From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- third_party/libwebrtc/modules/BUILD.gn | 256 ++ .../modules/async_audio_processing/BUILD.gn | 43 + .../async_audio_processing.cc | 61 + .../async_audio_processing.h | 76 + .../async_audio_processing_gn/moz.build | 225 ++ .../libwebrtc/modules/audio_coding/BUILD.gn | 1805 +++++++++ third_party/libwebrtc/modules/audio_coding/DEPS | 7 + third_party/libwebrtc/modules/audio_coding/OWNERS | 4 + .../modules/audio_coding/acm2/acm_receive_test.cc | 161 + .../modules/audio_coding/acm2/acm_receive_test.h | 98 + .../modules/audio_coding/acm2/acm_receiver.cc | 352 ++ .../modules/audio_coding/acm2/acm_receiver.h | 234 ++ .../audio_coding/acm2/acm_receiver_unittest.cc | 455 +++ .../modules/audio_coding/acm2/acm_remixing.cc | 114 + .../modules/audio_coding/acm2/acm_remixing.h | 34 + .../audio_coding/acm2/acm_remixing_unittest.cc | 191 + .../modules/audio_coding/acm2/acm_resampler.cc | 61 + .../modules/audio_coding/acm2/acm_resampler.h | 41 + .../modules/audio_coding/acm2/acm_send_test.cc | 174 + .../modules/audio_coding/acm2/acm_send_test.h | 91 + .../audio_coding/acm2/audio_coding_module.cc | 637 +++ .../acm2/audio_coding_module_unittest.cc | 1278 ++++++ .../modules/audio_coding/acm2/call_statistics.cc | 63 + .../modules/audio_coding/acm2/call_statistics.h | 64 + .../audio_coding/acm2/call_statistics_unittest.cc | 57 + .../modules/audio_coding/audio_coding.gni | 25 + .../modules/audio_coding/audio_coding_gn/moz.build | 236 ++ .../audio_coding_module_typedefs_gn/moz.build | 201 + .../audio_coding_opus_common_gn/moz.build | 225 ++ .../audio_coding/audio_encoder_cng_gn/moz.build | 232 ++ .../audio_network_adaptor_config.cc | 34 + .../audio_network_adaptor_impl.cc | 171 + .../audio_network_adaptor_impl.h | 89 + .../audio_network_adaptor_impl_unittest.cc | 306 ++ .../audio_network_adaptor/bitrate_controller.cc | 73 + .../audio_network_adaptor/bitrate_controller.h | 59 + .../bitrate_controller_unittest.cc | 236 ++ .../audio_network_adaptor/channel_controller.cc | 62 + .../audio_network_adaptor/channel_controller.h | 58 + .../channel_controller_unittest.cc | 101 + .../audio_network_adaptor/config.proto | 196 + .../audio_network_adaptor/controller.cc | 19 + .../audio_network_adaptor/controller.h | 42 + .../audio_network_adaptor/controller_manager.cc | 454 +++ .../audio_network_adaptor/controller_manager.h | 124 + .../controller_manager_unittest.cc | 486 +++ .../audio_network_adaptor/debug_dump.proto | 42 + .../audio_network_adaptor/debug_dump_writer.cc | 163 + .../audio_network_adaptor/debug_dump_writer.h | 54 + .../audio_network_adaptor/dtx_controller.cc | 51 + .../audio_network_adaptor/dtx_controller.h | 52 + .../dtx_controller_unittest.cc | 82 + .../audio_network_adaptor/event_log_writer.cc | 78 + .../audio_network_adaptor/event_log_writer.h | 44 + .../event_log_writer_unittest.cc | 240 ++ .../fec_controller_plr_based.cc | 113 + .../fec_controller_plr_based.h | 74 + .../fec_controller_plr_based_unittest.cc | 489 +++ .../frame_length_controller.cc | 201 + .../frame_length_controller.h | 93 + .../frame_length_controller_unittest.cc | 444 +++ .../frame_length_controller_v2.cc | 73 + .../frame_length_controller_v2.h | 44 + .../frame_length_controller_v2_unittest.cc | 121 + .../include/audio_network_adaptor.h | 49 + .../include/audio_network_adaptor_config.h | 51 + .../mock/mock_audio_network_adaptor.h | 57 + .../audio_network_adaptor/mock/mock_controller.h | 35 + .../mock/mock_controller_manager.h | 34 + .../mock/mock_debug_dump_writer.h | 44 + .../audio_network_adaptor/parse_ana_dump.py | 149 + .../audio_network_adaptor/util/threshold_curve.h | 118 + .../util/threshold_curve_unittest.cc | 632 +++ .../audio_network_adaptor_config_gn/moz.build | 217 + .../audio_network_adaptor_gn/moz.build | 242 ++ .../modules/audio_coding/codecs/audio_decoder.h | 20 + .../modules/audio_coding/codecs/audio_encoder.h | 20 + .../builtin_audio_decoder_factory_unittest.cc | 168 + .../builtin_audio_encoder_factory_unittest.cc | 178 + .../audio_coding/codecs/cng/audio_encoder_cng.cc | 322 ++ .../audio_coding/codecs/cng/audio_encoder_cng.h | 49 + .../codecs/cng/audio_encoder_cng_unittest.cc | 520 +++ .../audio_coding/codecs/cng/cng_unittest.cc | 252 ++ .../modules/audio_coding/codecs/cng/webrtc_cng.cc | 436 ++ .../modules/audio_coding/codecs/cng/webrtc_cng.h | 99 + .../audio_coding/codecs/g711/audio_decoder_pcm.cc | 102 + .../audio_coding/codecs/g711/audio_decoder_pcm.h | 81 + .../audio_coding/codecs/g711/audio_encoder_pcm.cc | 126 + .../audio_coding/codecs/g711/audio_encoder_pcm.h | 128 + .../audio_coding/codecs/g711/g711_interface.c | 59 + .../audio_coding/codecs/g711/g711_interface.h | 136 + .../audio_coding/codecs/g711/test/testG711.cc | 168 + .../audio_coding/codecs/g722/audio_decoder_g722.cc | 178 + .../audio_coding/codecs/g722/audio_decoder_g722.h | 86 + .../audio_coding/codecs/g722/audio_encoder_g722.cc | 156 + .../audio_coding/codecs/g722/audio_encoder_g722.h | 71 + .../audio_coding/codecs/g722/g722_interface.c | 104 + .../audio_coding/codecs/g722/g722_interface.h | 174 + .../audio_coding/codecs/g722/test/testG722.cc | 155 + .../modules/audio_coding/codecs/ilbc/abs_quant.c | 82 + .../modules/audio_coding/codecs/ilbc/abs_quant.h | 42 + .../audio_coding/codecs/ilbc/abs_quant_loop.c | 89 + .../audio_coding/codecs/ilbc/abs_quant_loop.h | 36 + .../audio_coding/codecs/ilbc/audio_decoder_ilbc.cc | 110 + .../audio_coding/codecs/ilbc/audio_decoder_ilbc.h | 54 + .../audio_coding/codecs/ilbc/audio_encoder_ilbc.cc | 151 + .../audio_coding/codecs/ilbc/audio_encoder_ilbc.h | 61 + .../audio_coding/codecs/ilbc/augmented_cb_corr.c | 64 + .../audio_coding/codecs/ilbc/augmented_cb_corr.h | 42 + .../modules/audio_coding/codecs/ilbc/bw_expand.c | 44 + .../modules/audio_coding/codecs/ilbc/bw_expand.h | 37 + .../audio_coding/codecs/ilbc/cb_construct.c | 80 + .../audio_coding/codecs/ilbc/cb_construct.h | 44 + .../audio_coding/codecs/ilbc/cb_mem_energy.c | 81 + .../audio_coding/codecs/ilbc/cb_mem_energy.h | 37 + .../codecs/ilbc/cb_mem_energy_augmentation.c | 69 + .../codecs/ilbc/cb_mem_energy_augmentation.h | 34 + .../audio_coding/codecs/ilbc/cb_mem_energy_calc.c | 67 + .../audio_coding/codecs/ilbc/cb_mem_energy_calc.h | 36 + .../modules/audio_coding/codecs/ilbc/cb_search.c | 405 ++ .../modules/audio_coding/codecs/ilbc/cb_search.h | 40 + .../audio_coding/codecs/ilbc/cb_search_core.c | 115 + .../audio_coding/codecs/ilbc/cb_search_core.h | 41 + .../codecs/ilbc/cb_update_best_index.c | 89 + .../codecs/ilbc/cb_update_best_index.h | 39 + .../modules/audio_coding/codecs/ilbc/chebyshev.c | 76 + .../modules/audio_coding/codecs/ilbc/chebyshev.h | 38 + .../modules/audio_coding/codecs/ilbc/comp_corr.c | 51 + .../modules/audio_coding/codecs/ilbc/comp_corr.h | 39 + .../audio_coding/codecs/ilbc/complexityMeasures.m | 57 + .../modules/audio_coding/codecs/ilbc/constants.c | 667 ++++ .../modules/audio_coding/codecs/ilbc/constants.h | 95 + .../codecs/ilbc/create_augmented_vec.c | 83 + .../codecs/ilbc/create_augmented_vec.h | 38 + .../modules/audio_coding/codecs/ilbc/decode.c | 261 ++ .../modules/audio_coding/codecs/ilbc/decode.h | 42 + .../audio_coding/codecs/ilbc/decode_residual.c | 185 + .../audio_coding/codecs/ilbc/decode_residual.h | 45 + .../codecs/ilbc/decoder_interpolate_lsf.c | 85 + .../codecs/ilbc/decoder_interpolate_lsf.h | 41 + .../modules/audio_coding/codecs/ilbc/defines.h | 225 ++ .../modules/audio_coding/codecs/ilbc/do_plc.c | 309 ++ .../modules/audio_coding/codecs/ilbc/do_plc.h | 44 + .../modules/audio_coding/codecs/ilbc/encode.c | 517 +++ .../modules/audio_coding/codecs/ilbc/encode.h | 38 + .../audio_coding/codecs/ilbc/energy_inverse.c | 46 + .../audio_coding/codecs/ilbc/energy_inverse.h | 36 + .../audio_coding/codecs/ilbc/enh_upsample.c | 112 + .../audio_coding/codecs/ilbc/enh_upsample.h | 33 + .../modules/audio_coding/codecs/ilbc/enhancer.c | 53 + .../modules/audio_coding/codecs/ilbc/enhancer.h | 40 + .../audio_coding/codecs/ilbc/enhancer_interface.c | 382 ++ .../audio_coding/codecs/ilbc/enhancer_interface.h | 36 + .../audio_coding/codecs/ilbc/filtered_cb_vecs.c | 50 + .../audio_coding/codecs/ilbc/filtered_cb_vecs.h | 39 + .../audio_coding/codecs/ilbc/frame_classify.c | 90 + .../audio_coding/codecs/ilbc/frame_classify.h | 34 + .../audio_coding/codecs/ilbc/gain_dequant.c | 47 + .../audio_coding/codecs/ilbc/gain_dequant.h | 36 + .../modules/audio_coding/codecs/ilbc/gain_quant.c | 105 + .../modules/audio_coding/codecs/ilbc/gain_quant.h | 36 + .../modules/audio_coding/codecs/ilbc/get_cd_vec.c | 126 + .../modules/audio_coding/codecs/ilbc/get_cd_vec.h | 40 + .../audio_coding/codecs/ilbc/get_lsp_poly.c | 84 + .../audio_coding/codecs/ilbc/get_lsp_poly.h | 46 + .../audio_coding/codecs/ilbc/get_sync_seq.c | 111 + .../audio_coding/codecs/ilbc/get_sync_seq.h | 41 + .../modules/audio_coding/codecs/ilbc/hp_input.c | 90 + .../modules/audio_coding/codecs/ilbc/hp_input.h | 38 + .../modules/audio_coding/codecs/ilbc/hp_output.c | 91 + .../modules/audio_coding/codecs/ilbc/hp_output.h | 38 + .../modules/audio_coding/codecs/ilbc/ilbc.c | 288 ++ .../modules/audio_coding/codecs/ilbc/ilbc.h | 251 ++ .../audio_coding/codecs/ilbc/ilbc_unittest.cc | 140 + .../audio_coding/codecs/ilbc/index_conv_dec.c | 40 + .../audio_coding/codecs/ilbc/index_conv_dec.h | 27 + .../audio_coding/codecs/ilbc/index_conv_enc.c | 45 + .../audio_coding/codecs/ilbc/index_conv_enc.h | 31 + .../modules/audio_coding/codecs/ilbc/init_decode.c | 98 + .../modules/audio_coding/codecs/ilbc/init_decode.h | 38 + .../modules/audio_coding/codecs/ilbc/init_encode.c | 73 + .../modules/audio_coding/codecs/ilbc/init_encode.h | 36 + .../modules/audio_coding/codecs/ilbc/interpolate.c | 48 + .../modules/audio_coding/codecs/ilbc/interpolate.h | 35 + .../audio_coding/codecs/ilbc/interpolate_samples.c | 53 + .../audio_coding/codecs/ilbc/interpolate_samples.h | 35 + .../modules/audio_coding/codecs/ilbc/lpc_encode.c | 62 + .../modules/audio_coding/codecs/ilbc/lpc_encode.h | 42 + .../modules/audio_coding/codecs/ilbc/lsf_check.c | 73 + .../modules/audio_coding/codecs/ilbc/lsf_check.h | 32 + .../codecs/ilbc/lsf_interpolate_to_poly_dec.c | 44 + .../codecs/ilbc/lsf_interpolate_to_poly_dec.h | 37 + .../codecs/ilbc/lsf_interpolate_to_poly_enc.c | 48 + .../codecs/ilbc/lsf_interpolate_to_poly_enc.h | 38 + .../modules/audio_coding/codecs/ilbc/lsf_to_lsp.c | 63 + .../modules/audio_coding/codecs/ilbc/lsf_to_lsp.h | 34 + .../modules/audio_coding/codecs/ilbc/lsf_to_poly.c | 88 + .../modules/audio_coding/codecs/ilbc/lsf_to_poly.h | 33 + .../modules/audio_coding/codecs/ilbc/lsp_to_lsf.c | 86 + .../modules/audio_coding/codecs/ilbc/lsp_to_lsf.h | 35 + .../modules/audio_coding/codecs/ilbc/my_corr.c | 56 + .../modules/audio_coding/codecs/ilbc/my_corr.h | 36 + .../audio_coding/codecs/ilbc/nearest_neighbor.c | 35 + .../audio_coding/codecs/ilbc/nearest_neighbor.h | 37 + .../modules/audio_coding/codecs/ilbc/pack_bits.c | 253 ++ .../modules/audio_coding/codecs/ilbc/pack_bits.h | 34 + .../modules/audio_coding/codecs/ilbc/poly_to_lsf.c | 32 + .../modules/audio_coding/codecs/ilbc/poly_to_lsf.h | 32 + .../modules/audio_coding/codecs/ilbc/poly_to_lsp.c | 159 + .../modules/audio_coding/codecs/ilbc/poly_to_lsp.h | 36 + .../modules/audio_coding/codecs/ilbc/refiner.c | 141 + .../modules/audio_coding/codecs/ilbc/refiner.h | 44 + .../codecs/ilbc/simple_interpolate_lsf.c | 133 + .../codecs/ilbc/simple_interpolate_lsf.h | 48 + .../audio_coding/codecs/ilbc/simple_lpc_analysis.c | 96 + .../audio_coding/codecs/ilbc/simple_lpc_analysis.h | 37 + .../audio_coding/codecs/ilbc/simple_lsf_dequant.c | 62 + .../audio_coding/codecs/ilbc/simple_lsf_dequant.h | 34 + .../audio_coding/codecs/ilbc/simple_lsf_quant.c | 49 + .../audio_coding/codecs/ilbc/simple_lsf_quant.h | 37 + .../modules/audio_coding/codecs/ilbc/smooth.c | 212 + .../modules/audio_coding/codecs/ilbc/smooth.h | 35 + .../audio_coding/codecs/ilbc/smooth_out_data.c | 56 + .../audio_coding/codecs/ilbc/smooth_out_data.h | 33 + .../modules/audio_coding/codecs/ilbc/sort_sq.c | 53 + .../modules/audio_coding/codecs/ilbc/sort_sq.h | 36 + .../modules/audio_coding/codecs/ilbc/split_vq.c | 63 + .../modules/audio_coding/codecs/ilbc/split_vq.h | 38 + .../audio_coding/codecs/ilbc/state_construct.c | 116 + .../audio_coding/codecs/ilbc/state_construct.h | 38 + .../audio_coding/codecs/ilbc/state_search.c | 121 + .../audio_coding/codecs/ilbc/state_search.h | 41 + .../modules/audio_coding/codecs/ilbc/swap_bytes.c | 35 + .../modules/audio_coding/codecs/ilbc/swap_bytes.h | 35 + .../modules/audio_coding/codecs/ilbc/test/empty.cc | 0 .../audio_coding/codecs/ilbc/test/iLBC_test.c | 238 ++ .../audio_coding/codecs/ilbc/test/iLBC_testLib.c | 215 + .../codecs/ilbc/test/iLBC_testprogram.c | 343 ++ .../modules/audio_coding/codecs/ilbc/unpack_bits.c | 241 ++ .../modules/audio_coding/codecs/ilbc/unpack_bits.h | 39 + .../modules/audio_coding/codecs/ilbc/vq3.c | 64 + .../modules/audio_coding/codecs/ilbc/vq3.h | 36 + .../modules/audio_coding/codecs/ilbc/vq4.c | 63 + .../modules/audio_coding/codecs/ilbc/vq4.h | 36 + .../audio_coding/codecs/ilbc/window32_w32.c | 64 + .../audio_coding/codecs/ilbc/window32_w32.h | 35 + .../modules/audio_coding/codecs/ilbc/xcorr_coef.c | 142 + .../modules/audio_coding/codecs/ilbc/xcorr_coef.h | 39 + .../audio_coding/codecs/isac/bandwidth_info.h | 24 + .../codecs/isac/main/source/filter_functions.c | 195 + .../codecs/isac/main/source/filter_functions.h | 25 + .../codecs/isac/main/source/isac_vad.c | 409 ++ .../codecs/isac/main/source/isac_vad.h | 45 + .../codecs/isac/main/source/os_specific_inline.h | 42 + .../codecs/isac/main/source/pitch_estimator.c | 695 ++++ .../codecs/isac/main/source/pitch_estimator.h | 32 + .../codecs/isac/main/source/pitch_filter.c | 388 ++ .../codecs/isac/main/source/pitch_filter.h | 42 + .../codecs/isac/main/source/settings.h | 196 + .../audio_coding/codecs/isac/main/source/structs.h | 448 +++ .../codecs/legacy_encoded_audio_frame.cc | 88 + .../codecs/legacy_encoded_audio_frame.h | 53 + .../codecs/legacy_encoded_audio_frame_unittest.cc | 179 + .../modules/audio_coding/codecs/opus/DEPS | 5 + .../codecs/opus/audio_coder_opus_common.cc | 52 + .../codecs/opus/audio_coder_opus_common.h | 89 + .../opus/audio_decoder_multi_channel_opus_impl.cc | 182 + .../opus/audio_decoder_multi_channel_opus_impl.h | 74 + .../audio_decoder_multi_channel_opus_unittest.cc | 148 + .../audio_coding/codecs/opus/audio_decoder_opus.cc | 128 + .../audio_coding/codecs/opus/audio_decoder_opus.h | 64 + .../opus/audio_encoder_multi_channel_opus_impl.cc | 366 ++ .../opus/audio_encoder_multi_channel_opus_impl.h | 92 + .../audio_encoder_multi_channel_opus_unittest.cc | 156 + .../audio_coding/codecs/opus/audio_encoder_opus.cc | 824 ++++ .../audio_coding/codecs/opus/audio_encoder_opus.h | 184 + .../codecs/opus/audio_encoder_opus_unittest.cc | 914 +++++ .../codecs/opus/opus_bandwidth_unittest.cc | 152 + .../codecs/opus/opus_complexity_unittest.cc | 105 + .../audio_coding/codecs/opus/opus_fec_test.cc | 248 ++ .../modules/audio_coding/codecs/opus/opus_inst.h | 43 + .../audio_coding/codecs/opus/opus_interface.cc | 881 +++++ .../audio_coding/codecs/opus/opus_interface.h | 547 +++ .../audio_coding/codecs/opus/opus_speed_test.cc | 147 + .../audio_coding/codecs/opus/opus_unittest.cc | 979 +++++ .../modules/audio_coding/codecs/opus/test/BUILD.gn | 55 + .../codecs/opus/test/audio_ring_buffer.cc | 76 + .../codecs/opus/test/audio_ring_buffer.h | 57 + .../codecs/opus/test/audio_ring_buffer_unittest.cc | 111 + .../audio_coding/codecs/opus/test/blocker.cc | 215 + .../audio_coding/codecs/opus/test/blocker.h | 127 + .../codecs/opus/test/blocker_unittest.cc | 293 ++ .../codecs/opus/test/lapped_transform.cc | 100 + .../codecs/opus/test/lapped_transform.h | 175 + .../codecs/opus/test/lapped_transform_unittest.cc | 203 + .../codecs/pcm16b/audio_decoder_pcm16b.cc | 70 + .../codecs/pcm16b/audio_decoder_pcm16b.h | 52 + .../codecs/pcm16b/audio_encoder_pcm16b.cc | 39 + .../codecs/pcm16b/audio_encoder_pcm16b.h | 46 + .../modules/audio_coding/codecs/pcm16b/pcm16b.c | 32 + .../modules/audio_coding/codecs/pcm16b/pcm16b.h | 63 + .../audio_coding/codecs/pcm16b/pcm16b_common.cc | 29 + .../audio_coding/codecs/pcm16b/pcm16b_common.h | 22 + .../codecs/red/audio_encoder_copy_red.cc | 272 ++ .../codecs/red/audio_encoder_copy_red.h | 102 + .../codecs/red/audio_encoder_copy_red_unittest.cc | 641 +++ .../codecs/tools/audio_codec_speed_test.cc | 126 + .../codecs/tools/audio_codec_speed_test.h | 93 + .../default_neteq_factory_gn/moz.build | 232 ++ .../libwebrtc/modules/audio_coding/g3doc/index.md | 32 + .../modules/audio_coding/g711_c_gn/moz.build | 217 + .../modules/audio_coding/g711_gn/moz.build | 226 ++ .../modules/audio_coding/g722_c_gn/moz.build | 217 + .../modules/audio_coding/g722_gn/moz.build | 226 ++ .../modules/audio_coding/ilbc_c_gn/moz.build | 300 ++ .../modules/audio_coding/ilbc_gn/moz.build | 233 ++ .../audio_coding/include/audio_coding_module.h | 246 ++ .../include/audio_coding_module_typedefs.h | 137 + .../modules/audio_coding/isac_bwinfo_gn/moz.build | 201 + .../modules/audio_coding/isac_vad_gn/moz.build | 220 ++ .../legacy_encoded_audio_frame_gn/moz.build | 225 ++ .../modules/audio_coding/neteq/accelerate.cc | 106 + .../modules/audio_coding/neteq/accelerate.h | 79 + .../audio_coding/neteq/audio_decoder_unittest.cc | 526 +++ .../audio_coding/neteq/audio_multi_vector.cc | 222 ++ .../audio_coding/neteq/audio_multi_vector.h | 138 + .../neteq/audio_multi_vector_unittest.cc | 323 ++ .../modules/audio_coding/neteq/audio_vector.cc | 381 ++ .../modules/audio_coding/neteq/audio_vector.h | 172 + .../audio_coding/neteq/audio_vector_unittest.cc | 384 ++ .../modules/audio_coding/neteq/background_noise.cc | 309 ++ .../modules/audio_coding/neteq/background_noise.h | 138 + .../neteq/background_noise_unittest.cc | 26 + .../audio_coding/neteq/buffer_level_filter.cc | 64 + .../audio_coding/neteq/buffer_level_filter.h | 54 + .../neteq/buffer_level_filter_unittest.cc | 116 + .../modules/audio_coding/neteq/comfort_noise.cc | 130 + .../modules/audio_coding/neteq/comfort_noise.h | 72 + .../audio_coding/neteq/comfort_noise_unittest.cc | 31 + .../audio_coding/neteq/cross_correlation.cc | 55 + .../modules/audio_coding/neteq/cross_correlation.h | 51 + .../modules/audio_coding/neteq/decision_logic.cc | 515 +++ .../modules/audio_coding/neteq/decision_logic.h | 202 + .../audio_coding/neteq/decision_logic_unittest.cc | 219 + .../modules/audio_coding/neteq/decoder_database.cc | 285 ++ .../modules/audio_coding/neteq/decoder_database.h | 204 + .../neteq/decoder_database_unittest.cc | 227 ++ .../audio_coding/neteq/default_neteq_factory.cc | 31 + .../audio_coding/neteq/default_neteq_factory.h | 41 + .../modules/audio_coding/neteq/delay_manager.cc | 207 + .../modules/audio_coding/neteq/delay_manager.h | 121 + .../audio_coding/neteq/delay_manager_unittest.cc | 246 ++ .../modules/audio_coding/neteq/dsp_helper.cc | 373 ++ .../modules/audio_coding/neteq/dsp_helper.h | 161 + .../audio_coding/neteq/dsp_helper_unittest.cc | 88 + .../modules/audio_coding/neteq/dtmf_buffer.cc | 246 ++ .../modules/audio_coding/neteq/dtmf_buffer.h | 104 + .../audio_coding/neteq/dtmf_buffer_unittest.cc | 297 ++ .../audio_coding/neteq/dtmf_tone_generator.cc | 215 + .../audio_coding/neteq/dtmf_tone_generator.h | 57 + .../neteq/dtmf_tone_generator_unittest.cc | 180 + .../libwebrtc/modules/audio_coding/neteq/expand.cc | 888 +++++ .../libwebrtc/modules/audio_coding/neteq/expand.h | 154 + .../audio_coding/neteq/expand_uma_logger.cc | 71 + .../modules/audio_coding/neteq/expand_uma_logger.h | 57 + .../modules/audio_coding/neteq/expand_unittest.cc | 203 + .../modules/audio_coding/neteq/g3doc/index.md | 102 + .../modules/audio_coding/neteq/histogram.cc | 149 + .../modules/audio_coding/neteq/histogram.h | 64 + .../audio_coding/neteq/histogram_unittest.cc | 73 + .../libwebrtc/modules/audio_coding/neteq/merge.cc | 391 ++ .../libwebrtc/modules/audio_coding/neteq/merge.h | 101 + .../modules/audio_coding/neteq/merge_unittest.cc | 121 + .../neteq/mock/mock_buffer_level_filter.h | 28 + .../neteq/mock/mock_decoder_database.h | 50 + .../audio_coding/neteq/mock/mock_delay_manager.h | 29 + .../audio_coding/neteq/mock/mock_dtmf_buffer.h | 35 + .../neteq/mock/mock_dtmf_tone_generator.h | 33 + .../modules/audio_coding/neteq/mock/mock_expand.h | 60 + .../audio_coding/neteq/mock/mock_histogram.h | 30 + .../neteq/mock/mock_neteq_controller.h | 62 + .../audio_coding/neteq/mock/mock_packet_buffer.h | 82 + .../neteq/mock/mock_red_payload_splitter.h | 30 + .../neteq/mock/mock_statistics_calculator.h | 30 + .../modules/audio_coding/neteq/nack_tracker.cc | 267 ++ .../modules/audio_coding/neteq/nack_tracker.h | 211 + .../audio_coding/neteq/nack_tracker_unittest.cc | 565 +++ .../neteq/neteq_decoder_plc_unittest.cc | 313 ++ .../modules/audio_coding/neteq/neteq_impl.cc | 2141 ++++++++++ .../modules/audio_coding/neteq/neteq_impl.h | 404 ++ .../audio_coding/neteq/neteq_impl_unittest.cc | 1871 +++++++++ .../neteq/neteq_network_stats_unittest.cc | 345 ++ .../audio_coding/neteq/neteq_stereo_unittest.cc | 424 ++ .../modules/audio_coding/neteq/neteq_unittest.cc | 1009 +++++ .../audio_coding/neteq/neteq_unittest.proto | 31 + .../libwebrtc/modules/audio_coding/neteq/normal.cc | 194 + .../libwebrtc/modules/audio_coding/neteq/normal.h | 76 + .../modules/audio_coding/neteq/normal_unittest.cc | 147 + .../libwebrtc/modules/audio_coding/neteq/packet.cc | 36 + .../libwebrtc/modules/audio_coding/neteq/packet.h | 128 + .../audio_coding/neteq/packet_arrival_history.cc | 105 + .../audio_coding/neteq/packet_arrival_history.h | 82 + .../neteq/packet_arrival_history_unittest.cc | 141 + .../modules/audio_coding/neteq/packet_buffer.cc | 405 ++ .../modules/audio_coding/neteq/packet_buffer.h | 181 + .../audio_coding/neteq/packet_buffer_unittest.cc | 989 +++++ .../modules/audio_coding/neteq/post_decode_vad.cc | 90 + .../modules/audio_coding/neteq/post_decode_vad.h | 71 + .../audio_coding/neteq/post_decode_vad_unittest.cc | 25 + .../audio_coding/neteq/preemptive_expand.cc | 117 + .../modules/audio_coding/neteq/preemptive_expand.h | 85 + .../modules/audio_coding/neteq/random_vector.cc | 63 + .../modules/audio_coding/neteq/random_vector.h | 46 + .../audio_coding/neteq/random_vector_unittest.cc | 25 + .../audio_coding/neteq/red_payload_splitter.cc | 190 + .../audio_coding/neteq/red_payload_splitter.h | 51 + .../neteq/red_payload_splitter_unittest.cc | 390 ++ .../audio_coding/neteq/reorder_optimizer.cc | 75 + .../modules/audio_coding/neteq/reorder_optimizer.h | 43 + .../neteq/reorder_optimizer_unittest.cc | 70 + .../audio_coding/neteq/statistics_calculator.cc | 394 ++ .../audio_coding/neteq/statistics_calculator.h | 210 + .../neteq/statistics_calculator_unittest.cc | 206 + .../modules/audio_coding/neteq/sync_buffer.cc | 118 + .../modules/audio_coding/neteq/sync_buffer.h | 110 + .../audio_coding/neteq/sync_buffer_unittest.cc | 174 + .../neteq/test/delay_tool/parse_delay_file.m | 201 + .../neteq/test/delay_tool/plot_neteq_delay.m | 197 + .../audio_coding/neteq/test/neteq_decoding_test.cc | 423 ++ .../audio_coding/neteq/test/neteq_decoding_test.h | 96 + .../neteq/test/neteq_ilbc_quality_test.cc | 81 + .../neteq/test/neteq_opus_quality_test.cc | 183 + .../neteq/test/neteq_pcm16b_quality_test.cc | 81 + .../neteq/test/neteq_pcmu_quality_test.cc | 80 + .../neteq/test/neteq_performance_unittest.cc | 60 + .../audio_coding/neteq/test/neteq_speed_test.cc | 58 + .../modules/audio_coding/neteq/test/result_sink.cc | 109 + .../modules/audio_coding/neteq/test/result_sink.h | 50 + .../modules/audio_coding/neteq/time_stretch.cc | 216 + .../modules/audio_coding/neteq/time_stretch.h | 113 + .../audio_coding/neteq/time_stretch_unittest.cc | 124 + .../modules/audio_coding/neteq/timestamp_scaler.cc | 87 + .../modules/audio_coding/neteq/timestamp_scaler.h | 67 + .../neteq/timestamp_scaler_unittest.cc | 324 ++ .../modules/audio_coding/neteq/tools/DEPS | 3 + .../modules/audio_coding/neteq/tools/README.md | 17 + .../audio_coding/neteq/tools/audio_checksum.h | 64 + .../modules/audio_coding/neteq/tools/audio_loop.cc | 61 + .../modules/audio_coding/neteq/tools/audio_loop.h | 57 + .../modules/audio_coding/neteq/tools/audio_sink.cc | 26 + .../modules/audio_coding/neteq/tools/audio_sink.h | 70 + .../neteq/tools/constant_pcm_packet_source.cc | 71 + .../neteq/tools/constant_pcm_packet_source.h | 55 + .../audio_coding/neteq/tools/encode_neteq_input.cc | 94 + .../audio_coding/neteq/tools/encode_neteq_input.h | 69 + .../neteq/tools/fake_decode_from_file.cc | 169 + .../neteq/tools/fake_decode_from_file.h | 77 + .../tools/initial_packet_inserter_neteq_input.cc | 79 + .../tools/initial_packet_inserter_neteq_input.h | 46 + .../audio_coding/neteq/tools/input_audio_file.cc | 96 + .../audio_coding/neteq/tools/input_audio_file.h | 62 + .../neteq/tools/input_audio_file_unittest.cc | 59 + .../neteq/tools/neteq_delay_analyzer.cc | 307 ++ .../neteq/tools/neteq_delay_analyzer.h | 76 + .../neteq/tools/neteq_event_log_input.cc | 68 + .../neteq/tools/neteq_event_log_input.h | 51 + .../audio_coding/neteq/tools/neteq_input.cc | 93 + .../modules/audio_coding/neteq/tools/neteq_input.h | 107 + .../neteq/tools/neteq_packet_source_input.cc | 90 + .../neteq/tools/neteq_packet_source_input.h | 70 + .../neteq/tools/neteq_performance_test.cc | 128 + .../neteq/tools/neteq_performance_test.h | 32 + .../audio_coding/neteq/tools/neteq_quality_test.cc | 482 +++ .../audio_coding/neteq/tools/neteq_quality_test.h | 176 + .../neteq/tools/neteq_replacement_input.cc | 116 + .../neteq/tools/neteq_replacement_input.h | 51 + .../audio_coding/neteq/tools/neteq_rtpplay.cc | 406 ++ .../audio_coding/neteq/tools/neteq_rtpplay_test.sh | 183 + .../audio_coding/neteq/tools/neteq_stats_getter.cc | 139 + .../audio_coding/neteq/tools/neteq_stats_getter.h | 106 + .../neteq/tools/neteq_stats_plotter.cc | 111 + .../audio_coding/neteq/tools/neteq_stats_plotter.h | 47 + .../modules/audio_coding/neteq/tools/neteq_test.cc | 345 ++ .../modules/audio_coding/neteq/tools/neteq_test.h | 129 + .../audio_coding/neteq/tools/neteq_test_factory.cc | 342 ++ .../audio_coding/neteq/tools/neteq_test_factory.h | 172 + .../audio_coding/neteq/tools/output_audio_file.h | 51 + .../audio_coding/neteq/tools/output_wav_file.h | 46 + .../modules/audio_coding/neteq/tools/packet.cc | 133 + .../modules/audio_coding/neteq/tools/packet.h | 104 + .../audio_coding/neteq/tools/packet_source.cc | 25 + .../audio_coding/neteq/tools/packet_source.h | 43 + .../audio_coding/neteq/tools/packet_unittest.cc | 226 ++ .../neteq/tools/resample_input_audio_file.cc | 48 + .../neteq/tools/resample_input_audio_file.h | 55 + .../neteq/tools/rtc_event_log_source.cc | 168 + .../neteq/tools/rtc_event_log_source.h | 71 + .../audio_coding/neteq/tools/rtp_analyze.cc | 166 + .../modules/audio_coding/neteq/tools/rtp_encode.cc | 351 ++ .../audio_coding/neteq/tools/rtp_file_source.cc | 100 + .../audio_coding/neteq/tools/rtp_file_source.h | 68 + .../audio_coding/neteq/tools/rtp_generator.cc | 60 + .../audio_coding/neteq/tools/rtp_generator.h | 83 + .../modules/audio_coding/neteq/tools/rtp_jitter.cc | 148 + .../modules/audio_coding/neteq/tools/rtpcat.cc | 45 + .../audio_coding/neteq/underrun_optimizer.cc | 71 + .../audio_coding/neteq/underrun_optimizer.h | 50 + .../neteq/underrun_optimizer_unittest.cc | 42 + .../modules/audio_coding/neteq_gn/moz.build | 267 ++ .../modules/audio_coding/pcm16b_c_gn/moz.build | 217 + .../modules/audio_coding/pcm16b_gn/moz.build | 227 ++ .../modules/audio_coding/red_gn/moz.build | 232 ++ .../libwebrtc/modules/audio_coding/test/Channel.cc | 274 ++ .../libwebrtc/modules/audio_coding/test/Channel.h | 117 + .../modules/audio_coding/test/EncodeDecodeTest.cc | 269 ++ .../modules/audio_coding/test/EncodeDecodeTest.h | 111 + .../libwebrtc/modules/audio_coding/test/PCMFile.cc | 240 ++ .../libwebrtc/modules/audio_coding/test/PCMFile.h | 77 + .../modules/audio_coding/test/PacketLossTest.cc | 167 + .../modules/audio_coding/test/PacketLossTest.h | 77 + .../libwebrtc/modules/audio_coding/test/RTPFile.cc | 235 ++ .../libwebrtc/modules/audio_coding/test/RTPFile.h | 133 + .../modules/audio_coding/test/TestAllCodecs.cc | 412 ++ .../modules/audio_coding/test/TestAllCodecs.h | 83 + .../modules/audio_coding/test/TestRedFec.cc | 200 + .../modules/audio_coding/test/TestRedFec.h | 56 + .../modules/audio_coding/test/TestStereo.cc | 599 +++ .../modules/audio_coding/test/TestStereo.h | 100 + .../modules/audio_coding/test/TestVADDTX.cc | 240 ++ .../modules/audio_coding/test/TestVADDTX.h | 115 + .../libwebrtc/modules/audio_coding/test/Tester.cc | 102 + .../audio_coding/test/TwoWayCommunication.cc | 191 + .../audio_coding/test/TwoWayCommunication.h | 62 + .../modules/audio_coding/test/opus_test.cc | 402 ++ .../modules/audio_coding/test/opus_test.h | 59 + .../audio_coding/test/target_delay_unittest.cc | 161 + .../modules/audio_coding/webrtc_cng_gn/moz.build | 232 ++ .../audio_coding/webrtc_multiopus_gn/moz.build | 230 ++ .../modules/audio_coding/webrtc_opus_gn/moz.build | 237 ++ .../audio_coding/webrtc_opus_wrapper_gn/moz.build | 229 ++ .../libwebrtc/modules/audio_device/BUILD.gn | 504 +++ third_party/libwebrtc/modules/audio_device/DEPS | 13 + third_party/libwebrtc/modules/audio_device/OWNERS | 2 + .../modules/audio_device/android/aaudio_player.cc | 216 + .../modules/audio_device/android/aaudio_player.h | 141 + .../audio_device/android/aaudio_recorder.cc | 205 + .../modules/audio_device/android/aaudio_recorder.h | 124 + .../modules/audio_device/android/aaudio_wrapper.cc | 499 +++ .../modules/audio_device/android/aaudio_wrapper.h | 127 + .../modules/audio_device/android/audio_common.h | 28 + .../audio_device/android/audio_device_template.h | 435 ++ .../audio_device/android/audio_device_unittest.cc | 1018 +++++ .../modules/audio_device/android/audio_manager.cc | 318 ++ .../modules/audio_device/android/audio_manager.h | 225 ++ .../audio_device/android/audio_manager_unittest.cc | 239 ++ .../audio_device/android/audio_record_jni.cc | 280 ++ .../audio_device/android/audio_record_jni.h | 168 + .../audio_device/android/audio_track_jni.cc | 296 ++ .../modules/audio_device/android/audio_track_jni.h | 161 + .../modules/audio_device/android/build_info.cc | 59 + .../modules/audio_device/android/build_info.h | 86 + .../audio_device/android/ensure_initialized.cc | 42 + .../audio_device/android/ensure_initialized.h | 17 + .../java/src/org/webrtc/voiceengine/BuildInfo.java | 51 + .../org/webrtc/voiceengine/WebRtcAudioEffects.java | 312 ++ .../org/webrtc/voiceengine/WebRtcAudioManager.java | 371 ++ .../org/webrtc/voiceengine/WebRtcAudioRecord.java | 409 ++ .../org/webrtc/voiceengine/WebRtcAudioTrack.java | 494 +++ .../org/webrtc/voiceengine/WebRtcAudioUtils.java | 382 ++ .../audio_device/android/opensles_common.cc | 103 + .../modules/audio_device/android/opensles_common.h | 62 + .../audio_device/android/opensles_player.cc | 434 ++ .../modules/audio_device/android/opensles_player.h | 195 + .../audio_device/android/opensles_recorder.cc | 431 ++ .../audio_device/android/opensles_recorder.h | 193 + .../modules/audio_device/audio_device_buffer.cc | 518 +++ .../modules/audio_device/audio_device_buffer.h | 245 ++ .../modules/audio_device/audio_device_config.h | 30 + .../audio_device/audio_device_data_observer.cc | 373 ++ .../modules/audio_device/audio_device_generic.cc | 66 + .../modules/audio_device/audio_device_generic.h | 145 + .../modules/audio_device/audio_device_gn/moz.build | 201 + .../modules/audio_device/audio_device_impl.cc | 951 +++++ .../modules/audio_device/audio_device_impl.h | 180 + .../modules/audio_device/audio_device_name.cc | 27 + .../modules/audio_device/audio_device_name.h | 50 + .../modules/audio_device/audio_device_unittest.cc | 1241 ++++++ .../audio_device/dummy/audio_device_dummy.cc | 226 ++ .../audio_device/dummy/audio_device_dummy.h | 117 + .../audio_device/dummy/file_audio_device.cc | 508 +++ .../modules/audio_device/dummy/file_audio_device.h | 163 + .../dummy/file_audio_device_factory.cc | 62 + .../audio_device/dummy/file_audio_device_factory.h | 44 + .../modules/audio_device/fine_audio_buffer.cc | 130 + .../modules/audio_device/fine_audio_buffer.h | 94 + .../audio_device/fine_audio_buffer_unittest.cc | 158 + .../audio_device/g3doc/audio_device_module.md | 171 + .../modules/audio_device/include/audio_device.h | 194 + .../include/audio_device_data_observer.h | 72 + .../audio_device/include/audio_device_default.h | 132 + .../audio_device/include/audio_device_defines.h | 177 + .../audio_device/include/audio_device_factory.cc | 53 + .../audio_device/include/audio_device_factory.h | 59 + .../audio_device/include/fake_audio_device.h | 33 + .../audio_device/include/mock_audio_device.h | 156 + .../audio_device/include/mock_audio_transport.h | 81 + .../audio_device/include/test_audio_device.cc | 497 +++ .../audio_device/include/test_audio_device.h | 149 + .../include/test_audio_device_unittest.cc | 192 + .../audio_device/linux/alsasymboltable_linux.cc | 40 + .../audio_device/linux/alsasymboltable_linux.h | 148 + .../audio_device/linux/audio_device_alsa_linux.cc | 1637 ++++++++ .../audio_device/linux/audio_device_alsa_linux.h | 208 + .../audio_device/linux/audio_device_pulse_linux.cc | 2286 +++++++++++ .../audio_device/linux/audio_device_pulse_linux.h | 349 ++ .../linux/audio_mixer_manager_alsa_linux.cc | 979 +++++ .../linux/audio_mixer_manager_alsa_linux.h | 71 + .../linux/audio_mixer_manager_pulse_linux.cc | 844 ++++ .../linux/audio_mixer_manager_pulse_linux.h | 114 + .../linux/latebindingsymboltable_linux.cc | 106 + .../linux/latebindingsymboltable_linux.h | 168 + .../linux/pulseaudiosymboltable_linux.cc | 41 + .../linux/pulseaudiosymboltable_linux.h | 106 + .../modules/audio_device/mac/audio_device_mac.cc | 2500 ++++++++++++ .../modules/audio_device/mac/audio_device_mac.h | 350 ++ .../audio_device/mac/audio_mixer_manager_mac.cc | 924 +++++ .../audio_device/mac/audio_mixer_manager_mac.h | 73 + .../audio_device/mock_audio_device_buffer.h | 35 + .../audio_device/win/audio_device_core_win.cc | 4178 ++++++++++++++++++++ .../audio_device/win/audio_device_core_win.h | 300 ++ .../audio_device/win/audio_device_module_win.cc | 522 +++ .../audio_device/win/audio_device_module_win.h | 87 + .../audio_device/win/core_audio_base_win.cc | 948 +++++ .../modules/audio_device/win/core_audio_base_win.h | 203 + .../audio_device/win/core_audio_input_win.cc | 453 +++ .../audio_device/win/core_audio_input_win.h | 73 + .../audio_device/win/core_audio_output_win.cc | 422 ++ .../audio_device/win/core_audio_output_win.h | 72 + .../audio_device/win/core_audio_utility_win.cc | 1529 +++++++ .../audio_device/win/core_audio_utility_win.h | 560 +++ .../win/core_audio_utility_win_unittest.cc | 876 ++++ third_party/libwebrtc/modules/audio_mixer/BUILD.gn | 144 + third_party/libwebrtc/modules/audio_mixer/DEPS | 13 + third_party/libwebrtc/modules/audio_mixer/OWNERS | 2 + .../modules/audio_mixer/audio_frame_manipulator.cc | 92 + .../modules/audio_mixer/audio_frame_manipulator.h | 33 + .../audio_frame_manipulator_gn/moz.build | 232 ++ .../audio_frame_manipulator_unittest.cc | 66 + .../modules/audio_mixer/audio_mixer_impl.cc | 266 ++ .../modules/audio_mixer/audio_mixer_impl.h | 100 + .../audio_mixer/audio_mixer_impl_gn/moz.build | 235 ++ .../audio_mixer/audio_mixer_impl_unittest.cc | 790 ++++ .../modules/audio_mixer/audio_mixer_test.cc | 182 + .../audio_mixer/default_output_rate_calculator.cc | 41 + .../audio_mixer/default_output_rate_calculator.h | 36 + .../modules/audio_mixer/frame_combiner.cc | 213 + .../libwebrtc/modules/audio_mixer/frame_combiner.h | 56 + .../modules/audio_mixer/frame_combiner_unittest.cc | 337 ++ .../libwebrtc/modules/audio_mixer/g3doc/index.md | 54 + .../modules/audio_mixer/gain_change_calculator.cc | 63 + .../modules/audio_mixer/gain_change_calculator.h | 42 + .../modules/audio_mixer/output_rate_calculator.h | 32 + .../modules/audio_mixer/sine_wave_generator.cc | 35 + .../modules/audio_mixer/sine_wave_generator.h | 40 + .../libwebrtc/modules/audio_processing/BUILD.gn | 677 ++++ .../libwebrtc/modules/audio_processing/DEPS | 14 + .../libwebrtc/modules/audio_processing/OWNERS | 8 + .../modules/audio_processing/aec3/BUILD.gn | 384 ++ .../audio_processing/aec3/adaptive_fir_filter.cc | 744 ++++ .../audio_processing/aec3/adaptive_fir_filter.h | 192 + .../aec3/adaptive_fir_filter_avx2.cc | 188 + .../aec3/adaptive_fir_filter_erl.cc | 102 + .../aec3/adaptive_fir_filter_erl.h | 54 + .../aec3/adaptive_fir_filter_erl_avx2.cc | 37 + .../aec3/adaptive_fir_filter_erl_gn/moz.build | 205 + .../aec3/adaptive_fir_filter_erl_unittest.cc | 106 + .../aec3/adaptive_fir_filter_gn/moz.build | 216 + .../aec3/adaptive_fir_filter_unittest.cc | 594 +++ .../audio_processing/aec3/aec3_avx2_gn/moz.build | 190 + .../modules/audio_processing/aec3/aec3_common.cc | 58 + .../modules/audio_processing/aec3/aec3_common.h | 114 + .../audio_processing/aec3/aec3_common_gn/moz.build | 201 + .../modules/audio_processing/aec3/aec3_fft.cc | 144 + .../modules/audio_processing/aec3/aec3_fft.h | 75 + .../audio_processing/aec3/aec3_fft_gn/moz.build | 216 + .../audio_processing/aec3/aec3_fft_unittest.cc | 213 + .../audio_processing/aec3/aec3_gn/moz.build | 289 ++ .../modules/audio_processing/aec3/aec_state.cc | 481 +++ .../modules/audio_processing/aec3/aec_state.h | 300 ++ .../audio_processing/aec3/aec_state_unittest.cc | 297 ++ .../audio_processing/aec3/alignment_mixer.cc | 163 + .../audio_processing/aec3/alignment_mixer.h | 57 + .../aec3/alignment_mixer_unittest.cc | 196 + .../aec3/api_call_jitter_metrics.cc | 121 + .../aec3/api_call_jitter_metrics.h | 60 + .../aec3/api_call_jitter_metrics_unittest.cc | 109 + .../modules/audio_processing/aec3/block.h | 91 + .../modules/audio_processing/aec3/block_buffer.cc | 23 + .../modules/audio_processing/aec3/block_buffer.h | 60 + .../audio_processing/aec3/block_delay_buffer.cc | 69 + .../audio_processing/aec3/block_delay_buffer.h | 43 + .../aec3/block_delay_buffer_unittest.cc | 105 + .../modules/audio_processing/aec3/block_framer.cc | 83 + .../modules/audio_processing/aec3/block_framer.h | 49 + .../audio_processing/aec3/block_framer_unittest.cc | 337 ++ .../audio_processing/aec3/block_processor.cc | 290 ++ .../audio_processing/aec3/block_processor.h | 81 + .../aec3/block_processor_metrics.cc | 104 + .../aec3/block_processor_metrics.h | 46 + .../aec3/block_processor_metrics_unittest.cc | 34 + .../aec3/block_processor_unittest.cc | 341 ++ .../audio_processing/aec3/clockdrift_detector.cc | 61 + .../audio_processing/aec3/clockdrift_detector.h | 40 + .../aec3/clockdrift_detector_unittest.cc | 57 + .../aec3/coarse_filter_update_gain.cc | 103 + .../aec3/coarse_filter_update_gain.h | 74 + .../aec3/coarse_filter_update_gain_unittest.cc | 268 ++ .../aec3/comfort_noise_generator.cc | 186 + .../aec3/comfort_noise_generator.h | 77 + .../aec3/comfort_noise_generator_unittest.cc | 72 + .../audio_processing/aec3/config_selector.cc | 71 + .../audio_processing/aec3/config_selector.h | 41 + .../aec3/config_selector_unittest.cc | 116 + .../modules/audio_processing/aec3/decimator.cc | 91 + .../modules/audio_processing/aec3/decimator.h | 41 + .../audio_processing/aec3/decimator_unittest.cc | 135 + .../modules/audio_processing/aec3/delay_estimate.h | 33 + .../aec3/dominant_nearend_detector.cc | 75 + .../aec3/dominant_nearend_detector.h | 56 + .../aec3/downsampled_render_buffer.cc | 25 + .../aec3/downsampled_render_buffer.h | 58 + .../audio_processing/aec3/echo_audibility.cc | 119 + .../audio_processing/aec3/echo_audibility.h | 85 + .../audio_processing/aec3/echo_canceller3.cc | 992 +++++ .../audio_processing/aec3/echo_canceller3.h | 230 ++ .../aec3/echo_canceller3_unittest.cc | 1160 ++++++ .../aec3/echo_path_delay_estimator.cc | 127 + .../aec3/echo_path_delay_estimator.h | 80 + .../aec3/echo_path_delay_estimator_unittest.cc | 184 + .../audio_processing/aec3/echo_path_variability.cc | 22 + .../audio_processing/aec3/echo_path_variability.h | 37 + .../aec3/echo_path_variability_unittest.cc | 50 + .../modules/audio_processing/aec3/echo_remover.cc | 521 +++ .../modules/audio_processing/aec3/echo_remover.h | 62 + .../audio_processing/aec3/echo_remover_metrics.cc | 157 + .../audio_processing/aec3/echo_remover_metrics.h | 78 + .../aec3/echo_remover_metrics_unittest.cc | 156 + .../audio_processing/aec3/echo_remover_unittest.cc | 210 + .../modules/audio_processing/aec3/erl_estimator.cc | 146 + .../modules/audio_processing/aec3/erl_estimator.h | 58 + .../aec3/erl_estimator_unittest.cc | 104 + .../audio_processing/aec3/erle_estimator.cc | 89 + .../modules/audio_processing/aec3/erle_estimator.h | 112 + .../aec3/erle_estimator_unittest.cc | 288 ++ .../modules/audio_processing/aec3/fft_buffer.cc | 27 + .../modules/audio_processing/aec3/fft_buffer.h | 60 + .../modules/audio_processing/aec3/fft_data.h | 104 + .../modules/audio_processing/aec3/fft_data_avx2.cc | 33 + .../audio_processing/aec3/fft_data_gn/moz.build | 205 + .../audio_processing/aec3/fft_data_unittest.cc | 186 + .../audio_processing/aec3/filter_analyzer.cc | 289 ++ .../audio_processing/aec3/filter_analyzer.h | 150 + .../aec3/filter_analyzer_unittest.cc | 33 + .../modules/audio_processing/aec3/frame_blocker.cc | 80 + .../modules/audio_processing/aec3/frame_blocker.h | 51 + .../aec3/frame_blocker_unittest.cc | 425 ++ .../aec3/fullband_erle_estimator.cc | 191 + .../aec3/fullband_erle_estimator.h | 118 + .../audio_processing/aec3/matched_filter.cc | 900 +++++ .../modules/audio_processing/aec3/matched_filter.h | 190 + .../audio_processing/aec3/matched_filter_avx2.cc | 261 ++ .../aec3/matched_filter_gn/moz.build | 205 + .../aec3/matched_filter_lag_aggregator.cc | 166 + .../aec3/matched_filter_lag_aggregator.h | 97 + .../aec3/matched_filter_lag_aggregator_unittest.cc | 113 + .../aec3/matched_filter_unittest.cc | 612 +++ .../aec3/mock/mock_block_processor.cc | 20 + .../aec3/mock/mock_block_processor.h | 53 + .../aec3/mock/mock_echo_remover.cc | 20 + .../audio_processing/aec3/mock/mock_echo_remover.h | 56 + .../aec3/mock/mock_render_delay_buffer.cc | 36 + .../aec3/mock/mock_render_delay_buffer.h | 67 + .../aec3/mock/mock_render_delay_controller.cc | 20 + .../aec3/mock/mock_render_delay_controller.h | 42 + .../audio_processing/aec3/moving_average.cc | 60 + .../modules/audio_processing/aec3/moving_average.h | 45 + .../aec3/moving_average_unittest.cc | 89 + .../aec3/multi_channel_content_detector.cc | 148 + .../aec3/multi_channel_content_detector.h | 96 + .../multi_channel_content_detector_unittest.cc | 470 +++ .../audio_processing/aec3/nearend_detector.h | 42 + .../aec3/refined_filter_update_gain.cc | 173 + .../aec3/refined_filter_update_gain.h | 91 + .../aec3/refined_filter_update_gain_unittest.cc | 392 ++ .../modules/audio_processing/aec3/render_buffer.cc | 81 + .../modules/audio_processing/aec3/render_buffer.h | 115 + .../aec3/render_buffer_gn/moz.build | 205 + .../aec3/render_buffer_unittest.cc | 46 + .../audio_processing/aec3/render_delay_buffer.cc | 519 +++ .../audio_processing/aec3/render_delay_buffer.h | 86 + .../aec3/render_delay_buffer_unittest.cc | 130 + .../aec3/render_delay_controller.cc | 186 + .../aec3/render_delay_controller.h | 51 + .../aec3/render_delay_controller_metrics.cc | 132 + .../aec3/render_delay_controller_metrics.h | 49 + .../render_delay_controller_metrics_unittest.cc | 72 + .../aec3/render_delay_controller_unittest.cc | 334 ++ .../aec3/render_signal_analyzer.cc | 156 + .../audio_processing/aec3/render_signal_analyzer.h | 62 + .../aec3/render_signal_analyzer_unittest.cc | 171 + .../aec3/residual_echo_estimator.cc | 379 ++ .../aec3/residual_echo_estimator.h | 85 + .../aec3/residual_echo_estimator_unittest.cc | 199 + .../aec3/reverb_decay_estimator.cc | 410 ++ .../audio_processing/aec3/reverb_decay_estimator.h | 120 + .../aec3/reverb_frequency_response.cc | 108 + .../aec3/reverb_frequency_response.h | 55 + .../modules/audio_processing/aec3/reverb_model.cc | 59 + .../modules/audio_processing/aec3/reverb_model.h | 58 + .../aec3/reverb_model_estimator.cc | 57 + .../audio_processing/aec3/reverb_model_estimator.h | 72 + .../aec3/reverb_model_estimator_unittest.cc | 157 + .../aec3/signal_dependent_erle_estimator.cc | 416 ++ .../aec3/signal_dependent_erle_estimator.h | 104 + .../signal_dependent_erle_estimator_unittest.cc | 208 + .../audio_processing/aec3/spectrum_buffer.cc | 30 + .../audio_processing/aec3/spectrum_buffer.h | 62 + .../aec3/stationarity_estimator.cc | 241 ++ .../audio_processing/aec3/stationarity_estimator.h | 123 + .../aec3/subband_erle_estimator.cc | 251 ++ .../audio_processing/aec3/subband_erle_estimator.h | 106 + .../aec3/subband_nearend_detector.cc | 70 + .../aec3/subband_nearend_detector.h | 52 + .../modules/audio_processing/aec3/subtractor.cc | 364 ++ .../modules/audio_processing/aec3/subtractor.h | 150 + .../audio_processing/aec3/subtractor_output.cc | 58 + .../audio_processing/aec3/subtractor_output.h | 52 + .../aec3/subtractor_output_analyzer.cc | 64 + .../aec3/subtractor_output_analyzer.h | 45 + .../audio_processing/aec3/subtractor_unittest.cc | 320 ++ .../audio_processing/aec3/suppression_filter.cc | 180 + .../audio_processing/aec3/suppression_filter.h | 51 + .../aec3/suppression_filter_unittest.cc | 257 ++ .../audio_processing/aec3/suppression_gain.cc | 465 +++ .../audio_processing/aec3/suppression_gain.h | 145 + .../aec3/suppression_gain_unittest.cc | 149 + .../audio_processing/aec3/transparent_mode.cc | 243 ++ .../audio_processing/aec3/transparent_mode.h | 47 + .../modules/audio_processing/aec3/vector_math.h | 229 ++ .../audio_processing/aec3/vector_math_avx2.cc | 82 + .../audio_processing/aec3/vector_math_gn/moz.build | 205 + .../audio_processing/aec3/vector_math_unittest.cc | 209 + .../modules/audio_processing/aec_dump/BUILD.gn | 112 + .../audio_processing/aec_dump/aec_dump_factory.h | 48 + .../aec_dump/aec_dump_gn/moz.build | 209 + .../audio_processing/aec_dump/aec_dump_impl.cc | 281 ++ .../audio_processing/aec_dump/aec_dump_impl.h | 85 + .../aec_dump/aec_dump_integration_test.cc | 93 + .../audio_processing/aec_dump/aec_dump_unittest.cc | 87 + .../aec_dump/capture_stream_info.cc | 61 + .../aec_dump/capture_stream_info.h | 66 + .../audio_processing/aec_dump/mock_aec_dump.cc | 19 + .../audio_processing/aec_dump/mock_aec_dump.h | 82 + .../aec_dump/null_aec_dump_factory.cc | 34 + .../aec_dump/null_aec_dump_factory_gn/moz.build | 225 ++ .../aec_dump_interface_gn/moz.build | 225 ++ .../modules/audio_processing/aecm/BUILD.gn | 44 + .../modules/audio_processing/aecm/aecm_core.cc | 1125 ++++++ .../modules/audio_processing/aecm/aecm_core.h | 441 +++ .../modules/audio_processing/aecm/aecm_core_c.cc | 671 ++++ .../audio_processing/aecm/aecm_core_gn/moz.build | 293 ++ .../audio_processing/aecm/aecm_core_mips.cc | 1656 ++++++++ .../audio_processing/aecm/aecm_core_neon.cc | 206 + .../modules/audio_processing/aecm/aecm_defines.h | 87 + .../audio_processing/aecm/echo_control_mobile.cc | 599 +++ .../audio_processing/aecm/echo_control_mobile.h | 209 + .../modules/audio_processing/agc/BUILD.gn | 126 + .../libwebrtc/modules/audio_processing/agc/agc.cc | 98 + .../libwebrtc/modules/audio_processing/agc/agc.h | 52 + .../modules/audio_processing/agc/agc_gn/moz.build | 233 ++ .../audio_processing/agc/agc_manager_direct.cc | 713 ++++ .../audio_processing/agc/agc_manager_direct.h | 278 ++ .../agc/agc_manager_direct_unittest.cc | 2184 ++++++++++ .../modules/audio_processing/agc/gain_control.h | 105 + .../agc/gain_control_interface_gn/moz.build | 201 + .../audio_processing/agc/legacy/analog_agc.cc | 1238 ++++++ .../audio_processing/agc/legacy/analog_agc.h | 118 + .../audio_processing/agc/legacy/digital_agc.cc | 704 ++++ .../audio_processing/agc/legacy/digital_agc.h | 75 + .../audio_processing/agc/legacy/gain_control.h | 256 ++ .../audio_processing/agc/legacy_agc_gn/moz.build | 233 ++ .../agc/level_estimation_gn/moz.build | 234 ++ .../audio_processing/agc/loudness_histogram.cc | 229 ++ .../audio_processing/agc/loudness_histogram.h | 90 + .../agc/loudness_histogram_unittest.cc | 107 + .../modules/audio_processing/agc/mock_agc.h | 32 + .../modules/audio_processing/agc/utility.cc | 39 + .../modules/audio_processing/agc/utility.h | 27 + .../modules/audio_processing/agc2/BUILD.gn | 511 +++ .../agc2/adaptive_digital_gain_controller.cc | 216 + .../agc2/adaptive_digital_gain_controller.h | 66 + .../adaptive_digital_gain_controller_gn/moz.build | 233 ++ .../adaptive_digital_gain_controller_unittest.cc | 312 ++ .../modules/audio_processing/agc2/agc2_common.h | 62 + .../audio_processing/agc2/agc2_testing_common.cc | 93 + .../audio_processing/agc2/agc2_testing_common.h | 82 + .../agc2/agc2_testing_common_unittest.cc | 27 + .../modules/audio_processing/agc2/biquad_filter.cc | 60 + .../modules/audio_processing/agc2/biquad_filter.h | 56 + .../agc2/biquad_filter_gn/moz.build | 221 ++ .../agc2/biquad_filter_unittest.cc | 175 + .../audio_processing/agc2/clipping_predictor.cc | 384 ++ .../audio_processing/agc2/clipping_predictor.h | 62 + .../agc2/clipping_predictor_gn/moz.build | 233 ++ .../agc2/clipping_predictor_level_buffer.cc | 77 + .../agc2/clipping_predictor_level_buffer.h | 71 + .../clipping_predictor_level_buffer_unittest.cc | 131 + .../agc2/clipping_predictor_unittest.cc | 491 +++ .../audio_processing/agc2/common_gn/moz.build | 201 + .../agc2/compute_interpolated_gain_curve.cc | 229 ++ .../agc2/compute_interpolated_gain_curve.h | 48 + .../modules/audio_processing/agc2/cpu_features.cc | 62 + .../modules/audio_processing/agc2/cpu_features.h | 39 + .../agc2/cpu_features_gn/moz.build | 232 ++ .../agc2/fixed_digital_gn/moz.build | 235 ++ .../agc2/fixed_digital_level_estimator.cc | 121 + .../agc2/fixed_digital_level_estimator.h | 66 + .../agc2/fixed_digital_level_estimator_unittest.cc | 159 + .../modules/audio_processing/agc2/gain_applier.cc | 103 + .../modules/audio_processing/agc2/gain_applier.h | 44 + .../agc2/gain_applier_gn/moz.build | 221 ++ .../audio_processing/agc2/gain_applier_unittest.cc | 93 + .../audio_processing/agc2/gain_map_gn/moz.build | 201 + .../audio_processing/agc2/gain_map_internal.h | 46 + .../agc2/input_volume_controller.cc | 580 +++ .../agc2/input_volume_controller.h | 282 ++ .../agc2/input_volume_controller_gn/moz.build | 234 ++ .../agc2/input_volume_controller_unittest.cc | 1857 +++++++++ .../agc2/input_volume_stats_reporter.cc | 171 + .../agc2/input_volume_stats_reporter.h | 96 + .../agc2/input_volume_stats_reporter_gn/moz.build | 225 ++ .../agc2/input_volume_stats_reporter_unittest.cc | 246 ++ .../agc2/interpolated_gain_curve.cc | 204 + .../agc2/interpolated_gain_curve.h | 152 + .../agc2/interpolated_gain_curve_unittest.cc | 203 + .../modules/audio_processing/agc2/limiter.cc | 155 + .../modules/audio_processing/agc2/limiter.h | 63 + .../audio_processing/agc2/limiter_db_gain_curve.cc | 138 + .../audio_processing/agc2/limiter_db_gain_curve.h | 76 + .../agc2/limiter_db_gain_curve_unittest.cc | 60 + .../audio_processing/agc2/limiter_unittest.cc | 60 + .../audio_processing/agc2/noise_level_estimator.cc | 172 + .../audio_processing/agc2/noise_level_estimator.h | 36 + .../agc2/noise_level_estimator_gn/moz.build | 233 ++ .../agc2/noise_level_estimator_unittest.cc | 98 + .../modules/audio_processing/agc2/rnn_vad/BUILD.gn | 334 ++ .../modules/audio_processing/agc2/rnn_vad/DEPS | 3 + .../agc2/rnn_vad/auto_correlation.cc | 91 + .../agc2/rnn_vad/auto_correlation.h | 49 + .../agc2/rnn_vad/auto_correlation_unittest.cc | 66 + .../modules/audio_processing/agc2/rnn_vad/common.h | 77 + .../agc2/rnn_vad/features_extraction.cc | 90 + .../agc2/rnn_vad/features_extraction.h | 61 + .../agc2/rnn_vad/features_extraction_unittest.cc | 103 + .../audio_processing/agc2/rnn_vad/lp_residual.cc | 141 + .../audio_processing/agc2/rnn_vad/lp_residual.h | 41 + .../agc2/rnn_vad/lp_residual_unittest.cc | 80 + .../audio_processing/agc2/rnn_vad/pitch_search.cc | 70 + .../audio_processing/agc2/rnn_vad/pitch_search.h | 54 + .../agc2/rnn_vad/pitch_search_internal.cc | 513 +++ .../agc2/rnn_vad/pitch_search_internal.h | 114 + .../agc2/rnn_vad/pitch_search_internal_unittest.cc | 217 + .../agc2/rnn_vad/pitch_search_unittest.cc | 53 + .../audio_processing/agc2/rnn_vad/ring_buffer.h | 65 + .../agc2/rnn_vad/ring_buffer_unittest.cc | 112 + .../modules/audio_processing/agc2/rnn_vad/rnn.cc | 91 + .../modules/audio_processing/agc2/rnn_vad/rnn.h | 53 + .../audio_processing/agc2/rnn_vad/rnn_fc.cc | 103 + .../modules/audio_processing/agc2/rnn_vad/rnn_fc.h | 72 + .../agc2/rnn_vad/rnn_fc_unittest.cc | 111 + .../audio_processing/agc2/rnn_vad/rnn_gru.cc | 198 + .../audio_processing/agc2/rnn_vad/rnn_gru.h | 70 + .../agc2/rnn_vad/rnn_gru_unittest.cc | 186 + .../audio_processing/agc2/rnn_vad/rnn_unittest.cc | 70 + .../rnn_vad/rnn_vad_auto_correlation_gn/moz.build | 232 ++ .../agc2/rnn_vad/rnn_vad_common_gn/moz.build | 216 + .../agc2/rnn_vad/rnn_vad_gn/moz.build | 233 ++ .../agc2/rnn_vad/rnn_vad_layers_gn/moz.build | 233 ++ .../agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build | 221 ++ .../agc2/rnn_vad/rnn_vad_pitch_gn/moz.build | 233 ++ .../agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build | 205 + .../rnn_vad/rnn_vad_sequence_buffer_gn/moz.build | 205 + .../rnn_vad/rnn_vad_spectral_features_gn/moz.build | 233 ++ .../rnn_vad_symmetric_matrix_buffer_gn/moz.build | 205 + .../audio_processing/agc2/rnn_vad/rnn_vad_tool.cc | 123 + .../agc2/rnn_vad/rnn_vad_unittest.cc | 185 + .../agc2/rnn_vad/sequence_buffer.h | 79 + .../agc2/rnn_vad/sequence_buffer_unittest.cc | 102 + .../agc2/rnn_vad/spectral_features.cc | 214 + .../agc2/rnn_vad/spectral_features.h | 79 + .../agc2/rnn_vad/spectral_features_internal.cc | 188 + .../agc2/rnn_vad/spectral_features_internal.h | 100 + .../rnn_vad/spectral_features_internal_unittest.cc | 160 + .../agc2/rnn_vad/spectral_features_unittest.cc | 160 + .../agc2/rnn_vad/symmetric_matrix_buffer.h | 95 + .../rnn_vad/symmetric_matrix_buffer_unittest.cc | 107 + .../audio_processing/agc2/rnn_vad/test_utils.cc | 143 + .../audio_processing/agc2/rnn_vad/test_utils.h | 130 + .../audio_processing/agc2/rnn_vad/vector_math.h | 114 + .../agc2/rnn_vad/vector_math_avx2.cc | 55 + .../agc2/rnn_vad/vector_math_avx2_gn/moz.build | 185 + .../agc2/rnn_vad/vector_math_gn/moz.build | 216 + .../agc2/rnn_vad/vector_math_unittest.cc | 71 + .../audio_processing/agc2/saturation_protector.cc | 183 + .../audio_processing/agc2/saturation_protector.h | 46 + .../agc2/saturation_protector_buffer.cc | 77 + .../agc2/saturation_protector_buffer.h | 59 + .../agc2/saturation_protector_buffer_unittest.cc | 73 + .../agc2/saturation_protector_gn/moz.build | 234 ++ .../agc2/saturation_protector_unittest.cc | 140 + .../agc2/speech_level_estimator.cc | 174 + .../audio_processing/agc2/speech_level_estimator.h | 81 + .../agc2/speech_level_estimator_gn/moz.build | 233 ++ .../agc2/speech_level_estimator_unittest.cc | 207 + .../agc2/speech_probability_buffer.cc | 105 + .../agc2/speech_probability_buffer.h | 80 + .../agc2/speech_probability_buffer_unittest.cc | 346 ++ .../modules/audio_processing/agc2/vad_wrapper.cc | 113 + .../modules/audio_processing/agc2/vad_wrapper.h | 82 + .../audio_processing/agc2/vad_wrapper_gn/moz.build | 232 ++ .../audio_processing/agc2/vad_wrapper_unittest.cc | 181 + .../audio_processing/agc2/vector_float_frame.cc | 39 + .../audio_processing/agc2/vector_float_frame.h | 42 + .../modules/audio_processing/api_gn/moz.build | 225 ++ .../audio_processing/apm_logging_gn/moz.build | 233 ++ .../modules/audio_processing/audio_buffer.cc | 396 ++ .../modules/audio_processing/audio_buffer.h | 172 + .../audio_processing/audio_buffer_gn/moz.build | 235 ++ .../audio_processing/audio_buffer_unittest.cc | 93 + .../audio_frame_proxies_gn/moz.build | 225 ++ .../audio_processing/audio_frame_view_gn/moz.build | 205 + .../audio_processing/audio_frame_view_unittest.cc | 51 + .../audio_processing_builder_impl.cc | 34 + .../audio_processing/audio_processing_gn/moz.build | 239 ++ .../audio_processing/audio_processing_impl.cc | 2649 +++++++++++++ .../audio_processing/audio_processing_impl.h | 603 +++ .../audio_processing_impl_locking_unittest.cc | 1012 +++++ .../audio_processing_impl_unittest.cc | 1569 ++++++++ .../audio_processing_performance_unittest.cc | 568 +++ .../audio_processing_statistics_gn/moz.build | 217 + .../audio_processing/audio_processing_unittest.cc | 3441 ++++++++++++++++ .../capture_levels_adjuster/BUILD.gn | 45 + .../audio_samples_scaler.cc | 92 + .../capture_levels_adjuster/audio_samples_scaler.h | 46 + .../audio_samples_scaler_unittest.cc | 204 + .../capture_levels_adjuster.cc | 96 + .../capture_levels_adjuster.h | 88 + .../capture_levels_adjuster_gn/moz.build | 233 ++ .../capture_levels_adjuster_unittest.cc | 187 + .../libwebrtc/modules/audio_processing/debug.proto | 115 + .../echo_control_mobile_bit_exact_unittest.cc | 221 ++ .../audio_processing/echo_control_mobile_impl.cc | 287 ++ .../audio_processing/echo_control_mobile_impl.h | 86 + .../echo_control_mobile_unittest.cc | 43 + .../echo_detector/circular_buffer.cc | 49 + .../echo_detector/circular_buffer.h | 44 + .../echo_detector/circular_buffer_unittest.cc | 53 + .../echo_detector/mean_variance_estimator.cc | 47 + .../echo_detector/mean_variance_estimator.h | 33 + .../mean_variance_estimator_unittest.cc | 65 + .../audio_processing/echo_detector/moving_max.cc | 52 + .../audio_processing/echo_detector/moving_max.h | 36 + .../echo_detector/moving_max_unittest.cc | 68 + .../normalized_covariance_estimator.cc | 43 + .../normalized_covariance_estimator.h | 43 + .../normalized_covariance_estimator_unittest.cc | 41 + .../g3doc/audio_processing_module.md | 26 + .../modules/audio_processing/gain_control_impl.cc | 373 ++ .../modules/audio_processing/gain_control_impl.h | 91 + .../audio_processing/gain_control_unittest.cc | 393 ++ .../modules/audio_processing/gain_controller2.cc | 283 ++ .../modules/audio_processing/gain_controller2.h | 110 + .../audio_processing/gain_controller2_gn/moz.build | 233 ++ .../audio_processing/gain_controller2_unittest.cc | 615 +++ .../modules/audio_processing/high_pass_filter.cc | 115 + .../modules/audio_processing/high_pass_filter.h | 45 + .../audio_processing/high_pass_filter_gn/moz.build | 232 ++ .../audio_processing/high_pass_filter_unittest.cc | 301 ++ .../modules/audio_processing/include/aec_dump.cc | 41 + .../modules/audio_processing/include/aec_dump.h | 116 + .../include/audio_frame_proxies.cc | 66 + .../audio_processing/include/audio_frame_proxies.h | 41 + .../audio_processing/include/audio_frame_view.h | 68 + .../audio_processing/include/audio_processing.cc | 210 + .../audio_processing/include/audio_processing.h | 941 +++++ .../include/audio_processing_statistics.cc | 22 + .../include/audio_processing_statistics.h | 67 + .../include/mock_audio_processing.h | 178 + .../audio_processing/logging/apm_data_dumper.cc | 100 + .../audio_processing/logging/apm_data_dumper.h | 452 +++ .../libwebrtc/modules/audio_processing/ns/BUILD.gn | 104 + .../modules/audio_processing/ns/fast_math.cc | 84 + .../modules/audio_processing/ns/fast_math.h | 38 + .../modules/audio_processing/ns/histograms.cc | 47 + .../modules/audio_processing/ns/histograms.h | 55 + .../modules/audio_processing/ns/noise_estimator.cc | 195 + .../modules/audio_processing/ns/noise_estimator.h | 77 + .../audio_processing/ns/noise_suppressor.cc | 555 +++ .../modules/audio_processing/ns/noise_suppressor.h | 92 + .../ns/noise_suppressor_unittest.cc | 102 + .../modules/audio_processing/ns/ns_common.h | 34 + .../modules/audio_processing/ns/ns_config.h | 24 + .../modules/audio_processing/ns/ns_fft.cc | 64 + .../libwebrtc/modules/audio_processing/ns/ns_fft.h | 45 + .../modules/audio_processing/ns/ns_gn/moz.build | 245 ++ .../audio_processing/ns/prior_signal_model.cc | 18 + .../audio_processing/ns/prior_signal_model.h | 32 + .../ns/prior_signal_model_estimator.cc | 170 + .../ns/prior_signal_model_estimator.h | 39 + .../ns/quantile_noise_estimator.cc | 88 + .../audio_processing/ns/quantile_noise_estimator.h | 45 + .../modules/audio_processing/ns/signal_model.cc | 24 + .../modules/audio_processing/ns/signal_model.h | 34 + .../audio_processing/ns/signal_model_estimator.cc | 175 + .../audio_processing/ns/signal_model_estimator.h | 58 + .../ns/speech_probability_estimator.cc | 103 + .../ns/speech_probability_estimator.h | 51 + .../audio_processing/ns/suppression_params.cc | 49 + .../audio_processing/ns/suppression_params.h | 30 + .../modules/audio_processing/ns/wiener_filter.cc | 120 + .../modules/audio_processing/ns/wiener_filter.h | 57 + .../optionally_built_submodule_creators.cc | 36 + .../optionally_built_submodule_creators.h | 42 + .../moz.build | 232 ++ .../audio_processing/render_queue_item_verifier.h | 36 + .../audio_processing/residual_echo_detector.cc | 205 + .../audio_processing/residual_echo_detector.h | 91 + .../residual_echo_detector_unittest.cc | 138 + .../modules/audio_processing/rms_level.cc | 138 + .../libwebrtc/modules/audio_processing/rms_level.h | 77 + .../audio_processing/rms_level_gn/moz.build | 221 ++ .../modules/audio_processing/rms_level_unittest.cc | 197 + .../modules/audio_processing/splitting_filter.cc | 144 + .../modules/audio_processing/splitting_filter.h | 72 + .../audio_processing/splitting_filter_unittest.cc | 103 + .../test/aec_dump_based_simulator.cc | 656 +++ .../test/aec_dump_based_simulator.h | 82 + .../test/android/apmtest/AndroidManifest.xml | 30 + .../test/android/apmtest/default.properties | 11 + .../test/android/apmtest/jni/main.c | 307 ++ .../test/android/apmtest/res/values/strings.xml | 4 + .../audio_processing/test/api_call_statistics.cc | 95 + .../audio_processing/test/api_call_statistics.h | 47 + .../modules/audio_processing/test/apmtest.m | 365 ++ .../audio_processing/test/audio_buffer_tools.cc | 68 + .../audio_processing/test/audio_buffer_tools.h | 42 + .../test/audio_processing_builder_for_testing.cc | 51 + .../test/audio_processing_builder_for_testing.h | 95 + .../test/audio_processing_simulator.cc | 630 +++ .../test/audio_processing_simulator.h | 247 ++ .../audio_processing/test/audioproc_float_impl.cc | 821 ++++ .../audio_processing/test/audioproc_float_impl.h | 51 + .../audio_processing/test/bitexactness_tools.cc | 148 + .../audio_processing/test/bitexactness_tools.h | 56 + .../test/conversational_speech/BUILD.gn | 81 + .../test/conversational_speech/OWNERS | 3 + .../test/conversational_speech/README.md | 74 + .../test/conversational_speech/config.cc | 31 + .../test/conversational_speech/config.h | 43 + .../test/conversational_speech/generator.cc | 89 + .../conversational_speech/generator_unittest.cc | 675 ++++ .../test/conversational_speech/mock_wavreader.cc | 34 + .../test/conversational_speech/mock_wavreader.h | 48 + .../mock_wavreader_factory.cc | 66 + .../conversational_speech/mock_wavreader_factory.h | 59 + .../test/conversational_speech/multiend_call.cc | 193 + .../test/conversational_speech/multiend_call.h | 104 + .../test/conversational_speech/simulator.cc | 235 ++ .../test/conversational_speech/simulator.h | 44 + .../test/conversational_speech/timing.cc | 73 + .../test/conversational_speech/timing.h | 51 + .../wavreader_abstract_factory.h | 34 + .../conversational_speech/wavreader_factory.cc | 65 + .../test/conversational_speech/wavreader_factory.h | 36 + .../conversational_speech/wavreader_interface.h | 40 + .../audio_processing/test/debug_dump_replayer.cc | 250 ++ .../audio_processing/test/debug_dump_replayer.h | 78 + .../audio_processing/test/debug_dump_test.cc | 504 +++ .../test/echo_canceller_test_tools.cc | 47 + .../test/echo_canceller_test_tools.h | 47 + .../test/echo_canceller_test_tools_unittest.cc | 82 + .../audio_processing/test/echo_control_mock.h | 46 + .../audio_processing/test/fake_recording_device.cc | 190 + .../audio_processing/test/fake_recording_device.h | 74 + .../test/fake_recording_device_unittest.cc | 231 ++ .../audio_processing/test/performance_timer.cc | 75 + .../audio_processing/test/performance_timer.h | 47 + .../audio_processing/test/protobuf_utils.cc | 79 + .../modules/audio_processing/test/protobuf_utils.h | 40 + .../test/py_quality_assessment/BUILD.gn | 170 + .../test/py_quality_assessment/OWNERS | 5 + .../test/py_quality_assessment/README.md | 125 + .../py_quality_assessment/apm_configs/default.json | 1 + .../apm_quality_assessment.py | 217 + .../apm_quality_assessment.sh | 91 + .../apm_quality_assessment_boxplot.py | 154 + .../apm_quality_assessment_export.py | 63 + .../apm_quality_assessment_gencfgs.py | 128 + .../apm_quality_assessment_optimize.py | 189 + .../apm_quality_assessment_unittest.py | 28 + .../test/py_quality_assessment/output/README.md | 1 + .../quality_assessment/__init__.py | 7 + .../quality_assessment/annotations.py | 296 ++ .../quality_assessment/annotations_unittest.py | 160 + .../quality_assessment/apm_configs/default.json | 1 + .../quality_assessment/apm_vad.cc | 96 + .../quality_assessment/audioproc_wrapper.py | 100 + .../quality_assessment/collect_data.py | 243 ++ .../quality_assessment/data_access.py | 154 + .../quality_assessment/echo_path_simulation.py | 136 + .../echo_path_simulation_factory.py | 48 + .../echo_path_simulation_unittest.py | 82 + .../quality_assessment/eval_scores.py | 427 ++ .../quality_assessment/eval_scores_factory.py | 55 + .../quality_assessment/eval_scores_unittest.py | 137 + .../quality_assessment/evaluation.py | 57 + .../quality_assessment/exceptions.py | 45 + .../quality_assessment/export.py | 426 ++ .../quality_assessment/export_unittest.py | 86 + .../quality_assessment/external_vad.py | 75 + .../quality_assessment/fake_external_vad.py | 25 + .../quality_assessment/fake_polqa.cc | 56 + .../quality_assessment/input_mixer.py | 97 + .../quality_assessment/input_mixer_unittest.py | 140 + .../quality_assessment/input_signal_creator.py | 68 + .../quality_assessment/results.css | 32 + .../quality_assessment/results.js | 376 ++ .../quality_assessment/signal_processing.py | 359 ++ .../signal_processing_unittest.py | 183 + .../quality_assessment/simulation.py | 446 +++ .../quality_assessment/simulation_unittest.py | 203 + .../quality_assessment/sound_level.cc | 127 + .../quality_assessment/test_data_generation.py | 526 +++ .../test_data_generation_factory.py | 71 + .../test_data_generation_unittest.py | 207 + .../quality_assessment/vad.cc | 103 + .../audio_processing/test/runtime_setting_util.cc | 50 + .../audio_processing/test/runtime_setting_util.h | 23 + .../audio_processing/test/simulator_buffers.cc | 86 + .../audio_processing/test/simulator_buffers.h | 66 + .../modules/audio_processing/test/test_utils.cc | 89 + .../modules/audio_processing/test/test_utils.h | 170 + .../modules/audio_processing/test/unittest.proto | 48 + .../audio_processing/test/wav_based_simulator.cc | 202 + .../audio_processing/test/wav_based_simulator.h | 63 + .../audio_processing/three_band_filter_bank.cc | 278 ++ .../audio_processing/three_band_filter_bank.h | 77 + .../modules/audio_processing/transient/BUILD.gn | 133 + .../audio_processing/transient/click_annotate.cc | 107 + .../modules/audio_processing/transient/common.h | 27 + .../transient/daubechies_8_wavelet_coeffs.h | 44 + .../audio_processing/transient/dyadic_decimator.h | 68 + .../transient/dyadic_decimator_unittest.cc | 111 + .../audio_processing/transient/file_utils.cc | 257 ++ .../audio_processing/transient/file_utils.h | 117 + .../transient/file_utils_unittest.cc | 501 +++ .../audio_processing/transient/moving_moments.cc | 50 + .../audio_processing/transient/moving_moments.h | 53 + .../transient/moving_moments_unittest.cc | 207 + .../transient/test/plotDetection.m | 22 + .../transient/test/readDetection.m | 26 + .../audio_processing/transient/test/readPCM.m | 26 + .../transient/transient_detector.cc | 176 + .../transient/transient_detector.h | 89 + .../transient/transient_detector_unittest.cc | 95 + .../transient/transient_suppression_test.cc | 238 ++ .../transient/transient_suppressor.h | 75 + .../transient_suppressor_api_gn/moz.build | 201 + .../transient/transient_suppressor_impl.cc | 455 +++ .../transient/transient_suppressor_impl.h | 115 + .../transient_suppressor_impl_gn/moz.build | 236 ++ .../transient/transient_suppressor_unittest.cc | 175 + .../transient/voice_probability_delay_unit.cc | 56 + .../transient/voice_probability_delay_unit.h | 43 + .../voice_probability_delay_unit_gn/moz.build | 221 ++ .../voice_probability_delay_unit_unittest.cc | 108 + .../audio_processing/transient/windows_private.h | 557 +++ .../modules/audio_processing/transient/wpd_node.cc | 72 + .../modules/audio_processing/transient/wpd_node.h | 45 + .../transient/wpd_node_unittest.cc | 64 + .../modules/audio_processing/transient/wpd_tree.cc | 118 + .../modules/audio_processing/transient/wpd_tree.h | 92 + .../transient/wpd_tree_unittest.cc | 177 + .../modules/audio_processing/utility/BUILD.gn | 79 + .../modules/audio_processing/utility/DEPS | 3 + .../utility/cascaded_biquad_filter.cc | 126 + .../utility/cascaded_biquad_filter.h | 80 + .../utility/cascaded_biquad_filter_gn/moz.build | 221 ++ .../utility/cascaded_biquad_filter_unittest.cc | 157 + .../audio_processing/utility/delay_estimator.cc | 708 ++++ .../audio_processing/utility/delay_estimator.h | 257 ++ .../utility/delay_estimator_internal.h | 51 + .../utility/delay_estimator_unittest.cc | 621 +++ .../utility/delay_estimator_wrapper.cc | 489 +++ .../utility/delay_estimator_wrapper.h | 248 ++ .../utility/legacy_delay_estimator_gn/moz.build | 222 ++ .../audio_processing/utility/pffft_wrapper.cc | 135 + .../audio_processing/utility/pffft_wrapper.h | 94 + .../utility/pffft_wrapper_gn/moz.build | 221 ++ .../utility/pffft_wrapper_unittest.cc | 182 + .../modules/audio_processing/vad/BUILD.gn | 69 + .../modules/audio_processing/vad/common.h | 29 + .../libwebrtc/modules/audio_processing/vad/gmm.cc | 61 + .../libwebrtc/modules/audio_processing/vad/gmm.h | 45 + .../modules/audio_processing/vad/gmm_unittest.cc | 65 + .../audio_processing/vad/noise_gmm_tables.h | 82 + .../audio_processing/vad/pitch_based_vad.cc | 120 + .../modules/audio_processing/vad/pitch_based_vad.h | 57 + .../vad/pitch_based_vad_unittest.cc | 75 + .../modules/audio_processing/vad/pitch_internal.cc | 55 + .../modules/audio_processing/vad/pitch_internal.h | 30 + .../vad/pitch_internal_unittest.cc | 54 + .../audio_processing/vad/pole_zero_filter.cc | 107 + .../audio_processing/vad/pole_zero_filter.h | 51 + .../vad/pole_zero_filter_unittest.cc | 103 + .../modules/audio_processing/vad/standalone_vad.cc | 91 + .../modules/audio_processing/vad/standalone_vad.h | 69 + .../vad/standalone_vad_unittest.cc | 107 + .../modules/audio_processing/vad/vad_audio_proc.cc | 275 ++ .../modules/audio_processing/vad/vad_audio_proc.h | 90 + .../audio_processing/vad/vad_audio_proc_internal.h | 81 + .../vad/vad_audio_proc_unittest.cc | 62 + .../audio_processing/vad/vad_circular_buffer.cc | 135 + .../audio_processing/vad/vad_circular_buffer.h | 69 + .../vad/vad_circular_buffer_unittest.cc | 134 + .../modules/audio_processing/vad/vad_gn/moz.build | 239 ++ .../vad/voice_activity_detector.cc | 85 + .../audio_processing/vad/voice_activity_detector.h | 74 + .../vad/voice_activity_detector_unittest.cc | 168 + .../audio_processing/vad/voice_gmm_tables.h | 77 + .../modules/congestion_controller/BUILD.gn | 68 + .../libwebrtc/modules/congestion_controller/DEPS | 5 + .../libwebrtc/modules/congestion_controller/OWNERS | 7 + .../congestion_controller_gn/moz.build | 234 ++ .../modules/congestion_controller/goog_cc/BUILD.gn | 369 ++ .../goog_cc/acknowledged_bitrate_estimator.cc | 70 + .../goog_cc/acknowledged_bitrate_estimator.h | 52 + .../acknowledged_bitrate_estimator_interface.cc | 92 + .../acknowledged_bitrate_estimator_interface.h | 85 + .../acknowledged_bitrate_estimator_unittest.cc | 136 + .../congestion_controller/goog_cc/alr_detector.cc | 111 + .../congestion_controller/goog_cc/alr_detector.h | 76 + .../goog_cc/alr_detector_gn/moz.build | 225 ++ .../goog_cc/alr_detector_unittest.cc | 206 + .../goog_cc/bitrate_estimator.cc | 166 + .../goog_cc/bitrate_estimator.h | 62 + .../congestion_window_pushback_controller.cc | 81 + .../congestion_window_pushback_controller.h | 48 + ...ngestion_window_pushback_controller_unittest.cc | 105 + .../goog_cc/delay_based_bwe.cc | 305 ++ .../goog_cc/delay_based_bwe.h | 133 + .../goog_cc/delay_based_bwe_gn/moz.build | 234 ++ .../goog_cc/delay_based_bwe_unittest.cc | 309 ++ .../goog_cc/delay_based_bwe_unittest_helper.cc | 529 +++ .../goog_cc/delay_based_bwe_unittest_helper.h | 189 + .../goog_cc/delay_increase_detector_interface.h | 43 + .../goog_cc/estimators_gn/moz.build | 238 ++ .../goog_cc/goog_cc_gn/moz.build | 233 ++ .../goog_cc/goog_cc_network_control.cc | 725 ++++ .../goog_cc/goog_cc_network_control.h | 147 + .../goog_cc/goog_cc_network_control_unittest.cc | 934 +++++ .../goog_cc/inter_arrival_delta.cc | 140 + .../goog_cc/inter_arrival_delta.h | 90 + .../goog_cc/link_capacity_estimator.cc | 77 + .../goog_cc/link_capacity_estimator.h | 38 + .../goog_cc/link_capacity_estimator_gn/moz.build | 221 ++ .../goog_cc/loss_based_bandwidth_estimation.cc | 260 ++ .../goog_cc/loss_based_bandwidth_estimation.h | 97 + .../goog_cc/loss_based_bwe_v1_gn/moz.build | 226 ++ .../goog_cc/loss_based_bwe_v2.cc | 1080 +++++ .../goog_cc/loss_based_bwe_v2.h | 203 + .../goog_cc/loss_based_bwe_v2_gn/moz.build | 232 ++ .../goog_cc/loss_based_bwe_v2_test.cc | 1526 +++++++ .../goog_cc/probe_bitrate_estimator.cc | 201 + .../goog_cc/probe_bitrate_estimator.h | 58 + .../goog_cc/probe_bitrate_estimator_unittest.cc | 228 ++ .../goog_cc/probe_controller.cc | 558 +++ .../goog_cc/probe_controller.h | 196 + .../goog_cc/probe_controller_gn/moz.build | 225 ++ .../goog_cc/probe_controller_unittest.cc | 1131 ++++++ .../goog_cc/pushback_controller_gn/moz.build | 225 ++ .../goog_cc/robust_throughput_estimator.cc | 189 + .../goog_cc/robust_throughput_estimator.h | 50 + .../robust_throughput_estimator_unittest.cc | 427 ++ .../goog_cc/send_side_bandwidth_estimation.cc | 695 ++++ .../goog_cc/send_side_bandwidth_estimation.h | 210 + .../send_side_bandwidth_estimation_unittest.cc | 206 + .../goog_cc/send_side_bwe_gn/moz.build | 233 ++ .../goog_cc/test/goog_cc_printer.cc | 200 + .../goog_cc/test/goog_cc_printer.h | 75 + .../goog_cc/trendline_estimator.cc | 332 ++ .../goog_cc/trendline_estimator.h | 125 + .../goog_cc/trendline_estimator_unittest.cc | 151 + .../include/receive_side_congestion_controller.h | 91 + .../modules/congestion_controller/pcc/BUILD.gn | 123 + .../pcc/bitrate_controller.cc | 139 + .../congestion_controller/pcc/bitrate_controller.h | 74 + .../pcc/bitrate_controller_unittest.cc | 303 ++ .../congestion_controller/pcc/monitor_interval.cc | 135 + .../congestion_controller/pcc/monitor_interval.h | 71 + .../pcc/monitor_interval_unittest.cc | 190 + .../congestion_controller/pcc/pcc_factory.cc | 30 + .../congestion_controller/pcc/pcc_factory.h | 30 + .../pcc/pcc_network_controller.cc | 391 ++ .../pcc/pcc_network_controller.h | 125 + .../pcc/pcc_network_controller_unittest.cc | 119 + .../congestion_controller/pcc/rtt_tracker.cc | 41 + .../congestion_controller/pcc/rtt_tracker.h | 39 + .../pcc/rtt_tracker_unittest.cc | 71 + .../congestion_controller/pcc/utility_function.cc | 86 + .../congestion_controller/pcc/utility_function.h | 78 + .../pcc/utility_function_unittest.cc | 113 + .../receive_side_congestion_controller.cc | 134 + .../receive_side_congestion_controller_unittest.cc | 126 + .../congestion_controller/remb_throttler.cc | 63 + .../modules/congestion_controller/remb_throttler.h | 54 + .../remb_throttler_unittest.cc | 100 + .../modules/congestion_controller/rtp/BUILD.gn | 100 + .../congestion_controller/rtp/control_handler.cc | 89 + .../congestion_controller/rtp/control_handler.h | 54 + .../rtp/control_handler_gn/moz.build | 232 ++ .../rtp/transport_feedback_adapter.cc | 275 ++ .../rtp/transport_feedback_adapter.h | 102 + .../rtp/transport_feedback_adapter_unittest.cc | 407 ++ .../rtp/transport_feedback_demuxer.cc | 94 + .../rtp/transport_feedback_demuxer.h | 62 + .../rtp/transport_feedback_demuxer_unittest.cc | 99 + .../rtp/transport_feedback_gn/moz.build | 233 ++ .../libwebrtc/modules/desktop_capture/BUILD.gn | 652 +++ third_party/libwebrtc/modules/desktop_capture/DEPS | 19 + .../libwebrtc/modules/desktop_capture/OWNERS | 2 + .../blank_detector_desktop_capturer_wrapper.cc | 139 + .../blank_detector_desktop_capturer_wrapper.h | 83 + ...k_detector_desktop_capturer_wrapper_unittest.cc | 165 + .../desktop_capture/cropped_desktop_frame.cc | 66 + .../desktop_capture/cropped_desktop_frame.h | 33 + .../cropped_desktop_frame_unittest.cc | 115 + .../desktop_capture/cropping_window_capturer.cc | 135 + .../desktop_capture/cropping_window_capturer.h | 84 + .../cropping_window_capturer_win.cc | 335 ++ .../delegated_source_list_controller.h | 70 + .../desktop_capture/desktop_and_cursor_composer.cc | 260 ++ .../desktop_capture/desktop_and_cursor_composer.h | 98 + .../desktop_and_cursor_composer_unittest.cc | 479 +++ .../desktop_capture_differ_sse2_gn/moz.build | 153 + .../desktop_capture/desktop_capture_gn/moz.build | 678 ++++ .../desktop_capture/desktop_capture_metadata.h | 31 + .../desktop_capture_metrics_helper.cc | 60 + .../desktop_capture_metrics_helper.h | 22 + .../desktop_capture_objc_gn/moz.build | 77 + .../desktop_capture/desktop_capture_options.cc | 61 + .../desktop_capture/desktop_capture_options.h | 262 ++ .../desktop_capture/desktop_capture_types.h | 75 + .../modules/desktop_capture/desktop_capturer.cc | 144 + .../modules/desktop_capture/desktop_capturer.h | 219 + .../desktop_capturer_differ_wrapper.cc | 232 ++ .../desktop_capturer_differ_wrapper.h | 72 + .../desktop_capturer_differ_wrapper_unittest.cc | 291 ++ .../desktop_capture/desktop_capturer_wrapper.cc | 60 + .../desktop_capture/desktop_capturer_wrapper.h | 48 + .../modules/desktop_capture/desktop_frame.cc | 205 + .../modules/desktop_capture/desktop_frame.h | 227 ++ .../desktop_capture/desktop_frame_generator.cc | 184 + .../desktop_capture/desktop_frame_generator.h | 121 + .../desktop_capture/desktop_frame_rotation.cc | 117 + .../desktop_capture/desktop_frame_rotation.h | 52 + .../desktop_frame_rotation_unittest.cc | 449 +++ .../desktop_capture/desktop_frame_unittest.cc | 336 ++ .../modules/desktop_capture/desktop_frame_win.cc | 73 + .../modules/desktop_capture/desktop_frame_win.h | 49 + .../modules/desktop_capture/desktop_geometry.cc | 79 + .../modules/desktop_capture/desktop_geometry.h | 169 + .../desktop_capture/desktop_geometry_unittest.cc | 106 + .../modules/desktop_capture/desktop_region.cc | 567 +++ .../modules/desktop_capture/desktop_region.h | 169 + .../desktop_capture/desktop_region_unittest.cc | 834 ++++ .../modules/desktop_capture/differ_block.cc | 76 + .../modules/desktop_capture/differ_block.h | 42 + .../desktop_capture/differ_block_unittest.cc | 89 + .../modules/desktop_capture/differ_vector_sse2.cc | 102 + .../modules/desktop_capture/differ_vector_sse2.h | 31 + .../desktop_capture/fake_desktop_capturer.cc | 84 + .../desktop_capture/fake_desktop_capturer.h | 76 + .../fallback_desktop_capturer_wrapper.cc | 183 + .../fallback_desktop_capturer_wrapper.h | 64 + .../fallback_desktop_capturer_wrapper_unittest.cc | 207 + .../full_screen_application_handler.cc | 30 + .../full_screen_application_handler.h | 50 + .../desktop_capture/full_screen_window_detector.cc | 84 + .../desktop_capture/full_screen_window_detector.h | 83 + .../linux/wayland/base_capturer_pipewire.cc | 230 ++ .../linux/wayland/base_capturer_pipewire.h | 92 + .../desktop_capture/linux/wayland/egl_dmabuf.cc | 781 ++++ .../desktop_capture/linux/wayland/egl_dmabuf.h | 74 + .../linux/wayland/mouse_cursor_monitor_pipewire.cc | 59 + .../linux/wayland/mouse_cursor_monitor_pipewire.h | 44 + .../linux/wayland/portal_request_response.h | 17 + .../linux/wayland/restore_token_manager.cc | 37 + .../linux/wayland/restore_token_manager.h | 46 + .../desktop_capture/linux/wayland/scoped_glib.h | 17 + .../wayland/screen_capture_portal_interface.cc | 127 + .../wayland/screen_capture_portal_interface.h | 76 + .../linux/wayland/screencast_portal.cc | 471 +++ .../linux/wayland/screencast_portal.h | 218 + .../linux/wayland/screencast_stream_utils.cc | 123 + .../linux/wayland/screencast_stream_utils.h | 51 + .../linux/wayland/shared_screencast_stream.cc | 965 +++++ .../linux/wayland/shared_screencast_stream.h | 95 + .../wayland/shared_screencast_stream_unittest.cc | 160 + .../test/test_screencast_stream_provider.cc | 361 ++ .../wayland/test/test_screencast_stream_provider.h | 93 + .../linux/wayland/xdg_desktop_portal_utils.h | 17 + .../linux/wayland/xdg_session_details.h | 17 + .../linux/x11/mouse_cursor_monitor_x11.cc | 258 ++ .../linux/x11/mouse_cursor_monitor_x11.h | 68 + .../linux/x11/screen_capturer_x11.cc | 517 +++ .../linux/x11/screen_capturer_x11.h | 147 + .../desktop_capture/linux/x11/shared_x_display.cc | 108 + .../desktop_capture/linux/x11/shared_x_display.h | 88 + .../linux/x11/window_capturer_x11.cc | 256 ++ .../linux/x11/window_capturer_x11.h | 78 + .../desktop_capture/linux/x11/window_finder_x11.cc | 52 + .../desktop_capture/linux/x11/window_finder_x11.h | 35 + .../desktop_capture/linux/x11/window_list_utils.cc | 198 + .../desktop_capture/linux/x11/window_list_utils.h | 56 + .../desktop_capture/linux/x11/x_atom_cache.cc | 51 + .../desktop_capture/linux/x11/x_atom_cache.h | 45 + .../desktop_capture/linux/x11/x_error_trap.cc | 70 + .../desktop_capture/linux/x11/x_error_trap.h | 51 + .../linux/x11/x_server_pixel_buffer.cc | 379 ++ .../linux/x11/x_server_pixel_buffer.h | 89 + .../desktop_capture/linux/x11/x_window_property.cc | 43 + .../desktop_capture/linux/x11/x_window_property.h | 63 + .../desktop_capture/mac/desktop_configuration.h | 96 + .../desktop_capture/mac/desktop_configuration.mm | 189 + .../mac/desktop_configuration_monitor.cc | 73 + .../mac/desktop_configuration_monitor.h | 55 + .../desktop_capture/mac/desktop_frame_cgimage.h | 58 + .../desktop_capture/mac/desktop_frame_cgimage.mm | 108 + .../desktop_capture/mac/desktop_frame_iosurface.h | 45 + .../desktop_capture/mac/desktop_frame_iosurface.mm | 61 + .../desktop_capture/mac/desktop_frame_provider.h | 59 + .../desktop_capture/mac/desktop_frame_provider.mm | 70 + .../mac/full_screen_mac_application_handler.cc | 238 ++ .../mac/full_screen_mac_application_handler.h | 24 + .../desktop_capture/mac/screen_capturer_mac.h | 128 + .../desktop_capture/mac/screen_capturer_mac.mm | 633 +++ .../desktop_capture/mac/window_list_utils.cc | 430 ++ .../desktop_capture/mac/window_list_utils.h | 117 + .../mock_desktop_capturer_callback.cc | 23 + .../mock_desktop_capturer_callback.h | 39 + .../modules/desktop_capture/mouse_cursor.cc | 36 + .../modules/desktop_capture/mouse_cursor.h | 49 + .../modules/desktop_capture/mouse_cursor_monitor.h | 111 + .../desktop_capture/mouse_cursor_monitor_linux.cc | 65 + .../desktop_capture/mouse_cursor_monitor_mac.mm | 213 + .../desktop_capture/mouse_cursor_monitor_null.cc | 38 + .../mouse_cursor_monitor_unittest.cc | 128 + .../desktop_capture/mouse_cursor_monitor_win.cc | 220 ++ .../desktop_capture/primitives_gn/moz.build | 214 + .../modules/desktop_capture/resolution_tracker.cc | 34 + .../modules/desktop_capture/resolution_tracker.h | 34 + .../modules/desktop_capture/rgba_color.cc | 61 + .../libwebrtc/modules/desktop_capture/rgba_color.h | 59 + .../modules/desktop_capture/rgba_color_unittest.cc | 45 + .../desktop_capture/screen_capture_frame_queue.h | 75 + .../desktop_capture/screen_capturer_darwin.mm | 33 + .../desktop_capture/screen_capturer_fuchsia.cc | 417 ++ .../desktop_capture/screen_capturer_fuchsia.h | 65 + .../desktop_capture/screen_capturer_helper.cc | 90 + .../desktop_capture/screen_capturer_helper.h | 91 + .../screen_capturer_helper_unittest.cc | 193 + .../screen_capturer_integration_test.cc | 380 ++ .../desktop_capture/screen_capturer_linux.cc | 44 + .../screen_capturer_mac_unittest.cc | 101 + .../desktop_capture/screen_capturer_null.cc | 21 + .../desktop_capture/screen_capturer_unittest.cc | 224 ++ .../modules/desktop_capture/screen_capturer_win.cc | 62 + .../modules/desktop_capture/screen_drawer.cc | 30 + .../modules/desktop_capture/screen_drawer.h | 79 + .../modules/desktop_capture/screen_drawer_linux.cc | 185 + .../desktop_capture/screen_drawer_lock_posix.cc | 59 + .../desktop_capture/screen_drawer_lock_posix.h | 39 + .../modules/desktop_capture/screen_drawer_mac.cc | 30 + .../desktop_capture/screen_drawer_unittest.cc | 160 + .../modules/desktop_capture/screen_drawer_win.cc | 209 + .../desktop_capture/shared_desktop_frame.cc | 59 + .../modules/desktop_capture/shared_desktop_frame.h | 65 + .../modules/desktop_capture/shared_memory.cc | 24 + .../modules/desktop_capture/shared_memory.h | 82 + .../modules/desktop_capture/test_utils.cc | 50 + .../libwebrtc/modules/desktop_capture/test_utils.h | 27 + .../modules/desktop_capture/test_utils_unittest.cc | 110 + .../modules/desktop_capture/win/cursor.cc | 233 ++ .../libwebrtc/modules/desktop_capture/win/cursor.h | 25 + .../win/cursor_test_data/1_24bpp.cur | Bin 0 -> 3262 bytes .../win/cursor_test_data/1_32bpp.cur | Bin 0 -> 4286 bytes .../win/cursor_test_data/1_8bpp.cur | Bin 0 -> 2238 bytes .../win/cursor_test_data/2_1bpp.cur | Bin 0 -> 326 bytes .../win/cursor_test_data/2_32bpp.cur | Bin 0 -> 4286 bytes .../win/cursor_test_data/3_32bpp.cur | Bin 0 -> 4286 bytes .../win/cursor_test_data/3_4bpp.cur | Bin 0 -> 766 bytes .../modules/desktop_capture/win/cursor_unittest.cc | 91 + .../win/cursor_unittest_resources.h | 24 + .../win/cursor_unittest_resources.rc | 28 + .../modules/desktop_capture/win/d3d_device.cc | 100 + .../modules/desktop_capture/win/d3d_device.h | 59 + .../modules/desktop_capture/win/desktop.cc | 111 + .../modules/desktop_capture/win/desktop.h | 65 + .../desktop_capture/win/desktop_capture_utils.cc | 32 + .../desktop_capture/win/desktop_capture_utils.h | 29 + .../win/display_configuration_monitor.cc | 37 + .../win/display_configuration_monitor.h | 38 + .../desktop_capture/win/dxgi_adapter_duplicator.cc | 185 + .../desktop_capture/win/dxgi_adapter_duplicator.h | 92 + .../modules/desktop_capture/win/dxgi_context.cc | 33 + .../modules/desktop_capture/win/dxgi_context.h | 62 + .../win/dxgi_duplicator_controller.cc | 514 +++ .../win/dxgi_duplicator_controller.h | 253 ++ .../modules/desktop_capture/win/dxgi_frame.cc | 77 + .../modules/desktop_capture/win/dxgi_frame.h | 63 + .../desktop_capture/win/dxgi_output_duplicator.cc | 390 ++ .../desktop_capture/win/dxgi_output_duplicator.h | 149 + .../modules/desktop_capture/win/dxgi_texture.cc | 81 + .../modules/desktop_capture/win/dxgi_texture.h | 73 + .../desktop_capture/win/dxgi_texture_mapping.cc | 58 + .../desktop_capture/win/dxgi_texture_mapping.h | 47 + .../desktop_capture/win/dxgi_texture_staging.cc | 132 + .../desktop_capture/win/dxgi_texture_staging.h | 68 + .../win/full_screen_win_application_handler.cc | 294 ++ .../win/full_screen_win_application_handler.h | 24 + .../desktop_capture/win/scoped_gdi_object.h | 91 + .../desktop_capture/win/scoped_thread_desktop.cc | 54 + .../desktop_capture/win/scoped_thread_desktop.h | 55 + .../desktop_capture/win/screen_capture_utils.cc | 184 + .../desktop_capture/win/screen_capture_utils.h | 75 + .../win/screen_capture_utils_unittest.cc | 81 + .../win/screen_capturer_win_directx.cc | 230 ++ .../win/screen_capturer_win_directx.h | 105 + .../win/screen_capturer_win_directx_unittest.cc | 41 + .../desktop_capture/win/screen_capturer_win_gdi.cc | 241 ++ .../desktop_capture/win/screen_capturer_win_gdi.h | 83 + .../win/screen_capturer_win_magnifier.cc | 398 ++ .../win/screen_capturer_win_magnifier.h | 140 + .../desktop_capture/win/selected_window_context.cc | 59 + .../desktop_capture/win/selected_window_context.h | 45 + .../win/test_support/test_window.cc | 104 + .../desktop_capture/win/test_support/test_window.h | 51 + .../desktop_capture/win/wgc_capture_session.cc | 429 ++ .../desktop_capture/win/wgc_capture_session.h | 137 + .../desktop_capture/win/wgc_capture_source.cc | 218 + .../desktop_capture/win/wgc_capture_source.h | 141 + .../win/wgc_capture_source_unittest.cc | 148 + .../desktop_capture/win/wgc_capturer_win.cc | 365 ++ .../modules/desktop_capture/win/wgc_capturer_win.h | 169 + .../win/wgc_capturer_win_unittest.cc | 572 +++ .../desktop_capture/win/wgc_desktop_frame.cc | 25 + .../desktop_capture/win/wgc_desktop_frame.h | 46 + .../desktop_capture/win/window_capture_utils.cc | 486 +++ .../desktop_capture/win/window_capture_utils.h | 136 + .../win/window_capture_utils_unittest.cc | 153 + .../desktop_capture/win/window_capturer_win_gdi.cc | 403 ++ .../desktop_capture/win/window_capturer_win_gdi.h | 78 + .../desktop_capture/window_capturer_linux.cc | 44 + .../modules/desktop_capture/window_capturer_mac.mm | 221 ++ .../desktop_capture/window_capturer_null.cc | 70 + .../desktop_capture/window_capturer_unittest.cc | 105 + .../modules/desktop_capture/window_capturer_win.cc | 48 + .../modules/desktop_capture/window_finder.cc | 20 + .../modules/desktop_capture/window_finder.h | 65 + .../modules/desktop_capture/window_finder_mac.h | 37 + .../modules/desktop_capture/window_finder_mac.mm | 52 + .../desktop_capture/window_finder_unittest.cc | 178 + .../modules/desktop_capture/window_finder_win.cc | 46 + .../modules/desktop_capture/window_finder_win.h | 30 + .../modules/include/module_common_types.h | 66 + .../modules/include/module_common_types_public.h | 62 + .../libwebrtc/modules/include/module_fec_types.h | 34 + .../libwebrtc/modules/module_api_gn/moz.build | 201 + .../modules/module_api_public_gn/moz.build | 201 + .../modules/module_common_types_unittest.cc | 116 + .../libwebrtc/modules/module_fec_api_gn/moz.build | 201 + third_party/libwebrtc/modules/pacing/BUILD.gn | 117 + third_party/libwebrtc/modules/pacing/DEPS | 6 + third_party/libwebrtc/modules/pacing/OWNERS | 5 + .../libwebrtc/modules/pacing/bitrate_prober.cc | 185 + .../libwebrtc/modules/pacing/bitrate_prober.h | 126 + .../modules/pacing/bitrate_prober_unittest.cc | 371 ++ .../libwebrtc/modules/pacing/g3doc/index.md | 164 + .../libwebrtc/modules/pacing/interval_budget.cc | 68 + .../libwebrtc/modules/pacing/interval_budget.h | 44 + .../modules/pacing/interval_budget_gn/moz.build | 221 ++ .../modules/pacing/interval_budget_unittest.cc | 123 + .../libwebrtc/modules/pacing/pacing_controller.cc | 710 ++++ .../libwebrtc/modules/pacing/pacing_controller.h | 251 ++ .../modules/pacing/pacing_controller_unittest.cc | 2175 ++++++++++ .../libwebrtc/modules/pacing/pacing_gn/moz.build | 239 ++ .../libwebrtc/modules/pacing/packet_router.cc | 356 ++ .../libwebrtc/modules/pacing/packet_router.h | 118 + .../modules/pacing/packet_router_unittest.cc | 736 ++++ .../modules/pacing/prioritized_packet_queue.cc | 343 ++ .../modules/pacing/prioritized_packet_queue.h | 171 + .../pacing/prioritized_packet_queue_unittest.cc | 363 ++ .../libwebrtc/modules/pacing/rtp_packet_pacer.h | 74 + .../modules/pacing/task_queue_paced_sender.cc | 372 ++ .../modules/pacing/task_queue_paced_sender.h | 209 + .../pacing/task_queue_paced_sender_unittest.cc | 913 +++++ third_party/libwebrtc/modules/portal/BUILD.gn | 148 + third_party/libwebrtc/modules/portal/OWNERS | 2 + third_party/libwebrtc/modules/portal/pipewire.sigs | 52 + .../modules/portal/pipewire_stub_header.fragment | 8 + .../libwebrtc/modules/portal/pipewire_utils.cc | 53 + .../libwebrtc/modules/portal/pipewire_utils.h | 35 + .../libwebrtc/modules/portal/portal_gn/moz.build | 109 + .../modules/portal/portal_request_response.h | 34 + .../libwebrtc/modules/portal/scoped_glib.cc | 64 + third_party/libwebrtc/modules/portal/scoped_glib.h | 76 + .../modules/portal/xdg_desktop_portal_utils.cc | 195 + .../modules/portal/xdg_desktop_portal_utils.h | 113 + .../libwebrtc/modules/portal/xdg_session_details.h | 33 + .../modules/remote_bitrate_estimator/BUILD.gn | 143 + .../modules/remote_bitrate_estimator/DEPS | 6 + .../modules/remote_bitrate_estimator/OWNERS | 5 + .../remote_bitrate_estimator/aimd_rate_control.cc | 431 ++ .../remote_bitrate_estimator/aimd_rate_control.h | 122 + .../aimd_rate_control_unittest.cc | 484 +++ .../remote_bitrate_estimator/bwe_defines.cc | 24 + .../remote_bitrate_estimator/include/bwe_defines.h | 46 + .../include/remote_bitrate_estimator.h | 70 + .../remote_bitrate_estimator/inter_arrival.cc | 163 + .../remote_bitrate_estimator/inter_arrival.h | 95 + .../inter_arrival_unittest.cc | 531 +++ .../remote_bitrate_estimator/overuse_detector.cc | 111 + .../remote_bitrate_estimator/overuse_detector.h | 58 + .../overuse_detector_unittest.cc | 682 ++++ .../remote_bitrate_estimator/overuse_estimator.cc | 164 + .../remote_bitrate_estimator/overuse_estimator.h | 83 + .../remote_bitrate_estimator/packet_arrival_map.cc | 164 + .../remote_bitrate_estimator/packet_arrival_map.h | 144 + .../packet_arrival_map_test.cc | 272 ++ .../remote_bitrate_estimator_abs_send_time.cc | 399 ++ .../remote_bitrate_estimator_abs_send_time.h | 135 + ...ote_bitrate_estimator_abs_send_time_unittest.cc | 296 ++ .../remote_bitrate_estimator_gn/moz.build | 244 ++ .../remote_bitrate_estimator_single_stream.cc | 245 ++ .../remote_bitrate_estimator_single_stream.h | 82 + ...ote_bitrate_estimator_single_stream_unittest.cc | 78 + .../remote_bitrate_estimator_unittest_helper.cc | 594 +++ .../remote_bitrate_estimator_unittest_helper.h | 225 ++ .../remote_estimator_proxy.cc | 328 ++ .../remote_estimator_proxy.h | 124 + .../remote_estimator_proxy_unittest.cc | 644 +++ .../test/bwe_test_logging.cc | 262 ++ .../test/bwe_test_logging.h | 360 ++ .../remote_bitrate_estimator/tools/bwe_rtp.cc | 109 + .../remote_bitrate_estimator/tools/bwe_rtp.h | 25 + .../remote_bitrate_estimator/tools/rtp_to_text.cc | 67 + third_party/libwebrtc/modules/rtp_rtcp/BUILD.gn | 713 ++++ third_party/libwebrtc/modules/rtp_rtcp/DEPS | 8 + third_party/libwebrtc/modules/rtp_rtcp/OWNERS | 6 + .../modules/rtp_rtcp/include/flexfec_receiver.h | 80 + .../modules/rtp_rtcp/include/flexfec_sender.h | 103 + .../modules/rtp_rtcp/include/receive_statistics.h | 83 + .../rtp_rtcp/include/recovered_packet_receiver.h | 30 + .../rtp_rtcp/include/remote_ntp_time_estimator.h | 74 + .../modules/rtp_rtcp/include/report_block_data.cc | 44 + .../modules/rtp_rtcp/include/report_block_data.h | 59 + .../modules/rtp_rtcp/include/rtcp_statistics.h | 77 + .../libwebrtc/modules/rtp_rtcp/include/rtp_cvo.h | 56 + .../rtp_rtcp/include/rtp_header_extension_map.h | 75 + .../modules/rtp_rtcp/include/rtp_packet_sender.h | 40 + .../libwebrtc/modules/rtp_rtcp/include/rtp_rtcp.h | 38 + .../modules/rtp_rtcp/include/rtp_rtcp_defines.cc | 75 + .../modules/rtp_rtcp/include/rtp_rtcp_defines.h | 495 +++ .../libwebrtc/modules/rtp_rtcp/leb128_gn/moz.build | 217 + .../mocks/mock_recovered_packet_receiver.h | 30 + .../rtp_rtcp/mocks/mock_rtcp_bandwidth_observer.h | 28 + .../modules/rtp_rtcp/mocks/mock_rtcp_rtt_stats.h | 25 + .../modules/rtp_rtcp/mocks/mock_rtp_rtcp.h | 192 + .../modules/rtp_rtcp/rtp_rtcp_format_gn/moz.build | 274 ++ .../modules/rtp_rtcp/rtp_rtcp_gn/moz.build | 284 ++ .../modules/rtp_rtcp/rtp_video_header_gn/moz.build | 225 ++ .../source/absolute_capture_time_interpolator.cc | 125 + .../source/absolute_capture_time_interpolator.h | 86 + .../absolute_capture_time_interpolator_unittest.cc | 353 ++ .../source/absolute_capture_time_sender.cc | 124 + .../rtp_rtcp/source/absolute_capture_time_sender.h | 88 + .../absolute_capture_time_sender_unittest.cc | 374 ++ .../source/active_decode_targets_helper.cc | 124 + .../rtp_rtcp/source/active_decode_targets_helper.h | 63 + .../active_decode_targets_helper_unittest.cc | 272 ++ .../libwebrtc/modules/rtp_rtcp/source/byte_io.h | 402 ++ .../modules/rtp_rtcp/source/byte_io_unittest.cc | 270 ++ .../source/capture_clock_offset_updater.cc | 33 + .../rtp_rtcp/source/capture_clock_offset_updater.h | 51 + .../capture_clock_offset_updater_unittest.cc | 58 + .../source/create_video_rtp_depacketizer.cc | 43 + .../source/create_video_rtp_depacketizer.h | 26 + .../deprecated/deprecated_rtp_sender_egress.cc | 465 +++ .../deprecated/deprecated_rtp_sender_egress.h | 151 + .../modules/rtp_rtcp/source/dtmf_queue.cc | 51 + .../libwebrtc/modules/rtp_rtcp/source/dtmf_queue.h | 43 + .../rtp_rtcp/source/fec_private_tables_bursty.cc | 660 ++++ .../rtp_rtcp/source/fec_private_tables_bursty.h | 37 + .../source/fec_private_tables_bursty_unittest.cc | 82 + .../rtp_rtcp/source/fec_private_tables_random.cc | 660 ++++ .../rtp_rtcp/source/fec_private_tables_random.h | 27 + .../modules/rtp_rtcp/source/fec_test_helper.cc | 230 ++ .../modules/rtp_rtcp/source/fec_test_helper.h | 125 + .../source/flexfec_header_reader_writer.cc | 320 ++ .../rtp_rtcp/source/flexfec_header_reader_writer.h | 88 + .../flexfec_header_reader_writer_unittest.cc | 560 +++ .../modules/rtp_rtcp/source/flexfec_receiver.cc | 196 + .../rtp_rtcp/source/flexfec_receiver_unittest.cc | 691 ++++ .../modules/rtp_rtcp/source/flexfec_sender.cc | 204 + .../rtp_rtcp/source/flexfec_sender_unittest.cc | 342 ++ .../rtp_rtcp/source/forward_error_correction.cc | 816 ++++ .../rtp_rtcp/source/forward_error_correction.h | 424 ++ .../source/forward_error_correction_internal.cc | 519 +++ .../source/forward_error_correction_internal.h | 121 + .../source/frame_transformer_factory_unittest.cc | 69 + .../libwebrtc/modules/rtp_rtcp/source/leb128.cc | 63 + .../libwebrtc/modules/rtp_rtcp/source/leb128.h | 31 + .../modules/rtp_rtcp/source/leb128_unittest.cc | 138 + .../modules/rtp_rtcp/source/nack_rtx_unittest.cc | 293 ++ .../modules/rtp_rtcp/source/packet_loss_stats.cc | 141 + .../modules/rtp_rtcp/source/packet_loss_stats.h | 58 + .../rtp_rtcp/source/packet_loss_stats_unittest.cc | 198 + .../modules/rtp_rtcp/source/packet_sequencer.cc | 156 + .../modules/rtp_rtcp/source/packet_sequencer.h | 77 + .../rtp_rtcp/source/packet_sequencer_unittest.cc | 250 ++ .../rtp_rtcp/source/receive_statistics_impl.cc | 431 ++ .../rtp_rtcp/source/receive_statistics_impl.h | 250 ++ .../rtp_rtcp/source/receive_statistics_unittest.cc | 901 +++++ .../rtp_rtcp/source/remote_ntp_time_estimator.cc | 109 + .../source/remote_ntp_time_estimator_unittest.cc | 128 + .../modules/rtp_rtcp/source/rtcp_nack_stats.cc | 29 + .../modules/rtp_rtcp/source/rtcp_nack_stats.h | 40 + .../rtp_rtcp/source/rtcp_nack_stats_unittest.cc | 64 + .../modules/rtp_rtcp/source/rtcp_packet.cc | 99 + .../modules/rtp_rtcp/source/rtcp_packet.h | 111 + .../modules/rtp_rtcp/source/rtcp_packet/app.cc | 103 + .../modules/rtp_rtcp/source/rtcp_packet/app.h | 67 + .../rtp_rtcp/source/rtcp_packet/app_unittest.cc | 110 + .../modules/rtp_rtcp/source/rtcp_packet/bye.cc | 141 + .../modules/rtp_rtcp/source/rtcp_packet/bye.h | 57 + .../rtp_rtcp/source/rtcp_packet/bye_unittest.cc | 147 + .../rtp_rtcp/source/rtcp_packet/common_header.cc | 89 + .../rtp_rtcp/source/rtcp_packet/common_header.h | 52 + .../source/rtcp_packet/common_header_unittest.cc | 103 + .../rtp_rtcp/source/rtcp_packet/compound_packet.cc | 50 + .../rtp_rtcp/source/rtcp_packet/compound_packet.h | 47 + .../source/rtcp_packet/compound_packet_unittest.cc | 155 + .../modules/rtp_rtcp/source/rtcp_packet/dlrr.cc | 94 + .../modules/rtp_rtcp/source/rtcp_packet/dlrr.h | 80 + .../rtp_rtcp/source/rtcp_packet/dlrr_unittest.cc | 92 + .../source/rtcp_packet/extended_reports.cc | 195 + .../rtp_rtcp/source/rtcp_packet/extended_reports.h | 73 + .../rtcp_packet/extended_reports_unittest.cc | 169 + .../modules/rtp_rtcp/source/rtcp_packet/fir.cc | 113 + .../modules/rtp_rtcp/source/rtcp_packet/fir.h | 62 + .../rtp_rtcp/source/rtcp_packet/fir_unittest.cc | 93 + .../source/rtcp_packet/loss_notification.cc | 133 + .../source/rtcp_packet/loss_notification.h | 82 + .../rtcp_packet/loss_notification_unittest.cc | 136 + .../modules/rtp_rtcp/source/rtcp_packet/nack.cc | 176 + .../modules/rtp_rtcp/source/rtcp_packet/nack.h | 59 + .../rtp_rtcp/source/rtcp_packet/nack_unittest.cc | 171 + .../modules/rtp_rtcp/source/rtcp_packet/pli.cc | 79 + .../modules/rtp_rtcp/source/rtcp_packet/pli.h | 39 + .../rtp_rtcp/source/rtcp_packet/pli_unittest.cc | 58 + .../modules/rtp_rtcp/source/rtcp_packet/psfb.cc | 47 + .../modules/rtp_rtcp/source/rtcp_packet/psfb.h | 48 + .../source/rtcp_packet/rapid_resync_request.cc | 68 + .../source/rtcp_packet/rapid_resync_request.h | 40 + .../rtcp_packet/rapid_resync_request_unittest.cc | 64 + .../rtp_rtcp/source/rtcp_packet/receiver_report.cc | 112 + .../rtp_rtcp/source/rtcp_packet/receiver_report.h | 60 + .../source/rtcp_packet/receiver_report_unittest.cc | 161 + .../modules/rtp_rtcp/source/rtcp_packet/remb.cc | 143 + .../modules/rtp_rtcp/source/rtcp_packet/remb.h | 59 + .../rtp_rtcp/source/rtcp_packet/remb_unittest.cc | 141 + .../rtp_rtcp/source/rtcp_packet/remote_estimate.cc | 148 + .../rtp_rtcp/source/rtcp_packet/remote_estimate.h | 59 + .../source/rtcp_packet/remote_estimate_unittest.cc | 56 + .../rtp_rtcp/source/rtcp_packet/report_block.cc | 100 + .../rtp_rtcp/source/rtcp_packet/report_block.h | 72 + .../source/rtcp_packet/report_block_unittest.cc | 110 + .../modules/rtp_rtcp/source/rtcp_packet/rrtr.cc | 49 + .../modules/rtp_rtcp/source/rtcp_packet/rrtr.h | 59 + .../rtp_rtcp/source/rtcp_packet/rrtr_unittest.cc | 50 + .../modules/rtp_rtcp/source/rtcp_packet/rtpfb.cc | 45 + .../modules/rtp_rtcp/source/rtcp_packet/rtpfb.h | 47 + .../modules/rtp_rtcp/source/rtcp_packet/sdes.cc | 199 + .../modules/rtp_rtcp/source/rtcp_packet/sdes.h | 56 + .../rtp_rtcp/source/rtcp_packet/sdes_unittest.cc | 244 ++ .../rtp_rtcp/source/rtcp_packet/sender_report.cc | 141 + .../rtp_rtcp/source/rtcp_packet/sender_report.h | 81 + .../source/rtcp_packet/sender_report_unittest.cc | 142 + .../rtp_rtcp/source/rtcp_packet/target_bitrate.cc | 127 + .../rtp_rtcp/source/rtcp_packet/target_bitrate.h | 63 + .../source/rtcp_packet/target_bitrate_unittest.cc | 96 + .../rtp_rtcp/source/rtcp_packet/tmmb_item.cc | 71 + .../rtp_rtcp/source/rtcp_packet/tmmb_item.h | 52 + .../modules/rtp_rtcp/source/rtcp_packet/tmmbn.cc | 109 + .../modules/rtp_rtcp/source/rtcp_packet/tmmbn.h | 55 + .../rtp_rtcp/source/rtcp_packet/tmmbn_unittest.cc | 105 + .../modules/rtp_rtcp/source/rtcp_packet/tmmbr.cc | 111 + .../modules/rtp_rtcp/source/rtcp_packet/tmmbr.h | 54 + .../rtp_rtcp/source/rtcp_packet/tmmbr_unittest.cc | 89 + .../source/rtcp_packet/transport_feedback.cc | 737 ++++ .../source/rtcp_packet/transport_feedback.h | 185 + .../rtcp_packet/transport_feedback_unittest.cc | 667 ++++ .../rtp_rtcp/source/rtcp_packet_unittest.cc | 42 + .../modules/rtp_rtcp/source/rtcp_receiver.cc | 1292 ++++++ .../modules/rtp_rtcp/source/rtcp_receiver.h | 457 +++ .../rtp_rtcp/source/rtcp_receiver_unittest.cc | 2012 ++++++++++ .../modules/rtp_rtcp/source/rtcp_sender.cc | 974 +++++ .../modules/rtp_rtcp/source/rtcp_sender.h | 332 ++ .../rtp_rtcp/source/rtcp_sender_unittest.cc | 844 ++++ .../modules/rtp_rtcp/source/rtcp_transceiver.cc | 150 + .../modules/rtp_rtcp/source/rtcp_transceiver.h | 105 + .../rtp_rtcp/source/rtcp_transceiver_config.cc | 80 + .../rtp_rtcp/source/rtcp_transceiver_config.h | 188 + .../rtp_rtcp/source/rtcp_transceiver_impl.cc | 857 ++++ .../rtp_rtcp/source/rtcp_transceiver_impl.h | 170 + .../source/rtcp_transceiver_impl_unittest.cc | 1741 ++++++++ .../rtp_rtcp/source/rtcp_transceiver_unittest.cc | 366 ++ .../source/rtp_dependency_descriptor_extension.cc | 55 + .../source/rtp_dependency_descriptor_extension.h | 59 + ...rtp_dependency_descriptor_extension_unittest.cc | 137 + .../source/rtp_dependency_descriptor_reader.cc | 239 ++ .../source/rtp_dependency_descriptor_reader.h | 69 + .../source/rtp_dependency_descriptor_writer.cc | 396 ++ .../source/rtp_dependency_descriptor_writer.h | 89 + .../source/rtp_descriptor_authentication.cc | 58 + .../source/rtp_descriptor_authentication.h | 27 + .../modules/rtp_rtcp/source/rtp_fec_unittest.cc | 1129 ++++++ .../modules/rtp_rtcp/source/rtp_format.cc | 144 + .../libwebrtc/modules/rtp_rtcp/source/rtp_format.h | 61 + .../modules/rtp_rtcp/source/rtp_format_h264.cc | 313 ++ .../modules/rtp_rtcp/source/rtp_format_h264.h | 99 + .../rtp_rtcp/source/rtp_format_h264_unittest.cc | 496 +++ .../modules/rtp_rtcp/source/rtp_format_unittest.cc | 283 ++ .../rtp_rtcp/source/rtp_format_video_generic.cc | 100 + .../rtp_rtcp/source/rtp_format_video_generic.h | 71 + .../source/rtp_format_video_generic_unittest.cc | 172 + .../modules/rtp_rtcp/source/rtp_format_vp8.cc | 169 + .../modules/rtp_rtcp/source/rtp_format_vp8.h | 74 + .../rtp_rtcp/source/rtp_format_vp8_test_helper.cc | 174 + .../rtp_rtcp/source/rtp_format_vp8_test_helper.h | 56 + .../rtp_rtcp/source/rtp_format_vp8_unittest.cc | 115 + .../modules/rtp_rtcp/source/rtp_format_vp9.cc | 451 +++ .../modules/rtp_rtcp/source/rtp_format_vp9.h | 72 + .../rtp_rtcp/source/rtp_format_vp9_unittest.cc | 608 +++ .../source/rtp_generic_frame_descriptor.cc | 100 + .../rtp_rtcp/source/rtp_generic_frame_descriptor.h | 79 + .../rtp_generic_frame_descriptor_extension.cc | 173 + .../rtp_generic_frame_descriptor_extension.h | 45 + ..._generic_frame_descriptor_extension_unittest.cc | 264 ++ .../rtp_rtcp/source/rtp_header_extension_map.cc | 169 + .../source/rtp_header_extension_map_unittest.cc | 115 + .../rtp_rtcp/source/rtp_header_extension_size.cc | 48 + .../rtp_rtcp/source/rtp_header_extension_size.h | 32 + .../source/rtp_header_extension_size_unittest.cc | 92 + .../rtp_rtcp/source/rtp_header_extensions.cc | 934 +++++ .../rtp_rtcp/source/rtp_header_extensions.h | 386 ++ .../modules/rtp_rtcp/source/rtp_packet.cc | 712 ++++ .../libwebrtc/modules/rtp_rtcp/source/rtp_packet.h | 283 ++ .../modules/rtp_rtcp/source/rtp_packet_history.cc | 428 ++ .../modules/rtp_rtcp/source/rtp_packet_history.h | 196 + .../rtp_rtcp/source/rtp_packet_history_unittest.cc | 681 ++++ .../modules/rtp_rtcp/source/rtp_packet_received.cc | 81 + .../modules/rtp_rtcp/source/rtp_packet_received.h | 77 + .../modules/rtp_rtcp/source/rtp_packet_to_send.cc | 31 + .../modules/rtp_rtcp/source/rtp_packet_to_send.h | 147 + .../modules/rtp_rtcp/source/rtp_packet_unittest.cc | 1285 ++++++ .../modules/rtp_rtcp/source/rtp_packetizer_av1.cc | 402 ++ .../modules/rtp_rtcp/source/rtp_packetizer_av1.h | 72 + .../source/rtp_packetizer_av1_test_helper.cc | 57 + .../source/rtp_packetizer_av1_test_helper.h | 51 + .../rtp_rtcp/source/rtp_packetizer_av1_unittest.cc | 342 ++ .../modules/rtp_rtcp/source/rtp_rtcp_config.h | 27 + .../modules/rtp_rtcp/source/rtp_rtcp_impl.cc | 768 ++++ .../modules/rtp_rtcp/source/rtp_rtcp_impl.h | 332 ++ .../modules/rtp_rtcp/source/rtp_rtcp_impl2.cc | 832 ++++ .../modules/rtp_rtcp/source/rtp_rtcp_impl2.h | 341 ++ .../rtp_rtcp/source/rtp_rtcp_impl2_unittest.cc | 1157 ++++++ .../rtp_rtcp/source/rtp_rtcp_impl_unittest.cc | 705 ++++ .../modules/rtp_rtcp/source/rtp_rtcp_interface.h | 468 +++ .../modules/rtp_rtcp/source/rtp_sender.cc | 834 ++++ .../libwebrtc/modules/rtp_rtcp/source/rtp_sender.h | 217 + .../modules/rtp_rtcp/source/rtp_sender_audio.cc | 400 ++ .../modules/rtp_rtcp/source/rtp_sender_audio.h | 122 + .../rtp_rtcp/source/rtp_sender_audio_unittest.cc | 264 ++ .../modules/rtp_rtcp/source/rtp_sender_egress.cc | 617 +++ .../modules/rtp_rtcp/source/rtp_sender_egress.h | 186 + .../rtp_rtcp/source/rtp_sender_egress_unittest.cc | 990 +++++ .../modules/rtp_rtcp/source/rtp_sender_unittest.cc | 1373 +++++++ .../modules/rtp_rtcp/source/rtp_sender_video.cc | 929 +++++ .../modules/rtp_rtcp/source/rtp_sender_video.h | 255 ++ .../rtp_sender_video_frame_transformer_delegate.cc | 227 ++ .../rtp_sender_video_frame_transformer_delegate.h | 94 + .../rtp_rtcp/source/rtp_sender_video_unittest.cc | 1697 ++++++++ .../rtp_rtcp/source/rtp_sequence_number_map.cc | 129 + .../rtp_rtcp/source/rtp_sequence_number_map.h | 85 + .../source/rtp_sequence_number_map_unittest.cc | 502 +++ .../libwebrtc/modules/rtp_rtcp/source/rtp_util.cc | 63 + .../libwebrtc/modules/rtp_rtcp/source/rtp_util.h | 31 + .../modules/rtp_rtcp/source/rtp_util_unittest.cc | 86 + .../modules/rtp_rtcp/source/rtp_video_header.cc | 110 + .../modules/rtp_rtcp/source/rtp_video_header.h | 101 + .../rtp_rtcp/source/rtp_video_header_unittest.cc | 351 ++ .../rtp_video_layers_allocation_extension.cc | 323 ++ .../source/rtp_video_layers_allocation_extension.h | 39 + ...p_video_layers_allocation_extension_unittest.cc | 287 ++ .../modules/rtp_rtcp/source/source_tracker.cc | 114 + .../modules/rtp_rtcp/source/source_tracker.h | 138 + .../rtp_rtcp/source/source_tracker_unittest.cc | 533 +++ .../libwebrtc/modules/rtp_rtcp/source/time_util.cc | 54 + .../libwebrtc/modules/rtp_rtcp/source/time_util.h | 56 + .../modules/rtp_rtcp/source/time_util_unittest.cc | 128 + .../modules/rtp_rtcp/source/tmmbr_help.cc | 184 + .../libwebrtc/modules/rtp_rtcp/source/tmmbr_help.h | 35 + .../modules/rtp_rtcp/source/ulpfec_generator.cc | 268 ++ .../modules/rtp_rtcp/source/ulpfec_generator.h | 123 + .../rtp_rtcp/source/ulpfec_generator_unittest.cc | 273 ++ .../rtp_rtcp/source/ulpfec_header_reader_writer.cc | 141 + .../rtp_rtcp/source/ulpfec_header_reader_writer.h | 69 + .../source/ulpfec_header_reader_writer_unittest.cc | 245 ++ .../modules/rtp_rtcp/source/ulpfec_receiver.cc | 245 ++ .../modules/rtp_rtcp/source/ulpfec_receiver.h | 80 + .../rtp_rtcp/source/ulpfec_receiver_unittest.cc | 543 +++ .../modules/rtp_rtcp/source/video_fec_generator.h | 54 + .../rtp_rtcp/source/video_rtp_depacketizer.cc | 42 + .../rtp_rtcp/source/video_rtp_depacketizer.h | 41 + .../rtp_rtcp/source/video_rtp_depacketizer_av1.cc | 395 ++ .../rtp_rtcp/source/video_rtp_depacketizer_av1.h | 42 + .../source/video_rtp_depacketizer_av1_unittest.cc | 392 ++ .../source/video_rtp_depacketizer_generic.cc | 72 + .../source/video_rtp_depacketizer_generic.h | 30 + .../video_rtp_depacketizer_generic_unittest.cc | 71 + .../rtp_rtcp/source/video_rtp_depacketizer_h264.cc | 310 ++ .../rtp_rtcp/source/video_rtp_depacketizer_h264.h | 28 + .../source/video_rtp_depacketizer_h264_unittest.cc | 425 ++ .../rtp_rtcp/source/video_rtp_depacketizer_raw.cc | 28 + .../rtp_rtcp/source/video_rtp_depacketizer_raw.h | 30 + .../source/video_rtp_depacketizer_raw_unittest.cc | 51 + .../rtp_rtcp/source/video_rtp_depacketizer_vp8.cc | 201 + .../rtp_rtcp/source/video_rtp_depacketizer_vp8.h | 42 + .../source/video_rtp_depacketizer_vp8_unittest.cc | 244 ++ .../rtp_rtcp/source/video_rtp_depacketizer_vp9.cc | 226 ++ .../rtp_rtcp/source/video_rtp_depacketizer_vp9.h | 42 + .../source/video_rtp_depacketizer_vp9_unittest.cc | 373 ++ .../test/testFec/average_residual_loss_xor_codes.h | 57 + .../modules/rtp_rtcp/test/testFec/test_fec.cc | 474 +++ .../test/testFec/test_packet_masks_metrics.cc | 1060 +++++ .../libwebrtc/modules/third_party/fft/BUILD.gn | 16 + .../libwebrtc/modules/third_party/fft/LICENSE | 25 + .../modules/third_party/fft/README.chromium | 12 + .../libwebrtc/modules/third_party/fft/fft.c | 942 +++++ .../libwebrtc/modules/third_party/fft/fft.h | 58 + .../modules/third_party/fft/fft_gn/moz.build | 217 + .../libwebrtc/modules/third_party/g711/BUILD.gn | 17 + .../libwebrtc/modules/third_party/g711/LICENSE | 14 + .../modules/third_party/g711/README.chromium | 11 + .../libwebrtc/modules/third_party/g711/g711.c | 72 + .../libwebrtc/modules/third_party/g711/g711.h | 350 ++ .../modules/third_party/g711/g711_3p_gn/moz.build | 217 + .../libwebrtc/modules/third_party/g722/BUILD.gn | 18 + .../libwebrtc/modules/third_party/g722/LICENSE | 20 + .../modules/third_party/g722/README.chromium | 11 + .../modules/third_party/g722/g722_3p_gn/moz.build | 221 ++ .../modules/third_party/g722/g722_decode.c | 399 ++ .../modules/third_party/g722/g722_enc_dec.h | 154 + .../modules/third_party/g722/g722_encode.c | 429 ++ .../modules/third_party/portaudio/BUILD.gn | 18 + .../modules/third_party/portaudio/LICENSE | 91 + .../modules/third_party/portaudio/README.chromium | 14 + .../third_party/portaudio/pa_memorybarrier.h | 144 + .../modules/third_party/portaudio/pa_ringbuffer.c | 237 ++ .../modules/third_party/portaudio/pa_ringbuffer.h | 263 ++ third_party/libwebrtc/modules/utility/BUILD.gn | 78 + third_party/libwebrtc/modules/utility/DEPS | 5 + third_party/libwebrtc/modules/utility/OWNERS | 1 + .../modules/utility/include/helpers_android.h | 80 + .../modules/utility/include/jvm_android.h | 193 + .../modules/utility/maybe_worker_thread.cc | 99 + .../modules/utility/maybe_worker_thread.h | 86 + .../utility/maybe_worker_thread_unittests.cc | 161 + .../modules/utility/source/helpers_android.cc | 117 + .../modules/utility/source/jvm_android.cc | 292 ++ .../libwebrtc/modules/utility/utility_gn/moz.build | 229 ++ .../libwebrtc/modules/video_capture/BUILD.gn | 152 + third_party/libwebrtc/modules/video_capture/DEPS | 6 + third_party/libwebrtc/modules/video_capture/OWNERS | 4 + .../modules/video_capture/device_info_impl.cc | 224 ++ .../modules/video_capture/device_info_impl.h | 63 + .../video_capture/linux/device_info_linux.cc | 42 + .../video_capture/linux/device_info_v4l2.cc | 517 +++ .../modules/video_capture/linux/device_info_v4l2.h | 71 + .../video_capture/linux/video_capture_linux.cc | 51 + .../video_capture/linux/video_capture_v4l2.cc | 489 +++ .../video_capture/linux/video_capture_v4l2.h | 65 + .../video_capture/raw_video_sink_interface.h | 34 + .../video_capture/test/video_capture_unittest.cc | 376 ++ .../modules/video_capture/video_capture.h | 169 + .../modules/video_capture/video_capture_config.h | 33 + .../modules/video_capture/video_capture_defines.h | 59 + .../modules/video_capture/video_capture_factory.cc | 26 + .../modules/video_capture/video_capture_factory.h | 40 + .../video_capture/video_capture_factory_null.cc | 27 + .../modules/video_capture/video_capture_impl.cc | 334 ++ .../modules/video_capture/video_capture_impl.h | 119 + .../video_capture_internal_impl_gn/moz.build | 254 ++ .../video_capture_module_gn/moz.build | 237 ++ .../video_capture/windows/device_info_ds.cc | 713 ++++ .../modules/video_capture/windows/device_info_ds.h | 107 + .../video_capture/windows/help_functions_ds.cc | 158 + .../video_capture/windows/help_functions_ds.h | 118 + .../video_capture/windows/sink_filter_ds.cc | 959 +++++ .../modules/video_capture/windows/sink_filter_ds.h | 162 + .../video_capture/windows/video_capture_ds.cc | 322 ++ .../video_capture/windows/video_capture_ds.h | 74 + .../windows/video_capture_factory_windows.cc | 38 + .../libwebrtc/modules/video_coding/BUILD.gn | 1329 +++++++ third_party/libwebrtc/modules/video_coding/DEPS | 25 + third_party/libwebrtc/modules/video_coding/OWNERS | 7 + .../modules/video_coding/chain_diff_calculator.cc | 62 + .../modules/video_coding/chain_diff_calculator.h | 46 + .../chain_diff_calculator_gn/moz.build | 225 ++ .../video_coding/chain_diff_calculator_unittest.cc | 126 + .../codec_globals_headers_gn/moz.build | 205 + .../modules/video_coding/codecs/av1/BUILD.gn | 110 + .../libwebrtc/modules/video_coding/codecs/av1/DEPS | 4 + .../video_coding/codecs/av1/av1_svc_config.cc | 118 + .../video_coding/codecs/av1/av1_svc_config.h | 32 + .../codecs/av1/av1_svc_config_gn/moz.build | 225 ++ .../codecs/av1/av1_svc_config_unittest.cc | 171 + .../video_coding/codecs/av1/dav1d_decoder.cc | 205 + .../video_coding/codecs/av1/dav1d_decoder.h | 23 + .../video_coding/codecs/av1/libaom_av1_encoder.cc | 825 ++++ .../video_coding/codecs/av1/libaom_av1_encoder.h | 31 + .../codecs/av1/libaom_av1_encoder_unittest.cc | 264 ++ .../video_coding/codecs/av1/libaom_av1_unittest.cc | 368 ++ .../modules/video_coding/codecs/h264/DEPS | 5 + .../modules/video_coding/codecs/h264/OWNERS | 2 + .../modules/video_coding/codecs/h264/h264.cc | 166 + .../video_coding/codecs/h264/h264_color_space.cc | 178 + .../video_coding/codecs/h264/h264_color_space.h | 38 + .../video_coding/codecs/h264/h264_decoder_impl.cc | 657 +++ .../video_coding/codecs/h264/h264_decoder_impl.h | 109 + .../video_coding/codecs/h264/h264_encoder_impl.cc | 713 ++++ .../video_coding/codecs/h264/h264_encoder_impl.h | 125 + .../codecs/h264/h264_encoder_impl_unittest.cc | 89 + .../codecs/h264/h264_simulcast_unittest.cc | 107 + .../video_coding/codecs/h264/include/h264.h | 72 + .../codecs/h264/include/h264_globals.h | 85 + .../codecs/h264/test/h264_impl_unittest.cc | 99 + .../codecs/interface/common_constants.h | 28 + .../codecs/interface/libvpx_interface.cc | 373 ++ .../codecs/interface/libvpx_interface.h | 128 + .../codecs/interface/mock_libvpx_interface.h | 147 + .../multiplex/augmented_video_frame_buffer.cc | 65 + .../include/augmented_video_frame_buffer.h | 62 + .../multiplex/include/multiplex_decoder_adapter.h | 80 + .../multiplex/include/multiplex_encoder_adapter.h | 91 + .../codecs/multiplex/multiplex_decoder_adapter.cc | 266 ++ .../multiplex/multiplex_encoded_image_packer.cc | 277 ++ .../multiplex/multiplex_encoded_image_packer.h | 120 + .../codecs/multiplex/multiplex_encoder_adapter.cc | 353 ++ .../multiplex/test/multiplex_adapter_unittest.cc | 319 ++ .../codecs/test/android_codec_factory_helper.cc | 78 + .../codecs/test/android_codec_factory_helper.h | 30 + .../codecs/test/batch/empty-runtime-deps | 1 + .../codecs/test/batch/run-instantiation-tests.sh | 56 + .../codecs/test/batch/run-videoprocessor-tests.sh | 70 + .../codecs/test/encoded_video_frame_producer.cc | 77 + .../codecs/test/encoded_video_frame_producer.h | 100 + .../codecs/test/objc_codec_factory_helper.h | 28 + .../codecs/test/objc_codec_factory_helper.mm | 30 + .../codecs/test/plot_webrtc_test_logs.py | 438 ++ .../codecs/test/video_codec_analyzer.cc | 186 + .../codecs/test/video_codec_analyzer.h | 65 + .../codecs/test/video_codec_analyzer_unittest.cc | 141 + .../video_coding/codecs/test/video_codec_test.cc | 456 +++ .../codecs/test/video_codec_tester_impl.cc | 325 ++ .../codecs/test/video_codec_tester_impl.h | 53 + .../test/video_codec_tester_impl_unittest.cc | 259 ++ .../codecs/test/video_codec_unittest.cc | 182 + .../codecs/test/video_codec_unittest.h | 128 + .../video_encoder_decoder_instantiation_tests.cc | 155 + .../codecs/test/videocodec_test_av1.cc | 101 + .../videocodec_test_fixture_config_unittest.cc | 63 + .../codecs/test/videocodec_test_fixture_impl.cc | 860 ++++ .../codecs/test/videocodec_test_fixture_impl.h | 107 + .../codecs/test/videocodec_test_libvpx.cc | 465 +++ .../codecs/test/videocodec_test_mediacodec.cc | 267 ++ .../codecs/test/videocodec_test_openh264.cc | 87 + .../codecs/test/videocodec_test_stats_impl.cc | 441 +++ .../codecs/test/videocodec_test_stats_impl.h | 95 + .../test/videocodec_test_stats_impl_unittest.cc | 105 + .../codecs/test/videocodec_test_videotoolbox.cc | 88 + .../video_coding/codecs/test/videoprocessor.cc | 722 ++++ .../video_coding/codecs/test/videoprocessor.h | 263 ++ .../codecs/test/videoprocessor_unittest.cc | 197 + .../codecs/vp8/default_temporal_layers.cc | 884 +++++ .../codecs/vp8/default_temporal_layers.h | 168 + .../codecs/vp8/default_temporal_layers_unittest.cc | 781 ++++ .../codecs/vp8/include/temporal_layers_checker.h | 63 + .../modules/video_coding/codecs/vp8/include/vp8.h | 50 + .../video_coding/codecs/vp8/include/vp8_globals.h | 49 + .../video_coding/codecs/vp8/libvpx_vp8_decoder.cc | 384 ++ .../video_coding/codecs/vp8/libvpx_vp8_decoder.h | 74 + .../video_coding/codecs/vp8/libvpx_vp8_encoder.cc | 1438 +++++++ .../video_coding/codecs/vp8/libvpx_vp8_encoder.h | 159 + .../codecs/vp8/libvpx_vp8_simulcast_test.cc | 112 + .../video_coding/codecs/vp8/screenshare_layers.cc | 624 +++ .../video_coding/codecs/vp8/screenshare_layers.h | 164 + .../codecs/vp8/screenshare_layers_unittest.cc | 788 ++++ .../video_coding/codecs/vp8/temporal_layers.h | 17 + .../codecs/vp8/temporal_layers_checker.cc | 146 + .../codecs/vp8/test/vp8_impl_unittest.cc | 913 +++++ .../video_coding/codecs/vp8/vp8_scalability.cc | 24 + .../video_coding/codecs/vp8/vp8_scalability.h | 24 + .../libwebrtc/modules/video_coding/codecs/vp9/DEPS | 3 + .../modules/video_coding/codecs/vp9/include/vp9.h | 54 + .../video_coding/codecs/vp9/include/vp9_globals.h | 179 + .../video_coding/codecs/vp9/libvpx_vp9_decoder.cc | 403 ++ .../video_coding/codecs/vp9/libvpx_vp9_decoder.h | 60 + .../video_coding/codecs/vp9/libvpx_vp9_encoder.cc | 2194 ++++++++++ .../video_coding/codecs/vp9/libvpx_vp9_encoder.h | 251 ++ .../modules/video_coding/codecs/vp9/svc_config.cc | 240 ++ .../modules/video_coding/codecs/vp9/svc_config.h | 39 + .../video_coding/codecs/vp9/svc_config_unittest.cc | 285 ++ .../codecs/vp9/test/vp9_impl_unittest.cc | 2446 ++++++++++++ .../modules/video_coding/codecs/vp9/vp9.cc | 118 + .../codecs/vp9/vp9_frame_buffer_pool.cc | 182 + .../codecs/vp9/vp9_frame_buffer_pool.h | 134 + .../modules/video_coding/decoder_database.cc | 152 + .../modules/video_coding/decoder_database.h | 73 + .../video_coding/decoder_database_unittest.cc | 84 + .../modules/video_coding/decoding_state.cc | 368 ++ .../modules/video_coding/decoding_state.h | 89 + .../video_coding/decoding_state_unittest.cc | 713 ++++ .../modules/video_coding/encoded_frame.cc | 151 + .../libwebrtc/modules/video_coding/encoded_frame.h | 127 + .../video_coding/encoded_frame_gn/moz.build | 232 ++ .../modules/video_coding/event_wrapper.cc | 41 + .../libwebrtc/modules/video_coding/event_wrapper.h | 47 + .../modules/video_coding/fec_controller_default.cc | 211 + .../modules/video_coding/fec_controller_default.h | 68 + .../video_coding/fec_controller_unittest.cc | 114 + .../modules/video_coding/fec_rate_table.h | 461 +++ .../libwebrtc/modules/video_coding/frame_buffer.cc | 265 ++ .../libwebrtc/modules/video_coding/frame_buffer.h | 89 + .../modules/video_coding/frame_buffer2.cc | 625 +++ .../libwebrtc/modules/video_coding/frame_buffer2.h | 193 + .../modules/video_coding/frame_buffer2_unittest.cc | 665 ++++ .../video_coding/frame_dependencies_calculator.cc | 75 + .../video_coding/frame_dependencies_calculator.h | 49 + .../frame_dependencies_calculator_gn/moz.build | 225 ++ .../frame_dependencies_calculator_unittest.cc | 121 + .../modules/video_coding/frame_helpers.cc | 96 + .../libwebrtc/modules/video_coding/frame_helpers.h | 30 + .../video_coding/frame_helpers_gn/moz.build | 232 ++ .../modules/video_coding/frame_helpers_unittest.cc | 34 + .../libwebrtc/modules/video_coding/frame_object.cc | 131 + .../libwebrtc/modules/video_coding/frame_object.h | 68 + .../libwebrtc/modules/video_coding/g3doc/index.md | 177 + .../modules/video_coding/generic_decoder.cc | 325 ++ .../modules/video_coding/generic_decoder.h | 124 + .../video_coding/generic_decoder_unittest.cc | 190 + .../modules/video_coding/h264_packet_buffer.cc | 287 ++ .../modules/video_coding/h264_packet_buffer.h | 56 + .../video_coding/h264_packet_buffer_unittest.cc | 778 ++++ .../video_coding/h264_sprop_parameter_sets.cc | 53 + .../video_coding/h264_sprop_parameter_sets.h | 38 + .../h264_sprop_parameter_sets_unittest.cc | 45 + .../modules/video_coding/h264_sps_pps_tracker.cc | 271 ++ .../modules/video_coding/h264_sps_pps_tracker.h | 76 + .../video_coding/h264_sps_pps_tracker_unittest.cc | 368 ++ .../libwebrtc/modules/video_coding/histogram.cc | 61 + .../libwebrtc/modules/video_coding/histogram.h | 46 + .../modules/video_coding/histogram_unittest.cc | 77 + .../video_coding/include/video_codec_initializer.h | 45 + .../video_coding/include/video_codec_interface.cc | 20 + .../video_coding/include/video_codec_interface.h | 121 + .../modules/video_coding/include/video_coding.h | 150 + .../video_coding/include/video_coding_defines.h | 121 + .../video_coding/include/video_error_codes.h | 31 + .../modules/video_coding/internal_defines.h | 23 + .../modules/video_coding/jitter_buffer.cc | 892 +++++ .../libwebrtc/modules/video_coding/jitter_buffer.h | 275 ++ .../modules/video_coding/jitter_buffer_common.h | 59 + .../modules/video_coding/jitter_buffer_unittest.cc | 1848 +++++++++ .../video_coding/loss_notification_controller.cc | 173 + .../video_coding/loss_notification_controller.h | 111 + .../loss_notification_controller_unittest.cc | 607 +++ .../modules/video_coding/media_opt_util.cc | 704 ++++ .../modules/video_coding/media_opt_util.h | 350 ++ .../modules/video_coding/nack_requester.cc | 340 ++ .../modules/video_coding/nack_requester.h | 157 + .../video_coding/nack_requester_gn/moz.build | 233 ++ .../video_coding/nack_requester_unittest.cc | 402 ++ .../libwebrtc/modules/video_coding/packet.cc | 69 + .../libwebrtc/modules/video_coding/packet.h | 80 + .../modules/video_coding/packet_buffer.cc | 422 ++ .../libwebrtc/modules/video_coding/packet_buffer.h | 134 + .../video_coding/packet_buffer_gn/moz.build | 232 ++ .../modules/video_coding/packet_buffer_unittest.cc | 828 ++++ .../libwebrtc/modules/video_coding/receiver.cc | 191 + .../libwebrtc/modules/video_coding/receiver.h | 69 + .../modules/video_coding/receiver_unittest.cc | 493 +++ .../video_coding/rtp_frame_id_only_ref_finder.cc | 33 + .../video_coding/rtp_frame_id_only_ref_finder.h | 38 + .../video_coding/rtp_frame_reference_finder.cc | 189 + .../video_coding/rtp_frame_reference_finder.h | 60 + .../rtp_frame_reference_finder_unittest.cc | 322 ++ .../modules/video_coding/rtp_generic_ref_finder.cc | 44 + .../modules/video_coding/rtp_generic_ref_finder.h | 32 + .../video_coding/rtp_seq_num_only_ref_finder.cc | 186 + .../video_coding/rtp_seq_num_only_ref_finder.h | 70 + .../modules/video_coding/rtp_vp8_ref_finder.cc | 254 ++ .../modules/video_coding/rtp_vp8_ref_finder.h | 83 + .../video_coding/rtp_vp8_ref_finder_unittest.cc | 370 ++ .../modules/video_coding/rtp_vp9_ref_finder.cc | 367 ++ .../modules/video_coding/rtp_vp9_ref_finder.h | 105 + .../video_coding/rtp_vp9_ref_finder_unittest.cc | 637 +++ .../libwebrtc/modules/video_coding/session_info.cc | 540 +++ .../libwebrtc/modules/video_coding/session_info.h | 122 + .../modules/video_coding/session_info_unittest.cc | 469 +++ .../libwebrtc/modules/video_coding/svc/BUILD.gn | 135 + .../svc/create_scalability_structure.cc | 295 ++ .../svc/create_scalability_structure.h | 35 + .../video_coding/svc/scalability_mode_util.cc | 390 ++ .../video_coding/svc/scalability_mode_util.h | 46 + .../svc/scalability_mode_util_gn/moz.build | 225 ++ .../svc/scalability_mode_util_unittest.cc | 116 + .../svc/scalability_structure_full_svc.cc | 444 +++ .../svc/scalability_structure_full_svc.h | 190 + .../svc/scalability_structure_full_svc_unittest.cc | 123 + .../svc/scalability_structure_key_svc.cc | 427 ++ .../svc/scalability_structure_key_svc.h | 138 + .../svc/scalability_structure_key_svc_unittest.cc | 245 ++ .../svc/scalability_structure_l2t2_key_shift.cc | 177 + .../svc/scalability_structure_l2t2_key_shift.h | 64 + ...calability_structure_l2t2_key_shift_unittest.cc | 358 ++ .../svc/scalability_structure_simulcast.cc | 353 ++ .../svc/scalability_structure_simulcast.h | 145 + .../svc/scalability_structure_test_helpers.cc | 101 + .../svc/scalability_structure_test_helpers.h | 59 + .../svc/scalability_structure_unittest.cc | 395 ++ .../svc/scalability_structures_gn/moz.build | 232 ++ .../video_coding/svc/scalable_video_controller.h | 139 + .../svc/scalable_video_controller_gn/moz.build | 221 ++ .../svc/scalable_video_controller_no_layering.cc | 88 + .../svc/scalable_video_controller_no_layering.h | 40 + .../modules/video_coding/svc/svc_rate_allocator.cc | 452 +++ .../modules/video_coding/svc/svc_rate_allocator.h | 69 + .../svc/svc_rate_allocator_gn/moz.build | 225 ++ .../svc/svc_rate_allocator_unittest.cc | 584 +++ .../modules/video_coding/test/stream_generator.cc | 128 + .../modules/video_coding/test/stream_generator.h | 74 + .../libwebrtc/modules/video_coding/timing/BUILD.gn | 153 + .../modules/video_coding/timing/codec_timer.cc | 58 + .../modules/video_coding/timing/codec_timer.h | 50 + .../video_coding/timing/codec_timer_gn/moz.build | 221 ++ .../timing/frame_delay_variation_kalman_filter.cc | 148 + .../timing/frame_delay_variation_kalman_filter.h | 106 + .../moz.build | 221 ++ ...frame_delay_variation_kalman_filter_unittest.cc | 115 + .../video_coding/timing/inter_frame_delay.cc | 71 + .../video_coding/timing/inter_frame_delay.h | 46 + .../timing/inter_frame_delay_gn/moz.build | 221 ++ .../timing/inter_frame_delay_unittest.cc | 190 + .../video_coding/timing/jitter_estimator.cc | 476 +++ .../modules/video_coding/timing/jitter_estimator.h | 218 + .../timing/jitter_estimator_gn/moz.build | 232 ++ .../timing/jitter_estimator_unittest.cc | 305 ++ .../modules/video_coding/timing/rtt_filter.cc | 161 + .../modules/video_coding/timing/rtt_filter.h | 69 + .../video_coding/timing/rtt_filter_gn/moz.build | 221 ++ .../video_coding/timing/rtt_filter_unittest.cc | 105 + .../video_coding/timing/timestamp_extrapolator.cc | 169 + .../video_coding/timing/timestamp_extrapolator.h | 48 + .../timing/timestamp_extrapolator_gn/moz.build | 221 ++ .../timing/timestamp_extrapolator_unittest.cc | 221 ++ .../modules/video_coding/timing/timing.cc | 297 ++ .../libwebrtc/modules/video_coding/timing/timing.h | 160 + .../video_coding/timing/timing_module_gn/moz.build | 232 ++ .../modules/video_coding/timing/timing_unittest.cc | 339 ++ .../utility/bandwidth_quality_scaler.cc | 148 + .../utility/bandwidth_quality_scaler.h | 93 + .../utility/bandwidth_quality_scaler_unittest.cc | 278 ++ .../video_coding/utility/decoded_frames_history.cc | 92 + .../video_coding/utility/decoded_frames_history.h | 52 + .../utility/decoded_frames_history_unittest.cc | 114 + .../modules/video_coding/utility/frame_dropper.cc | 268 ++ .../modules/video_coding/utility/frame_dropper.h | 94 + .../video_coding/utility/frame_dropper_unittest.cc | 160 + .../utility/framerate_controller_deprecated.cc | 85 + .../utility/framerate_controller_deprecated.h | 47 + .../framerate_controller_deprecated_unittest.cc | 90 + .../modules/video_coding/utility/ivf_defines.h | 25 + .../video_coding/utility/ivf_file_reader.cc | 238 ++ .../modules/video_coding/utility/ivf_file_reader.h | 82 + .../utility/ivf_file_reader_unittest.cc | 188 + .../video_coding/utility/ivf_file_writer.cc | 245 ++ .../modules/video_coding/utility/ivf_file_writer.h | 66 + .../utility/ivf_file_writer_unittest.cc | 311 ++ .../modules/video_coding/utility/qp_parser.cc | 53 + .../modules/video_coding/utility/qp_parser.h | 45 + .../video_coding/utility/qp_parser_unittest.cc | 118 + .../modules/video_coding/utility/quality_scaler.cc | 334 ++ .../modules/video_coding/utility/quality_scaler.h | 120 + .../utility/quality_scaler_unittest.cc | 254 ++ .../utility/simulcast_rate_allocator.cc | 343 ++ .../utility/simulcast_rate_allocator.h | 70 + .../utility/simulcast_rate_allocator_unittest.cc | 824 ++++ .../utility/simulcast_test_fixture_impl.cc | 967 +++++ .../utility/simulcast_test_fixture_impl.h | 95 + .../video_coding/utility/simulcast_utility.cc | 93 + .../video_coding/utility/simulcast_utility.h | 33 + .../modules/video_coding/utility/vp8_constants.h | 27 + .../video_coding/utility/vp8_header_parser.cc | 200 + .../video_coding/utility/vp8_header_parser.h | 40 + .../modules/video_coding/utility/vp9_constants.h | 198 + .../utility/vp9_uncompressed_header_parser.cc | 533 +++ .../utility/vp9_uncompressed_header_parser.h | 155 + .../vp9_uncompressed_header_parser_unittest.cc | 94 + .../video_coding/video_codec_initializer.cc | 352 ++ .../video_codec_initializer_unittest.cc | 493 +++ .../video_codec_interface_gn/moz.build | 226 ++ .../modules/video_coding/video_coding_defines.cc | 20 + .../modules/video_coding/video_coding_gn/moz.build | 249 ++ .../modules/video_coding/video_coding_impl.cc | 254 ++ .../modules/video_coding/video_coding_impl.h | 179 + .../video_coding/video_coding_utility_gn/moz.build | 243 ++ .../modules/video_coding/video_receiver.cc | 278 ++ .../modules/video_coding/video_receiver2.cc | 108 + .../modules/video_coding/video_receiver2.h | 67 + .../video_coding/video_receiver2_unittest.cc | 145 + .../video_coding/video_receiver_unittest.cc | 236 ++ .../webrtc_libvpx_interface_gn/moz.build | 221 ++ .../modules/video_coding/webrtc_vp8_gn/moz.build | 235 ++ .../webrtc_vp8_scalability_gn/moz.build | 221 ++ .../webrtc_vp8_temporal_layers_gn/moz.build | 237 ++ .../modules/video_coding/webrtc_vp9_gn/moz.build | 238 ++ .../video_coding/webrtc_vp9_helpers_gn/moz.build | 233 ++ 2460 files changed, 471722 insertions(+) create mode 100644 third_party/libwebrtc/modules/BUILD.gn create mode 100644 third_party/libwebrtc/modules/async_audio_processing/BUILD.gn create mode 100644 third_party/libwebrtc/modules/async_audio_processing/async_audio_processing.cc create mode 100644 third_party/libwebrtc/modules/async_audio_processing/async_audio_processing.h create mode 100644 third_party/libwebrtc/modules/async_audio_processing/async_audio_processing_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_coding/DEPS create mode 100644 third_party/libwebrtc/modules/audio_coding/OWNERS create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.h create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.h create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.h create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.h create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.h create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.h create mode 100644 third_party/libwebrtc/modules/audio_coding/acm2/call_statistics_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_coding.gni create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_coding_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_coding_module_typedefs_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_coding_opus_common_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_encoder_cng_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_config.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/config.proto create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump.proto create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump_writer.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump_writer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_audio_network_adaptor.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_controller.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_controller_manager.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_debug_dump_writer.h create mode 100755 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/parse_ana_dump.py create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/util/threshold_curve.h create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/util/threshold_curve_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor_config_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/audio_network_adaptor_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/audio_decoder.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/audio_encoder.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/cng/cng_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g711/test/testG711.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/g722/test/testG722.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/complexityMeasures.m create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/defines.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/my_corr.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/my_corr.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/empty.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_test.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testLib.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testprogram.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/bandwidth_info.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/settings.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/structs.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/DEPS create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_bandwidth_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_complexity_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_fec_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_inst.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_speed_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.c create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.h create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.h create mode 100644 third_party/libwebrtc/modules/audio_coding/default_neteq_factory_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/g3doc/index.md create mode 100644 third_party/libwebrtc/modules/audio_coding/g711_c_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/g711_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/g722_c_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/g722_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/ilbc_c_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/ilbc_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/include/audio_coding_module.h create mode 100644 third_party/libwebrtc/modules/audio_coding/include/audio_coding_module_typedefs.h create mode 100644 third_party/libwebrtc/modules/audio_coding/isac_bwinfo_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/isac_vad_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/legacy_encoded_audio_frame_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/accelerate.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/accelerate.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/audio_vector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/background_noise_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/decoder_database_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/delay_manager_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/expand.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/expand.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/expand_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/g3doc/index.md create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/histogram.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/histogram.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/histogram_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/merge.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/merge.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/merge_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_buffer_level_filter.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_expand.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_histogram.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_neteq_controller.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_red_payload_splitter.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_statistics_calculator.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.proto create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/normal.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/normal.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/normal_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/packet.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/packet.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/random_vector.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/random_vector.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/random_vector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/parse_delay_file.m create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/plot_neteq_delay.m create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_ilbc_quality_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_opus_quality_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcm16b_quality_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcmu_quality_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_performance_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/time_stretch_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/DEPS create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/README.md create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_checksum.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc create mode 100755 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay_test.sh create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/output_audio_file.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/output_wav_file.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_analyze.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_encode.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_jitter.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/tools/rtpcat.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.h create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/pcm16b_c_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/pcm16b_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/red_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/test/Channel.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/Channel.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/EncodeDecodeTest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/EncodeDecodeTest.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/PCMFile.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/PCMFile.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/PacketLossTest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/PacketLossTest.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/RTPFile.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/RTPFile.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TestAllCodecs.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TestAllCodecs.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TestRedFec.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TestRedFec.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TestStereo.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TestStereo.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TestVADDTX.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TestVADDTX.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/Tester.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TwoWayCommunication.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/TwoWayCommunication.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/opus_test.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/test/opus_test.h create mode 100644 third_party/libwebrtc/modules/audio_coding/test/target_delay_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_coding/webrtc_cng_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/webrtc_multiopus_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/webrtc_opus_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_coding/webrtc_opus_wrapper_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_device/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_device/DEPS create mode 100644 third_party/libwebrtc/modules/audio_device/OWNERS create mode 100644 third_party/libwebrtc/modules/audio_device/android/aaudio_player.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/aaudio_player.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/aaudio_recorder.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/aaudio_recorder.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/aaudio_wrapper.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/aaudio_wrapper.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_common.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_device_template.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_device_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_manager.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_manager.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_manager_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_record_jni.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_record_jni.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_track_jni.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/audio_track_jni.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/build_info.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/build_info.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/ensure_initialized.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/ensure_initialized.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/BuildInfo.java create mode 100644 third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioEffects.java create mode 100644 third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioManager.java create mode 100644 third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioRecord.java create mode 100644 third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioTrack.java create mode 100644 third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioUtils.java create mode 100644 third_party/libwebrtc/modules/audio_device/android/opensles_common.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/opensles_common.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/opensles_player.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/opensles_player.h create mode 100644 third_party/libwebrtc/modules/audio_device/android/opensles_recorder.cc create mode 100644 third_party/libwebrtc/modules/audio_device/android/opensles_recorder.h create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_config.h create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_data_observer.cc create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_generic.cc create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_generic.h create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_impl.h create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_name.cc create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_name.h create mode 100644 third_party/libwebrtc/modules/audio_device/audio_device_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_device/dummy/audio_device_dummy.cc create mode 100644 third_party/libwebrtc/modules/audio_device/dummy/audio_device_dummy.h create mode 100644 third_party/libwebrtc/modules/audio_device/dummy/file_audio_device.cc create mode 100644 third_party/libwebrtc/modules/audio_device/dummy/file_audio_device.h create mode 100644 third_party/libwebrtc/modules/audio_device/dummy/file_audio_device_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_device/dummy/file_audio_device_factory.h create mode 100644 third_party/libwebrtc/modules/audio_device/fine_audio_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_device/fine_audio_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_device/fine_audio_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_device/g3doc/audio_device_module.md create mode 100644 third_party/libwebrtc/modules/audio_device/include/audio_device.h create mode 100644 third_party/libwebrtc/modules/audio_device/include/audio_device_data_observer.h create mode 100644 third_party/libwebrtc/modules/audio_device/include/audio_device_default.h create mode 100644 third_party/libwebrtc/modules/audio_device/include/audio_device_defines.h create mode 100644 third_party/libwebrtc/modules/audio_device/include/audio_device_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_device/include/audio_device_factory.h create mode 100644 third_party/libwebrtc/modules/audio_device/include/fake_audio_device.h create mode 100644 third_party/libwebrtc/modules/audio_device/include/mock_audio_device.h create mode 100644 third_party/libwebrtc/modules/audio_device/include/mock_audio_transport.h create mode 100644 third_party/libwebrtc/modules/audio_device/include/test_audio_device.cc create mode 100644 third_party/libwebrtc/modules/audio_device/include/test_audio_device.h create mode 100644 third_party/libwebrtc/modules/audio_device/include/test_audio_device_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_device/linux/alsasymboltable_linux.cc create mode 100644 third_party/libwebrtc/modules/audio_device/linux/alsasymboltable_linux.h create mode 100644 third_party/libwebrtc/modules/audio_device/linux/audio_device_alsa_linux.cc create mode 100644 third_party/libwebrtc/modules/audio_device/linux/audio_device_alsa_linux.h create mode 100644 third_party/libwebrtc/modules/audio_device/linux/audio_device_pulse_linux.cc create mode 100644 third_party/libwebrtc/modules/audio_device/linux/audio_device_pulse_linux.h create mode 100644 third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_alsa_linux.cc create mode 100644 third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_alsa_linux.h create mode 100644 third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_pulse_linux.cc create mode 100644 third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_pulse_linux.h create mode 100644 third_party/libwebrtc/modules/audio_device/linux/latebindingsymboltable_linux.cc create mode 100644 third_party/libwebrtc/modules/audio_device/linux/latebindingsymboltable_linux.h create mode 100644 third_party/libwebrtc/modules/audio_device/linux/pulseaudiosymboltable_linux.cc create mode 100644 third_party/libwebrtc/modules/audio_device/linux/pulseaudiosymboltable_linux.h create mode 100644 third_party/libwebrtc/modules/audio_device/mac/audio_device_mac.cc create mode 100644 third_party/libwebrtc/modules/audio_device/mac/audio_device_mac.h create mode 100644 third_party/libwebrtc/modules/audio_device/mac/audio_mixer_manager_mac.cc create mode 100644 third_party/libwebrtc/modules/audio_device/mac/audio_mixer_manager_mac.h create mode 100644 third_party/libwebrtc/modules/audio_device/mock_audio_device_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_device/win/audio_device_core_win.cc create mode 100644 third_party/libwebrtc/modules/audio_device/win/audio_device_core_win.h create mode 100644 third_party/libwebrtc/modules/audio_device/win/audio_device_module_win.cc create mode 100644 third_party/libwebrtc/modules/audio_device/win/audio_device_module_win.h create mode 100644 third_party/libwebrtc/modules/audio_device/win/core_audio_base_win.cc create mode 100644 third_party/libwebrtc/modules/audio_device/win/core_audio_base_win.h create mode 100644 third_party/libwebrtc/modules/audio_device/win/core_audio_input_win.cc create mode 100644 third_party/libwebrtc/modules/audio_device/win/core_audio_input_win.h create mode 100644 third_party/libwebrtc/modules/audio_device/win/core_audio_output_win.cc create mode 100644 third_party/libwebrtc/modules/audio_device/win/core_audio_output_win.h create mode 100644 third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win.cc create mode 100644 third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win.h create mode 100644 third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_mixer/DEPS create mode 100644 third_party/libwebrtc/modules/audio_mixer/OWNERS create mode 100644 third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator.h create mode 100644 third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl.h create mode 100644 third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/audio_mixer_test.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/default_output_rate_calculator.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/default_output_rate_calculator.h create mode 100644 third_party/libwebrtc/modules/audio_mixer/frame_combiner.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/frame_combiner.h create mode 100644 third_party/libwebrtc/modules/audio_mixer/frame_combiner_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/g3doc/index.md create mode 100644 third_party/libwebrtc/modules/audio_mixer/gain_change_calculator.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/gain_change_calculator.h create mode 100644 third_party/libwebrtc/modules/audio_mixer/output_rate_calculator.h create mode 100644 third_party/libwebrtc/modules/audio_mixer/sine_wave_generator.cc create mode 100644 third_party/libwebrtc/modules/audio_mixer/sine_wave_generator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/DEPS create mode 100644 third_party/libwebrtc/modules/audio_processing/OWNERS create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_common_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec_state.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec_state_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_framer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_framer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/config_selector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/config_selector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/decimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/decimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/delay_estimate.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_data.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_data_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_data_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/moving_average.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/moving_average_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/nearend_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/vector_math.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/vector_math_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/vector_math_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_factory.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump_interface_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_defines.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/gain_control.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/utility.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/utility.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_map_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_map_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h create mode 100644 third_party/libwebrtc/modules/audio_processing/api_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/apm_logging_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_frame_proxies_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_frame_view_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_frame_view_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_performance_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/debug.proto create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_control_mobile_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/g3doc/audio_processing_module.md create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_control_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_control_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_controller2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_controller2.h create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_controller2_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_controller2_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/high_pass_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/high_pass_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/high_pass_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/aec_dump.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_frame_view.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_processing.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/mock_audio_processing.h create mode 100644 third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/fast_math.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/histograms.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/histograms.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_config.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_fft.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/signal_model.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/suppression_params.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.h create mode 100644 third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/render_queue_item_verifier.h create mode 100644 third_party/libwebrtc/modules/audio_processing/residual_echo_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/residual_echo_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/residual_echo_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/rms_level.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/rms_level.h create mode 100644 third_party/libwebrtc/modules/audio_processing/rms_level_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/rms_level_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/splitting_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/splitting_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/splitting_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml create mode 100644 third_party/libwebrtc/modules/audio_processing/test/android/apmtest/default.properties create mode 100644 third_party/libwebrtc/modules/audio_processing/test/android/apmtest/jni/main.c create mode 100644 third_party/libwebrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml create mode 100644 third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/apmtest.m create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/debug_dump_test.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/echo_control_mock.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/fake_recording_device_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/performance_timer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/performance_timer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/OWNERS create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/README.md create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/output/README.md create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/sound_level.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/vad.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/test_utils.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/test_utils.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/unittest.proto create mode 100644 third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/click_annotate.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/file_utils.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/file_utils.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/file_utils_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/moving_moments.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/moving_moments_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/test/plotDetection.m create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/test/readDetection.m create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/test/readPCM.m create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppression_test.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_api_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/windows_private.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_node.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_node_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_tree_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/DEPS create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/legacy_delay_estimator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/gmm.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/gmm.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/gmm_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/noise_gmm_tables.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_internal_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/standalone_vad_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/voice_gmm_tables.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/BUILD.gn create mode 100644 third_party/libwebrtc/modules/congestion_controller/DEPS create mode 100644 third_party/libwebrtc/modules/congestion_controller/OWNERS create mode 100644 third_party/libwebrtc/modules/congestion_controller/congestion_controller_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/BUILD.gn create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/bitrate_estimator.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/bitrate_estimator.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_increase_detector_interface.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/estimators_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/inter_arrival_delta.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/inter_arrival_delta.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v1_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2_test.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/pushback_controller_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bwe_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/test/goog_cc_printer.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/test/goog_cc_printer.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/include/receive_side_congestion_controller.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/BUILD.gn create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/pcc_factory.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/pcc_factory.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/utility_function.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/utility_function.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/pcc/utility_function_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/receive_side_congestion_controller.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/receive_side_congestion_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/remb_throttler.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/remb_throttler.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/remb_throttler_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/BUILD.gn create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/control_handler.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/control_handler.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/control_handler_gn/moz.build create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer.h create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer_unittest.cc create mode 100644 third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_gn/moz.build create mode 100644 third_party/libwebrtc/modules/desktop_capture/BUILD.gn create mode 100644 third_party/libwebrtc/modules/desktop_capture/DEPS create mode 100644 third_party/libwebrtc/modules/desktop_capture/OWNERS create mode 100644 third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer_win.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/delegated_source_list_controller.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capture_differ_sse2_gn/moz.build create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capture_metadata.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capture_metrics_helper.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capture_metrics_helper.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capture_objc_gn/moz.build create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capture_options.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capture_options.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capture_types.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capturer.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capturer.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capturer_wrapper.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_capturer_wrapper.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame_generator.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame_generator.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame_rotation.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame_rotation.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame_rotation_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame_win.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_frame_win.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_geometry.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_geometry.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_geometry_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_region.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_region.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/desktop_region_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/differ_block.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/differ_block.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/differ_block_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/differ_vector_sse2.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/differ_vector_sse2.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/fake_desktop_capturer.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/fake_desktop_capturer.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/fallback_desktop_capturer_wrapper.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/fallback_desktop_capturer_wrapper.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/fallback_desktop_capturer_wrapper_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/full_screen_application_handler.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/full_screen_application_handler.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/full_screen_window_detector.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/full_screen_window_detector.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/base_capturer_pipewire.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/base_capturer_pipewire.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/mouse_cursor_monitor_pipewire.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/mouse_cursor_monitor_pipewire.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/portal_request_response.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/restore_token_manager.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/restore_token_manager.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/scoped_glib.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/screen_capture_portal_interface.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/screen_capture_portal_interface.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_portal.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_portal.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_stream_utils.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_stream_utils.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/test/test_screencast_stream_provider.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/test/test_screencast_stream_provider.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/xdg_desktop_portal_utils.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/wayland/xdg_session_details.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_configuration.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_configuration.mm create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_configuration_monitor.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_configuration_monitor.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_frame_cgimage.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_frame_cgimage.mm create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_frame_iosurface.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_frame_iosurface.mm create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_frame_provider.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/desktop_frame_provider.mm create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/full_screen_mac_application_handler.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/full_screen_mac_application_handler.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/screen_capturer_mac.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/screen_capturer_mac.mm create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/window_list_utils.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/mac/window_list_utils.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mock_desktop_capturer_callback.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/mock_desktop_capturer_callback.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mouse_cursor.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/mouse_cursor.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor_linux.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor_mac.mm create mode 100644 third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor_null.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor_win.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build create mode 100644 third_party/libwebrtc/modules/desktop_capture/resolution_tracker.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/resolution_tracker.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/rgba_color.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/rgba_color.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/rgba_color_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capture_frame_queue.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_darwin.mm create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_fuchsia.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_fuchsia.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_helper.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_helper.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_helper_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_integration_test.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_linux.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_mac_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_null.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_capturer_win.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_drawer.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_drawer.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_drawer_linux.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_drawer_lock_posix.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_drawer_lock_posix.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_drawer_mac.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_drawer_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/screen_drawer_win.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/shared_desktop_frame.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/shared_desktop_frame.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/shared_memory.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/shared_memory.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/test_utils.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/test_utils.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/test_utils_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_test_data/1_24bpp.cur create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_test_data/1_32bpp.cur create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_test_data/1_8bpp.cur create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_test_data/2_1bpp.cur create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_test_data/2_32bpp.cur create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_test_data/3_32bpp.cur create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_test_data/3_4bpp.cur create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_unittest_resources.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/cursor_unittest_resources.rc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/d3d_device.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/d3d_device.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/desktop.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/desktop.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/desktop_capture_utils.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/desktop_capture_utils.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/display_configuration_monitor.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/display_configuration_monitor.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_adapter_duplicator.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_adapter_duplicator.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_context.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_context.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_duplicator_controller.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_duplicator_controller.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_frame.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_frame.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_output_duplicator.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_output_duplicator.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_texture.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_texture.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_texture_mapping.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_texture_mapping.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_texture_staging.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/dxgi_texture_staging.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/full_screen_win_application_handler.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/full_screen_win_application_handler.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/scoped_gdi_object.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/scoped_thread_desktop.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/scoped_thread_desktop.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capture_utils.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capture_utils.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capture_utils_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_directx.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_directx.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_directx_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_gdi.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_gdi.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_magnifier.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_magnifier.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/selected_window_context.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/selected_window_context.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/test_support/test_window.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/test_support/test_window.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_capture_session.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_capture_session.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_capture_source.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_capture_source.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_capture_source_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_capturer_win.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_capturer_win.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_capturer_win_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_desktop_frame.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/wgc_desktop_frame.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/window_capture_utils.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/window_capture_utils.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/window_capture_utils_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/window_capturer_win_gdi.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/win/window_capturer_win_gdi.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_capturer_linux.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_capturer_mac.mm create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_capturer_null.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_capturer_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_capturer_win.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_finder.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_finder.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_finder_mac.h create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_finder_mac.mm create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_finder_unittest.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_finder_win.cc create mode 100644 third_party/libwebrtc/modules/desktop_capture/window_finder_win.h create mode 100644 third_party/libwebrtc/modules/include/module_common_types.h create mode 100644 third_party/libwebrtc/modules/include/module_common_types_public.h create mode 100644 third_party/libwebrtc/modules/include/module_fec_types.h create mode 100644 third_party/libwebrtc/modules/module_api_gn/moz.build create mode 100644 third_party/libwebrtc/modules/module_api_public_gn/moz.build create mode 100644 third_party/libwebrtc/modules/module_common_types_unittest.cc create mode 100644 third_party/libwebrtc/modules/module_fec_api_gn/moz.build create mode 100644 third_party/libwebrtc/modules/pacing/BUILD.gn create mode 100644 third_party/libwebrtc/modules/pacing/DEPS create mode 100644 third_party/libwebrtc/modules/pacing/OWNERS create mode 100644 third_party/libwebrtc/modules/pacing/bitrate_prober.cc create mode 100644 third_party/libwebrtc/modules/pacing/bitrate_prober.h create mode 100644 third_party/libwebrtc/modules/pacing/bitrate_prober_unittest.cc create mode 100644 third_party/libwebrtc/modules/pacing/g3doc/index.md create mode 100644 third_party/libwebrtc/modules/pacing/interval_budget.cc create mode 100644 third_party/libwebrtc/modules/pacing/interval_budget.h create mode 100644 third_party/libwebrtc/modules/pacing/interval_budget_gn/moz.build create mode 100644 third_party/libwebrtc/modules/pacing/interval_budget_unittest.cc create mode 100644 third_party/libwebrtc/modules/pacing/pacing_controller.cc create mode 100644 third_party/libwebrtc/modules/pacing/pacing_controller.h create mode 100644 third_party/libwebrtc/modules/pacing/pacing_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/pacing/pacing_gn/moz.build create mode 100644 third_party/libwebrtc/modules/pacing/packet_router.cc create mode 100644 third_party/libwebrtc/modules/pacing/packet_router.h create mode 100644 third_party/libwebrtc/modules/pacing/packet_router_unittest.cc create mode 100644 third_party/libwebrtc/modules/pacing/prioritized_packet_queue.cc create mode 100644 third_party/libwebrtc/modules/pacing/prioritized_packet_queue.h create mode 100644 third_party/libwebrtc/modules/pacing/prioritized_packet_queue_unittest.cc create mode 100644 third_party/libwebrtc/modules/pacing/rtp_packet_pacer.h create mode 100644 third_party/libwebrtc/modules/pacing/task_queue_paced_sender.cc create mode 100644 third_party/libwebrtc/modules/pacing/task_queue_paced_sender.h create mode 100644 third_party/libwebrtc/modules/pacing/task_queue_paced_sender_unittest.cc create mode 100644 third_party/libwebrtc/modules/portal/BUILD.gn create mode 100644 third_party/libwebrtc/modules/portal/OWNERS create mode 100644 third_party/libwebrtc/modules/portal/pipewire.sigs create mode 100644 third_party/libwebrtc/modules/portal/pipewire_stub_header.fragment create mode 100644 third_party/libwebrtc/modules/portal/pipewire_utils.cc create mode 100644 third_party/libwebrtc/modules/portal/pipewire_utils.h create mode 100644 third_party/libwebrtc/modules/portal/portal_gn/moz.build create mode 100644 third_party/libwebrtc/modules/portal/portal_request_response.h create mode 100644 third_party/libwebrtc/modules/portal/scoped_glib.cc create mode 100644 third_party/libwebrtc/modules/portal/scoped_glib.h create mode 100644 third_party/libwebrtc/modules/portal/xdg_desktop_portal_utils.cc create mode 100644 third_party/libwebrtc/modules/portal/xdg_desktop_portal_utils.h create mode 100644 third_party/libwebrtc/modules/portal/xdg_session_details.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/BUILD.gn create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/DEPS create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/OWNERS create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/aimd_rate_control.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/aimd_rate_control.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/aimd_rate_control_unittest.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/bwe_defines.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/include/bwe_defines.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/include/remote_bitrate_estimator.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/inter_arrival.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/inter_arrival.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/inter_arrival_unittest.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/overuse_detector.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/overuse_detector.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/overuse_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/overuse_estimator.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/overuse_estimator.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/packet_arrival_map.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/packet_arrival_map.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/packet_arrival_map_test.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_bitrate_estimator_abs_send_time.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_bitrate_estimator_abs_send_time.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_bitrate_estimator_abs_send_time_unittest.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_bitrate_estimator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_bitrate_estimator_single_stream.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_bitrate_estimator_single_stream.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_bitrate_estimator_single_stream_unittest.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_bitrate_estimator_unittest_helper.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_bitrate_estimator_unittest_helper.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_estimator_proxy.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_estimator_proxy.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/remote_estimator_proxy_unittest.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/test/bwe_test_logging.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/test/bwe_test_logging.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/tools/bwe_rtp.cc create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/tools/bwe_rtp.h create mode 100644 third_party/libwebrtc/modules/remote_bitrate_estimator/tools/rtp_to_text.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/BUILD.gn create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/DEPS create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/OWNERS create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/flexfec_receiver.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/flexfec_sender.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/receive_statistics.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/recovered_packet_receiver.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/remote_ntp_time_estimator.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/report_block_data.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/report_block_data.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/rtcp_statistics.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/rtp_cvo.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/rtp_header_extension_map.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/rtp_packet_sender.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/rtp_rtcp.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/rtp_rtcp_defines.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/include/rtp_rtcp_defines.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/leb128_gn/moz.build create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/mocks/mock_recovered_packet_receiver.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/mocks/mock_rtcp_bandwidth_observer.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/mocks/mock_rtcp_rtt_stats.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/mocks/mock_rtp_rtcp.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/rtp_rtcp_format_gn/moz.build create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/rtp_rtcp_gn/moz.build create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/rtp_video_header_gn/moz.build create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/absolute_capture_time_interpolator.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/absolute_capture_time_interpolator.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/absolute_capture_time_interpolator_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/absolute_capture_time_sender.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/absolute_capture_time_sender.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/absolute_capture_time_sender_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/active_decode_targets_helper.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/active_decode_targets_helper.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/active_decode_targets_helper_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/byte_io.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/byte_io_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/capture_clock_offset_updater.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/capture_clock_offset_updater.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/capture_clock_offset_updater_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/create_video_rtp_depacketizer.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/deprecated/deprecated_rtp_sender_egress.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/deprecated/deprecated_rtp_sender_egress.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/dtmf_queue.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/dtmf_queue.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/fec_private_tables_bursty.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/fec_private_tables_bursty.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/fec_private_tables_bursty_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/fec_private_tables_random.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/fec_private_tables_random.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/fec_test_helper.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/fec_test_helper.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/flexfec_header_reader_writer.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/flexfec_header_reader_writer.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/flexfec_header_reader_writer_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/flexfec_receiver.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/flexfec_receiver_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/flexfec_sender.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/flexfec_sender_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/forward_error_correction.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/forward_error_correction.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/forward_error_correction_internal.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/forward_error_correction_internal.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/frame_transformer_factory_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/leb128.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/leb128.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/leb128_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/nack_rtx_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/packet_loss_stats.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/packet_loss_stats.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/packet_loss_stats_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/packet_sequencer.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/packet_sequencer.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/packet_sequencer_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/receive_statistics_impl.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/receive_statistics_impl.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/receive_statistics_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/remote_ntp_time_estimator.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/remote_ntp_time_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_nack_stats.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_nack_stats.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_nack_stats_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/app.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/app.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/app_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/bye.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/bye.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/bye_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/common_header.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/common_header.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/common_header_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/compound_packet.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/compound_packet.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/compound_packet_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/dlrr.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/dlrr.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/dlrr_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/extended_reports.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/extended_reports.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/extended_reports_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/fir.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/fir.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/fir_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/loss_notification.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/loss_notification.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/loss_notification_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/nack.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/nack.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/nack_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/pli.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/pli.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/pli_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/psfb.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/psfb.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/rapid_resync_request.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/rapid_resync_request.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/rapid_resync_request_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/receiver_report.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/receiver_report.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/receiver_report_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/remb.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/remb.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/remb_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/remote_estimate.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/remote_estimate.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/remote_estimate_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/report_block.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/report_block.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/report_block_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/rrtr.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/rrtr.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/rrtr_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/rtpfb.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/rtpfb.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/sdes.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/sdes.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/sdes_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/sender_report.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/sender_report.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/sender_report_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/target_bitrate.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/target_bitrate.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/target_bitrate_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/tmmb_item.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/tmmb_item.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/tmmbn.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/tmmbn.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/tmmbn_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/tmmbr.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/tmmbr.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/tmmbr_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/transport_feedback.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/transport_feedback.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet/transport_feedback_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_packet_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_receiver.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_receiver.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_receiver_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_sender.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_sender.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_sender_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_transceiver.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_transceiver.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_transceiver_config.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_transceiver_config.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_transceiver_impl.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_transceiver_impl.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_transceiver_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtcp_transceiver_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_dependency_descriptor_extension_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_dependency_descriptor_reader.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_dependency_descriptor_reader.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_dependency_descriptor_writer.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_dependency_descriptor_writer.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_descriptor_authentication.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_descriptor_authentication.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_fec_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_h264.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_h264.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_video_generic.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_video_generic.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_vp8.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_vp8.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_vp8_test_helper.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_vp8_test_helper.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_vp8_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_vp9.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_header_extension_map.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_header_extension_map_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_header_extension_size.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_header_extension_size.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_header_extension_size_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_header_extensions.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_header_extensions.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet_history.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet_history.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet_history_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet_received.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet_received.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet_to_send.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet_to_send.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packet_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packetizer_av1.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packetizer_av1.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packetizer_av1_test_helper.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packetizer_av1_test_helper.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_packetizer_av1_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_rtcp_config.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_rtcp_impl.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_rtcp_impl.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_rtcp_impl2.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_rtcp_impl2.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_rtcp_impl2_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_rtcp_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_rtcp_interface.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_audio.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_audio.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_audio_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_egress.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_egress.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_egress_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_video.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_video.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sequence_number_map.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sequence_number_map.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_sequence_number_map_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_util.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_util.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_util_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_video_header.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_video_header.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_video_header_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_video_layers_allocation_extension.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_video_layers_allocation_extension.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/rtp_video_layers_allocation_extension_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/source_tracker.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/source_tracker.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/source_tracker_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/time_util.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/time_util.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/time_util_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/tmmbr_help.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/tmmbr_help.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/ulpfec_generator.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/ulpfec_generator.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/ulpfec_generator_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/ulpfec_header_reader_writer.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/ulpfec_header_reader_writer.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/ulpfec_header_reader_writer_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/ulpfec_receiver.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/ulpfec_receiver.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/ulpfec_receiver_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_fec_generator.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_av1.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_av1_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_generic.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_generic.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_generic_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_h264_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_raw.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_raw.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_raw_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_vp8_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9_unittest.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/test/testFec/average_residual_loss_xor_codes.h create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/test/testFec/test_fec.cc create mode 100644 third_party/libwebrtc/modules/rtp_rtcp/test/testFec/test_packet_masks_metrics.cc create mode 100644 third_party/libwebrtc/modules/third_party/fft/BUILD.gn create mode 100644 third_party/libwebrtc/modules/third_party/fft/LICENSE create mode 100644 third_party/libwebrtc/modules/third_party/fft/README.chromium create mode 100644 third_party/libwebrtc/modules/third_party/fft/fft.c create mode 100644 third_party/libwebrtc/modules/third_party/fft/fft.h create mode 100644 third_party/libwebrtc/modules/third_party/fft/fft_gn/moz.build create mode 100644 third_party/libwebrtc/modules/third_party/g711/BUILD.gn create mode 100644 third_party/libwebrtc/modules/third_party/g711/LICENSE create mode 100644 third_party/libwebrtc/modules/third_party/g711/README.chromium create mode 100644 third_party/libwebrtc/modules/third_party/g711/g711.c create mode 100644 third_party/libwebrtc/modules/third_party/g711/g711.h create mode 100644 third_party/libwebrtc/modules/third_party/g711/g711_3p_gn/moz.build create mode 100644 third_party/libwebrtc/modules/third_party/g722/BUILD.gn create mode 100644 third_party/libwebrtc/modules/third_party/g722/LICENSE create mode 100644 third_party/libwebrtc/modules/third_party/g722/README.chromium create mode 100644 third_party/libwebrtc/modules/third_party/g722/g722_3p_gn/moz.build create mode 100644 third_party/libwebrtc/modules/third_party/g722/g722_decode.c create mode 100644 third_party/libwebrtc/modules/third_party/g722/g722_enc_dec.h create mode 100644 third_party/libwebrtc/modules/third_party/g722/g722_encode.c create mode 100644 third_party/libwebrtc/modules/third_party/portaudio/BUILD.gn create mode 100644 third_party/libwebrtc/modules/third_party/portaudio/LICENSE create mode 100644 third_party/libwebrtc/modules/third_party/portaudio/README.chromium create mode 100644 third_party/libwebrtc/modules/third_party/portaudio/pa_memorybarrier.h create mode 100644 third_party/libwebrtc/modules/third_party/portaudio/pa_ringbuffer.c create mode 100644 third_party/libwebrtc/modules/third_party/portaudio/pa_ringbuffer.h create mode 100644 third_party/libwebrtc/modules/utility/BUILD.gn create mode 100644 third_party/libwebrtc/modules/utility/DEPS create mode 100644 third_party/libwebrtc/modules/utility/OWNERS create mode 100644 third_party/libwebrtc/modules/utility/include/helpers_android.h create mode 100644 third_party/libwebrtc/modules/utility/include/jvm_android.h create mode 100644 third_party/libwebrtc/modules/utility/maybe_worker_thread.cc create mode 100644 third_party/libwebrtc/modules/utility/maybe_worker_thread.h create mode 100644 third_party/libwebrtc/modules/utility/maybe_worker_thread_unittests.cc create mode 100644 third_party/libwebrtc/modules/utility/source/helpers_android.cc create mode 100644 third_party/libwebrtc/modules/utility/source/jvm_android.cc create mode 100644 third_party/libwebrtc/modules/utility/utility_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_capture/BUILD.gn create mode 100644 third_party/libwebrtc/modules/video_capture/DEPS create mode 100644 third_party/libwebrtc/modules/video_capture/OWNERS create mode 100644 third_party/libwebrtc/modules/video_capture/device_info_impl.cc create mode 100644 third_party/libwebrtc/modules/video_capture/device_info_impl.h create mode 100644 third_party/libwebrtc/modules/video_capture/linux/device_info_linux.cc create mode 100644 third_party/libwebrtc/modules/video_capture/linux/device_info_v4l2.cc create mode 100644 third_party/libwebrtc/modules/video_capture/linux/device_info_v4l2.h create mode 100644 third_party/libwebrtc/modules/video_capture/linux/video_capture_linux.cc create mode 100644 third_party/libwebrtc/modules/video_capture/linux/video_capture_v4l2.cc create mode 100644 third_party/libwebrtc/modules/video_capture/linux/video_capture_v4l2.h create mode 100644 third_party/libwebrtc/modules/video_capture/raw_video_sink_interface.h create mode 100644 third_party/libwebrtc/modules/video_capture/test/video_capture_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture.h create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture_config.h create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture_defines.h create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture_factory.cc create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture_factory.h create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture_factory_null.cc create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture_impl.cc create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture_impl.h create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture_internal_impl_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_capture/video_capture_module_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_capture/windows/device_info_ds.cc create mode 100644 third_party/libwebrtc/modules/video_capture/windows/device_info_ds.h create mode 100644 third_party/libwebrtc/modules/video_capture/windows/help_functions_ds.cc create mode 100644 third_party/libwebrtc/modules/video_capture/windows/help_functions_ds.h create mode 100644 third_party/libwebrtc/modules/video_capture/windows/sink_filter_ds.cc create mode 100644 third_party/libwebrtc/modules/video_capture/windows/sink_filter_ds.h create mode 100644 third_party/libwebrtc/modules/video_capture/windows/video_capture_ds.cc create mode 100644 third_party/libwebrtc/modules/video_capture/windows/video_capture_ds.h create mode 100644 third_party/libwebrtc/modules/video_capture/windows/video_capture_factory_windows.cc create mode 100644 third_party/libwebrtc/modules/video_coding/BUILD.gn create mode 100644 third_party/libwebrtc/modules/video_coding/DEPS create mode 100644 third_party/libwebrtc/modules/video_coding/OWNERS create mode 100644 third_party/libwebrtc/modules/video_coding/chain_diff_calculator.cc create mode 100644 third_party/libwebrtc/modules/video_coding/chain_diff_calculator.h create mode 100644 third_party/libwebrtc/modules/video_coding/chain_diff_calculator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/chain_diff_calculator_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codec_globals_headers_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/BUILD.gn create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/DEPS create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/DEPS create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/OWNERS create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/h264.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/h264_color_space.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/h264_color_space.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/include/h264.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/include/h264_globals.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/interface/common_constants.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/interface/libvpx_interface.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/interface/libvpx_interface.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/interface/mock_libvpx_interface.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/multiplex/augmented_video_frame_buffer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/android_codec_factory_helper.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/android_codec_factory_helper.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/batch/empty-runtime-deps create mode 100755 third_party/libwebrtc/modules/video_coding/codecs/test/batch/run-instantiation-tests.sh create mode 100755 third_party/libwebrtc/modules/video_coding/codecs/test/batch/run-videoprocessor-tests.sh create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/encoded_video_frame_producer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/encoded_video_frame_producer.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/objc_codec_factory_helper.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/objc_codec_factory_helper.mm create mode 100755 third_party/libwebrtc/modules/video_coding/codecs/test/plot_webrtc_test_logs.py create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_analyzer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_analyzer.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_analyzer_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_test.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_tester_impl.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_tester_impl.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_tester_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_unittest.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/video_encoder_decoder_instantiation_tests.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_av1.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_config_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_impl.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_impl.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_libvpx.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_mediacodec.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_openh264.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_videotoolbox.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/include/temporal_layers_checker.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/include/vp8.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/include/vp8_globals.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_simulcast_test.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/temporal_layers.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/temporal_layers_checker.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/vp8_scalability.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp8/vp8_scalability.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc create mode 100644 third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h create mode 100644 third_party/libwebrtc/modules/video_coding/decoder_database.cc create mode 100644 third_party/libwebrtc/modules/video_coding/decoder_database.h create mode 100644 third_party/libwebrtc/modules/video_coding/decoder_database_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/decoding_state.cc create mode 100644 third_party/libwebrtc/modules/video_coding/decoding_state.h create mode 100644 third_party/libwebrtc/modules/video_coding/decoding_state_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/encoded_frame.cc create mode 100644 third_party/libwebrtc/modules/video_coding/encoded_frame.h create mode 100644 third_party/libwebrtc/modules/video_coding/encoded_frame_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/event_wrapper.cc create mode 100644 third_party/libwebrtc/modules/video_coding/event_wrapper.h create mode 100644 third_party/libwebrtc/modules/video_coding/fec_controller_default.cc create mode 100644 third_party/libwebrtc/modules/video_coding/fec_controller_default.h create mode 100644 third_party/libwebrtc/modules/video_coding/fec_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/fec_rate_table.h create mode 100644 third_party/libwebrtc/modules/video_coding/frame_buffer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/frame_buffer.h create mode 100644 third_party/libwebrtc/modules/video_coding/frame_buffer2.cc create mode 100644 third_party/libwebrtc/modules/video_coding/frame_buffer2.h create mode 100644 third_party/libwebrtc/modules/video_coding/frame_buffer2_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/frame_dependencies_calculator.cc create mode 100644 third_party/libwebrtc/modules/video_coding/frame_dependencies_calculator.h create mode 100644 third_party/libwebrtc/modules/video_coding/frame_dependencies_calculator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/frame_dependencies_calculator_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/frame_helpers.cc create mode 100644 third_party/libwebrtc/modules/video_coding/frame_helpers.h create mode 100644 third_party/libwebrtc/modules/video_coding/frame_helpers_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/frame_helpers_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/frame_object.cc create mode 100644 third_party/libwebrtc/modules/video_coding/frame_object.h create mode 100644 third_party/libwebrtc/modules/video_coding/g3doc/index.md create mode 100644 third_party/libwebrtc/modules/video_coding/generic_decoder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/generic_decoder.h create mode 100644 third_party/libwebrtc/modules/video_coding/generic_decoder_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/h264_packet_buffer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/h264_packet_buffer.h create mode 100644 third_party/libwebrtc/modules/video_coding/h264_packet_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/h264_sprop_parameter_sets.cc create mode 100644 third_party/libwebrtc/modules/video_coding/h264_sprop_parameter_sets.h create mode 100644 third_party/libwebrtc/modules/video_coding/h264_sprop_parameter_sets_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/h264_sps_pps_tracker.cc create mode 100644 third_party/libwebrtc/modules/video_coding/h264_sps_pps_tracker.h create mode 100644 third_party/libwebrtc/modules/video_coding/h264_sps_pps_tracker_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/histogram.cc create mode 100644 third_party/libwebrtc/modules/video_coding/histogram.h create mode 100644 third_party/libwebrtc/modules/video_coding/histogram_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/include/video_codec_initializer.h create mode 100644 third_party/libwebrtc/modules/video_coding/include/video_codec_interface.cc create mode 100644 third_party/libwebrtc/modules/video_coding/include/video_codec_interface.h create mode 100644 third_party/libwebrtc/modules/video_coding/include/video_coding.h create mode 100644 third_party/libwebrtc/modules/video_coding/include/video_coding_defines.h create mode 100644 third_party/libwebrtc/modules/video_coding/include/video_error_codes.h create mode 100644 third_party/libwebrtc/modules/video_coding/internal_defines.h create mode 100644 third_party/libwebrtc/modules/video_coding/jitter_buffer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/jitter_buffer.h create mode 100644 third_party/libwebrtc/modules/video_coding/jitter_buffer_common.h create mode 100644 third_party/libwebrtc/modules/video_coding/jitter_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/loss_notification_controller.cc create mode 100644 third_party/libwebrtc/modules/video_coding/loss_notification_controller.h create mode 100644 third_party/libwebrtc/modules/video_coding/loss_notification_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/media_opt_util.cc create mode 100644 third_party/libwebrtc/modules/video_coding/media_opt_util.h create mode 100644 third_party/libwebrtc/modules/video_coding/nack_requester.cc create mode 100644 third_party/libwebrtc/modules/video_coding/nack_requester.h create mode 100644 third_party/libwebrtc/modules/video_coding/nack_requester_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/nack_requester_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/packet.cc create mode 100644 third_party/libwebrtc/modules/video_coding/packet.h create mode 100644 third_party/libwebrtc/modules/video_coding/packet_buffer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/packet_buffer.h create mode 100644 third_party/libwebrtc/modules/video_coding/packet_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/packet_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/receiver.cc create mode 100644 third_party/libwebrtc/modules/video_coding/receiver.h create mode 100644 third_party/libwebrtc/modules/video_coding/receiver_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_frame_id_only_ref_finder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_frame_id_only_ref_finder.h create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_frame_reference_finder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_frame_reference_finder.h create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_frame_reference_finder_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_generic_ref_finder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_generic_ref_finder.h create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_seq_num_only_ref_finder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_seq_num_only_ref_finder.h create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_vp8_ref_finder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_vp8_ref_finder.h create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_vp8_ref_finder_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder.cc create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder.h create mode 100644 third_party/libwebrtc/modules/video_coding/rtp_vp9_ref_finder_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/session_info.cc create mode 100644 third_party/libwebrtc/modules/video_coding/session_info.h create mode 100644 third_party/libwebrtc/modules/video_coding/session_info_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/BUILD.gn create mode 100644 third_party/libwebrtc/modules/video_coding/svc/create_scalability_structure.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/create_scalability_structure.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_simulcast.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_simulcast.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_test_helpers.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_test_helpers.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structure_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalability_structures_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_no_layering.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_no_layering.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.cc create mode 100644 third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.h create mode 100644 third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/test/stream_generator.cc create mode 100644 third_party/libwebrtc/modules/video_coding/test/stream_generator.h create mode 100644 third_party/libwebrtc/modules/video_coding/timing/BUILD.gn create mode 100644 third_party/libwebrtc/modules/video_coding/timing/codec_timer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/codec_timer.h create mode 100644 third_party/libwebrtc/modules/video_coding/timing/codec_timer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/timing/frame_delay_variation_kalman_filter.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/frame_delay_variation_kalman_filter.h create mode 100644 third_party/libwebrtc/modules/video_coding/timing/frame_delay_variation_kalman_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/timing/frame_delay_variation_kalman_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/inter_frame_delay.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/inter_frame_delay.h create mode 100644 third_party/libwebrtc/modules/video_coding/timing/inter_frame_delay_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/timing/inter_frame_delay_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/jitter_estimator.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/jitter_estimator.h create mode 100644 third_party/libwebrtc/modules/video_coding/timing/jitter_estimator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/timing/jitter_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/rtt_filter.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/rtt_filter.h create mode 100644 third_party/libwebrtc/modules/video_coding/timing/rtt_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/timing/rtt_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/timestamp_extrapolator.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/timestamp_extrapolator.h create mode 100644 third_party/libwebrtc/modules/video_coding/timing/timestamp_extrapolator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/timing/timestamp_extrapolator_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/timing.cc create mode 100644 third_party/libwebrtc/modules/video_coding/timing/timing.h create mode 100644 third_party/libwebrtc/modules/video_coding/timing/timing_module_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/timing/timing_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/bandwidth_quality_scaler.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/bandwidth_quality_scaler.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/bandwidth_quality_scaler_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/decoded_frames_history.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/decoded_frames_history.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/decoded_frames_history_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/frame_dropper.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/frame_dropper.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/frame_dropper_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/framerate_controller_deprecated.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/framerate_controller_deprecated.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/framerate_controller_deprecated_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/ivf_defines.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/ivf_file_reader.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/ivf_file_reader.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/ivf_file_reader_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/ivf_file_writer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/ivf_file_writer.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/ivf_file_writer_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/qp_parser.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/qp_parser.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/qp_parser_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/quality_scaler.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/quality_scaler.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/quality_scaler_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/simulcast_rate_allocator.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/simulcast_rate_allocator.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/simulcast_rate_allocator_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/simulcast_test_fixture_impl.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/simulcast_test_fixture_impl.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/simulcast_utility.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/simulcast_utility.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/vp8_constants.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/vp8_header_parser.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/vp8_header_parser.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/vp9_constants.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/vp9_uncompressed_header_parser.cc create mode 100644 third_party/libwebrtc/modules/video_coding/utility/vp9_uncompressed_header_parser.h create mode 100644 third_party/libwebrtc/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/video_codec_initializer.cc create mode 100644 third_party/libwebrtc/modules/video_coding/video_codec_initializer_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/video_codec_interface_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/video_coding_defines.cc create mode 100644 third_party/libwebrtc/modules/video_coding/video_coding_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/video_coding_impl.cc create mode 100644 third_party/libwebrtc/modules/video_coding/video_coding_impl.h create mode 100644 third_party/libwebrtc/modules/video_coding/video_coding_utility_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/video_receiver.cc create mode 100644 third_party/libwebrtc/modules/video_coding/video_receiver2.cc create mode 100644 third_party/libwebrtc/modules/video_coding/video_receiver2.h create mode 100644 third_party/libwebrtc/modules/video_coding/video_receiver2_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/video_receiver_unittest.cc create mode 100644 third_party/libwebrtc/modules/video_coding/webrtc_libvpx_interface_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/webrtc_vp8_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/webrtc_vp8_scalability_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/webrtc_vp8_temporal_layers_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/webrtc_vp9_gn/moz.build create mode 100644 third_party/libwebrtc/modules/video_coding/webrtc_vp9_helpers_gn/moz.build (limited to 'third_party/libwebrtc/modules') diff --git a/third_party/libwebrtc/modules/BUILD.gn b/third_party/libwebrtc/modules/BUILD.gn new file mode 100644 index 0000000000..ffb7491b31 --- /dev/null +++ b/third_party/libwebrtc/modules/BUILD.gn @@ -0,0 +1,256 @@ +# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../webrtc.gni") +import("audio_coding/audio_coding.gni") + +group("modules") { + deps = [ + "audio_coding", + "audio_device", + "audio_mixer", + "audio_processing", + "congestion_controller", + "pacing", + "remote_bitrate_estimator", + "rtp_rtcp", + "utility", + "video_coding", + ] + + if (rtc_desktop_capture_supported) { + deps += [ "desktop_capture" ] + } +} + +rtc_source_set("module_api_public") { + sources = [ "include/module_common_types_public.h" ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_source_set("module_api") { + visibility = [ "*" ] + sources = [ "include/module_common_types.h" ] +} + +rtc_source_set("module_fec_api") { + visibility = [ "*" ] + sources = [ "include/module_fec_types.h" ] +} + +if (rtc_include_tests && !build_with_chromium) { + modules_tests_resources = [ + "../resources/audio_coding/testfile16kHz.pcm", + "../resources/audio_coding/testfile32kHz.pcm", + "../resources/audio_coding/teststereo32kHz.pcm", + "../resources/foreman_cif.yuv", + ] + + if (is_ios) { + bundle_data("modules_tests_bundle_data") { + testonly = true + sources = modules_tests_resources + outputs = [ "{{bundle_resources_dir}}/{{source_file_part}}" ] + } + } + + rtc_test("modules_tests") { + testonly = true + + deps = [ + "../test:test_main", + "../test:video_test_common", + "audio_coding:audio_coding_modules_tests", + "rtp_rtcp:rtp_rtcp_modules_tests", + "video_coding:video_coding_modules_tests", + "//testing/gtest", + ] + + if (rtc_desktop_capture_supported) { + deps += [ "desktop_capture:desktop_capture_modules_tests" ] + } + + data = modules_tests_resources + + if (is_android) { + use_default_launcher = false + deps += [ + # NOTE(brandtr): Including Java classes seems only to be possible from + # rtc_test targets. Therefore we include this target here, instead of + # in video_coding_modules_tests, where it is actually used. + "../sdk/android:libjingle_peerconnection_java", + "//sdk/android:native_test_jni_onload", + "//testing/android/native_test:native_test_support", + ] + shard_timeout = 900 + } + + if (is_ios) { + deps += [ ":modules_tests_bundle_data" ] + } + } + + modules_unittests_resources = [ + "../resources/audio_coding/neteq_opus.rtp", + "../resources/audio_coding/neteq_opus_dtx.rtp", + "../resources/audio_coding/neteq_universal_new.rtp", + "../resources/audio_coding/speech_4_channels_48k_one_second.wav", + "../resources/audio_coding/speech_mono_16kHz.pcm", + "../resources/audio_coding/speech_mono_32_48kHz.pcm", + "../resources/audio_coding/testfile16kHz.pcm", + "../resources/audio_coding/testfile32kHz.pcm", + "../resources/audio_coding/testfile_fake_stereo_32kHz.pcm", + "../resources/audio_coding/teststereo32kHz.pcm", + "../resources/audio_device/audio_short16.pcm", + "../resources/audio_device/audio_short44.pcm", + "../resources/audio_device/audio_short48.pcm", + "../resources/audio_processing/agc/agc_audio.pcm", + "../resources/audio_processing/agc/agc_no_circular_buffer.dat", + "../resources/audio_processing/agc/agc_pitch_gain.dat", + "../resources/audio_processing/agc/agc_pitch_lag.dat", + "../resources/audio_processing/agc/agc_spectral_peak.dat", + "../resources/audio_processing/agc/agc_vad.dat", + "../resources/audio_processing/agc/agc_voicing_prob.dat", + "../resources/audio_processing/agc/agc_with_circular_buffer.dat", + "../resources/audio_processing/output_data_fixed.pb", + "../resources/audio_processing/output_data_float.pb", + "../resources/audio_processing/output_data_float_avx2.pb", + "../resources/audio_processing/output_data_mac.pb", + "../resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm", + "../resources/audio_processing/transient/audio16kHz.pcm", + "../resources/audio_processing/transient/audio32kHz.pcm", + "../resources/audio_processing/transient/audio48kHz.pcm", + "../resources/audio_processing/transient/audio8kHz.pcm", + "../resources/audio_processing/transient/detect16kHz.dat", + "../resources/audio_processing/transient/detect32kHz.dat", + "../resources/audio_processing/transient/detect48kHz.dat", + "../resources/audio_processing/transient/detect8kHz.dat", + "../resources/audio_processing/transient/double-utils.dat", + "../resources/audio_processing/transient/float-utils.dat", + "../resources/audio_processing/transient/suppressed16kHz.pcm", + "../resources/audio_processing/transient/suppressed32kHz.pcm", + "../resources/audio_processing/transient/suppressed8kHz.pcm", + "../resources/audio_processing/transient/wpd0.dat", + "../resources/audio_processing/transient/wpd1.dat", + "../resources/audio_processing/transient/wpd2.dat", + "../resources/audio_processing/transient/wpd3.dat", + "../resources/audio_processing/transient/wpd4.dat", + "../resources/audio_processing/transient/wpd5.dat", + "../resources/audio_processing/transient/wpd6.dat", + "../resources/audio_processing/transient/wpd7.dat", + "../resources/deflicker_before_cif_short.yuv", + "../resources/far16_stereo.pcm", + "../resources/far176_stereo.pcm", + "../resources/far192_stereo.pcm", + "../resources/far22_stereo.pcm", + "../resources/far32_stereo.pcm", + "../resources/far44_stereo.pcm", + "../resources/far48_stereo.pcm", + "../resources/far88_stereo.pcm", + "../resources/far8_stereo.pcm", + "../resources/far96_stereo.pcm", + "../resources/foremanColorEnhanced_cif_short.yuv", + "../resources/foreman_cif.yuv", + "../resources/foreman_cif_short.yuv", + "../resources/near16_stereo.pcm", + "../resources/near176_stereo.pcm", + "../resources/near192_stereo.pcm", + "../resources/near22_stereo.pcm", + "../resources/near32_stereo.pcm", + "../resources/near44_stereo.pcm", + "../resources/near48_stereo.pcm", + "../resources/near88_stereo.pcm", + "../resources/near8_stereo.pcm", + "../resources/near96_stereo.pcm", + "../resources/ref03.aecdump", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_0_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_0_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_1_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_1_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke2_0_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke2_0_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke2_1_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke2_1_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingDelay1_0_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingDelay1_0_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingLoss1_0_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingLoss1_0_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_Multi1_1_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_Multi1_1_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_SteadyChoke_0_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_SteadyChoke_0_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_SteadyChoke_1_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_SteadyChoke_1_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_SteadyDelay_0_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_SteadyDelay_0_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_SteadyLoss_0_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_SteadyLoss_0_TOF.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_UnlimitedSpeed_0_AST.bin", + "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_UnlimitedSpeed_0_TOF.bin", + "../resources/short_mixed_mono_48.dat", + "../resources/short_mixed_mono_48.pcm", + "../resources/short_mixed_mono_48_arm.dat", + "../resources/short_mixed_stereo_48.dat", + "../resources/short_mixed_stereo_48.pcm", + "../resources/voice_engine/audio_tiny48.wav", + ] + if (is_ios) { + bundle_data("modules_unittests_bundle_data") { + testonly = true + sources = modules_unittests_resources + outputs = [ "{{bundle_resources_dir}}/{{source_file_part}}" ] + } + } + + rtc_test("modules_unittests") { + testonly = true + defines = [] + sources = [ "module_common_types_unittest.cc" ] + + deps = [ + ":module_api", + ":module_api_public", + "../test:test_main", + "../test:test_support", + "audio_coding:audio_coding_unittests", + "audio_device:audio_device_unittests", + "audio_mixer:audio_mixer_unittests", + "audio_processing:audio_processing_unittests", + "audio_processing/aec3:aec3_unittests", + "audio_processing/ns:ns_unittests", + "congestion_controller:congestion_controller_unittests", + "pacing:pacing_unittests", + "remote_bitrate_estimator:remote_bitrate_estimator_unittests", + "rtp_rtcp:rtp_rtcp_unittests", + "utility:utility_unittests", + "video_coding:video_coding_unittests", + "video_coding/timing:timing_unittests", + ] + + if (rtc_desktop_capture_supported) { + deps += [ "desktop_capture:desktop_capture_unittests" ] + } + + data = modules_unittests_resources + + if (is_android) { + use_default_launcher = false + deps += [ + "../sdk/android:libjingle_peerconnection_java", + "//testing/android/native_test:native_test_support", + ] + shard_timeout = 900 + } + if (is_ios) { + info_plist = "../test/ios/Info.plist" + deps += [ ":modules_unittests_bundle_data" ] + configs += [ "..:common_objc" ] + ldflags = [ "-ObjC" ] + } + } +} diff --git a/third_party/libwebrtc/modules/async_audio_processing/BUILD.gn b/third_party/libwebrtc/modules/async_audio_processing/BUILD.gn new file mode 100644 index 0000000000..7a7ca20df1 --- /dev/null +++ b/third_party/libwebrtc/modules/async_audio_processing/BUILD.gn @@ -0,0 +1,43 @@ +# Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") + +rtc_library("async_audio_processing") { + sources = [ + "async_audio_processing.cc", + "async_audio_processing.h", + ] + + public = [ "async_audio_processing.h" ] + + deps = [ + "../../api:scoped_refptr", + "../../api:sequence_checker", + "../../api/audio:audio_frame_api", + "../../api/audio:audio_frame_processor", + "../../api/task_queue:task_queue", + "../../rtc_base:checks", + "../../rtc_base:refcount", + "../../rtc_base:rtc_task_queue", + ] +} + +if (rtc_include_tests) { + rtc_library("async_audio_processing_test") { + testonly = true + + sources = [] + + deps = [ + ":async_audio_processing", + "../../api/audio:audio_frame_api", + "../../rtc_base:checks", + ] + } +} diff --git a/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing.cc b/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing.cc new file mode 100644 index 0000000000..9452f3bcf9 --- /dev/null +++ b/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing.cc @@ -0,0 +1,61 @@ + +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/async_audio_processing/async_audio_processing.h" + +#include + +#include "api/audio/audio_frame.h" +#include "api/task_queue/task_queue_factory.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +AsyncAudioProcessing::Factory::~Factory() = default; +AsyncAudioProcessing::Factory::Factory(AudioFrameProcessor& frame_processor, + TaskQueueFactory& task_queue_factory) + : frame_processor_(frame_processor), + task_queue_factory_(task_queue_factory) {} + +std::unique_ptr +AsyncAudioProcessing::Factory::CreateAsyncAudioProcessing( + AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback) { + return std::make_unique( + frame_processor_, task_queue_factory_, + std::move(on_frame_processed_callback)); +} + +AsyncAudioProcessing::~AsyncAudioProcessing() { + frame_processor_.SetSink(nullptr); +} + +AsyncAudioProcessing::AsyncAudioProcessing( + AudioFrameProcessor& frame_processor, + TaskQueueFactory& task_queue_factory, + AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback) + : on_frame_processed_callback_(std::move(on_frame_processed_callback)), + frame_processor_(frame_processor), + task_queue_(task_queue_factory.CreateTaskQueue( + "AsyncAudioProcessing", + TaskQueueFactory::Priority::NORMAL)) { + frame_processor_.SetSink([this](std::unique_ptr frame) { + task_queue_.PostTask([this, frame = std::move(frame)]() mutable { + on_frame_processed_callback_(std::move(frame)); + }); + }); +} + +void AsyncAudioProcessing::Process(std::unique_ptr frame) { + task_queue_.PostTask([this, frame = std::move(frame)]() mutable { + frame_processor_.Process(std::move(frame)); + }); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing.h b/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing.h new file mode 100644 index 0000000000..bbd0f69b1b --- /dev/null +++ b/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_ASYNC_AUDIO_PROCESSING_ASYNC_AUDIO_PROCESSING_H_ +#define MODULES_ASYNC_AUDIO_PROCESSING_ASYNC_AUDIO_PROCESSING_H_ + +#include + +#include "api/audio/audio_frame_processor.h" +#include "rtc_base/ref_count.h" +#include "rtc_base/task_queue.h" + +namespace webrtc { + +class AudioFrame; +class TaskQueueFactory; + +// Helper class taking care of interactions with AudioFrameProcessor +// in asynchronous manner. Offloads AudioFrameProcessor::Process calls +// to a dedicated task queue. Makes sure that it's always safe for +// AudioFrameProcessor to pass processed frames back to its sink. +class AsyncAudioProcessing final { + public: + // Helper class passing AudioFrameProcessor and TaskQueueFactory into + // AsyncAudioProcessing constructor. + class Factory : public rtc::RefCountInterface { + public: + Factory(const Factory&) = delete; + Factory& operator=(const Factory&) = delete; + + ~Factory(); + Factory(AudioFrameProcessor& frame_processor, + TaskQueueFactory& task_queue_factory); + + std::unique_ptr CreateAsyncAudioProcessing( + AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback); + + private: + AudioFrameProcessor& frame_processor_; + TaskQueueFactory& task_queue_factory_; + }; + + AsyncAudioProcessing(const AsyncAudioProcessing&) = delete; + AsyncAudioProcessing& operator=(const AsyncAudioProcessing&) = delete; + + ~AsyncAudioProcessing(); + + // Creates AsyncAudioProcessing which will pass audio frames to + // `frame_processor` on `task_queue_` and reply with processed frames passed + // into `on_frame_processed_callback`, which is posted back onto + // `task_queue_`. `task_queue_` is created using the provided + // `task_queue_factory`. + AsyncAudioProcessing( + AudioFrameProcessor& frame_processor, + TaskQueueFactory& task_queue_factory, + AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback); + + // Accepts `frame` for asynchronous processing. Thread-safe. + void Process(std::unique_ptr frame); + + private: + AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback_; + AudioFrameProcessor& frame_processor_; + rtc::TaskQueue task_queue_; +}; + +} // namespace webrtc + +#endif // MODULES_ASYNC_AUDIO_PROCESSING_ASYNC_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing_gn/moz.build b/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing_gn/moz.build new file mode 100644 index 0000000000..f599fadae2 --- /dev/null +++ b/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/async_audio_processing/async_audio_processing.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("async_audio_processing_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/BUILD.gn b/third_party/libwebrtc/modules/audio_coding/BUILD.gn new file mode 100644 index 0000000000..eac0650a26 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/BUILD.gn @@ -0,0 +1,1805 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") +import("audio_coding.gni") +if (rtc_enable_protobuf) { + import("//third_party/protobuf/proto_library.gni") +} + +visibility = [ ":*" ] + +rtc_source_set("audio_coding_module_typedefs") { + visibility += [ "*" ] + sources = [ "include/audio_coding_module_typedefs.h" ] +} + +rtc_library("audio_coding") { + visibility += [ "*" ] + sources = [ + "acm2/acm_receiver.cc", + "acm2/acm_receiver.h", + "acm2/acm_remixing.cc", + "acm2/acm_remixing.h", + "acm2/acm_resampler.cc", + "acm2/acm_resampler.h", + "acm2/audio_coding_module.cc", + "acm2/call_statistics.cc", + "acm2/call_statistics.h", + "include/audio_coding_module.h", + ] + + defines = [] + + deps = [ + ":audio_coding_module_typedefs", + ":default_neteq_factory", + ":neteq", + "..:module_api", + "..:module_api_public", + "../../api:array_view", + "../../api:function_view", + "../../api/audio:audio_frame_api", + "../../api/audio_codecs:audio_codecs_api", + "../../api/neteq:neteq_api", + "../../common_audio", + "../../common_audio:common_audio_c", + "../../rtc_base:audio_format_to_string", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:safe_conversions", + "../../rtc_base/synchronization:mutex", + "../../system_wrappers", + "../../system_wrappers:metrics", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("legacy_encoded_audio_frame") { + sources = [ + "codecs/legacy_encoded_audio_frame.cc", + "codecs/legacy_encoded_audio_frame.h", + ] + deps = [ + "../../api:array_view", + "../../api/audio_codecs:audio_codecs_api", + "../../rtc_base:buffer", + "../../rtc_base:checks", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("webrtc_cng") { + visibility += webrtc_default_visibility + sources = [ + "codecs/cng/webrtc_cng.cc", + "codecs/cng/webrtc_cng.h", + ] + + deps = [ + "../../api:array_view", + "../../common_audio:common_audio_c", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:safe_conversions", + ] +} + +rtc_library("audio_encoder_cng") { + visibility += [ "*" ] + sources = [ + "codecs/cng/audio_encoder_cng.cc", + "codecs/cng/audio_encoder_cng.h", + ] + + deps = [ + ":webrtc_cng", + "../../api/audio_codecs:audio_codecs_api", + "../../api/units:time_delta", + "../../common_audio", + "../../rtc_base:checks", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("red") { + visibility += [ "*" ] + sources = [ + "codecs/red/audio_encoder_copy_red.cc", + "codecs/red/audio_encoder_copy_red.h", + ] + + deps = [ + "../../api:array_view", + "../../api:field_trials_view", + "../../api/audio_codecs:audio_codecs_api", + "../../api/units:time_delta", + "../../common_audio", + "../../rtc_base:buffer", + "../../rtc_base:byte_order", + "../../rtc_base:checks", + "../../rtc_base:logging", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("g711") { + visibility += [ "*" ] + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/g711/audio_decoder_pcm.cc", + "codecs/g711/audio_decoder_pcm.h", + "codecs/g711/audio_encoder_pcm.cc", + "codecs/g711/audio_encoder_pcm.h", + ] + + deps = [ + ":legacy_encoded_audio_frame", + "../../api:array_view", + "../../api/audio_codecs:audio_codecs_api", + "../../api/units:time_delta", + "../../rtc_base:buffer", + "../../rtc_base:checks", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + public_deps = [ ":g711_c" ] # no-presubmit-check TODO(webrtc:8603) +} + +rtc_library("g711_c") { + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/g711/g711_interface.c", + "codecs/g711/g711_interface.h", + ] + deps = [ "../third_party/g711:g711_3p" ] +} + +rtc_library("g722") { + visibility += [ "*" ] + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/g722/audio_decoder_g722.cc", + "codecs/g722/audio_decoder_g722.h", + "codecs/g722/audio_encoder_g722.cc", + "codecs/g722/audio_encoder_g722.h", + ] + + deps = [ + ":legacy_encoded_audio_frame", + "../../api:array_view", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs/g722:audio_encoder_g722_config", + "../../api/units:time_delta", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:safe_conversions", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + public_deps = [ ":g722_c" ] # no-presubmit-check TODO(webrtc:8603) +} + +rtc_library("g722_c") { + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/g722/g722_interface.c", + "codecs/g722/g722_interface.h", + ] + deps = [ "../third_party/g722:g722_3p" ] +} + +rtc_library("ilbc") { + visibility += webrtc_default_visibility + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/ilbc/audio_decoder_ilbc.cc", + "codecs/ilbc/audio_decoder_ilbc.h", + "codecs/ilbc/audio_encoder_ilbc.cc", + "codecs/ilbc/audio_encoder_ilbc.h", + ] + + deps = [ + ":legacy_encoded_audio_frame", + "../../api:array_view", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs/ilbc:audio_encoder_ilbc_config", + "../../api/units:time_delta", + "../../common_audio", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:safe_conversions", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + public_deps = [ ":ilbc_c" ] # no-presubmit-check TODO(webrtc:8603) +} + +rtc_library("ilbc_c") { + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/ilbc/abs_quant.c", + "codecs/ilbc/abs_quant.h", + "codecs/ilbc/abs_quant_loop.c", + "codecs/ilbc/abs_quant_loop.h", + "codecs/ilbc/augmented_cb_corr.c", + "codecs/ilbc/augmented_cb_corr.h", + "codecs/ilbc/bw_expand.c", + "codecs/ilbc/bw_expand.h", + "codecs/ilbc/cb_construct.c", + "codecs/ilbc/cb_construct.h", + "codecs/ilbc/cb_mem_energy.c", + "codecs/ilbc/cb_mem_energy.h", + "codecs/ilbc/cb_mem_energy_augmentation.c", + "codecs/ilbc/cb_mem_energy_augmentation.h", + "codecs/ilbc/cb_mem_energy_calc.c", + "codecs/ilbc/cb_mem_energy_calc.h", + "codecs/ilbc/cb_search.c", + "codecs/ilbc/cb_search.h", + "codecs/ilbc/cb_search_core.c", + "codecs/ilbc/cb_search_core.h", + "codecs/ilbc/cb_update_best_index.c", + "codecs/ilbc/cb_update_best_index.h", + "codecs/ilbc/chebyshev.c", + "codecs/ilbc/chebyshev.h", + "codecs/ilbc/comp_corr.c", + "codecs/ilbc/comp_corr.h", + "codecs/ilbc/constants.c", + "codecs/ilbc/constants.h", + "codecs/ilbc/create_augmented_vec.c", + "codecs/ilbc/create_augmented_vec.h", + "codecs/ilbc/decode.c", + "codecs/ilbc/decode.h", + "codecs/ilbc/decode_residual.c", + "codecs/ilbc/decode_residual.h", + "codecs/ilbc/decoder_interpolate_lsf.c", + "codecs/ilbc/decoder_interpolate_lsf.h", + "codecs/ilbc/defines.h", + "codecs/ilbc/do_plc.c", + "codecs/ilbc/do_plc.h", + "codecs/ilbc/encode.c", + "codecs/ilbc/encode.h", + "codecs/ilbc/energy_inverse.c", + "codecs/ilbc/energy_inverse.h", + "codecs/ilbc/enh_upsample.c", + "codecs/ilbc/enh_upsample.h", + "codecs/ilbc/enhancer.c", + "codecs/ilbc/enhancer.h", + "codecs/ilbc/enhancer_interface.c", + "codecs/ilbc/enhancer_interface.h", + "codecs/ilbc/filtered_cb_vecs.c", + "codecs/ilbc/filtered_cb_vecs.h", + "codecs/ilbc/frame_classify.c", + "codecs/ilbc/frame_classify.h", + "codecs/ilbc/gain_dequant.c", + "codecs/ilbc/gain_dequant.h", + "codecs/ilbc/gain_quant.c", + "codecs/ilbc/gain_quant.h", + "codecs/ilbc/get_cd_vec.c", + "codecs/ilbc/get_cd_vec.h", + "codecs/ilbc/get_lsp_poly.c", + "codecs/ilbc/get_lsp_poly.h", + "codecs/ilbc/get_sync_seq.c", + "codecs/ilbc/get_sync_seq.h", + "codecs/ilbc/hp_input.c", + "codecs/ilbc/hp_input.h", + "codecs/ilbc/hp_output.c", + "codecs/ilbc/hp_output.h", + "codecs/ilbc/ilbc.c", + "codecs/ilbc/ilbc.h", + "codecs/ilbc/index_conv_dec.c", + "codecs/ilbc/index_conv_dec.h", + "codecs/ilbc/index_conv_enc.c", + "codecs/ilbc/index_conv_enc.h", + "codecs/ilbc/init_decode.c", + "codecs/ilbc/init_decode.h", + "codecs/ilbc/init_encode.c", + "codecs/ilbc/init_encode.h", + "codecs/ilbc/interpolate.c", + "codecs/ilbc/interpolate.h", + "codecs/ilbc/interpolate_samples.c", + "codecs/ilbc/interpolate_samples.h", + "codecs/ilbc/lpc_encode.c", + "codecs/ilbc/lpc_encode.h", + "codecs/ilbc/lsf_check.c", + "codecs/ilbc/lsf_check.h", + "codecs/ilbc/lsf_interpolate_to_poly_dec.c", + "codecs/ilbc/lsf_interpolate_to_poly_dec.h", + "codecs/ilbc/lsf_interpolate_to_poly_enc.c", + "codecs/ilbc/lsf_interpolate_to_poly_enc.h", + "codecs/ilbc/lsf_to_lsp.c", + "codecs/ilbc/lsf_to_lsp.h", + "codecs/ilbc/lsf_to_poly.c", + "codecs/ilbc/lsf_to_poly.h", + "codecs/ilbc/lsp_to_lsf.c", + "codecs/ilbc/lsp_to_lsf.h", + "codecs/ilbc/my_corr.c", + "codecs/ilbc/my_corr.h", + "codecs/ilbc/nearest_neighbor.c", + "codecs/ilbc/nearest_neighbor.h", + "codecs/ilbc/pack_bits.c", + "codecs/ilbc/pack_bits.h", + "codecs/ilbc/poly_to_lsf.c", + "codecs/ilbc/poly_to_lsf.h", + "codecs/ilbc/poly_to_lsp.c", + "codecs/ilbc/poly_to_lsp.h", + "codecs/ilbc/refiner.c", + "codecs/ilbc/refiner.h", + "codecs/ilbc/simple_interpolate_lsf.c", + "codecs/ilbc/simple_interpolate_lsf.h", + "codecs/ilbc/simple_lpc_analysis.c", + "codecs/ilbc/simple_lpc_analysis.h", + "codecs/ilbc/simple_lsf_dequant.c", + "codecs/ilbc/simple_lsf_dequant.h", + "codecs/ilbc/simple_lsf_quant.c", + "codecs/ilbc/simple_lsf_quant.h", + "codecs/ilbc/smooth.c", + "codecs/ilbc/smooth.h", + "codecs/ilbc/smooth_out_data.c", + "codecs/ilbc/smooth_out_data.h", + "codecs/ilbc/sort_sq.c", + "codecs/ilbc/sort_sq.h", + "codecs/ilbc/split_vq.c", + "codecs/ilbc/split_vq.h", + "codecs/ilbc/state_construct.c", + "codecs/ilbc/state_construct.h", + "codecs/ilbc/state_search.c", + "codecs/ilbc/state_search.h", + "codecs/ilbc/swap_bytes.c", + "codecs/ilbc/swap_bytes.h", + "codecs/ilbc/unpack_bits.c", + "codecs/ilbc/unpack_bits.h", + "codecs/ilbc/vq3.c", + "codecs/ilbc/vq3.h", + "codecs/ilbc/vq4.c", + "codecs/ilbc/vq4.h", + "codecs/ilbc/window32_w32.c", + "codecs/ilbc/window32_w32.h", + "codecs/ilbc/xcorr_coef.c", + "codecs/ilbc/xcorr_coef.h", + ] + + deps = [ + "../../api/audio_codecs:audio_codecs_api", + "../../common_audio", + "../../common_audio:common_audio_c", + "../../rtc_base:checks", + "../../rtc_base:sanitizer", + "../../rtc_base/system:arch", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/base:core_headers" ] +} + +rtc_library("isac_vad") { + visibility += [ "../audio_processing/vad:*" ] + sources = [ + "codecs/isac/main/source/filter_functions.c", + "codecs/isac/main/source/filter_functions.h", + "codecs/isac/main/source/isac_vad.c", + "codecs/isac/main/source/isac_vad.h", + "codecs/isac/main/source/os_specific_inline.h", + "codecs/isac/main/source/pitch_estimator.c", + "codecs/isac/main/source/pitch_estimator.h", + "codecs/isac/main/source/pitch_filter.c", + "codecs/isac/main/source/pitch_filter.h", + "codecs/isac/main/source/settings.h", + "codecs/isac/main/source/structs.h", + ] + deps = [ + ":isac_bwinfo", + "../../rtc_base:compile_assert_c", + "../../rtc_base/system:arch", + "../../rtc_base/system:ignore_warnings", + "../third_party/fft", + ] +} + +rtc_source_set("isac_bwinfo") { + sources = [ "codecs/isac/bandwidth_info.h" ] + deps = [] +} + +rtc_library("pcm16b") { + visibility += [ "*" ] + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/pcm16b/audio_decoder_pcm16b.cc", + "codecs/pcm16b/audio_decoder_pcm16b.h", + "codecs/pcm16b/audio_encoder_pcm16b.cc", + "codecs/pcm16b/audio_encoder_pcm16b.h", + "codecs/pcm16b/pcm16b_common.cc", + "codecs/pcm16b/pcm16b_common.h", + ] + + deps = [ + ":g711", + ":legacy_encoded_audio_frame", + "../../api:array_view", + "../../api/audio_codecs:audio_codecs_api", + "../../rtc_base:buffer", + "../../rtc_base:checks", + ] + public_deps = [ ":pcm16b_c" ] # no-presubmit-check TODO(webrtc:8603) +} + +rtc_library("pcm16b_c") { + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/pcm16b/pcm16b.c", + "codecs/pcm16b/pcm16b.h", + ] +} + +rtc_library("audio_coding_opus_common") { + sources = [ + "codecs/opus/audio_coder_opus_common.cc", + "codecs/opus/audio_coder_opus_common.h", + ] + + deps = [ + "../../api:array_view", + "../../api/audio_codecs:audio_codecs_api", + "../../rtc_base:checks", + "../../rtc_base:stringutils", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("webrtc_opus") { + visibility += webrtc_default_visibility + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/opus/audio_decoder_opus.cc", + "codecs/opus/audio_decoder_opus.h", + "codecs/opus/audio_encoder_opus.cc", + "codecs/opus/audio_encoder_opus.h", + ] + + deps = [ + ":audio_coding_opus_common", + ":audio_network_adaptor", + "../../api:array_view", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs/opus:audio_encoder_opus_config", + "../../common_audio", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:protobuf_utils", + "../../rtc_base:rtc_numerics", + "../../rtc_base:safe_conversions", + "../../rtc_base:safe_minmax", + "../../rtc_base:stringutils", + "../../rtc_base:timeutils", + "../../system_wrappers:field_trial", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + public_deps = # no-presubmit-check TODO(webrtc:8603) + [ ":webrtc_opus_wrapper" ] + + defines = audio_codec_defines +} + +rtc_library("webrtc_multiopus") { + visibility += webrtc_default_visibility + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/opus/audio_decoder_multi_channel_opus_impl.cc", + "codecs/opus/audio_decoder_multi_channel_opus_impl.h", + "codecs/opus/audio_encoder_multi_channel_opus_impl.cc", + "codecs/opus/audio_encoder_multi_channel_opus_impl.h", + ] + + deps = [ + ":audio_coding_opus_common", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs/opus:audio_decoder_opus_config", + "../../api/audio_codecs/opus:audio_encoder_opus_config", + "../../api/units:time_delta", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:safe_minmax", + "../../rtc_base:stringutils", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/memory", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + public_deps = # no-presubmit-check TODO(webrtc:8603) + [ ":webrtc_opus_wrapper" ] + + defines = audio_codec_defines +} + +rtc_library("webrtc_opus_wrapper") { + poisonous = [ "audio_codecs" ] + sources = [ + "codecs/opus/opus_inst.h", + "codecs/opus/opus_interface.cc", + "codecs/opus/opus_interface.h", + ] + + defines = audio_coding_defines + + deps = [ + "../../api:array_view", + "../../rtc_base:checks", + "../../rtc_base:ignore_wundef", + "../../system_wrappers:field_trial", + ] + + if (rtc_build_opus) { + deps += [ rtc_opus_dir ] + public_configs = [ "//third_party/opus:opus_config" ] + } else if (build_with_mozilla) { + public_configs = [ "//third_party/opus:opus_config" ] + } +} + +if (rtc_enable_protobuf) { + proto_library("ana_debug_dump_proto") { + visibility += webrtc_default_visibility + sources = [ "audio_network_adaptor/debug_dump.proto" ] + link_deps = [ ":ana_config_proto" ] + proto_out_dir = "modules/audio_coding/audio_network_adaptor" + } + proto_library("ana_config_proto") { + visibility += [ "*" ] + sources = [ "audio_network_adaptor/config.proto" ] + proto_out_dir = "modules/audio_coding/audio_network_adaptor" + } +} + +rtc_library("audio_network_adaptor_config") { + visibility += webrtc_default_visibility + sources = [ + "audio_network_adaptor/audio_network_adaptor_config.cc", + "audio_network_adaptor/include/audio_network_adaptor_config.h", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("audio_network_adaptor") { + visibility += webrtc_default_visibility + sources = [ + "audio_network_adaptor/audio_network_adaptor_impl.cc", + "audio_network_adaptor/audio_network_adaptor_impl.h", + "audio_network_adaptor/bitrate_controller.cc", + "audio_network_adaptor/bitrate_controller.h", + "audio_network_adaptor/channel_controller.cc", + "audio_network_adaptor/channel_controller.h", + "audio_network_adaptor/controller.cc", + "audio_network_adaptor/controller.h", + "audio_network_adaptor/controller_manager.cc", + "audio_network_adaptor/controller_manager.h", + "audio_network_adaptor/debug_dump_writer.cc", + "audio_network_adaptor/debug_dump_writer.h", + "audio_network_adaptor/dtx_controller.cc", + "audio_network_adaptor/dtx_controller.h", + "audio_network_adaptor/event_log_writer.cc", + "audio_network_adaptor/event_log_writer.h", + "audio_network_adaptor/fec_controller_plr_based.cc", + "audio_network_adaptor/fec_controller_plr_based.h", + "audio_network_adaptor/frame_length_controller.cc", + "audio_network_adaptor/frame_length_controller.h", + "audio_network_adaptor/frame_length_controller_v2.cc", + "audio_network_adaptor/frame_length_controller_v2.h", + "audio_network_adaptor/include/audio_network_adaptor.h", + "audio_network_adaptor/util/threshold_curve.h", + ] + + public_deps = # no-presubmit-check TODO(webrtc:8603) + [ ":audio_network_adaptor_config" ] + + deps = [ + "../../api/audio_codecs:audio_codecs_api", + "../../api/rtc_event_log", + "../../common_audio", + "../../logging:rtc_event_audio", + "../../rtc_base:checks", + "../../rtc_base:ignore_wundef", + "../../rtc_base:logging", + "../../rtc_base:protobuf_utils", + "../../rtc_base:safe_conversions", + "../../rtc_base:timeutils", + "../../rtc_base/system:file_wrapper", + "../../system_wrappers", + "../../system_wrappers:field_trial", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/algorithm:container", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + if (rtc_enable_protobuf) { + deps += [ + ":ana_config_proto", + ":ana_debug_dump_proto", + ] + } +} + +rtc_library("neteq") { + visibility += webrtc_default_visibility + sources = [ + "neteq/accelerate.cc", + "neteq/accelerate.h", + "neteq/audio_multi_vector.cc", + "neteq/audio_multi_vector.h", + "neteq/audio_vector.cc", + "neteq/audio_vector.h", + "neteq/background_noise.cc", + "neteq/background_noise.h", + "neteq/buffer_level_filter.cc", + "neteq/buffer_level_filter.h", + "neteq/comfort_noise.cc", + "neteq/comfort_noise.h", + "neteq/cross_correlation.cc", + "neteq/cross_correlation.h", + "neteq/decision_logic.cc", + "neteq/decision_logic.h", + "neteq/decoder_database.cc", + "neteq/decoder_database.h", + "neteq/delay_manager.cc", + "neteq/delay_manager.h", + "neteq/dsp_helper.cc", + "neteq/dsp_helper.h", + "neteq/dtmf_buffer.cc", + "neteq/dtmf_buffer.h", + "neteq/dtmf_tone_generator.cc", + "neteq/dtmf_tone_generator.h", + "neteq/expand.cc", + "neteq/expand.h", + "neteq/expand_uma_logger.cc", + "neteq/expand_uma_logger.h", + "neteq/histogram.cc", + "neteq/histogram.h", + "neteq/merge.cc", + "neteq/merge.h", + "neteq/nack_tracker.cc", + "neteq/nack_tracker.h", + "neteq/neteq_impl.cc", + "neteq/neteq_impl.h", + "neteq/normal.cc", + "neteq/normal.h", + "neteq/packet.cc", + "neteq/packet.h", + "neteq/packet_arrival_history.cc", + "neteq/packet_arrival_history.h", + "neteq/packet_buffer.cc", + "neteq/packet_buffer.h", + "neteq/post_decode_vad.cc", + "neteq/post_decode_vad.h", + "neteq/preemptive_expand.cc", + "neteq/preemptive_expand.h", + "neteq/random_vector.cc", + "neteq/random_vector.h", + "neteq/red_payload_splitter.cc", + "neteq/red_payload_splitter.h", + "neteq/reorder_optimizer.cc", + "neteq/reorder_optimizer.h", + "neteq/statistics_calculator.cc", + "neteq/statistics_calculator.h", + "neteq/sync_buffer.cc", + "neteq/sync_buffer.h", + "neteq/time_stretch.cc", + "neteq/time_stretch.h", + "neteq/timestamp_scaler.cc", + "neteq/timestamp_scaler.h", + "neteq/underrun_optimizer.cc", + "neteq/underrun_optimizer.h", + ] + + deps = [ + ":audio_coding_module_typedefs", + ":webrtc_cng", + "..:module_api_public", + "../../api:array_view", + "../../api:rtp_headers", + "../../api:rtp_packet_info", + "../../api:scoped_refptr", + "../../api/audio:audio_frame_api", + "../../api/audio_codecs:audio_codecs_api", + "../../api/neteq:neteq_api", + "../../api/neteq:neteq_controller_api", + "../../api/neteq:tick_timer", + "../../common_audio", + "../../common_audio:common_audio_c", + "../../rtc_base:audio_format_to_string", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:event_tracer", + "../../rtc_base:gtest_prod", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:rtc_numerics", + "../../rtc_base:safe_conversions", + "../../rtc_base:safe_minmax", + "../../rtc_base:sanitizer", + "../../rtc_base/experiments:field_trial_parser", + "../../rtc_base/synchronization:mutex", + "../../system_wrappers", + "../../system_wrappers:field_trial", + "../../system_wrappers:metrics", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_source_set("default_neteq_factory") { + visibility += webrtc_default_visibility + sources = [ + "neteq/default_neteq_factory.cc", + "neteq/default_neteq_factory.h", + ] + deps = [ + ":neteq", + "../../api:scoped_refptr", + "../../api/audio_codecs:audio_codecs_api", + "../../api/neteq:default_neteq_controller_factory", + "../../api/neteq:neteq_api", + "../../system_wrappers:system_wrappers", + ] +} + +# Although providing only test support, this target must be outside of the +# rtc_include_tests conditional. The reason is that it supports fuzzer tests +# that ultimately are built and run as a part of the Chromium ecosystem, which +# does not set the rtc_include_tests flag. +rtc_library("neteq_tools_minimal") { + visibility += webrtc_default_visibility + sources = [ + "neteq/tools/audio_sink.cc", + "neteq/tools/audio_sink.h", + "neteq/tools/encode_neteq_input.cc", + "neteq/tools/encode_neteq_input.h", + "neteq/tools/neteq_input.cc", + "neteq/tools/neteq_input.h", + "neteq/tools/neteq_test.cc", + "neteq/tools/neteq_test.h", + "neteq/tools/packet.cc", + "neteq/tools/packet.h", + "neteq/tools/packet_source.cc", + "neteq/tools/packet_source.h", + ] + + deps = [ + ":default_neteq_factory", + ":neteq", + "../../api:array_view", + "../../api:neteq_simulator_api", + "../../api:rtp_headers", + "../../api/audio:audio_frame_api", + "../../api/audio_codecs:audio_codecs_api", + "../../api/neteq:custom_neteq_factory", + "../../api/neteq:default_neteq_controller_factory", + "../../api/neteq:neteq_api", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:copy_on_write_buffer", + "../../rtc_base:safe_conversions", + "../../rtc_base:stringutils", + "../../system_wrappers", + "../rtp_rtcp:rtp_rtcp_format", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + defines = audio_codec_defines +} + +rtc_library("neteq_test_tools") { + visibility += webrtc_default_visibility + testonly = true + sources = [ + "neteq/tools/audio_checksum.h", + "neteq/tools/audio_loop.cc", + "neteq/tools/audio_loop.h", + "neteq/tools/constant_pcm_packet_source.cc", + "neteq/tools/constant_pcm_packet_source.h", + "neteq/tools/initial_packet_inserter_neteq_input.cc", + "neteq/tools/initial_packet_inserter_neteq_input.h", + "neteq/tools/neteq_packet_source_input.cc", + "neteq/tools/neteq_packet_source_input.h", + "neteq/tools/output_audio_file.h", + "neteq/tools/output_wav_file.h", + "neteq/tools/rtp_file_source.cc", + "neteq/tools/rtp_file_source.h", + "neteq/tools/rtp_generator.cc", + "neteq/tools/rtp_generator.h", + ] + + deps = [ + ":neteq_tools", + ":neteq_tools_minimal", + ":pcm16b", + "../../api:array_view", + "../../api:rtp_headers", + "../../common_audio", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:ssl", + "../../rtc_base:stringutils", + "../../rtc_base/system:arch", + "../../test:rtp_test_utils", + "../rtp_rtcp:rtp_rtcp_format", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + if (rtc_enable_protobuf) { + sources += [ + "neteq/tools/neteq_event_log_input.cc", + "neteq/tools/neteq_event_log_input.h", + ] + deps += [ ":rtc_event_log_source" ] + } +} + +rtc_library("neteq_tools") { + visibility += webrtc_default_visibility + sources = [ + "neteq/tools/fake_decode_from_file.cc", + "neteq/tools/fake_decode_from_file.h", + "neteq/tools/neteq_delay_analyzer.cc", + "neteq/tools/neteq_delay_analyzer.h", + "neteq/tools/neteq_replacement_input.cc", + "neteq/tools/neteq_replacement_input.h", + "neteq/tools/neteq_stats_getter.cc", + "neteq/tools/neteq_stats_getter.h", + "neteq/tools/neteq_stats_plotter.cc", + "neteq/tools/neteq_stats_plotter.h", + ] + + deps = [ + ":neteq_input_audio_tools", + ":neteq_tools_minimal", + "..:module_api_public", + "../../api:array_view", + "../../api/audio_codecs:audio_codecs_api", + "../../rtc_base:checks", + "../../rtc_base:rtc_numerics", + "../../rtc_base:safe_conversions", + "../../rtc_base:stringutils", + "../../rtc_base:timeutils", + "../rtp_rtcp", + "../rtp_rtcp:rtp_rtcp_format", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("neteq_input_audio_tools") { + visibility += webrtc_default_visibility + sources = [ + "neteq/tools/input_audio_file.cc", + "neteq/tools/input_audio_file.h", + "neteq/tools/resample_input_audio_file.cc", + "neteq/tools/resample_input_audio_file.h", + ] + + deps = [ + "../../common_audio", + "../../rtc_base:checks", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +if (rtc_enable_protobuf) { + rtc_library("rtc_event_log_source") { + testonly = true + + sources = [ + "neteq/tools/rtc_event_log_source.cc", + "neteq/tools/rtc_event_log_source.h", + ] + + deps = [ + ":neteq_tools_minimal", + "../../logging:rtc_event_log_parser", + "../../rtc_base:checks", + "../rtp_rtcp", + "../rtp_rtcp:rtp_rtcp_format", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + public_deps = # no-presubmit-check TODO(webrtc:8603) + [ "../../logging:rtc_event_log_proto" ] + } + + # Only used for test purpose. Since we want to use it from chromium + # (see audio_coding_modules_tests_shared below), we cannot guard it + # under rtc_include_tests. + proto_library("neteq_unittest_proto") { + testonly = true + sources = [ "neteq/neteq_unittest.proto" ] + proto_out_dir = "modules/audio_coding/neteq" + } +} + +# Allow to re-use some test classes from chromium. +rtc_library("audio_coding_modules_tests_shared") { + testonly = true + visibility = [] + visibility = [ "*" ] + + sources = [ + "neteq/test/neteq_decoding_test.cc", + "neteq/test/neteq_decoding_test.h", + "neteq/test/result_sink.cc", + "neteq/test/result_sink.h", + "test/PCMFile.cc", + "test/PCMFile.h", + "test/TestStereo.cc", + "test/TestStereo.h", + "test/opus_test.cc", + "test/opus_test.h", + ] + + deps = [ + ":audio_coding", + ":audio_coding_module_typedefs", + ":default_neteq_factory", + ":neteq_test_tools", + ":neteq_tools_minimal", + ":webrtc_opus_wrapper", + "..:module_api", + "../../api:rtp_headers", + "../../api/audio:audio_frame_api", + "../../api/audio_codecs:builtin_audio_decoder_factory", + "../../api/audio_codecs:builtin_audio_encoder_factory", + "../../api/neteq:neteq_api", + "../../rtc_base:checks", + "../../rtc_base:ignore_wundef", + "../../rtc_base:ssl", + "../../rtc_base:stringutils", + "../../system_wrappers", + "../../test:fileutils", + "../../test:test_support", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + defines = audio_coding_defines + + if (rtc_enable_protobuf) { + defines += [ "WEBRTC_NETEQ_UNITTEST_BITEXACT" ] + deps += [ ":neteq_unittest_proto" ] + } +} + +if (rtc_include_tests) { + audio_coding_deps = [ + ":audio_encoder_cng", + ":g711", + ":g722", + ":pcm16b", + "../../common_audio", + "../../system_wrappers", + ] + if (rtc_include_ilbc) { + audio_coding_deps += [ ":ilbc" ] + } + if (rtc_include_opus) { + audio_coding_deps += [ ":webrtc_opus" ] + } + if (!build_with_mozilla && !build_with_chromium) { + audio_coding_deps += [ ":red" ] + } + + rtc_source_set("mocks") { + testonly = true + sources = [ + "audio_network_adaptor/mock/mock_audio_network_adaptor.h", + "audio_network_adaptor/mock/mock_controller.h", + "audio_network_adaptor/mock/mock_controller_manager.h", + "audio_network_adaptor/mock/mock_debug_dump_writer.h", + ] + deps = [ + ":audio_network_adaptor", + "../../test:test_support", + ] + } + + if (!build_with_chromium) { + group("audio_coding_tests") { + visibility += webrtc_default_visibility + testonly = true + public_deps = [ # no-presubmit-check TODO(webrtc:8603) + ":acm_receive_test", + ":acm_send_test", + ":audio_codec_speed_tests", + ":audio_decoder_unittests", + ":audio_decoder_unittests", + ":g711_test", + ":g722_test", + ":ilbc_test", + ":neteq_ilbc_quality_test", + ":neteq_opus_quality_test", + ":neteq_pcm16b_quality_test", + ":neteq_pcmu_quality_test", + ":neteq_speed_test", + ":rtp_analyze", + ":rtp_encode", + ":rtp_jitter", + ":rtpcat", + ":webrtc_opus_fec_test", + ] + if (rtc_enable_protobuf) { + public_deps += # no-presubmit-check TODO(webrtc:8603) + [ ":neteq_rtpplay" ] + } + } + } + + rtc_library("audio_coding_modules_tests") { + testonly = true + visibility += webrtc_default_visibility + + sources = [ + "test/Channel.cc", + "test/Channel.h", + "test/EncodeDecodeTest.cc", + "test/EncodeDecodeTest.h", + "test/PacketLossTest.cc", + "test/PacketLossTest.h", + "test/RTPFile.cc", + "test/RTPFile.h", + "test/TestAllCodecs.cc", + "test/TestAllCodecs.h", + "test/TestRedFec.cc", + "test/TestRedFec.h", + "test/TestVADDTX.cc", + "test/TestVADDTX.h", + "test/Tester.cc", + "test/TwoWayCommunication.cc", + "test/TwoWayCommunication.h", + "test/target_delay_unittest.cc", + ] + deps = [ + ":audio_coding", + ":audio_coding_module_typedefs", + ":audio_coding_modules_tests_shared", + ":audio_encoder_cng", + ":pcm16b_c", + ":red", + ":webrtc_opus_wrapper", + "..:module_api", + "../../api:rtp_headers", + "../../api/audio:audio_frame_api", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs:builtin_audio_decoder_factory", + "../../api/audio_codecs:builtin_audio_encoder_factory", + "../../api/audio_codecs/L16:audio_decoder_L16", + "../../api/audio_codecs/L16:audio_encoder_L16", + "../../api/audio_codecs/g711:audio_decoder_g711", + "../../api/audio_codecs/g711:audio_encoder_g711", + "../../api/audio_codecs/g722:audio_decoder_g722", + "../../api/audio_codecs/g722:audio_encoder_g722", + "../../api/audio_codecs/ilbc:audio_decoder_ilbc", + "../../api/audio_codecs/ilbc:audio_encoder_ilbc", + "../../api/audio_codecs/opus:audio_decoder_opus", + "../../api/audio_codecs/opus:audio_encoder_opus", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:stringutils", + "../../rtc_base:timeutils", + "../../rtc_base/synchronization:mutex", + "../../test:fileutils", + "../../test:scoped_key_value_config", + "../../test:test_support", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + defines = audio_coding_defines + } + + rtc_library("audio_coding_perf_tests") { + testonly = true + visibility += webrtc_default_visibility + + sources = [ + "codecs/opus/opus_complexity_unittest.cc", + "neteq/test/neteq_performance_unittest.cc", + ] + deps = [ + ":neteq_test_support", + ":neteq_test_tools", + "../../api/audio_codecs/opus:audio_encoder_opus", + "../../api/test/metrics:global_metrics_logger_and_exporter", + "../../api/test/metrics:metric", + "../../rtc_base:macromagic", + "../../rtc_base:timeutils", + "../../system_wrappers", + "../../system_wrappers:field_trial", + "../../test:fileutils", + "../../test:test_support", + ] + } + + rtc_library("acm_receive_test") { + testonly = true + sources = [ + "acm2/acm_receive_test.cc", + "acm2/acm_receive_test.h", + ] + + defines = audio_coding_defines + + deps = [ + ":audio_coding", + ":neteq_tools", + ":neteq_tools_minimal", + "../../api:scoped_refptr", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs:builtin_audio_decoder_factory", + "../../test:test_support", + "//testing/gtest", + ] + + deps += audio_coding_deps + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } + + rtc_library("acm_send_test") { + testonly = true + sources = [ + "acm2/acm_send_test.cc", + "acm2/acm_send_test.h", + ] + + defines = audio_coding_defines + + deps = [ + ":audio_coding", + ":neteq_input_audio_tools", + ":neteq_tools", + ":neteq_tools_minimal", + "../../api/audio:audio_frame_api", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs:builtin_audio_decoder_factory", + "../../api/audio_codecs:builtin_audio_encoder_factory", + "../../rtc_base:checks", + "../../rtc_base:stringutils", + "../../test:test_support", + "//testing/gtest", + ] + deps += audio_coding_deps + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } + + if (!build_with_chromium) { + audio_decoder_unittests_resources = + [ "../../resources/audio_coding/testfile32kHz.pcm" ] + + if (is_ios) { + bundle_data("audio_decoder_unittests_bundle_data") { + testonly = true + sources = audio_decoder_unittests_resources + outputs = [ "{{bundle_resources_dir}}/{{source_file_part}}" ] + } + } + + rtc_test("audio_decoder_unittests") { + testonly = true + sources = [ "neteq/audio_decoder_unittest.cc" ] + + defines = neteq_defines + + deps = [ + ":ilbc", + ":neteq", + ":neteq_input_audio_tools", + ":neteq_tools", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs/opus:audio_encoder_opus", + "../../common_audio", + "../../rtc_base/system:arch", + "../../test:fileutils", + "../../test:test_main", + "../../test:test_support", + "//testing/gtest", + ] + audio_coding_deps + + data = audio_decoder_unittests_resources + + if (is_android) { + use_default_launcher = false + deps += [ + "//build/android/gtest_apk:native_test_instrumentation_test_runner_java", + "//testing/android/native_test:native_test_java", + "//testing/android/native_test:native_test_support", + ] + shard_timeout = 900 + } + if (is_ios) { + deps += [ ":audio_decoder_unittests_bundle_data" ] + } + } + } + + if (rtc_enable_protobuf) { + rtc_library("neteq_test_factory") { + testonly = true + visibility += webrtc_default_visibility + defines = audio_codec_defines + deps = [ + ":neteq_input_audio_tools", + ":neteq_tools", + ":neteq_tools_minimal", + "../../rtc_base:checks", + "../../rtc_base:refcount", + "../../test:fileutils", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + sources = [ + "neteq/tools/neteq_test_factory.cc", + "neteq/tools/neteq_test_factory.h", + ] + + deps += [ + ":neteq", + ":neteq_test_tools", + "../../api/audio_codecs:builtin_audio_decoder_factory", + "../../api/neteq:neteq_api", + "../../test:audio_test_common", + "../../test:field_trial", + "../../test:test_support", + ] + } + } + + if (rtc_enable_protobuf && !build_with_chromium) { + rtc_executable("neteq_rtpplay") { + testonly = true + visibility += [ "*" ] + defines = [] + deps = [ + ":neteq_test_factory", + ":neteq_test_tools", + ":neteq_tools_minimal", + "../../rtc_base:stringutils", + "../../system_wrappers:field_trial", + "../../test:field_trial", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + sources = [ "neteq/tools/neteq_rtpplay.cc" ] + } + } + + if (!build_with_chromium) { + audio_codec_speed_tests_resources = [ + "//resources/audio_coding/music_stereo_48kHz.pcm", + "//resources/audio_coding/speech_mono_16kHz.pcm", + "//resources/audio_coding/speech_mono_32_48kHz.pcm", + ] + + if (is_ios) { + bundle_data("audio_codec_speed_tests_data") { + testonly = true + sources = audio_codec_speed_tests_resources + outputs = [ "{{bundle_resources_dir}}/{{source_file_part}}" ] + } + } + + rtc_test("audio_codec_speed_tests") { + testonly = true + defines = [] + deps = [ + "../../rtc_base:macromagic", + "../../test:fileutils", + ] + sources = [ + "codecs/opus/opus_speed_test.cc", + "codecs/tools/audio_codec_speed_test.cc", + "codecs/tools/audio_codec_speed_test.h", + ] + + data = audio_codec_speed_tests_resources + + if (is_android) { + use_default_launcher = false + deps += [ + "//build/android/gtest_apk:native_test_instrumentation_test_runner_java", + "//testing/android/native_test:native_test_java", + "//testing/android/native_test:native_test_support", + ] + shard_timeout = 900 + } + + if (is_ios) { + deps += [ ":audio_codec_speed_tests_data" ] + } + + deps += [ + ":webrtc_opus", + "../../rtc_base:checks", + "../../test:test_main", + "../../test:test_support", + "../audio_processing", + "//testing/gtest", + ] + } + } + + rtc_library("neteq_test_support") { + testonly = true + sources = [ + "neteq/tools/neteq_performance_test.cc", + "neteq/tools/neteq_performance_test.h", + ] + + deps = [ + ":default_neteq_factory", + ":neteq", + ":neteq_test_tools", + ":pcm16b", + "../../api/audio:audio_frame_api", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs:builtin_audio_decoder_factory", + "../../api/neteq:neteq_api", + "../../rtc_base:checks", + "../../system_wrappers", + "../../test:fileutils", + "../../test:test_support", + "//testing/gtest", + ] + } + + if (!build_with_chromium) { + rtc_library("neteq_quality_test_support") { + testonly = true + sources = [ + "neteq/tools/neteq_quality_test.cc", + "neteq/tools/neteq_quality_test.h", + ] + + deps = [ + ":default_neteq_factory", + ":neteq", + ":neteq_input_audio_tools", + ":neteq_test_tools", + ":neteq_tools_minimal", + "../../api/audio_codecs:builtin_audio_decoder_factory", + "../../api/neteq:neteq_api", + "../../rtc_base:checks", + "../../rtc_base:stringutils", + "../../system_wrappers", + "../../test:fileutils", + "../../test:test_support", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/strings", + ] + } + + rtc_executable("rtp_encode") { + testonly = true + + deps = [ + ":audio_coding", + ":audio_encoder_cng", + ":neteq_input_audio_tools", + "../../api/audio:audio_frame_api", + "../../api/audio_codecs/L16:audio_encoder_L16", + "../../api/audio_codecs/g711:audio_encoder_g711", + "../../api/audio_codecs/g722:audio_encoder_g722", + "../../api/audio_codecs/ilbc:audio_encoder_ilbc", + "../../api/audio_codecs/opus:audio_encoder_opus", + "../../rtc_base:safe_conversions", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + "//third_party/abseil-cpp/absl/memory", + ] + + deps += audio_coding_deps + + sources = [ "neteq/tools/rtp_encode.cc" ] + + defines = audio_coding_defines + } + + rtc_executable("rtp_jitter") { + testonly = true + + deps = [ + "../../api:array_view", + "../../rtc_base:buffer", + "../rtp_rtcp:rtp_rtcp_format", + ] + + deps += audio_coding_deps + + sources = [ "neteq/tools/rtp_jitter.cc" ] + + defines = audio_coding_defines + } + + rtc_executable("rtpcat") { + testonly = true + + sources = [ "neteq/tools/rtpcat.cc" ] + + deps = [ + "../../rtc_base:checks", + "../../test:rtp_test_utils", + "//testing/gtest", + ] + } + + rtc_executable("rtp_analyze") { + testonly = true + + sources = [ "neteq/tools/rtp_analyze.cc" ] + + deps = [ + ":neteq", + ":neteq_test_tools", + ":neteq_tools_minimal", + ":pcm16b", + "//testing/gtest", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + + rtc_executable("neteq_opus_quality_test") { + testonly = true + + sources = [ "neteq/test/neteq_opus_quality_test.cc" ] + + deps = [ + ":neteq", + ":neteq_quality_test_support", + ":neteq_tools", + ":webrtc_opus", + "../../test:test_main", + "//testing/gtest", + "//third_party/abseil-cpp/absl/flags:flag", + ] + } + + rtc_executable("neteq_speed_test") { + testonly = true + + sources = [ "neteq/test/neteq_speed_test.cc" ] + + deps = [ + ":neteq", + ":neteq_test_support", + "../../rtc_base:checks", + "../../test:test_support", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + + rtc_executable("neteq_ilbc_quality_test") { + testonly = true + + sources = [ "neteq/test/neteq_ilbc_quality_test.cc" ] + + deps = [ + ":ilbc", + ":neteq", + ":neteq_quality_test_support", + ":neteq_tools", + "../../rtc_base:checks", + "../../rtc_base:safe_conversions", + "../../test:fileutils", + "../../test:test_main", + "//testing/gtest", + "//third_party/abseil-cpp/absl/flags:flag", + ] + } + + rtc_executable("neteq_pcmu_quality_test") { + testonly = true + + sources = [ "neteq/test/neteq_pcmu_quality_test.cc" ] + + deps = [ + ":g711", + ":neteq", + ":neteq_quality_test_support", + "../../rtc_base:checks", + "../../rtc_base:safe_conversions", + "../../test:fileutils", + "../../test:test_main", + "//testing/gtest", + "//third_party/abseil-cpp/absl/flags:flag", + ] + } + + rtc_executable("neteq_pcm16b_quality_test") { + testonly = true + + sources = [ "neteq/test/neteq_pcm16b_quality_test.cc" ] + + deps = [ + ":neteq", + ":neteq_quality_test_support", + ":pcm16b", + "../../rtc_base:checks", + "../../rtc_base:safe_conversions", + "../../test:fileutils", + "../../test:test_main", + "//testing/gtest", + "//third_party/abseil-cpp/absl/flags:flag", + ] + } + } + + rtc_executable("g711_test") { + testonly = true + + sources = [ "codecs/g711/test/testG711.cc" ] + + deps = [ ":g711" ] + } + + rtc_executable("g722_test") { + testonly = true + + sources = [ "codecs/g722/test/testG722.cc" ] + + deps = [ ":g722" ] + } + + if (!build_with_chromium) { + rtc_executable("ilbc_test") { + testonly = true + + sources = [ "codecs/ilbc/test/iLBC_test.c" ] + + deps = [ ":ilbc" ] + } + + rtc_executable("webrtc_opus_fec_test") { + testonly = true + + sources = [ "codecs/opus/opus_fec_test.cc" ] + + deps = [ + ":webrtc_opus", + "../../common_audio", + "../../rtc_base:macromagic", + "../../test:fileutils", + "../../test:test_main", + "../../test:test_support", + "//testing/gtest", + ] + } + + rtc_library("audio_coding_unittests") { + testonly = true + visibility += webrtc_default_visibility + + sources = [ + "acm2/acm_receiver_unittest.cc", + "acm2/acm_remixing_unittest.cc", + "acm2/audio_coding_module_unittest.cc", + "acm2/call_statistics_unittest.cc", + "audio_network_adaptor/audio_network_adaptor_impl_unittest.cc", + "audio_network_adaptor/bitrate_controller_unittest.cc", + "audio_network_adaptor/channel_controller_unittest.cc", + "audio_network_adaptor/controller_manager_unittest.cc", + "audio_network_adaptor/dtx_controller_unittest.cc", + "audio_network_adaptor/event_log_writer_unittest.cc", + "audio_network_adaptor/fec_controller_plr_based_unittest.cc", + "audio_network_adaptor/frame_length_controller_unittest.cc", + "audio_network_adaptor/frame_length_controller_v2_unittest.cc", + "audio_network_adaptor/util/threshold_curve_unittest.cc", + "codecs/builtin_audio_decoder_factory_unittest.cc", + "codecs/builtin_audio_encoder_factory_unittest.cc", + "codecs/cng/audio_encoder_cng_unittest.cc", + "codecs/cng/cng_unittest.cc", + "codecs/ilbc/ilbc_unittest.cc", + "codecs/legacy_encoded_audio_frame_unittest.cc", + "codecs/opus/audio_decoder_multi_channel_opus_unittest.cc", + "codecs/opus/audio_encoder_multi_channel_opus_unittest.cc", + "codecs/opus/audio_encoder_opus_unittest.cc", + "codecs/opus/opus_bandwidth_unittest.cc", + "codecs/opus/opus_unittest.cc", + "codecs/red/audio_encoder_copy_red_unittest.cc", + "neteq/audio_multi_vector_unittest.cc", + "neteq/audio_vector_unittest.cc", + "neteq/background_noise_unittest.cc", + "neteq/buffer_level_filter_unittest.cc", + "neteq/comfort_noise_unittest.cc", + "neteq/decision_logic_unittest.cc", + "neteq/decoder_database_unittest.cc", + "neteq/delay_manager_unittest.cc", + "neteq/dsp_helper_unittest.cc", + "neteq/dtmf_buffer_unittest.cc", + "neteq/dtmf_tone_generator_unittest.cc", + "neteq/expand_unittest.cc", + "neteq/histogram_unittest.cc", + "neteq/merge_unittest.cc", + "neteq/mock/mock_buffer_level_filter.h", + "neteq/mock/mock_decoder_database.h", + "neteq/mock/mock_delay_manager.h", + "neteq/mock/mock_dtmf_buffer.h", + "neteq/mock/mock_dtmf_tone_generator.h", + "neteq/mock/mock_expand.h", + "neteq/mock/mock_histogram.h", + "neteq/mock/mock_neteq_controller.h", + "neteq/mock/mock_packet_buffer.h", + "neteq/mock/mock_red_payload_splitter.h", + "neteq/mock/mock_statistics_calculator.h", + "neteq/nack_tracker_unittest.cc", + "neteq/neteq_decoder_plc_unittest.cc", + "neteq/neteq_impl_unittest.cc", + "neteq/neteq_network_stats_unittest.cc", + "neteq/neteq_stereo_unittest.cc", + "neteq/neteq_unittest.cc", + "neteq/normal_unittest.cc", + "neteq/packet_arrival_history_unittest.cc", + "neteq/packet_buffer_unittest.cc", + "neteq/post_decode_vad_unittest.cc", + "neteq/random_vector_unittest.cc", + "neteq/red_payload_splitter_unittest.cc", + "neteq/reorder_optimizer_unittest.cc", + "neteq/statistics_calculator_unittest.cc", + "neteq/sync_buffer_unittest.cc", + "neteq/time_stretch_unittest.cc", + "neteq/timestamp_scaler_unittest.cc", + "neteq/tools/input_audio_file_unittest.cc", + "neteq/tools/packet_unittest.cc", + "neteq/underrun_optimizer_unittest.cc", + ] + + deps = [ + ":acm_receive_test", + ":acm_send_test", + ":audio_coding", + ":audio_coding_module_typedefs", + ":audio_coding_modules_tests_shared", + ":audio_coding_opus_common", + ":audio_encoder_cng", + ":audio_network_adaptor", + ":default_neteq_factory", + ":g711", + ":ilbc", + ":legacy_encoded_audio_frame", + ":mocks", + ":neteq", + ":neteq_input_audio_tools", + ":neteq_test_support", + ":neteq_test_tools", + ":neteq_tools", + ":neteq_tools_minimal", + ":pcm16b", + ":red", + ":webrtc_cng", + ":webrtc_opus", + "..:module_api", + "..:module_api_public", + "../../api:array_view", + "../../api/audio:audio_frame_api", + "../../api/audio_codecs:audio_codecs_api", + "../../api/audio_codecs:builtin_audio_decoder_factory", + "../../api/audio_codecs:builtin_audio_encoder_factory", + "../../api/audio_codecs/opus:audio_decoder_multiopus", + "../../api/audio_codecs/opus:audio_decoder_opus", + "../../api/audio_codecs/opus:audio_encoder_multiopus", + "../../api/audio_codecs/opus:audio_encoder_opus", + "../../api/neteq:default_neteq_controller_factory", + "../../api/neteq:neteq_api", + "../../api/neteq:neteq_controller_api", + "../../api/neteq:tick_timer", + "../../api/neteq:tick_timer_unittest", + "../../api/rtc_event_log", + "../../common_audio", + "../../common_audio:common_audio_c", + "../../common_audio:mock_common_audio", + "../../logging:mocks", + "../../logging:rtc_event_audio", + "../../modules/rtp_rtcp:rtp_rtcp_format", + "../../rtc_base:checks", + "../../rtc_base:ignore_wundef", + "../../rtc_base:macromagic", + "../../rtc_base:platform_thread", + "../../rtc_base:refcount", + "../../rtc_base:rtc_base_tests_utils", + "../../rtc_base:rtc_event", + "../../rtc_base:safe_conversions", + "../../rtc_base:sanitizer", + "../../rtc_base:ssl", + "../../rtc_base:stringutils", + "../../rtc_base:timeutils", + "../../rtc_base/synchronization:mutex", + "../../rtc_base/system:arch", + "../../system_wrappers", + "../../test:audio_codec_mocks", + "../../test:audio_test_common", + "../../test:field_trial", + "../../test:fileutils", + "../../test:rtc_expect_death", + "../../test:rtp_test_utils", + "../../test:scoped_key_value_config", + "../../test:test_common", + "../../test:test_support", + "codecs/opus/test", + "codecs/opus/test:test_unittest", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/memory", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + defines = audio_coding_defines + + if (rtc_enable_protobuf) { + defines += [ "WEBRTC_NETEQ_UNITTEST_BITEXACT" ] + deps += [ + ":ana_config_proto", + ":neteq_unittest_proto", + ] + } + } + } +} + +# For backwards compatibility only! Use +# webrtc/api/audio_codecs:audio_codecs_api instead. +# TODO(kwiberg): Remove this. +rtc_source_set("audio_decoder_interface") { + visibility += [ "*" ] + sources = [ "codecs/audio_decoder.h" ] + deps = [ "../../api/audio_codecs:audio_codecs_api" ] +} + +# For backwards compatibility only! Use +# webrtc/api/audio_codecs:audio_codecs_api instead. +# TODO(ossu): Remove this. +rtc_source_set("audio_encoder_interface") { + visibility += [ "*" ] + sources = [ "codecs/audio_encoder.h" ] + deps = [ "../../api/audio_codecs:audio_codecs_api" ] +} diff --git a/third_party/libwebrtc/modules/audio_coding/DEPS b/third_party/libwebrtc/modules/audio_coding/DEPS new file mode 100644 index 0000000000..3dc9624a4b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/DEPS @@ -0,0 +1,7 @@ +include_rules = [ + "+call", + "+common_audio", + "+logging/rtc_event_log", + "+audio_coding/neteq/neteq_unittest.pb.h", # Different path. + "+system_wrappers", +] diff --git a/third_party/libwebrtc/modules/audio_coding/OWNERS b/third_party/libwebrtc/modules/audio_coding/OWNERS new file mode 100644 index 0000000000..c27c2a8d2d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/OWNERS @@ -0,0 +1,4 @@ +henrik.lundin@webrtc.org +minyue@webrtc.org +ivoc@webrtc.org +jakobi@webrtc.org diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.cc new file mode 100644 index 0000000000..8bc76cd2af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.cc @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_receive_test.h" + +#include + +#include + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +namespace { +AudioCodingModule::Config MakeAcmConfig( + Clock* clock, + rtc::scoped_refptr decoder_factory) { + AudioCodingModule::Config config; + config.clock = clock; + config.decoder_factory = std::move(decoder_factory); + return config; +} +} // namespace + +AcmReceiveTestOldApi::AcmReceiveTestOldApi( + PacketSource* packet_source, + AudioSink* audio_sink, + int output_freq_hz, + NumOutputChannels exptected_output_channels, + rtc::scoped_refptr decoder_factory) + : clock_(0), + acm_(webrtc::AudioCodingModule::Create( + MakeAcmConfig(&clock_, std::move(decoder_factory)))), + packet_source_(packet_source), + audio_sink_(audio_sink), + output_freq_hz_(output_freq_hz), + exptected_output_channels_(exptected_output_channels) {} + +AcmReceiveTestOldApi::~AcmReceiveTestOldApi() = default; + +void AcmReceiveTestOldApi::RegisterDefaultCodecs() { + acm_->SetReceiveCodecs({{103, {"ISAC", 16000, 1}}, + {104, {"ISAC", 32000, 1}}, + {107, {"L16", 8000, 1}}, + {108, {"L16", 16000, 1}}, + {109, {"L16", 32000, 1}}, + {111, {"L16", 8000, 2}}, + {112, {"L16", 16000, 2}}, + {113, {"L16", 32000, 2}}, + {0, {"PCMU", 8000, 1}}, + {110, {"PCMU", 8000, 2}}, + {8, {"PCMA", 8000, 1}}, + {118, {"PCMA", 8000, 2}}, + {102, {"ILBC", 8000, 1}}, + {9, {"G722", 8000, 1}}, + {119, {"G722", 8000, 2}}, + {120, {"OPUS", 48000, 2, {{"stereo", "1"}}}}, + {13, {"CN", 8000, 1}}, + {98, {"CN", 16000, 1}}, + {99, {"CN", 32000, 1}}}); +} + +// Remaps payload types from ACM's default to those used in the resource file +// neteq_universal_new.rtp. +void AcmReceiveTestOldApi::RegisterNetEqTestCodecs() { + acm_->SetReceiveCodecs({{103, {"ISAC", 16000, 1}}, + {104, {"ISAC", 32000, 1}}, + {93, {"L16", 8000, 1}}, + {94, {"L16", 16000, 1}}, + {95, {"L16", 32000, 1}}, + {0, {"PCMU", 8000, 1}}, + {8, {"PCMA", 8000, 1}}, + {102, {"ILBC", 8000, 1}}, + {9, {"G722", 8000, 1}}, + {120, {"OPUS", 48000, 2}}, + {13, {"CN", 8000, 1}}, + {98, {"CN", 16000, 1}}, + {99, {"CN", 32000, 1}}}); +} + +void AcmReceiveTestOldApi::Run() { + for (std::unique_ptr packet(packet_source_->NextPacket()); packet; + packet = packet_source_->NextPacket()) { + // Pull audio until time to insert packet. + while (clock_.TimeInMilliseconds() < packet->time_ms()) { + AudioFrame output_frame; + bool muted; + EXPECT_EQ(0, + acm_->PlayoutData10Ms(output_freq_hz_, &output_frame, &muted)); + ASSERT_EQ(output_freq_hz_, output_frame.sample_rate_hz_); + ASSERT_FALSE(muted); + const size_t samples_per_block = + static_cast(output_freq_hz_ * 10 / 1000); + EXPECT_EQ(samples_per_block, output_frame.samples_per_channel_); + if (exptected_output_channels_ != kArbitraryChannels) { + if (output_frame.speech_type_ == webrtc::AudioFrame::kPLC) { + // Don't check number of channels for PLC output, since each test run + // usually starts with a short period of mono PLC before decoding the + // first packet. + } else { + EXPECT_EQ(exptected_output_channels_, output_frame.num_channels_); + } + } + ASSERT_TRUE(audio_sink_->WriteAudioFrame(output_frame)); + clock_.AdvanceTimeMilliseconds(10); + AfterGetAudio(); + } + + EXPECT_EQ(0, acm_->IncomingPacket( + packet->payload(), + static_cast(packet->payload_length_bytes()), + packet->header())) + << "Failure when inserting packet:" << std::endl + << " PT = " << static_cast(packet->header().payloadType) + << std::endl + << " TS = " << packet->header().timestamp << std::endl + << " SN = " << packet->header().sequenceNumber; + } +} + +AcmReceiveTestToggleOutputFreqOldApi::AcmReceiveTestToggleOutputFreqOldApi( + PacketSource* packet_source, + AudioSink* audio_sink, + int output_freq_hz_1, + int output_freq_hz_2, + int toggle_period_ms, + NumOutputChannels exptected_output_channels) + : AcmReceiveTestOldApi(packet_source, + audio_sink, + output_freq_hz_1, + exptected_output_channels, + CreateBuiltinAudioDecoderFactory()), + output_freq_hz_1_(output_freq_hz_1), + output_freq_hz_2_(output_freq_hz_2), + toggle_period_ms_(toggle_period_ms), + last_toggle_time_ms_(clock_.TimeInMilliseconds()) {} + +void AcmReceiveTestToggleOutputFreqOldApi::AfterGetAudio() { + if (clock_.TimeInMilliseconds() >= last_toggle_time_ms_ + toggle_period_ms_) { + output_freq_hz_ = (output_freq_hz_ == output_freq_hz_1_) + ? output_freq_hz_2_ + : output_freq_hz_1_; + last_toggle_time_ms_ = clock_.TimeInMilliseconds(); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.h new file mode 100644 index 0000000000..2095ef9025 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_RECEIVE_TEST_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_RECEIVE_TEST_H_ + +#include // for size_t + +#include +#include + +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/scoped_refptr.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +class AudioCodingModule; +class AudioDecoder; + +namespace test { +class AudioSink; +class PacketSource; + +class AcmReceiveTestOldApi { + public: + enum NumOutputChannels : size_t { + kArbitraryChannels = 0, + kMonoOutput = 1, + kStereoOutput = 2, + kQuadOutput = 4 + }; + + AcmReceiveTestOldApi(PacketSource* packet_source, + AudioSink* audio_sink, + int output_freq_hz, + NumOutputChannels exptected_output_channels, + rtc::scoped_refptr decoder_factory); + virtual ~AcmReceiveTestOldApi(); + + AcmReceiveTestOldApi(const AcmReceiveTestOldApi&) = delete; + AcmReceiveTestOldApi& operator=(const AcmReceiveTestOldApi&) = delete; + + // Registers the codecs with default parameters from ACM. + void RegisterDefaultCodecs(); + + // Registers codecs with payload types matching the pre-encoded NetEq test + // files. + void RegisterNetEqTestCodecs(); + + // Runs the test and returns true if successful. + void Run(); + + AudioCodingModule* get_acm() { return acm_.get(); } + + protected: + // Method is called after each block of output audio is received from ACM. + virtual void AfterGetAudio() {} + + SimulatedClock clock_; + std::unique_ptr acm_; + PacketSource* packet_source_; + AudioSink* audio_sink_; + int output_freq_hz_; + NumOutputChannels exptected_output_channels_; +}; + +// This test toggles the output frequency every `toggle_period_ms`. The test +// starts with `output_freq_hz_1`. Except for the toggling, it does the same +// thing as AcmReceiveTestOldApi. +class AcmReceiveTestToggleOutputFreqOldApi : public AcmReceiveTestOldApi { + public: + AcmReceiveTestToggleOutputFreqOldApi( + PacketSource* packet_source, + AudioSink* audio_sink, + int output_freq_hz_1, + int output_freq_hz_2, + int toggle_period_ms, + NumOutputChannels exptected_output_channels); + + protected: + void AfterGetAudio() override; + + const int output_freq_hz_1_; + const int output_freq_hz_2_; + const int toggle_period_ms_; + int64_t last_toggle_time_ms_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_ACM2_ACM_RECEIVE_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc new file mode 100644 index 0000000000..b078af1d2d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_receiver.h" + +#include +#include + +#include +#include + +#include "absl/strings/match.h" +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/audio_decoder.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/acm2/acm_resampler.h" +#include "modules/audio_coding/acm2/call_statistics.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/strings/audio_format_to_string.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { + +namespace acm2 { + +namespace { + +std::unique_ptr CreateNetEq( + NetEqFactory* neteq_factory, + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr& decoder_factory) { + if (neteq_factory) { + return neteq_factory->CreateNetEq(config, decoder_factory, clock); + } + return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock); +} + +} // namespace + +AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config) + : last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]), + neteq_(CreateNetEq(config.neteq_factory, + config.neteq_config, + config.clock, + config.decoder_factory)), + clock_(config.clock), + resampled_last_output_frame_(true) { + RTC_DCHECK(clock_); + memset(last_audio_buffer_.get(), 0, + sizeof(int16_t) * AudioFrame::kMaxDataSizeSamples); +} + +AcmReceiver::~AcmReceiver() = default; + +int AcmReceiver::SetMinimumDelay(int delay_ms) { + if (neteq_->SetMinimumDelay(delay_ms)) + return 0; + RTC_LOG(LS_ERROR) << "AcmReceiver::SetExtraDelay " << delay_ms; + return -1; +} + +int AcmReceiver::SetMaximumDelay(int delay_ms) { + if (neteq_->SetMaximumDelay(delay_ms)) + return 0; + RTC_LOG(LS_ERROR) << "AcmReceiver::SetExtraDelay " << delay_ms; + return -1; +} + +bool AcmReceiver::SetBaseMinimumDelayMs(int delay_ms) { + return neteq_->SetBaseMinimumDelayMs(delay_ms); +} + +int AcmReceiver::GetBaseMinimumDelayMs() const { + return neteq_->GetBaseMinimumDelayMs(); +} + +absl::optional AcmReceiver::last_packet_sample_rate_hz() const { + MutexLock lock(&mutex_); + if (!last_decoder_) { + return absl::nullopt; + } + return last_decoder_->sample_rate_hz; +} + +int AcmReceiver::last_output_sample_rate_hz() const { + return neteq_->last_output_sample_rate_hz(); +} + +int AcmReceiver::InsertPacket(const RTPHeader& rtp_header, + rtc::ArrayView incoming_payload) { + if (incoming_payload.empty()) { + neteq_->InsertEmptyPacket(rtp_header); + return 0; + } + + int payload_type = rtp_header.payloadType; + auto format = neteq_->GetDecoderFormat(payload_type); + if (format && absl::EqualsIgnoreCase(format->sdp_format.name, "red")) { + // This is a RED packet. Get the format of the audio codec. + payload_type = incoming_payload[0] & 0x7f; + format = neteq_->GetDecoderFormat(payload_type); + } + if (!format) { + RTC_LOG_F(LS_ERROR) << "Payload-type " << payload_type + << " is not registered."; + return -1; + } + + { + MutexLock lock(&mutex_); + if (absl::EqualsIgnoreCase(format->sdp_format.name, "cn")) { + if (last_decoder_ && last_decoder_->num_channels > 1) { + // This is a CNG and the audio codec is not mono, so skip pushing in + // packets into NetEq. + return 0; + } + } else { + last_decoder_ = DecoderInfo{/*payload_type=*/payload_type, + /*sample_rate_hz=*/format->sample_rate_hz, + /*num_channels=*/format->num_channels, + /*sdp_format=*/std::move(format->sdp_format)}; + } + } // `mutex_` is released. + + if (neteq_->InsertPacket(rtp_header, incoming_payload) < 0) { + RTC_LOG(LS_ERROR) << "AcmReceiver::InsertPacket " + << static_cast(rtp_header.payloadType) + << " Failed to insert packet"; + return -1; + } + return 0; +} + +int AcmReceiver::GetAudio(int desired_freq_hz, + AudioFrame* audio_frame, + bool* muted) { + RTC_DCHECK(muted); + + int current_sample_rate_hz = 0; + if (neteq_->GetAudio(audio_frame, muted, ¤t_sample_rate_hz) != + NetEq::kOK) { + RTC_LOG(LS_ERROR) << "AcmReceiver::GetAudio - NetEq Failed."; + return -1; + } + + RTC_DCHECK_NE(current_sample_rate_hz, 0); + + // Update if resampling is required. + const bool need_resampling = + (desired_freq_hz != -1) && (current_sample_rate_hz != desired_freq_hz); + + // Accessing members, take the lock. + MutexLock lock(&mutex_); + if (need_resampling && !resampled_last_output_frame_) { + // Prime the resampler with the last frame. + int16_t temp_output[AudioFrame::kMaxDataSizeSamples]; + int samples_per_channel_int = resampler_.Resample10Msec( + last_audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz, + audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples, + temp_output); + if (samples_per_channel_int < 0) { + RTC_LOG(LS_ERROR) << "AcmReceiver::GetAudio - " + "Resampling last_audio_buffer_ failed."; + return -1; + } + } + + // TODO(bugs.webrtc.org/3923) Glitches in the output may appear if the output + // rate from NetEq changes. + if (need_resampling) { + // TODO(yujo): handle this more efficiently for muted frames. + int samples_per_channel_int = resampler_.Resample10Msec( + audio_frame->data(), current_sample_rate_hz, desired_freq_hz, + audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples, + audio_frame->mutable_data()); + if (samples_per_channel_int < 0) { + RTC_LOG(LS_ERROR) + << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed."; + return -1; + } + audio_frame->samples_per_channel_ = + static_cast(samples_per_channel_int); + audio_frame->sample_rate_hz_ = desired_freq_hz; + RTC_DCHECK_EQ( + audio_frame->sample_rate_hz_, + rtc::dchecked_cast(audio_frame->samples_per_channel_ * 100)); + resampled_last_output_frame_ = true; + } else { + resampled_last_output_frame_ = false; + // We might end up here ONLY if codec is changed. + } + + // Store current audio in `last_audio_buffer_` for next time. + memcpy(last_audio_buffer_.get(), audio_frame->data(), + sizeof(int16_t) * audio_frame->samples_per_channel_ * + audio_frame->num_channels_); + + call_stats_.DecodedByNetEq(audio_frame->speech_type_, *muted); + return 0; +} + +void AcmReceiver::SetCodecs(const std::map& codecs) { + neteq_->SetCodecs(codecs); +} + +void AcmReceiver::FlushBuffers() { + neteq_->FlushBuffers(); +} + +void AcmReceiver::RemoveAllCodecs() { + MutexLock lock(&mutex_); + neteq_->RemoveAllPayloadTypes(); + last_decoder_ = absl::nullopt; +} + +absl::optional AcmReceiver::GetPlayoutTimestamp() { + return neteq_->GetPlayoutTimestamp(); +} + +int AcmReceiver::FilteredCurrentDelayMs() const { + return neteq_->FilteredCurrentDelayMs(); +} + +int AcmReceiver::TargetDelayMs() const { + return neteq_->TargetDelayMs(); +} + +absl::optional> AcmReceiver::LastDecoder() + const { + MutexLock lock(&mutex_); + if (!last_decoder_) { + return absl::nullopt; + } + RTC_DCHECK_NE(-1, last_decoder_->payload_type); + return std::make_pair(last_decoder_->payload_type, last_decoder_->sdp_format); +} + +void AcmReceiver::GetNetworkStatistics( + NetworkStatistics* acm_stat, + bool get_and_clear_legacy_stats /* = true */) const { + NetEqNetworkStatistics neteq_stat; + if (get_and_clear_legacy_stats) { + // NetEq function always returns zero, so we don't check the return value. + neteq_->NetworkStatistics(&neteq_stat); + + acm_stat->currentExpandRate = neteq_stat.expand_rate; + acm_stat->currentSpeechExpandRate = neteq_stat.speech_expand_rate; + acm_stat->currentPreemptiveRate = neteq_stat.preemptive_rate; + acm_stat->currentAccelerateRate = neteq_stat.accelerate_rate; + acm_stat->currentSecondaryDecodedRate = neteq_stat.secondary_decoded_rate; + acm_stat->currentSecondaryDiscardedRate = + neteq_stat.secondary_discarded_rate; + acm_stat->meanWaitingTimeMs = neteq_stat.mean_waiting_time_ms; + acm_stat->maxWaitingTimeMs = neteq_stat.max_waiting_time_ms; + } else { + neteq_stat = neteq_->CurrentNetworkStatistics(); + acm_stat->currentExpandRate = 0; + acm_stat->currentSpeechExpandRate = 0; + acm_stat->currentPreemptiveRate = 0; + acm_stat->currentAccelerateRate = 0; + acm_stat->currentSecondaryDecodedRate = 0; + acm_stat->currentSecondaryDiscardedRate = 0; + acm_stat->meanWaitingTimeMs = -1; + acm_stat->maxWaitingTimeMs = 1; + } + acm_stat->currentBufferSize = neteq_stat.current_buffer_size_ms; + acm_stat->preferredBufferSize = neteq_stat.preferred_buffer_size_ms; + acm_stat->jitterPeaksFound = neteq_stat.jitter_peaks_found ? true : false; + + NetEqLifetimeStatistics neteq_lifetime_stat = neteq_->GetLifetimeStatistics(); + acm_stat->totalSamplesReceived = neteq_lifetime_stat.total_samples_received; + acm_stat->concealedSamples = neteq_lifetime_stat.concealed_samples; + acm_stat->silentConcealedSamples = + neteq_lifetime_stat.silent_concealed_samples; + acm_stat->concealmentEvents = neteq_lifetime_stat.concealment_events; + acm_stat->jitterBufferDelayMs = neteq_lifetime_stat.jitter_buffer_delay_ms; + acm_stat->jitterBufferTargetDelayMs = + neteq_lifetime_stat.jitter_buffer_target_delay_ms; + acm_stat->jitterBufferMinimumDelayMs = + neteq_lifetime_stat.jitter_buffer_minimum_delay_ms; + acm_stat->jitterBufferEmittedCount = + neteq_lifetime_stat.jitter_buffer_emitted_count; + acm_stat->delayedPacketOutageSamples = + neteq_lifetime_stat.delayed_packet_outage_samples; + acm_stat->relativePacketArrivalDelayMs = + neteq_lifetime_stat.relative_packet_arrival_delay_ms; + acm_stat->interruptionCount = neteq_lifetime_stat.interruption_count; + acm_stat->totalInterruptionDurationMs = + neteq_lifetime_stat.total_interruption_duration_ms; + acm_stat->insertedSamplesForDeceleration = + neteq_lifetime_stat.inserted_samples_for_deceleration; + acm_stat->removedSamplesForAcceleration = + neteq_lifetime_stat.removed_samples_for_acceleration; + acm_stat->fecPacketsReceived = neteq_lifetime_stat.fec_packets_received; + acm_stat->fecPacketsDiscarded = neteq_lifetime_stat.fec_packets_discarded; + acm_stat->packetsDiscarded = neteq_lifetime_stat.packets_discarded; + + NetEqOperationsAndState neteq_operations_and_state = + neteq_->GetOperationsAndState(); + acm_stat->packetBufferFlushes = + neteq_operations_and_state.packet_buffer_flushes; +} + +int AcmReceiver::EnableNack(size_t max_nack_list_size) { + neteq_->EnableNack(max_nack_list_size); + return 0; +} + +void AcmReceiver::DisableNack() { + neteq_->DisableNack(); +} + +std::vector AcmReceiver::GetNackList( + int64_t round_trip_time_ms) const { + return neteq_->GetNackList(round_trip_time_ms); +} + +void AcmReceiver::ResetInitialDelay() { + neteq_->SetMinimumDelay(0); + // TODO(turajs): Should NetEq Buffer be flushed? +} + +uint32_t AcmReceiver::NowInTimestamp(int decoder_sampling_rate) const { + // Down-cast the time to (32-6)-bit since we only care about + // the least significant bits. (32-6) bits cover 2^(32-6) = 67108864 ms. + // We masked 6 most significant bits of 32-bit so there is no overflow in + // the conversion from milliseconds to timestamp. + const uint32_t now_in_ms = + static_cast(clock_->TimeInMilliseconds() & 0x03ffffff); + return static_cast((decoder_sampling_rate / 1000) * now_in_ms); +} + +void AcmReceiver::GetDecodingCallStatistics( + AudioDecodingCallStats* stats) const { + MutexLock lock(&mutex_); + *stats = call_stats_.GetDecodingStatistics(); +} + +} // namespace acm2 + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.h new file mode 100644 index 0000000000..a61247627f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.h @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_RECEIVER_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_RECEIVER_H_ + +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio_codecs/audio_decoder.h" +#include "api/audio_codecs/audio_format.h" +#include "modules/audio_coding/acm2/acm_resampler.h" +#include "modules/audio_coding/acm2/call_statistics.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class Clock; +class NetEq; +struct RTPHeader; + +namespace acm2 { + +class AcmReceiver { + public: + // Constructor of the class + explicit AcmReceiver(const AudioCodingModule::Config& config); + + // Destructor of the class. + ~AcmReceiver(); + + // + // Inserts a payload with its associated RTP-header into NetEq. + // + // Input: + // - rtp_header : RTP header for the incoming payload containing + // information about payload type, sequence number, + // timestamp, SSRC and marker bit. + // - incoming_payload : Incoming audio payload. + // - length_payload : Length of incoming audio payload in bytes. + // + // Return value : 0 if OK. + // <0 if NetEq returned an error. + // + int InsertPacket(const RTPHeader& rtp_header, + rtc::ArrayView incoming_payload); + + // + // Asks NetEq for 10 milliseconds of decoded audio. + // + // Input: + // -desired_freq_hz : specifies the sampling rate [Hz] of the output + // audio. If set -1 indicates to resampling is + // is required and the audio returned at the + // sampling rate of the decoder. + // + // Output: + // -audio_frame : an audio frame were output data and + // associated parameters are written to. + // -muted : if true, the sample data in audio_frame is not + // populated, and must be interpreted as all zero. + // + // Return value : 0 if OK. + // -1 if NetEq returned an error. + // + int GetAudio(int desired_freq_hz, AudioFrame* audio_frame, bool* muted); + + // Replace the current set of decoders with the specified set. + void SetCodecs(const std::map& codecs); + + // + // Sets a minimum delay for packet buffer. The given delay is maintained, + // unless channel condition dictates a higher delay. + // + // Input: + // - delay_ms : minimum delay in milliseconds. + // + // Return value : 0 if OK. + // <0 if NetEq returned an error. + // + int SetMinimumDelay(int delay_ms); + + // + // Sets a maximum delay [ms] for the packet buffer. The target delay does not + // exceed the given value, even if channel condition requires so. + // + // Input: + // - delay_ms : maximum delay in milliseconds. + // + // Return value : 0 if OK. + // <0 if NetEq returned an error. + // + int SetMaximumDelay(int delay_ms); + + // Sets a base minimum delay in milliseconds for the packet buffer. + // Base minimum delay sets lower bound minimum delay value which + // is set via SetMinimumDelay. + // + // Returns true if value was successfully set, false overwise. + bool SetBaseMinimumDelayMs(int delay_ms); + + // Returns current value of base minimum delay in milliseconds. + int GetBaseMinimumDelayMs() const; + + // + // Resets the initial delay to zero. + // + void ResetInitialDelay(); + + // Returns the sample rate of the decoder associated with the last incoming + // packet. If no packet of a registered non-CNG codec has been received, the + // return value is empty. Also, if the decoder was unregistered since the last + // packet was inserted, the return value is empty. + absl::optional last_packet_sample_rate_hz() const; + + // Returns last_output_sample_rate_hz from the NetEq instance. + int last_output_sample_rate_hz() const; + + // + // Get the current network statistics from NetEq. + // + // Output: + // - statistics : The current network statistics. + // + void GetNetworkStatistics(NetworkStatistics* statistics, + bool get_and_clear_legacy_stats = true) const; + + // + // Flushes the NetEq packet and speech buffers. + // + void FlushBuffers(); + + // + // Remove all registered codecs. + // + void RemoveAllCodecs(); + + // Returns the RTP timestamp for the last sample delivered by GetAudio(). + // The return value will be empty if no valid timestamp is available. + absl::optional GetPlayoutTimestamp(); + + // Returns the current total delay from NetEq (packet buffer and sync buffer) + // in ms, with smoothing applied to even out short-time fluctuations due to + // jitter. The packet buffer part of the delay is not updated during DTX/CNG + // periods. + // + int FilteredCurrentDelayMs() const; + + // Returns the current target delay for NetEq in ms. + // + int TargetDelayMs() const; + + // + // Get payload type and format of the last non-CNG/non-DTMF received payload. + // If no non-CNG/non-DTMF packet is received absl::nullopt is returned. + // + absl::optional> LastDecoder() const; + + // + // Enable NACK and set the maximum size of the NACK list. If NACK is already + // enabled then the maximum NACK list size is modified accordingly. + // + // If the sequence number of last received packet is N, the sequence numbers + // of NACK list are in the range of [N - `max_nack_list_size`, N). + // + // `max_nack_list_size` should be positive (none zero) and less than or + // equal to `Nack::kNackListSizeLimit`. Otherwise, No change is applied and -1 + // is returned. 0 is returned at success. + // + int EnableNack(size_t max_nack_list_size); + + // Disable NACK. + void DisableNack(); + + // + // Get a list of packets to be retransmitted. `round_trip_time_ms` is an + // estimate of the round-trip-time (in milliseconds). Missing packets which + // will be playout in a shorter time than the round-trip-time (with respect + // to the time this API is called) will not be included in the list. + // + // Negative `round_trip_time_ms` results is an error message and empty list + // is returned. + // + std::vector GetNackList(int64_t round_trip_time_ms) const; + + // + // Get statistics of calls to GetAudio(). + void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const; + + private: + struct DecoderInfo { + int payload_type; + int sample_rate_hz; + int num_channels; + SdpAudioFormat sdp_format; + }; + + uint32_t NowInTimestamp(int decoder_sampling_rate) const; + + mutable Mutex mutex_; + absl::optional last_decoder_ RTC_GUARDED_BY(mutex_); + ACMResampler resampler_; + + // After construction, this is only ever touched on the thread that calls + // AcmReceiver::GetAudio, and only modified in this method. + std::unique_ptr last_audio_buffer_; + CallStatistics call_stats_; + const std::unique_ptr neteq_; // NetEq is thread-safe; no lock needed. + Clock* const clock_; + std::atomic resampled_last_output_frame_; +}; + +} // namespace acm2 + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_ACM2_ACM_RECEIVER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc new file mode 100644 index 0000000000..6dd44b696e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc @@ -0,0 +1,455 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_receiver.h" + +#include // std::min +#include + +#include "absl/types/optional.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace acm2 { + +class AcmReceiverTestOldApi : public AudioPacketizationCallback, + public ::testing::Test { + protected: + AcmReceiverTestOldApi() + : timestamp_(0), + packet_sent_(false), + last_packet_send_timestamp_(timestamp_), + last_frame_type_(AudioFrameType::kEmptyFrame) { + config_.decoder_factory = decoder_factory_; + } + + ~AcmReceiverTestOldApi() {} + + void SetUp() override { + acm_.reset(AudioCodingModule::Create(config_)); + receiver_.reset(new AcmReceiver(config_)); + ASSERT_TRUE(receiver_.get() != NULL); + ASSERT_TRUE(acm_.get() != NULL); + acm_->InitializeReceiver(); + acm_->RegisterTransportCallback(this); + + rtp_header_.sequenceNumber = 0; + rtp_header_.timestamp = 0; + rtp_header_.markerBit = false; + rtp_header_.ssrc = 0x12345678; // Arbitrary. + rtp_header_.numCSRCs = 0; + rtp_header_.payloadType = 0; + } + + void TearDown() override {} + + AudioCodecInfo SetEncoder(int payload_type, + const SdpAudioFormat& format, + const std::map cng_payload_types = {}) { + // Create the speech encoder. + absl::optional info = + encoder_factory_->QueryAudioEncoder(format); + RTC_CHECK(info.has_value()); + std::unique_ptr enc = + encoder_factory_->MakeAudioEncoder(payload_type, format, absl::nullopt); + + // If we have a compatible CN specification, stack a CNG on top. + auto it = cng_payload_types.find(info->sample_rate_hz); + if (it != cng_payload_types.end()) { + AudioEncoderCngConfig config; + config.speech_encoder = std::move(enc); + config.num_channels = 1; + config.payload_type = it->second; + config.vad_mode = Vad::kVadNormal; + enc = CreateComfortNoiseEncoder(std::move(config)); + } + + // Actually start using the new encoder. + acm_->SetEncoder(std::move(enc)); + return *info; + } + + int InsertOnePacketOfSilence(const AudioCodecInfo& info) { + // Frame setup according to the codec. + AudioFrame frame; + frame.sample_rate_hz_ = info.sample_rate_hz; + frame.samples_per_channel_ = info.sample_rate_hz / 100; // 10 ms. + frame.num_channels_ = info.num_channels; + frame.Mute(); + packet_sent_ = false; + last_packet_send_timestamp_ = timestamp_; + int num_10ms_frames = 0; + while (!packet_sent_) { + frame.timestamp_ = timestamp_; + timestamp_ += rtc::checked_cast(frame.samples_per_channel_); + EXPECT_GE(acm_->Add10MsData(frame), 0); + ++num_10ms_frames; + } + return num_10ms_frames; + } + + int SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) override { + if (frame_type == AudioFrameType::kEmptyFrame) + return 0; + + rtp_header_.payloadType = payload_type; + rtp_header_.timestamp = timestamp; + + int ret_val = receiver_->InsertPacket( + rtp_header_, + rtc::ArrayView(payload_data, payload_len_bytes)); + if (ret_val < 0) { + RTC_DCHECK_NOTREACHED(); + return -1; + } + rtp_header_.sequenceNumber++; + packet_sent_ = true; + last_frame_type_ = frame_type; + return 0; + } + + const rtc::scoped_refptr encoder_factory_ = + CreateBuiltinAudioEncoderFactory(); + const rtc::scoped_refptr decoder_factory_ = + CreateBuiltinAudioDecoderFactory(); + AudioCodingModule::Config config_; + std::unique_ptr receiver_; + std::unique_ptr acm_; + RTPHeader rtp_header_; + uint32_t timestamp_; + bool packet_sent_; // Set when SendData is called reset when inserting audio. + uint32_t last_packet_send_timestamp_; + AudioFrameType last_frame_type_; +}; + +#if defined(WEBRTC_ANDROID) +#define MAYBE_SampleRate DISABLED_SampleRate +#else +#define MAYBE_SampleRate SampleRate +#endif +TEST_F(AcmReceiverTestOldApi, MAYBE_SampleRate) { + const std::map codecs = {{0, {"OPUS", 48000, 2}}}; + receiver_->SetCodecs(codecs); + + constexpr int kOutSampleRateHz = 8000; // Different than codec sample rate. + for (size_t i = 0; i < codecs.size(); ++i) { + const int payload_type = rtc::checked_cast(i); + const int num_10ms_frames = + InsertOnePacketOfSilence(SetEncoder(payload_type, codecs.at(i))); + for (int k = 0; k < num_10ms_frames; ++k) { + AudioFrame frame; + bool muted; + EXPECT_EQ(0, receiver_->GetAudio(kOutSampleRateHz, &frame, &muted)); + } + EXPECT_EQ(encoder_factory_->QueryAudioEncoder(codecs.at(i))->sample_rate_hz, + receiver_->last_output_sample_rate_hz()); + } +} + +class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi { + protected: + AcmReceiverTestFaxModeOldApi() { + config_.neteq_config.for_test_no_time_stretching = true; + } + + void RunVerifyAudioFrame(const SdpAudioFormat& codec) { + // Make sure "fax mode" is enabled. This will avoid delay changes unless the + // packet-loss concealment is made. We do this in order to make the + // timestamp increments predictable; in normal mode, NetEq may decide to do + // accelerate or pre-emptive expand operations after some time, offsetting + // the timestamp. + EXPECT_TRUE(config_.neteq_config.for_test_no_time_stretching); + + constexpr int payload_type = 17; + receiver_->SetCodecs({{payload_type, codec}}); + + const AudioCodecInfo info = SetEncoder(payload_type, codec); + const int output_sample_rate_hz = info.sample_rate_hz; + const size_t output_channels = info.num_channels; + const size_t samples_per_ms = rtc::checked_cast( + rtc::CheckedDivExact(output_sample_rate_hz, 1000)); + const AudioFrame::VADActivity expected_vad_activity = + output_sample_rate_hz > 16000 ? AudioFrame::kVadActive + : AudioFrame::kVadPassive; + + // Expect the first output timestamp to be 5*fs/8000 samples before the + // first inserted timestamp (because of NetEq's look-ahead). (This value is + // defined in Expand::overlap_length_.) + uint32_t expected_output_ts = + last_packet_send_timestamp_ - + rtc::CheckedDivExact(5 * output_sample_rate_hz, 8000); + + AudioFrame frame; + bool muted; + EXPECT_EQ(0, receiver_->GetAudio(output_sample_rate_hz, &frame, &muted)); + // Expect timestamp = 0 before first packet is inserted. + EXPECT_EQ(0u, frame.timestamp_); + for (int i = 0; i < 5; ++i) { + const int num_10ms_frames = InsertOnePacketOfSilence(info); + for (int k = 0; k < num_10ms_frames; ++k) { + EXPECT_EQ(0, + receiver_->GetAudio(output_sample_rate_hz, &frame, &muted)); + EXPECT_EQ(expected_output_ts, frame.timestamp_); + expected_output_ts += rtc::checked_cast(10 * samples_per_ms); + EXPECT_EQ(10 * samples_per_ms, frame.samples_per_channel_); + EXPECT_EQ(output_sample_rate_hz, frame.sample_rate_hz_); + EXPECT_EQ(output_channels, frame.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, frame.speech_type_); + EXPECT_EQ(expected_vad_activity, frame.vad_activity_); + EXPECT_FALSE(muted); + } + } + } +}; + +#if defined(WEBRTC_ANDROID) +#define MAYBE_VerifyAudioFramePCMU DISABLED_VerifyAudioFramePCMU +#else +#define MAYBE_VerifyAudioFramePCMU VerifyAudioFramePCMU +#endif +TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFramePCMU) { + RunVerifyAudioFrame({"PCMU", 8000, 1}); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_VerifyAudioFrameOpus DISABLED_VerifyAudioFrameOpus +#else +#define MAYBE_VerifyAudioFrameOpus VerifyAudioFrameOpus +#endif +TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFrameOpus) { + RunVerifyAudioFrame({"opus", 48000, 2}); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad +#else +#define MAYBE_PostdecodingVad PostdecodingVad +#endif +TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) { + EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad); + constexpr int payload_type = 34; + const SdpAudioFormat codec = {"L16", 16000, 1}; + const AudioCodecInfo info = SetEncoder(payload_type, codec); + receiver_->SetCodecs({{payload_type, codec}}); + constexpr int kNumPackets = 5; + AudioFrame frame; + for (int n = 0; n < kNumPackets; ++n) { + const int num_10ms_frames = InsertOnePacketOfSilence(info); + for (int k = 0; k < num_10ms_frames; ++k) { + bool muted; + ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted)); + } + } + EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_); +} + +class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi { + protected: + AcmReceiverTestPostDecodeVadPassiveOldApi() { + config_.neteq_config.enable_post_decode_vad = false; + } +}; + +#if defined(WEBRTC_ANDROID) +#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad +#else +#define MAYBE_PostdecodingVad PostdecodingVad +#endif +TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) { + EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad); + constexpr int payload_type = 34; + const SdpAudioFormat codec = {"L16", 16000, 1}; + const AudioCodecInfo info = SetEncoder(payload_type, codec); + auto const value = encoder_factory_->QueryAudioEncoder(codec); + ASSERT_TRUE(value.has_value()); + receiver_->SetCodecs({{payload_type, codec}}); + const int kNumPackets = 5; + AudioFrame frame; + for (int n = 0; n < kNumPackets; ++n) { + const int num_10ms_frames = InsertOnePacketOfSilence(info); + for (int k = 0; k < num_10ms_frames; ++k) { + bool muted; + ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted)); + } + } + EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_LastAudioCodec DISABLED_LastAudioCodec +#else +#define MAYBE_LastAudioCodec LastAudioCodec +#endif +#if defined(WEBRTC_CODEC_OPUS) +TEST_F(AcmReceiverTestOldApi, MAYBE_LastAudioCodec) { + const std::map codecs = { + {0, {"PCMU", 8000, 1}}, {1, {"PCMA", 8000, 1}}, {2, {"L16", 32000, 1}}}; + const std::map cng_payload_types = { + {8000, 100}, {16000, 101}, {32000, 102}}; + { + std::map receive_codecs = codecs; + for (const auto& cng_type : cng_payload_types) { + receive_codecs.emplace(std::make_pair( + cng_type.second, SdpAudioFormat("CN", cng_type.first, 1))); + } + receiver_->SetCodecs(receive_codecs); + } + + // No audio payload is received. + EXPECT_EQ(absl::nullopt, receiver_->LastDecoder()); + + // Start with sending DTX. + packet_sent_ = false; + InsertOnePacketOfSilence( + SetEncoder(0, codecs.at(0), cng_payload_types)); // Enough to test + // with one codec. + ASSERT_TRUE(packet_sent_); + EXPECT_EQ(AudioFrameType::kAudioFrameCN, last_frame_type_); + + // Has received, only, DTX. Last Audio codec is undefined. + EXPECT_EQ(absl::nullopt, receiver_->LastDecoder()); + EXPECT_EQ(absl::nullopt, receiver_->last_packet_sample_rate_hz()); + + for (size_t i = 0; i < codecs.size(); ++i) { + // Set DTX off to send audio payload. + packet_sent_ = false; + const int payload_type = rtc::checked_cast(i); + const AudioCodecInfo info_without_cng = + SetEncoder(payload_type, codecs.at(i)); + InsertOnePacketOfSilence(info_without_cng); + + // Sanity check if Actually an audio payload received, and it should be + // of type "speech." + ASSERT_TRUE(packet_sent_); + ASSERT_EQ(AudioFrameType::kAudioFrameSpeech, last_frame_type_); + EXPECT_EQ(info_without_cng.sample_rate_hz, + receiver_->last_packet_sample_rate_hz()); + + // Set VAD on to send DTX. Then check if the "Last Audio codec" returns + // the expected codec. Encode repeatedly until a DTX is sent. + const AudioCodecInfo info_with_cng = + SetEncoder(payload_type, codecs.at(i), cng_payload_types); + while (last_frame_type_ != AudioFrameType::kAudioFrameCN) { + packet_sent_ = false; + InsertOnePacketOfSilence(info_with_cng); + ASSERT_TRUE(packet_sent_); + } + EXPECT_EQ(info_with_cng.sample_rate_hz, + receiver_->last_packet_sample_rate_hz()); + EXPECT_EQ(codecs.at(i), receiver_->LastDecoder()->second); + } +} +#endif + +// Check if the statistics are initialized correctly. Before any call to ACM +// all fields have to be zero. +#if defined(WEBRTC_ANDROID) +#define MAYBE_InitializedToZero DISABLED_InitializedToZero +#else +#define MAYBE_InitializedToZero InitializedToZero +#endif +TEST_F(AcmReceiverTestOldApi, MAYBE_InitializedToZero) { + AudioDecodingCallStats stats; + receiver_->GetDecodingCallStatistics(&stats); + EXPECT_EQ(0, stats.calls_to_neteq); + EXPECT_EQ(0, stats.calls_to_silence_generator); + EXPECT_EQ(0, stats.decoded_normal); + EXPECT_EQ(0, stats.decoded_cng); + EXPECT_EQ(0, stats.decoded_neteq_plc); + EXPECT_EQ(0, stats.decoded_plc_cng); + EXPECT_EQ(0, stats.decoded_muted_output); +} + +// Insert some packets and pull audio. Check statistics are valid. Then, +// simulate packet loss and check if PLC and PLC-to-CNG statistics are +// correctly updated. +#if defined(WEBRTC_ANDROID) +#define MAYBE_NetEqCalls DISABLED_NetEqCalls +#else +#define MAYBE_NetEqCalls NetEqCalls +#endif +TEST_F(AcmReceiverTestOldApi, MAYBE_NetEqCalls) { + AudioDecodingCallStats stats; + const int kNumNormalCalls = 10; + const int kSampleRateHz = 16000; + const int kNumSamples10ms = kSampleRateHz / 100; + const int kFrameSizeMs = 10; // Multiple of 10. + const int kFrameSizeSamples = kFrameSizeMs / 10 * kNumSamples10ms; + const int kPayloadSizeBytes = kFrameSizeSamples * sizeof(int16_t); + const uint8_t kPayloadType = 111; + RTPHeader rtp_header; + AudioFrame audio_frame; + bool muted; + + receiver_->SetCodecs( + {{kPayloadType, SdpAudioFormat("L16", kSampleRateHz, 1)}}); + rtp_header.sequenceNumber = 0xABCD; + rtp_header.timestamp = 0xABCDEF01; + rtp_header.payloadType = kPayloadType; + rtp_header.markerBit = false; + rtp_header.ssrc = 0x1234; + rtp_header.numCSRCs = 0; + rtp_header.payload_type_frequency = kSampleRateHz; + + for (int num_calls = 0; num_calls < kNumNormalCalls; ++num_calls) { + const uint8_t kPayload[kPayloadSizeBytes] = {0}; + ASSERT_EQ(0, receiver_->InsertPacket(rtp_header, kPayload)); + ++rtp_header.sequenceNumber; + rtp_header.timestamp += kFrameSizeSamples; + ASSERT_EQ(0, receiver_->GetAudio(-1, &audio_frame, &muted)); + EXPECT_FALSE(muted); + } + receiver_->GetDecodingCallStatistics(&stats); + EXPECT_EQ(kNumNormalCalls, stats.calls_to_neteq); + EXPECT_EQ(0, stats.calls_to_silence_generator); + EXPECT_EQ(kNumNormalCalls, stats.decoded_normal); + EXPECT_EQ(0, stats.decoded_cng); + EXPECT_EQ(0, stats.decoded_neteq_plc); + EXPECT_EQ(0, stats.decoded_plc_cng); + EXPECT_EQ(0, stats.decoded_muted_output); + + const int kNumPlc = 3; + const int kNumPlcCng = 5; + + // Simulate packet-loss. NetEq first performs PLC then PLC fades to CNG. + for (int n = 0; n < kNumPlc + kNumPlcCng; ++n) { + ASSERT_EQ(0, receiver_->GetAudio(-1, &audio_frame, &muted)); + EXPECT_FALSE(muted); + } + receiver_->GetDecodingCallStatistics(&stats); + EXPECT_EQ(kNumNormalCalls + kNumPlc + kNumPlcCng, stats.calls_to_neteq); + EXPECT_EQ(0, stats.calls_to_silence_generator); + EXPECT_EQ(kNumNormalCalls, stats.decoded_normal); + EXPECT_EQ(0, stats.decoded_cng); + EXPECT_EQ(kNumPlc, stats.decoded_neteq_plc); + EXPECT_EQ(kNumPlcCng, stats.decoded_plc_cng); + EXPECT_EQ(0, stats.decoded_muted_output); + // TODO(henrik.lundin) Add a test with muted state enabled. +} + +} // namespace acm2 + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.cc new file mode 100644 index 0000000000..13709dbbee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_remixing.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +void DownMixFrame(const AudioFrame& input, rtc::ArrayView output) { + RTC_DCHECK_EQ(input.num_channels_, 2); + RTC_DCHECK_EQ(output.size(), input.samples_per_channel_); + + if (input.muted()) { + std::fill(output.begin(), output.begin() + input.samples_per_channel_, 0); + } else { + const int16_t* const input_data = input.data(); + for (size_t n = 0; n < input.samples_per_channel_; ++n) { + output[n] = rtc::dchecked_cast( + (int32_t{input_data[2 * n]} + int32_t{input_data[2 * n + 1]}) >> 1); + } + } +} + +void ReMixFrame(const AudioFrame& input, + size_t num_output_channels, + std::vector* output) { + const size_t output_size = num_output_channels * input.samples_per_channel_; + RTC_DCHECK(!(input.num_channels_ == 0 && num_output_channels > 0 && + input.samples_per_channel_ > 0)); + + if (output->size() != output_size) { + output->resize(output_size); + } + + // For muted frames, fill the frame with zeros. + if (input.muted()) { + std::fill(output->begin(), output->end(), 0); + return; + } + + // Ensure that the special case of zero input channels is handled correctly + // (zero samples per channel is already handled correctly in the code below). + if (input.num_channels_ == 0) { + return; + } + + const int16_t* const input_data = input.data(); + size_t out_index = 0; + + // When upmixing is needed and the input is mono copy the left channel + // into the left and right channels, and set any remaining channels to zero. + if (input.num_channels_ == 1 && input.num_channels_ < num_output_channels) { + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + (*output)[out_index++] = input_data[k]; + (*output)[out_index++] = input_data[k]; + for (size_t j = 2; j < num_output_channels; ++j) { + (*output)[out_index++] = 0; + } + RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels); + } + RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels); + return; + } + + size_t in_index = 0; + + // When upmixing is needed and the output is surround, copy the available + // channels directly, and set the remaining channels to zero. + if (input.num_channels_ < num_output_channels) { + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + for (size_t j = 0; j < input.num_channels_; ++j) { + (*output)[out_index++] = input_data[in_index++]; + } + for (size_t j = input.num_channels_; j < num_output_channels; ++j) { + (*output)[out_index++] = 0; + } + RTC_DCHECK_EQ(in_index, (k + 1) * input.num_channels_); + RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels); + } + RTC_DCHECK_EQ(in_index, input.samples_per_channel_ * input.num_channels_); + RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels); + + return; + } + + // When downmixing is needed, and the input is stereo, average the channels. + if (input.num_channels_ == 2) { + for (size_t n = 0; n < input.samples_per_channel_; ++n) { + (*output)[n] = rtc::dchecked_cast( + (int32_t{input_data[2 * n]} + int32_t{input_data[2 * n + 1]}) >> 1); + } + return; + } + + // When downmixing is needed, and the input is multichannel, drop the surplus + // channels. + const size_t num_channels_to_drop = input.num_channels_ - num_output_channels; + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + for (size_t j = 0; j < num_output_channels; ++j) { + (*output)[out_index++] = input_data[in_index++]; + } + in_index += num_channels_to_drop; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.h new file mode 100644 index 0000000000..661569b033 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_ + +#include + +#include "api/audio/audio_frame.h" + +namespace webrtc { + +// Stereo-to-mono downmixing. The length of the output must equal to the number +// of samples per channel in the input. +void DownMixFrame(const AudioFrame& input, rtc::ArrayView output); + +// Remixes the interleaved input frame to an interleaved output data vector. The +// remixed data replaces the data in the output vector which is resized if +// needed. The remixing supports any combination of input and output channels, +// as well as any number of samples per channel. +void ReMixFrame(const AudioFrame& input, + size_t num_output_channels, + std::vector* output); + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing_unittest.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing_unittest.cc new file mode 100644 index 0000000000..a1a816f727 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing_unittest.cc @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_remixing.h" + +#include + +#include "api/audio/audio_frame.h" +#include "system_wrappers/include/clock.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::testing::AllOf; +using ::testing::Each; +using ::testing::ElementsAreArray; +using ::testing::SizeIs; + +namespace webrtc { + +TEST(AcmRemixing, DownMixFrame) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 2; + in_data[2 * k + 1] = 0; + } + + DownMixFrame(in, out); + + EXPECT_THAT(out, AllOf(SizeIs(480), Each(1))); +} + +TEST(AcmRemixing, DownMixMutedFrame) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 2; + in_data[2 * k + 1] = 0; + } + + in.Mute(); + + DownMixFrame(in, out); + + EXPECT_THAT(out, AllOf(SizeIs(480), Each(0))); +} + +TEST(AcmRemixing, RemixMutedStereoFrameTo6Channels) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 1; + in_data[2 * k + 1] = 2; + } + in.Mute(); + + ReMixFrame(in, 6, &out); + EXPECT_EQ(6 * 480u, out.size()); + + EXPECT_THAT(out, AllOf(SizeIs(in.samples_per_channel_ * 6), Each(0))); +} + +TEST(AcmRemixing, RemixStereoFrameTo6Channels) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 1; + in_data[2 * k + 1] = 2; + } + + ReMixFrame(in, 6, &out); + EXPECT_EQ(6 * 480u, out.size()); + + std::vector expected_output(in.samples_per_channel_ * 6); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + expected_output[6 * k] = 1; + expected_output[6 * k + 1] = 2; + } + + EXPECT_THAT(out, ElementsAreArray(expected_output)); +} + +TEST(AcmRemixing, RemixMonoFrameTo6Channels) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 1; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[k] = 1; + } + + ReMixFrame(in, 6, &out); + EXPECT_EQ(6 * 480u, out.size()); + + std::vector expected_output(in.samples_per_channel_ * 6, 0); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + expected_output[6 * k] = 1; + expected_output[6 * k + 1] = 1; + } + + EXPECT_THAT(out, ElementsAreArray(expected_output)); +} + +TEST(AcmRemixing, RemixStereoFrameToMono) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 2; + in_data[2 * k + 1] = 0; + } + + ReMixFrame(in, 1, &out); + EXPECT_EQ(480u, out.size()); + + EXPECT_THAT(out, AllOf(SizeIs(in.samples_per_channel_), Each(1))); +} + +TEST(AcmRemixing, RemixMonoFrameToStereo) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 1; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[k] = 1; + } + + ReMixFrame(in, 2, &out); + EXPECT_EQ(960u, out.size()); + + EXPECT_THAT(out, AllOf(SizeIs(2 * in.samples_per_channel_), Each(1))); +} + +TEST(AcmRemixing, Remix3ChannelFrameToStereo) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 3; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + for (size_t j = 0; j < 3; ++j) { + in_data[3 * k + j] = j; + } + } + + ReMixFrame(in, 2, &out); + EXPECT_EQ(2 * 480u, out.size()); + + std::vector expected_output(in.samples_per_channel_ * 2); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + for (size_t j = 0; j < 2; ++j) { + expected_output[2 * k + j] = static_cast(j); + } + } + + EXPECT_THAT(out, ElementsAreArray(expected_output)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.cc new file mode 100644 index 0000000000..e307c6ca57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_resampler.h" + +#include + +#include "rtc_base/logging.h" + +namespace webrtc { +namespace acm2 { + +ACMResampler::ACMResampler() {} + +ACMResampler::~ACMResampler() {} + +int ACMResampler::Resample10Msec(const int16_t* in_audio, + int in_freq_hz, + int out_freq_hz, + size_t num_audio_channels, + size_t out_capacity_samples, + int16_t* out_audio) { + size_t in_length = in_freq_hz * num_audio_channels / 100; + if (in_freq_hz == out_freq_hz) { + if (out_capacity_samples < in_length) { + RTC_DCHECK_NOTREACHED(); + return -1; + } + memcpy(out_audio, in_audio, in_length * sizeof(int16_t)); + return static_cast(in_length / num_audio_channels); + } + + if (resampler_.InitializeIfNeeded(in_freq_hz, out_freq_hz, + num_audio_channels) != 0) { + RTC_LOG(LS_ERROR) << "InitializeIfNeeded(" << in_freq_hz << ", " + << out_freq_hz << ", " << num_audio_channels + << ") failed."; + return -1; + } + + int out_length = + resampler_.Resample(in_audio, in_length, out_audio, out_capacity_samples); + if (out_length == -1) { + RTC_LOG(LS_ERROR) << "Resample(" << in_audio << ", " << in_length << ", " + << out_audio << ", " << out_capacity_samples + << ") failed."; + return -1; + } + + return static_cast(out_length / num_audio_channels); +} + +} // namespace acm2 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.h new file mode 100644 index 0000000000..96ba93a762 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_RESAMPLER_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_RESAMPLER_H_ + +#include +#include + +#include "common_audio/resampler/include/push_resampler.h" + +namespace webrtc { +namespace acm2 { + +class ACMResampler { + public: + ACMResampler(); + ~ACMResampler(); + + int Resample10Msec(const int16_t* in_audio, + int in_freq_hz, + int out_freq_hz, + size_t num_audio_channels, + size_t out_capacity_samples, + int16_t* out_audio); + + private: + PushResampler resampler_; +}; + +} // namespace acm2 +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_ACM2_ACM_RESAMPLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.cc new file mode 100644 index 0000000000..3e65f94b0d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.cc @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_send_test.h" + +#include +#include + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "rtc_base/checks.h" +#include "rtc_base/string_encode.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +AcmSendTestOldApi::AcmSendTestOldApi(InputAudioFile* audio_source, + int source_rate_hz, + int test_duration_ms) + : clock_(0), + acm_(webrtc::AudioCodingModule::Create([this] { + AudioCodingModule::Config config; + config.clock = &clock_; + config.decoder_factory = CreateBuiltinAudioDecoderFactory(); + return config; + }())), + audio_source_(audio_source), + source_rate_hz_(source_rate_hz), + input_block_size_samples_( + static_cast(source_rate_hz_ * kBlockSizeMs / 1000)), + codec_registered_(false), + test_duration_ms_(test_duration_ms), + frame_type_(AudioFrameType::kAudioFrameSpeech), + payload_type_(0), + timestamp_(0), + sequence_number_(0) { + input_frame_.sample_rate_hz_ = source_rate_hz_; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = input_block_size_samples_; + RTC_DCHECK_LE(input_block_size_samples_ * input_frame_.num_channels_, + AudioFrame::kMaxDataSizeSamples); + acm_->RegisterTransportCallback(this); +} + +AcmSendTestOldApi::~AcmSendTestOldApi() = default; + +bool AcmSendTestOldApi::RegisterCodec(absl::string_view payload_name, + int clockrate_hz, + int num_channels, + int payload_type, + int frame_size_samples) { + SdpAudioFormat format(payload_name, clockrate_hz, num_channels); + if (absl::EqualsIgnoreCase(payload_name, "g722")) { + RTC_CHECK_EQ(16000, clockrate_hz); + format.clockrate_hz = 8000; + } else if (absl::EqualsIgnoreCase(payload_name, "opus")) { + RTC_CHECK(num_channels == 1 || num_channels == 2); + if (num_channels == 2) { + format.parameters["stereo"] = "1"; + } + format.num_channels = 2; + } + format.parameters["ptime"] = rtc::ToString(rtc::CheckedDivExact( + frame_size_samples, rtc::CheckedDivExact(clockrate_hz, 1000))); + auto factory = CreateBuiltinAudioEncoderFactory(); + acm_->SetEncoder( + factory->MakeAudioEncoder(payload_type, format, absl::nullopt)); + codec_registered_ = true; + input_frame_.num_channels_ = num_channels; + RTC_DCHECK_LE(input_block_size_samples_ * input_frame_.num_channels_, + AudioFrame::kMaxDataSizeSamples); + return codec_registered_; +} + +void AcmSendTestOldApi::RegisterExternalCodec( + std::unique_ptr external_speech_encoder) { + input_frame_.num_channels_ = external_speech_encoder->NumChannels(); + acm_->SetEncoder(std::move(external_speech_encoder)); + RTC_DCHECK_LE(input_block_size_samples_ * input_frame_.num_channels_, + AudioFrame::kMaxDataSizeSamples); + codec_registered_ = true; +} + +std::unique_ptr AcmSendTestOldApi::NextPacket() { + RTC_DCHECK(codec_registered_); + if (filter_.test(static_cast(payload_type_))) { + // This payload type should be filtered out. Since the payload type is the + // same throughout the whole test run, no packet at all will be delivered. + // We can just as well signal that the test is over by returning NULL. + return nullptr; + } + // Insert audio and process until one packet is produced. + while (clock_.TimeInMilliseconds() < test_duration_ms_) { + clock_.AdvanceTimeMilliseconds(kBlockSizeMs); + RTC_CHECK(audio_source_->Read( + input_block_size_samples_ * input_frame_.num_channels_, + input_frame_.mutable_data())); + data_to_send_ = false; + RTC_CHECK_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += static_cast(input_block_size_samples_); + if (data_to_send_) { + // Encoded packet received. + return CreatePacket(); + } + } + // Test ended. + return nullptr; +} + +// This method receives the callback from ACM when a new packet is produced. +int32_t AcmSendTestOldApi::SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) { + // Store the packet locally. + frame_type_ = frame_type; + payload_type_ = payload_type; + timestamp_ = timestamp; + last_payload_vec_.assign(payload_data, payload_data + payload_len_bytes); + RTC_DCHECK_EQ(last_payload_vec_.size(), payload_len_bytes); + data_to_send_ = true; + return 0; +} + +std::unique_ptr AcmSendTestOldApi::CreatePacket() { + const size_t kRtpHeaderSize = 12; + rtc::CopyOnWriteBuffer packet_buffer(last_payload_vec_.size() + + kRtpHeaderSize); + uint8_t* packet_memory = packet_buffer.MutableData(); + // Populate the header bytes. + packet_memory[0] = 0x80; + packet_memory[1] = static_cast(payload_type_); + packet_memory[2] = (sequence_number_ >> 8) & 0xFF; + packet_memory[3] = (sequence_number_)&0xFF; + packet_memory[4] = (timestamp_ >> 24) & 0xFF; + packet_memory[5] = (timestamp_ >> 16) & 0xFF; + packet_memory[6] = (timestamp_ >> 8) & 0xFF; + packet_memory[7] = timestamp_ & 0xFF; + // Set SSRC to 0x12345678. + packet_memory[8] = 0x12; + packet_memory[9] = 0x34; + packet_memory[10] = 0x56; + packet_memory[11] = 0x78; + + ++sequence_number_; + + // Copy the payload data. + memcpy(packet_memory + kRtpHeaderSize, &last_payload_vec_[0], + last_payload_vec_.size()); + auto packet = std::make_unique(std::move(packet_buffer), + clock_.TimeInMilliseconds()); + RTC_DCHECK(packet); + RTC_DCHECK(packet->valid_header()); + return packet; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.h new file mode 100644 index 0000000000..0bd24705fd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_SEND_TEST_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_SEND_TEST_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/audio/audio_frame.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +class AudioEncoder; + +namespace test { +class InputAudioFile; +class Packet; + +class AcmSendTestOldApi : public AudioPacketizationCallback, + public PacketSource { + public: + AcmSendTestOldApi(InputAudioFile* audio_source, + int source_rate_hz, + int test_duration_ms); + ~AcmSendTestOldApi() override; + + AcmSendTestOldApi(const AcmSendTestOldApi&) = delete; + AcmSendTestOldApi& operator=(const AcmSendTestOldApi&) = delete; + + // Registers the send codec. Returns true on success, false otherwise. + bool RegisterCodec(absl::string_view payload_name, + int sampling_freq_hz, + int channels, + int payload_type, + int frame_size_samples); + + // Registers an external send codec. + void RegisterExternalCodec( + std::unique_ptr external_speech_encoder); + + // Inherited from PacketSource. + std::unique_ptr NextPacket() override; + + // Inherited from AudioPacketizationCallback. + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) override; + + AudioCodingModule* acm() { return acm_.get(); } + + private: + static const int kBlockSizeMs = 10; + + // Creates a Packet object from the last packet produced by ACM (and received + // through the SendData method as a callback). + std::unique_ptr CreatePacket(); + + SimulatedClock clock_; + std::unique_ptr acm_; + InputAudioFile* audio_source_; + int source_rate_hz_; + const size_t input_block_size_samples_; + AudioFrame input_frame_; + bool codec_registered_; + int test_duration_ms_; + // The following member variables are set whenever SendData() is called. + AudioFrameType frame_type_; + int payload_type_; + uint32_t timestamp_; + uint16_t sequence_number_; + std::vector last_payload_vec_; + bool data_to_send_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_ACM2_ACM_SEND_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module.cc b/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module.cc new file mode 100644 index 0000000000..2c186273b6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module.cc @@ -0,0 +1,637 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/include/audio_coding_module.h" + +#include +#include + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_coding/acm2/acm_receiver.h" +#include "modules/audio_coding/acm2/acm_remixing.h" +#include "modules/audio_coding/acm2/acm_resampler.h" +#include "modules/include/module_common_types.h" +#include "modules/include/module_common_types_public.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// Initial size for the buffer in InputBuffer. This matches 6 channels of 10 ms +// 48 kHz data. +constexpr size_t kInitialInputDataBufferSize = 6 * 480; + +constexpr int32_t kMaxInputSampleRateHz = 192000; + +class AudioCodingModuleImpl final : public AudioCodingModule { + public: + explicit AudioCodingModuleImpl(const AudioCodingModule::Config& config); + ~AudioCodingModuleImpl() override; + + ///////////////////////////////////////// + // Sender + // + + void ModifyEncoder(rtc::FunctionView*)> + modifier) override; + + // Register a transport callback which will be + // called to deliver the encoded buffers. + int RegisterTransportCallback(AudioPacketizationCallback* transport) override; + + // Add 10 ms of raw (PCM) audio data to the encoder. + int Add10MsData(const AudioFrame& audio_frame) override; + + ///////////////////////////////////////// + // (FEC) Forward Error Correction (codec internal) + // + + // Set target packet loss rate + int SetPacketLossRate(int loss_rate) override; + + ///////////////////////////////////////// + // Receiver + // + + // Initialize receiver, resets codec database etc. + int InitializeReceiver() override; + + void SetReceiveCodecs(const std::map& codecs) override; + + // Incoming packet from network parsed and ready for decode. + int IncomingPacket(const uint8_t* incoming_payload, + const size_t payload_length, + const RTPHeader& rtp_info) override; + + // Get 10 milliseconds of raw audio data to play out, and + // automatic resample to the requested frequency if > 0. + int PlayoutData10Ms(int desired_freq_hz, + AudioFrame* audio_frame, + bool* muted) override; + + ///////////////////////////////////////// + // Statistics + // + + int GetNetworkStatistics(NetworkStatistics* statistics) override; + + ANAStats GetANAStats() const override; + + int GetTargetBitrate() const override; + + private: + struct InputData { + InputData() : buffer(kInitialInputDataBufferSize) {} + uint32_t input_timestamp; + const int16_t* audio; + size_t length_per_channel; + size_t audio_channel; + // If a re-mix is required (up or down), this buffer will store a re-mixed + // version of the input. + std::vector buffer; + }; + + InputData input_data_ RTC_GUARDED_BY(acm_mutex_); + + // This member class writes values to the named UMA histogram, but only if + // the value has changed since the last time (and always for the first call). + class ChangeLogger { + public: + explicit ChangeLogger(absl::string_view histogram_name) + : histogram_name_(histogram_name) {} + // Logs the new value if it is different from the last logged value, or if + // this is the first call. + void MaybeLog(int value); + + private: + int last_value_ = 0; + int first_time_ = true; + const std::string histogram_name_; + }; + + int Add10MsDataInternal(const AudioFrame& audio_frame, InputData* input_data) + RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + // TODO(bugs.webrtc.org/10739): change `absolute_capture_timestamp_ms` to + // int64_t when it always receives a valid value. + int Encode(const InputData& input_data, + absl::optional absolute_capture_timestamp_ms) + RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + int InitializeReceiverSafe() RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + bool HaveValidEncoder(absl::string_view caller_name) const + RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + // Preprocessing of input audio, including resampling and down-mixing if + // required, before pushing audio into encoder's buffer. + // + // in_frame: input audio-frame + // ptr_out: pointer to output audio_frame. If no preprocessing is required + // `ptr_out` will be pointing to `in_frame`, otherwise pointing to + // `preprocess_frame_`. + // + // Return value: + // -1: if encountering an error. + // 0: otherwise. + int PreprocessToAddData(const AudioFrame& in_frame, + const AudioFrame** ptr_out) + RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + // Change required states after starting to receive the codec corresponding + // to `index`. + int UpdateUponReceivingCodec(int index); + + mutable Mutex acm_mutex_; + rtc::Buffer encode_buffer_ RTC_GUARDED_BY(acm_mutex_); + uint32_t expected_codec_ts_ RTC_GUARDED_BY(acm_mutex_); + uint32_t expected_in_ts_ RTC_GUARDED_BY(acm_mutex_); + acm2::ACMResampler resampler_ RTC_GUARDED_BY(acm_mutex_); + acm2::AcmReceiver receiver_; // AcmReceiver has it's own internal lock. + ChangeLogger bitrate_logger_ RTC_GUARDED_BY(acm_mutex_); + + // Current encoder stack, provided by a call to RegisterEncoder. + std::unique_ptr encoder_stack_ RTC_GUARDED_BY(acm_mutex_); + + // This is to keep track of CN instances where we can send DTMFs. + uint8_t previous_pltype_ RTC_GUARDED_BY(acm_mutex_); + + bool receiver_initialized_ RTC_GUARDED_BY(acm_mutex_); + + AudioFrame preprocess_frame_ RTC_GUARDED_BY(acm_mutex_); + bool first_10ms_data_ RTC_GUARDED_BY(acm_mutex_); + + bool first_frame_ RTC_GUARDED_BY(acm_mutex_); + uint32_t last_timestamp_ RTC_GUARDED_BY(acm_mutex_); + uint32_t last_rtp_timestamp_ RTC_GUARDED_BY(acm_mutex_); + + Mutex callback_mutex_; + AudioPacketizationCallback* packetization_callback_ + RTC_GUARDED_BY(callback_mutex_); + + int codec_histogram_bins_log_[static_cast( + AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes)]; + int number_of_consecutive_empty_packets_; +}; + +// Adds a codec usage sample to the histogram. +void UpdateCodecTypeHistogram(size_t codec_type) { + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.Encoder.CodecType", static_cast(codec_type), + static_cast( + webrtc::AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes)); +} + +void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) { + if (value != last_value_ || first_time_) { + first_time_ = false; + last_value_ = value; + RTC_HISTOGRAM_COUNTS_SPARSE_100(histogram_name_, value); + } +} + +AudioCodingModuleImpl::AudioCodingModuleImpl( + const AudioCodingModule::Config& config) + : expected_codec_ts_(0xD87F3F9F), + expected_in_ts_(0xD87F3F9F), + receiver_(config), + bitrate_logger_("WebRTC.Audio.TargetBitrateInKbps"), + encoder_stack_(nullptr), + previous_pltype_(255), + receiver_initialized_(false), + first_10ms_data_(false), + first_frame_(true), + packetization_callback_(NULL), + codec_histogram_bins_log_(), + number_of_consecutive_empty_packets_(0) { + if (InitializeReceiverSafe() < 0) { + RTC_LOG(LS_ERROR) << "Cannot initialize receiver"; + } + RTC_LOG(LS_INFO) << "Created"; +} + +AudioCodingModuleImpl::~AudioCodingModuleImpl() = default; + +int32_t AudioCodingModuleImpl::Encode( + const InputData& input_data, + absl::optional absolute_capture_timestamp_ms) { + // TODO(bugs.webrtc.org/10739): add dcheck that + // `audio_frame.absolute_capture_timestamp_ms()` always has a value. + AudioEncoder::EncodedInfo encoded_info; + uint8_t previous_pltype; + + // Check if there is an encoder before. + if (!HaveValidEncoder("Process")) + return -1; + + if (!first_frame_) { + RTC_DCHECK(IsNewerTimestamp(input_data.input_timestamp, last_timestamp_)) + << "Time should not move backwards"; + } + + // Scale the timestamp to the codec's RTP timestamp rate. + uint32_t rtp_timestamp = + first_frame_ + ? input_data.input_timestamp + : last_rtp_timestamp_ + + rtc::dchecked_cast(rtc::CheckedDivExact( + int64_t{input_data.input_timestamp - last_timestamp_} * + encoder_stack_->RtpTimestampRateHz(), + int64_t{encoder_stack_->SampleRateHz()})); + + last_timestamp_ = input_data.input_timestamp; + last_rtp_timestamp_ = rtp_timestamp; + first_frame_ = false; + + // Clear the buffer before reuse - encoded data will get appended. + encode_buffer_.Clear(); + encoded_info = encoder_stack_->Encode( + rtp_timestamp, + rtc::ArrayView( + input_data.audio, + input_data.audio_channel * input_data.length_per_channel), + &encode_buffer_); + + bitrate_logger_.MaybeLog(encoder_stack_->GetTargetBitrate() / 1000); + if (encode_buffer_.size() == 0 && !encoded_info.send_even_if_empty) { + // Not enough data. + return 0; + } + previous_pltype = previous_pltype_; // Read it while we have the critsect. + + // Log codec type to histogram once every 500 packets. + if (encoded_info.encoded_bytes == 0) { + ++number_of_consecutive_empty_packets_; + } else { + size_t codec_type = static_cast(encoded_info.encoder_type); + codec_histogram_bins_log_[codec_type] += + number_of_consecutive_empty_packets_ + 1; + number_of_consecutive_empty_packets_ = 0; + if (codec_histogram_bins_log_[codec_type] >= 500) { + codec_histogram_bins_log_[codec_type] -= 500; + UpdateCodecTypeHistogram(codec_type); + } + } + + AudioFrameType frame_type; + if (encode_buffer_.size() == 0 && encoded_info.send_even_if_empty) { + frame_type = AudioFrameType::kEmptyFrame; + encoded_info.payload_type = previous_pltype; + } else { + RTC_DCHECK_GT(encode_buffer_.size(), 0); + frame_type = encoded_info.speech ? AudioFrameType::kAudioFrameSpeech + : AudioFrameType::kAudioFrameCN; + } + + { + MutexLock lock(&callback_mutex_); + if (packetization_callback_) { + packetization_callback_->SendData( + frame_type, encoded_info.payload_type, encoded_info.encoded_timestamp, + encode_buffer_.data(), encode_buffer_.size(), + absolute_capture_timestamp_ms.value_or(-1)); + } + } + previous_pltype_ = encoded_info.payload_type; + return static_cast(encode_buffer_.size()); +} + +///////////////////////////////////////// +// Sender +// + +void AudioCodingModuleImpl::ModifyEncoder( + rtc::FunctionView*)> modifier) { + MutexLock lock(&acm_mutex_); + modifier(&encoder_stack_); +} + +// Register a transport callback which will be called to deliver +// the encoded buffers. +int AudioCodingModuleImpl::RegisterTransportCallback( + AudioPacketizationCallback* transport) { + MutexLock lock(&callback_mutex_); + packetization_callback_ = transport; + return 0; +} + +// Add 10MS of raw (PCM) audio data to the encoder. +int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) { + MutexLock lock(&acm_mutex_); + int r = Add10MsDataInternal(audio_frame, &input_data_); + // TODO(bugs.webrtc.org/10739): add dcheck that + // `audio_frame.absolute_capture_timestamp_ms()` always has a value. + return r < 0 + ? r + : Encode(input_data_, audio_frame.absolute_capture_timestamp_ms()); +} + +int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame, + InputData* input_data) { + if (audio_frame.samples_per_channel_ == 0) { + RTC_DCHECK_NOTREACHED(); + RTC_LOG(LS_ERROR) << "Cannot Add 10 ms audio, payload length is zero"; + return -1; + } + + if (audio_frame.sample_rate_hz_ > kMaxInputSampleRateHz) { + RTC_DCHECK_NOTREACHED(); + RTC_LOG(LS_ERROR) << "Cannot Add 10 ms audio, input frequency not valid"; + return -1; + } + + // If the length and frequency matches. We currently just support raw PCM. + if (static_cast(audio_frame.sample_rate_hz_ / 100) != + audio_frame.samples_per_channel_) { + RTC_LOG(LS_ERROR) + << "Cannot Add 10 ms audio, input frequency and length doesn't match"; + return -1; + } + + if (audio_frame.num_channels_ != 1 && audio_frame.num_channels_ != 2 && + audio_frame.num_channels_ != 4 && audio_frame.num_channels_ != 6 && + audio_frame.num_channels_ != 8) { + RTC_LOG(LS_ERROR) << "Cannot Add 10 ms audio, invalid number of channels."; + return -1; + } + + // Do we have a codec registered? + if (!HaveValidEncoder("Add10MsData")) { + return -1; + } + + const AudioFrame* ptr_frame; + // Perform a resampling, also down-mix if it is required and can be + // performed before resampling (a down mix prior to resampling will take + // place if both primary and secondary encoders are mono and input is in + // stereo). + if (PreprocessToAddData(audio_frame, &ptr_frame) < 0) { + return -1; + } + + // Check whether we need an up-mix or down-mix? + const size_t current_num_channels = encoder_stack_->NumChannels(); + const bool same_num_channels = + ptr_frame->num_channels_ == current_num_channels; + + // TODO(yujo): Skip encode of muted frames. + input_data->input_timestamp = ptr_frame->timestamp_; + input_data->length_per_channel = ptr_frame->samples_per_channel_; + input_data->audio_channel = current_num_channels; + + if (!same_num_channels) { + // Remixes the input frame to the output data and in the process resize the + // output data if needed. + ReMixFrame(*ptr_frame, current_num_channels, &input_data->buffer); + + // For pushing data to primary, point the `ptr_audio` to correct buffer. + input_data->audio = input_data->buffer.data(); + RTC_DCHECK_GE(input_data->buffer.size(), + input_data->length_per_channel * input_data->audio_channel); + } else { + // When adding data to encoders this pointer is pointing to an audio buffer + // with correct number of channels. + input_data->audio = ptr_frame->data(); + } + + return 0; +} + +// Perform a resampling and down-mix if required. We down-mix only if +// encoder is mono and input is stereo. In case of dual-streaming, both +// encoders has to be mono for down-mix to take place. +// |*ptr_out| will point to the pre-processed audio-frame. If no pre-processing +// is required, |*ptr_out| points to `in_frame`. +// TODO(yujo): Make this more efficient for muted frames. +int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame, + const AudioFrame** ptr_out) { + const bool resample = + in_frame.sample_rate_hz_ != encoder_stack_->SampleRateHz(); + + // This variable is true if primary codec and secondary codec (if exists) + // are both mono and input is stereo. + // TODO(henrik.lundin): This condition should probably be + // in_frame.num_channels_ > encoder_stack_->NumChannels() + const bool down_mix = + in_frame.num_channels_ == 2 && encoder_stack_->NumChannels() == 1; + + if (!first_10ms_data_) { + expected_in_ts_ = in_frame.timestamp_; + expected_codec_ts_ = in_frame.timestamp_; + first_10ms_data_ = true; + } else if (in_frame.timestamp_ != expected_in_ts_) { + RTC_LOG(LS_WARNING) << "Unexpected input timestamp: " << in_frame.timestamp_ + << ", expected: " << expected_in_ts_; + expected_codec_ts_ += + (in_frame.timestamp_ - expected_in_ts_) * + static_cast( + static_cast(encoder_stack_->SampleRateHz()) / + static_cast(in_frame.sample_rate_hz_)); + expected_in_ts_ = in_frame.timestamp_; + } + + if (!down_mix && !resample) { + // No pre-processing is required. + if (expected_in_ts_ == expected_codec_ts_) { + // If we've never resampled, we can use the input frame as-is + *ptr_out = &in_frame; + } else { + // Otherwise we'll need to alter the timestamp. Since in_frame is const, + // we'll have to make a copy of it. + preprocess_frame_.CopyFrom(in_frame); + preprocess_frame_.timestamp_ = expected_codec_ts_; + *ptr_out = &preprocess_frame_; + } + + expected_in_ts_ += static_cast(in_frame.samples_per_channel_); + expected_codec_ts_ += static_cast(in_frame.samples_per_channel_); + return 0; + } + + *ptr_out = &preprocess_frame_; + preprocess_frame_.num_channels_ = in_frame.num_channels_; + preprocess_frame_.samples_per_channel_ = in_frame.samples_per_channel_; + std::array audio; + const int16_t* src_ptr_audio; + if (down_mix) { + // If a resampling is required, the output of a down-mix is written into a + // local buffer, otherwise, it will be written to the output frame. + int16_t* dest_ptr_audio = + resample ? audio.data() : preprocess_frame_.mutable_data(); + RTC_DCHECK_GE(audio.size(), preprocess_frame_.samples_per_channel_); + RTC_DCHECK_GE(audio.size(), in_frame.samples_per_channel_); + DownMixFrame(in_frame, + rtc::ArrayView( + dest_ptr_audio, preprocess_frame_.samples_per_channel_)); + preprocess_frame_.num_channels_ = 1; + + // Set the input of the resampler to the down-mixed signal. + src_ptr_audio = audio.data(); + } else { + // Set the input of the resampler to the original data. + src_ptr_audio = in_frame.data(); + } + + preprocess_frame_.timestamp_ = expected_codec_ts_; + preprocess_frame_.sample_rate_hz_ = in_frame.sample_rate_hz_; + // If it is required, we have to do a resampling. + if (resample) { + // The result of the resampler is written to output frame. + int16_t* dest_ptr_audio = preprocess_frame_.mutable_data(); + + int samples_per_channel = resampler_.Resample10Msec( + src_ptr_audio, in_frame.sample_rate_hz_, encoder_stack_->SampleRateHz(), + preprocess_frame_.num_channels_, AudioFrame::kMaxDataSizeSamples, + dest_ptr_audio); + + if (samples_per_channel < 0) { + RTC_LOG(LS_ERROR) << "Cannot add 10 ms audio, resampling failed"; + return -1; + } + preprocess_frame_.samples_per_channel_ = + static_cast(samples_per_channel); + preprocess_frame_.sample_rate_hz_ = encoder_stack_->SampleRateHz(); + } + + expected_codec_ts_ += + static_cast(preprocess_frame_.samples_per_channel_); + expected_in_ts_ += static_cast(in_frame.samples_per_channel_); + + return 0; +} + +///////////////////////////////////////// +// (FEC) Forward Error Correction (codec internal) +// + +int AudioCodingModuleImpl::SetPacketLossRate(int loss_rate) { + MutexLock lock(&acm_mutex_); + if (HaveValidEncoder("SetPacketLossRate")) { + encoder_stack_->OnReceivedUplinkPacketLossFraction(loss_rate / 100.0); + } + return 0; +} + +///////////////////////////////////////// +// Receiver +// + +int AudioCodingModuleImpl::InitializeReceiver() { + MutexLock lock(&acm_mutex_); + return InitializeReceiverSafe(); +} + +// Initialize receiver, resets codec database etc. +int AudioCodingModuleImpl::InitializeReceiverSafe() { + // If the receiver is already initialized then we want to destroy any + // existing decoders. After a call to this function, we should have a clean + // start-up. + if (receiver_initialized_) + receiver_.RemoveAllCodecs(); + receiver_.FlushBuffers(); + + receiver_initialized_ = true; + return 0; +} + +void AudioCodingModuleImpl::SetReceiveCodecs( + const std::map& codecs) { + MutexLock lock(&acm_mutex_); + receiver_.SetCodecs(codecs); +} + +// Incoming packet from network parsed and ready for decode. +int AudioCodingModuleImpl::IncomingPacket(const uint8_t* incoming_payload, + const size_t payload_length, + const RTPHeader& rtp_header) { + RTC_DCHECK_EQ(payload_length == 0, incoming_payload == nullptr); + return receiver_.InsertPacket( + rtp_header, + rtc::ArrayView(incoming_payload, payload_length)); +} + +// Get 10 milliseconds of raw audio data to play out. +// Automatic resample to the requested frequency. +int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz, + AudioFrame* audio_frame, + bool* muted) { + // GetAudio always returns 10 ms, at the requested sample rate. + if (receiver_.GetAudio(desired_freq_hz, audio_frame, muted) != 0) { + RTC_LOG(LS_ERROR) << "PlayoutData failed, RecOut Failed"; + return -1; + } + return 0; +} + +///////////////////////////////////////// +// Statistics +// + +// TODO(turajs) change the return value to void. Also change the corresponding +// NetEq function. +int AudioCodingModuleImpl::GetNetworkStatistics(NetworkStatistics* statistics) { + receiver_.GetNetworkStatistics(statistics); + return 0; +} + +bool AudioCodingModuleImpl::HaveValidEncoder( + absl::string_view caller_name) const { + if (!encoder_stack_) { + RTC_LOG(LS_ERROR) << caller_name << " failed: No send codec is registered."; + return false; + } + return true; +} + +ANAStats AudioCodingModuleImpl::GetANAStats() const { + MutexLock lock(&acm_mutex_); + if (encoder_stack_) + return encoder_stack_->GetANAStats(); + // If no encoder is set, return default stats. + return ANAStats(); +} + +int AudioCodingModuleImpl::GetTargetBitrate() const { + MutexLock lock(&acm_mutex_); + if (!encoder_stack_) { + return -1; + } + return encoder_stack_->GetTargetBitrate(); +} + +} // namespace + +AudioCodingModule::Config::Config( + rtc::scoped_refptr decoder_factory) + : neteq_config(), + clock(Clock::GetRealTimeClockRaw()), + decoder_factory(decoder_factory) { + // Post-decode VAD is disabled by default in NetEq, however, Audio + // Conference Mixer relies on VAD decisions and fails without them. + neteq_config.enable_post_decode_vad = true; +} + +AudioCodingModule::Config::Config(const Config&) = default; +AudioCodingModule::Config::~Config() = default; + +AudioCodingModule* AudioCodingModule::Create(const Config& config) { + return new AudioCodingModuleImpl(config); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc b/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc new file mode 100644 index 0000000000..f1eb81c015 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc @@ -0,0 +1,1278 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/include/audio_coding_module.h" + +#include +#include + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus.h" +#include "api/audio_codecs/opus/audio_decoder_opus.h" +#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus.h" +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/acm2/acm_receive_test.h" +#include "modules/audio_coding/acm2/acm_send_test.h" +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" +#include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h" +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "modules/audio_coding/neteq/tools/audio_checksum.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "modules/audio_coding/neteq/tools/constant_pcm_packet_source.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/output_audio_file.h" +#include "modules/audio_coding/neteq/tools/output_wav_file.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" +#include "rtc_base/event.h" +#include "rtc_base/message_digest.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/system/arch.h" +#include "rtc_base/thread_annotations.h" +#include "system_wrappers/include/clock.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "system_wrappers/include/sleep.h" +#include "test/audio_decoder_proxy_factory.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder.h" +#include "test/mock_audio_encoder.h" +#include "test/testsupport/file_utils.h" +#include "test/testsupport/rtc_expect_death.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::Invoke; + +namespace webrtc { + +namespace { +const int kSampleRateHz = 16000; +const int kNumSamples10ms = kSampleRateHz / 100; +const int kFrameSizeMs = 10; // Multiple of 10. +const int kFrameSizeSamples = kFrameSizeMs / 10 * kNumSamples10ms; +const int kPayloadSizeBytes = kFrameSizeSamples * sizeof(int16_t); +const uint8_t kPayloadType = 111; +} // namespace + +class RtpData { + public: + RtpData(int samples_per_packet, uint8_t payload_type) + : samples_per_packet_(samples_per_packet), payload_type_(payload_type) {} + + virtual ~RtpData() {} + + void Populate(RTPHeader* rtp_header) { + rtp_header->sequenceNumber = 0xABCD; + rtp_header->timestamp = 0xABCDEF01; + rtp_header->payloadType = payload_type_; + rtp_header->markerBit = false; + rtp_header->ssrc = 0x1234; + rtp_header->numCSRCs = 0; + + rtp_header->payload_type_frequency = kSampleRateHz; + } + + void Forward(RTPHeader* rtp_header) { + ++rtp_header->sequenceNumber; + rtp_header->timestamp += samples_per_packet_; + } + + private: + int samples_per_packet_; + uint8_t payload_type_; +}; + +class PacketizationCallbackStubOldApi : public AudioPacketizationCallback { + public: + PacketizationCallbackStubOldApi() + : num_calls_(0), + last_frame_type_(AudioFrameType::kEmptyFrame), + last_payload_type_(-1), + last_timestamp_(0) {} + + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) override { + MutexLock lock(&mutex_); + ++num_calls_; + last_frame_type_ = frame_type; + last_payload_type_ = payload_type; + last_timestamp_ = timestamp; + last_payload_vec_.assign(payload_data, payload_data + payload_len_bytes); + return 0; + } + + int num_calls() const { + MutexLock lock(&mutex_); + return num_calls_; + } + + int last_payload_len_bytes() const { + MutexLock lock(&mutex_); + return rtc::checked_cast(last_payload_vec_.size()); + } + + AudioFrameType last_frame_type() const { + MutexLock lock(&mutex_); + return last_frame_type_; + } + + int last_payload_type() const { + MutexLock lock(&mutex_); + return last_payload_type_; + } + + uint32_t last_timestamp() const { + MutexLock lock(&mutex_); + return last_timestamp_; + } + + void SwapBuffers(std::vector* payload) { + MutexLock lock(&mutex_); + last_payload_vec_.swap(*payload); + } + + private: + int num_calls_ RTC_GUARDED_BY(mutex_); + AudioFrameType last_frame_type_ RTC_GUARDED_BY(mutex_); + int last_payload_type_ RTC_GUARDED_BY(mutex_); + uint32_t last_timestamp_ RTC_GUARDED_BY(mutex_); + std::vector last_payload_vec_ RTC_GUARDED_BY(mutex_); + mutable Mutex mutex_; +}; + +class AudioCodingModuleTestOldApi : public ::testing::Test { + protected: + AudioCodingModuleTestOldApi() + : rtp_utility_(new RtpData(kFrameSizeSamples, kPayloadType)), + clock_(Clock::GetRealTimeClock()) {} + + ~AudioCodingModuleTestOldApi() {} + + void TearDown() {} + + void SetUp() { + acm_.reset(AudioCodingModule::Create([this] { + AudioCodingModule::Config config; + config.clock = clock_; + config.decoder_factory = CreateBuiltinAudioDecoderFactory(); + return config; + }())); + + rtp_utility_->Populate(&rtp_header_); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSampleRateHz * 10 / 1000; // 10 ms. + static_assert(kSampleRateHz * 10 / 1000 <= AudioFrame::kMaxDataSizeSamples, + "audio frame too small"); + input_frame_.Mute(); + + ASSERT_EQ(0, acm_->RegisterTransportCallback(&packet_cb_)); + + SetUpL16Codec(); + } + + // Set up L16 codec. + virtual void SetUpL16Codec() { + audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 1); + pac_size_ = 160; + } + + virtual void RegisterCodec() { + acm_->SetReceiveCodecs({{kPayloadType, *audio_format_}}); + acm_->SetEncoder(CreateBuiltinAudioEncoderFactory()->MakeAudioEncoder( + kPayloadType, *audio_format_, absl::nullopt)); + } + + virtual void InsertPacketAndPullAudio() { + InsertPacket(); + PullAudio(); + } + + virtual void InsertPacket() { + const uint8_t kPayload[kPayloadSizeBytes] = {0}; + ASSERT_EQ(0, + acm_->IncomingPacket(kPayload, kPayloadSizeBytes, rtp_header_)); + rtp_utility_->Forward(&rtp_header_); + } + + virtual void PullAudio() { + AudioFrame audio_frame; + bool muted; + ASSERT_EQ(0, acm_->PlayoutData10Ms(-1, &audio_frame, &muted)); + ASSERT_FALSE(muted); + } + + virtual void InsertAudio() { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kNumSamples10ms; + } + + virtual void VerifyEncoding() { + int last_length = packet_cb_.last_payload_len_bytes(); + EXPECT_TRUE(last_length == 2 * pac_size_ || last_length == 0) + << "Last encoded packet was " << last_length << " bytes."; + } + + virtual void InsertAudioAndVerifyEncoding() { + InsertAudio(); + VerifyEncoding(); + } + + std::unique_ptr rtp_utility_; + std::unique_ptr acm_; + PacketizationCallbackStubOldApi packet_cb_; + RTPHeader rtp_header_; + AudioFrame input_frame_; + + absl::optional audio_format_; + int pac_size_ = -1; + + Clock* clock_; +}; + +class AudioCodingModuleTestOldApiDeathTest + : public AudioCodingModuleTestOldApi {}; + +TEST_F(AudioCodingModuleTestOldApi, VerifyOutputFrame) { + AudioFrame audio_frame; + const int kSampleRateHz = 32000; + bool muted; + EXPECT_EQ(0, acm_->PlayoutData10Ms(kSampleRateHz, &audio_frame, &muted)); + ASSERT_FALSE(muted); + EXPECT_EQ(0u, audio_frame.timestamp_); + EXPECT_GT(audio_frame.num_channels_, 0u); + EXPECT_EQ(static_cast(kSampleRateHz / 100), + audio_frame.samples_per_channel_); + EXPECT_EQ(kSampleRateHz, audio_frame.sample_rate_hz_); +} + +// The below test is temporarily disabled on Windows due to problems +// with clang debug builds. +// TODO(tommi): Re-enable when we've figured out what the problem is. +// http://crbug.com/615050 +#if !defined(WEBRTC_WIN) && defined(__clang__) && RTC_DCHECK_IS_ON && \ + GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST_F(AudioCodingModuleTestOldApiDeathTest, FailOnZeroDesiredFrequency) { + AudioFrame audio_frame; + bool muted; + RTC_EXPECT_DEATH(acm_->PlayoutData10Ms(0, &audio_frame, &muted), + "dst_sample_rate_hz"); +} +#endif + +// Checks that the transport callback is invoked once for each speech packet. +// Also checks that the frame type is kAudioFrameSpeech. +TEST_F(AudioCodingModuleTestOldApi, TransportCallbackIsInvokedForEachPacket) { + const int k10MsBlocksPerPacket = 3; + pac_size_ = k10MsBlocksPerPacket * kSampleRateHz / 100; + audio_format_->parameters["ptime"] = "30"; + RegisterCodec(); + const int kLoops = 10; + for (int i = 0; i < kLoops; ++i) { + EXPECT_EQ(i / k10MsBlocksPerPacket, packet_cb_.num_calls()); + if (packet_cb_.num_calls() > 0) + EXPECT_EQ(AudioFrameType::kAudioFrameSpeech, + packet_cb_.last_frame_type()); + InsertAudioAndVerifyEncoding(); + } + EXPECT_EQ(kLoops / k10MsBlocksPerPacket, packet_cb_.num_calls()); + EXPECT_EQ(AudioFrameType::kAudioFrameSpeech, packet_cb_.last_frame_type()); +} + +// Introduce this class to set different expectations on the number of encoded +// bytes. This class expects all encoded packets to be 9 bytes (matching one +// CNG SID frame) or 0 bytes. This test depends on `input_frame_` containing +// (near-)zero values. It also introduces a way to register comfort noise with +// a custom payload type. +class AudioCodingModuleTestWithComfortNoiseOldApi + : public AudioCodingModuleTestOldApi { + protected: + void RegisterCngCodec(int rtp_payload_type) { + acm_->SetReceiveCodecs({{kPayloadType, *audio_format_}, + {rtp_payload_type, {"cn", kSampleRateHz, 1}}}); + acm_->ModifyEncoder([&](std::unique_ptr* enc) { + AudioEncoderCngConfig config; + config.speech_encoder = std::move(*enc); + config.num_channels = 1; + config.payload_type = rtp_payload_type; + config.vad_mode = Vad::kVadNormal; + *enc = CreateComfortNoiseEncoder(std::move(config)); + }); + } + + void VerifyEncoding() override { + int last_length = packet_cb_.last_payload_len_bytes(); + EXPECT_TRUE(last_length == 9 || last_length == 0) + << "Last encoded packet was " << last_length << " bytes."; + } + + void DoTest(int blocks_per_packet, int cng_pt) { + const int kLoops = 40; + // This array defines the expected frame types, and when they should arrive. + // We expect a frame to arrive each time the speech encoder would have + // produced a packet, and once every 100 ms the frame should be non-empty, + // that is contain comfort noise. + const struct { + int ix; + AudioFrameType type; + } expectation[] = {{2, AudioFrameType::kAudioFrameCN}, + {5, AudioFrameType::kEmptyFrame}, + {8, AudioFrameType::kEmptyFrame}, + {11, AudioFrameType::kAudioFrameCN}, + {14, AudioFrameType::kEmptyFrame}, + {17, AudioFrameType::kEmptyFrame}, + {20, AudioFrameType::kAudioFrameCN}, + {23, AudioFrameType::kEmptyFrame}, + {26, AudioFrameType::kEmptyFrame}, + {29, AudioFrameType::kEmptyFrame}, + {32, AudioFrameType::kAudioFrameCN}, + {35, AudioFrameType::kEmptyFrame}, + {38, AudioFrameType::kEmptyFrame}}; + for (int i = 0; i < kLoops; ++i) { + int num_calls_before = packet_cb_.num_calls(); + EXPECT_EQ(i / blocks_per_packet, num_calls_before); + InsertAudioAndVerifyEncoding(); + int num_calls = packet_cb_.num_calls(); + if (num_calls == num_calls_before + 1) { + EXPECT_EQ(expectation[num_calls - 1].ix, i); + EXPECT_EQ(expectation[num_calls - 1].type, packet_cb_.last_frame_type()) + << "Wrong frame type for lap " << i; + EXPECT_EQ(cng_pt, packet_cb_.last_payload_type()); + } else { + EXPECT_EQ(num_calls, num_calls_before); + } + } + } +}; + +// Checks that the transport callback is invoked once per frame period of the +// underlying speech encoder, even when comfort noise is produced. +// Also checks that the frame type is kAudioFrameCN or kEmptyFrame. +TEST_F(AudioCodingModuleTestWithComfortNoiseOldApi, + TransportCallbackTestForComfortNoiseRegisterCngLast) { + const int k10MsBlocksPerPacket = 3; + pac_size_ = k10MsBlocksPerPacket * kSampleRateHz / 100; + audio_format_->parameters["ptime"] = "30"; + RegisterCodec(); + const int kCngPayloadType = 105; + RegisterCngCodec(kCngPayloadType); + DoTest(k10MsBlocksPerPacket, kCngPayloadType); +} + +// A multi-threaded test for ACM that uses the PCM16b 16 kHz codec. +class AudioCodingModuleMtTestOldApi : public AudioCodingModuleTestOldApi { + protected: + static const int kNumPackets = 500; + static const int kNumPullCalls = 500; + + AudioCodingModuleMtTestOldApi() + : AudioCodingModuleTestOldApi(), + send_count_(0), + insert_packet_count_(0), + pull_audio_count_(0), + next_insert_packet_time_ms_(0), + fake_clock_(new SimulatedClock(0)) { + clock_ = fake_clock_.get(); + } + + void SetUp() { + AudioCodingModuleTestOldApi::SetUp(); + RegisterCodec(); // Must be called before the threads start below. + StartThreads(); + } + + void StartThreads() { + quit_.store(false); + + const auto attributes = + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime); + send_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!quit_.load()) { + CbSendImpl(); + } + }, + "send", attributes); + insert_packet_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!quit_.load()) { + CbInsertPacketImpl(); + } + }, + "insert_packet", attributes); + pull_audio_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!quit_.load()) { + CbPullAudioImpl(); + } + }, + "pull_audio", attributes); + } + + void TearDown() { + AudioCodingModuleTestOldApi::TearDown(); + quit_.store(true); + pull_audio_thread_.Finalize(); + send_thread_.Finalize(); + insert_packet_thread_.Finalize(); + } + + bool RunTest() { return test_complete_.Wait(TimeDelta::Minutes(10)); } + + virtual bool TestDone() { + if (packet_cb_.num_calls() > kNumPackets) { + MutexLock lock(&mutex_); + if (pull_audio_count_ > kNumPullCalls) { + // Both conditions for completion are met. End the test. + return true; + } + } + return false; + } + + // The send thread doesn't have to care about the current simulated time, + // since only the AcmReceiver is using the clock. + void CbSendImpl() { + SleepMs(1); + if (HasFatalFailure()) { + // End the test early if a fatal failure (ASSERT_*) has occurred. + test_complete_.Set(); + } + ++send_count_; + InsertAudioAndVerifyEncoding(); + if (TestDone()) { + test_complete_.Set(); + } + } + + void CbInsertPacketImpl() { + SleepMs(1); + { + MutexLock lock(&mutex_); + if (clock_->TimeInMilliseconds() < next_insert_packet_time_ms_) { + return; + } + next_insert_packet_time_ms_ += 10; + } + // Now we're not holding the crit sect when calling ACM. + ++insert_packet_count_; + InsertPacket(); + } + + void CbPullAudioImpl() { + SleepMs(1); + { + MutexLock lock(&mutex_); + // Don't let the insert thread fall behind. + if (next_insert_packet_time_ms_ < clock_->TimeInMilliseconds()) { + return; + } + ++pull_audio_count_; + } + // Now we're not holding the crit sect when calling ACM. + PullAudio(); + fake_clock_->AdvanceTimeMilliseconds(10); + } + + rtc::PlatformThread send_thread_; + rtc::PlatformThread insert_packet_thread_; + rtc::PlatformThread pull_audio_thread_; + // Used to force worker threads to stop looping. + std::atomic quit_; + + rtc::Event test_complete_; + int send_count_; + int insert_packet_count_; + int pull_audio_count_ RTC_GUARDED_BY(mutex_); + Mutex mutex_; + int64_t next_insert_packet_time_ms_ RTC_GUARDED_BY(mutex_); + std::unique_ptr fake_clock_; +}; + +#if defined(WEBRTC_IOS) +#define MAYBE_DoTest DISABLED_DoTest +#else +#define MAYBE_DoTest DoTest +#endif +TEST_F(AudioCodingModuleMtTestOldApi, MAYBE_DoTest) { + EXPECT_TRUE(RunTest()); +} + +// Disabling all of these tests on iOS until file support has been added. +// See https://code.google.com/p/webrtc/issues/detail?id=4752 for details. +#if !defined(WEBRTC_IOS) + +// This test verifies bit exactness for the send-side of ACM. The test setup is +// a chain of three different test classes: +// +// test::AcmSendTest -> AcmSenderBitExactness -> test::AcmReceiveTest +// +// The receiver side is driving the test by requesting new packets from +// AcmSenderBitExactness::NextPacket(). This method, in turn, asks for the +// packet from test::AcmSendTest::NextPacket, which inserts audio from the +// input file until one packet is produced. (The input file loops indefinitely.) +// Before passing the packet to the receiver, this test class verifies the +// packet header and updates a payload checksum with the new payload. The +// decoded output from the receiver is also verified with a (separate) checksum. +class AcmSenderBitExactnessOldApi : public ::testing::Test, + public test::PacketSource { + protected: + static const int kTestDurationMs = 1000; + + AcmSenderBitExactnessOldApi() + : frame_size_rtp_timestamps_(0), + packet_count_(0), + payload_type_(0), + last_sequence_number_(0), + last_timestamp_(0), + payload_checksum_(rtc::MessageDigestFactory::Create(rtc::DIGEST_MD5)) {} + + // Sets up the test::AcmSendTest object. Returns true on success, otherwise + // false. + bool SetUpSender(absl::string_view input_file_name, int source_rate) { + // Note that `audio_source_` will loop forever. The test duration is set + // explicitly by `kTestDurationMs`. + audio_source_.reset(new test::InputAudioFile(input_file_name)); + send_test_.reset(new test::AcmSendTestOldApi(audio_source_.get(), + source_rate, kTestDurationMs)); + return send_test_.get() != NULL; + } + + // Registers a send codec in the test::AcmSendTest object. Returns true on + // success, false on failure. + bool RegisterSendCodec(absl::string_view payload_name, + int sampling_freq_hz, + int channels, + int payload_type, + int frame_size_samples, + int frame_size_rtp_timestamps) { + payload_type_ = payload_type; + frame_size_rtp_timestamps_ = frame_size_rtp_timestamps; + return send_test_->RegisterCodec(payload_name, sampling_freq_hz, channels, + payload_type, frame_size_samples); + } + + void RegisterExternalSendCodec( + std::unique_ptr external_speech_encoder, + int payload_type) { + payload_type_ = payload_type; + frame_size_rtp_timestamps_ = rtc::checked_cast( + external_speech_encoder->Num10MsFramesInNextPacket() * + external_speech_encoder->RtpTimestampRateHz() / 100); + send_test_->RegisterExternalCodec(std::move(external_speech_encoder)); + } + + // Runs the test. SetUpSender() and RegisterSendCodec() must have been called + // before calling this method. + void Run(absl::string_view audio_checksum_ref, + absl::string_view payload_checksum_ref, + int expected_packets, + test::AcmReceiveTestOldApi::NumOutputChannels expected_channels, + rtc::scoped_refptr decoder_factory = nullptr) { + if (!decoder_factory) { + decoder_factory = CreateBuiltinAudioDecoderFactory(); + } + // Set up the receiver used to decode the packets and verify the decoded + // output. + test::AudioChecksum audio_checksum; + const std::string output_file_name = + webrtc::test::OutputPath() + + ::testing::UnitTest::GetInstance() + ->current_test_info() + ->test_case_name() + + "_" + ::testing::UnitTest::GetInstance()->current_test_info()->name() + + "_output.wav"; + const int kOutputFreqHz = 8000; + test::OutputWavFile output_file(output_file_name, kOutputFreqHz, + expected_channels); + // Have the output audio sent both to file and to the checksum calculator. + test::AudioSinkFork output(&audio_checksum, &output_file); + test::AcmReceiveTestOldApi receive_test(this, &output, kOutputFreqHz, + expected_channels, decoder_factory); + ASSERT_NO_FATAL_FAILURE(receive_test.RegisterDefaultCodecs()); + + // This is where the actual test is executed. + receive_test.Run(); + + // Extract and verify the audio checksum. + std::string checksum_string = audio_checksum.Finish(); + ExpectChecksumEq(audio_checksum_ref, checksum_string); + + // Extract and verify the payload checksum. + rtc::Buffer checksum_result(payload_checksum_->Size()); + payload_checksum_->Finish(checksum_result.data(), checksum_result.size()); + checksum_string = rtc::hex_encode(checksum_result); + ExpectChecksumEq(payload_checksum_ref, checksum_string); + + // Verify number of packets produced. + EXPECT_EQ(expected_packets, packet_count_); + + // Delete the output file. + remove(output_file_name.c_str()); + } + + // Helper: result must be one the "|"-separated checksums. + void ExpectChecksumEq(absl::string_view ref, absl::string_view result) { + if (ref.size() == result.size()) { + // Only one checksum: clearer message. + EXPECT_EQ(ref, result); + } else { + EXPECT_NE(ref.find(result), absl::string_view::npos) + << result << " must be one of these:\n" + << ref; + } + } + + // Inherited from test::PacketSource. + std::unique_ptr NextPacket() override { + auto packet = send_test_->NextPacket(); + if (!packet) + return NULL; + + VerifyPacket(packet.get()); + // TODO(henrik.lundin) Save the packet to file as well. + + // Pass it on to the caller. The caller becomes the owner of `packet`. + return packet; + } + + // Verifies the packet. + void VerifyPacket(const test::Packet* packet) { + EXPECT_TRUE(packet->valid_header()); + // (We can check the header fields even if valid_header() is false.) + EXPECT_EQ(payload_type_, packet->header().payloadType); + if (packet_count_ > 0) { + // This is not the first packet. + uint16_t sequence_number_diff = + packet->header().sequenceNumber - last_sequence_number_; + EXPECT_EQ(1, sequence_number_diff); + uint32_t timestamp_diff = packet->header().timestamp - last_timestamp_; + EXPECT_EQ(frame_size_rtp_timestamps_, timestamp_diff); + } + ++packet_count_; + last_sequence_number_ = packet->header().sequenceNumber; + last_timestamp_ = packet->header().timestamp; + // Update the checksum. + payload_checksum_->Update(packet->payload(), + packet->payload_length_bytes()); + } + + void SetUpTest(absl::string_view codec_name, + int codec_sample_rate_hz, + int channels, + int payload_type, + int codec_frame_size_samples, + int codec_frame_size_rtp_timestamps) { + ASSERT_TRUE(SetUpSender( + channels == 1 ? kTestFileMono32kHz : kTestFileFakeStereo32kHz, 32000)); + ASSERT_TRUE(RegisterSendCodec(codec_name, codec_sample_rate_hz, channels, + payload_type, codec_frame_size_samples, + codec_frame_size_rtp_timestamps)); + } + + void SetUpTestExternalEncoder( + std::unique_ptr external_speech_encoder, + int payload_type) { + ASSERT_TRUE(send_test_); + RegisterExternalSendCodec(std::move(external_speech_encoder), payload_type); + } + + std::unique_ptr send_test_; + std::unique_ptr audio_source_; + uint32_t frame_size_rtp_timestamps_; + int packet_count_; + uint8_t payload_type_; + uint16_t last_sequence_number_; + uint32_t last_timestamp_; + std::unique_ptr payload_checksum_; + const std::string kTestFileMono32kHz = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + const std::string kTestFileFakeStereo32kHz = + webrtc::test::ResourcePath("audio_coding/testfile_fake_stereo_32kHz", + "pcm"); + const std::string kTestFileQuad48kHz = webrtc::test::ResourcePath( + "audio_coding/speech_4_channels_48k_one_second", + "wav"); +}; + +class AcmSenderBitExactnessNewApi : public AcmSenderBitExactnessOldApi {}; + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_8000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 8000, 1, 107, 80, 80)); + Run(/*audio_checksum_ref=*/"69118ed438ac76252d023e0463819471", + /*payload_checksum_ref=*/"c1edd36339ce0326cc4550041ad719a0", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_16000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 16000, 1, 108, 160, 160)); + Run(/*audio_checksum_ref=*/"f95c87bdd33f631bcf80f4b19445bbd2", + /*payload_checksum_ref=*/"ad786526383178b08d80d6eee06e9bad", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_32000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 32000, 1, 109, 320, 320)); + Run(/*audio_checksum_ref=*/"c50244419c5c3a2f04cc69a022c266a2", + /*payload_checksum_ref=*/"5ef82ea885e922263606c6fdbc49f651", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_stereo_8000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 8000, 2, 111, 80, 80)); + Run(/*audio_checksum_ref=*/"4fccf4cc96f1e8e8de4b9fadf62ded9e", + /*payload_checksum_ref=*/"62ce5adb0d4965d0a52ec98ae7f98974", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_stereo_16000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 16000, 2, 112, 160, 160)); + Run(/*audio_checksum_ref=*/"e15e388d9d4af8c02a59fe1552fedee3", + /*payload_checksum_ref=*/"41ca8edac4b8c71cd54fd9f25ec14870", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_stereo_32000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 32000, 2, 113, 320, 320)); + Run(/*audio_checksum_ref=*/"b240520c0d05003fde7a174ae5957286", + /*payload_checksum_ref=*/"50e58502fb04421bf5b857dda4c96879", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcmu_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("PCMU", 8000, 1, 0, 160, 160)); + Run(/*audio_checksum_ref=*/"c8d1fc677f33c2022ec5f83c7f302280", + /*payload_checksum_ref=*/"8f9b8750bd80fe26b6cbf6659b89f0f9", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcma_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("PCMA", 8000, 1, 8, 160, 160)); + Run(/*audio_checksum_ref=*/"47eb60e855eb12d1b0e6da9c975754a4", + /*payload_checksum_ref=*/"6ad745e55aa48981bfc790d0eeef2dd1", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcmu_stereo_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("PCMU", 8000, 2, 110, 160, 160)); + Run(/*audio_checksum_ref=*/"6ef2f57d4934714787fd0a834e3ea18e", + /*payload_checksum_ref=*/"60b6f25e8d1e74cb679cfe756dd9bca5", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcma_stereo_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("PCMA", 8000, 2, 118, 160, 160)); + Run(/*audio_checksum_ref=*/"a84d75e098d87ab6b260687eb4b612a2", + /*payload_checksum_ref=*/"92b282c83efd20e7eeef52ba40842cf7", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +#if defined(WEBRTC_CODEC_ILBC) && defined(WEBRTC_LINUX) && \ + defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessOldApi, Ilbc_30ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("ILBC", 8000, 1, 102, 240, 240)); + Run(/*audio_checksum_ref=*/"b14dba0de36efa5ec88a32c0b320b70f", + /*payload_checksum_ref=*/"cfae2e9f6aba96e145f2bcdd5050ce78", + /*expected_packets=*/33, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} +#endif + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessOldApi, G722_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("G722", 16000, 1, 9, 320, 160)); + Run(/*audio_checksum_ref=*/"f5264affff25cf2cbd2e1e8a5217f9a3", + /*payload_checksum_ref=*/"fc68a87e1380614e658087cb35d5ca10", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} +#endif + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessOldApi, G722_stereo_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("G722", 16000, 2, 119, 320, 160)); + Run(/*audio_checksum_ref=*/"be0b8528ff9db3a2219f55ddd36faf7f", + /*payload_checksum_ref=*/"66516152eeaa1e650ad94ff85f668dac", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} +#endif + +namespace { +// Checksum depends on libopus being compiled with or without SSE. +const std::string audio_checksum = + "6a76fe2ffba057c06eb63239b3c47abe" + "|0c4f9d33b4a7379a34ee0c0d5718afe6"; +const std::string payload_checksum = + "b43bdf7638b2bc2a5a6f30bdc640b9ed" + "|c30d463e7ed10bdd1da9045f80561f27"; +} // namespace + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessOldApi, Opus_stereo_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("opus", 48000, 2, 120, 960, 960)); + Run(audio_checksum, payload_checksum, /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} +#endif + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessNewApi, OpusFromFormat_stereo_20ms) { + const auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"stereo", "1"}})); + ASSERT_TRUE(SetUpSender(kTestFileFakeStereo32kHz, 32000)); + ASSERT_NO_FATAL_FAILURE(SetUpTestExternalEncoder( + AudioEncoderOpus::MakeAudioEncoder(*config, 120), 120)); + Run(audio_checksum, payload_checksum, /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} +#endif + +// TODO(webrtc:8649): Disabled until the Encoder counterpart of +// https://webrtc-review.googlesource.com/c/src/+/129768 lands. +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessNewApi, DISABLED_OpusManyChannels) { + constexpr int kNumChannels = 4; + constexpr int kOpusPayloadType = 120; + + // Read a 4 channel file at 48kHz. + ASSERT_TRUE(SetUpSender(kTestFileQuad48kHz, 48000)); + + const auto sdp_format = SdpAudioFormat("multiopus", 48000, kNumChannels, + {{"channel_mapping", "0,1,2,3"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + const auto encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + ASSERT_TRUE(encoder_config.has_value()); + + ASSERT_NO_FATAL_FAILURE( + SetUpTestExternalEncoder(AudioEncoderMultiChannelOpus::MakeAudioEncoder( + *encoder_config, kOpusPayloadType), + kOpusPayloadType)); + + const auto decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + const auto opus_decoder = + AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config); + + rtc::scoped_refptr decoder_factory = + rtc::make_ref_counted(opus_decoder.get()); + + // Set up an EXTERNAL DECODER to parse 4 channels. + Run("audio checksum check downstream|8051617907766bec5f4e4a4f7c6d5291", + "payload checksum check downstream|b09c52e44b2bdd9a0809e3a5b1623a76", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kQuadOutput, + decoder_factory); +} +#endif + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessNewApi, OpusFromFormat_stereo_20ms_voip) { + auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"stereo", "1"}})); + // If not set, default will be kAudio in case of stereo. + config->application = AudioEncoderOpusConfig::ApplicationMode::kVoip; + ASSERT_TRUE(SetUpSender(kTestFileFakeStereo32kHz, 32000)); + ASSERT_NO_FATAL_FAILURE(SetUpTestExternalEncoder( + AudioEncoderOpus::MakeAudioEncoder(*config, 120), 120)); + const std::string audio_maybe_sse = + "1010e60ad34cee73c939edaf563d0593" + "|c05b4523d4c3fad2bab96d2a56baa2d0"; + + const std::string payload_maybe_sse = + "ea48d94e43217793af9b7e15ece94e54" + "|bd93c492087093daf662cdd968f6cdda"; + + Run(audio_maybe_sse, payload_maybe_sse, /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} +#endif + +// This test is for verifying the SetBitRate function. The bitrate is changed at +// the beginning, and the number of generated bytes are checked. +class AcmSetBitRateTest : public ::testing::Test { + protected: + static const int kTestDurationMs = 1000; + + // Sets up the test::AcmSendTest object. Returns true on success, otherwise + // false. + bool SetUpSender() { + const std::string input_file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + // Note that `audio_source_` will loop forever. The test duration is set + // explicitly by `kTestDurationMs`. + audio_source_.reset(new test::InputAudioFile(input_file_name)); + static const int kSourceRateHz = 32000; + send_test_.reset(new test::AcmSendTestOldApi( + audio_source_.get(), kSourceRateHz, kTestDurationMs)); + return send_test_.get(); + } + + // Registers a send codec in the test::AcmSendTest object. Returns true on + // success, false on failure. + virtual bool RegisterSendCodec(absl::string_view payload_name, + int sampling_freq_hz, + int channels, + int payload_type, + int frame_size_samples, + int frame_size_rtp_timestamps) { + return send_test_->RegisterCodec(payload_name, sampling_freq_hz, channels, + payload_type, frame_size_samples); + } + + void RegisterExternalSendCodec( + std::unique_ptr external_speech_encoder, + int payload_type) { + send_test_->RegisterExternalCodec(std::move(external_speech_encoder)); + } + + void RunInner(int min_expected_total_bits, int max_expected_total_bits) { + int nr_bytes = 0; + while (std::unique_ptr next_packet = + send_test_->NextPacket()) { + nr_bytes += rtc::checked_cast(next_packet->payload_length_bytes()); + } + EXPECT_LE(min_expected_total_bits, nr_bytes * 8); + EXPECT_GE(max_expected_total_bits, nr_bytes * 8); + } + + void SetUpTest(absl::string_view codec_name, + int codec_sample_rate_hz, + int channels, + int payload_type, + int codec_frame_size_samples, + int codec_frame_size_rtp_timestamps) { + ASSERT_TRUE(SetUpSender()); + ASSERT_TRUE(RegisterSendCodec(codec_name, codec_sample_rate_hz, channels, + payload_type, codec_frame_size_samples, + codec_frame_size_rtp_timestamps)); + } + + std::unique_ptr send_test_; + std::unique_ptr audio_source_; +}; + +class AcmSetBitRateNewApi : public AcmSetBitRateTest { + protected: + // Runs the test. SetUpSender() must have been called and a codec must be set + // up before calling this method. + void Run(int min_expected_total_bits, int max_expected_total_bits) { + RunInner(min_expected_total_bits, max_expected_total_bits); + } +}; + +TEST_F(AcmSetBitRateNewApi, OpusFromFormat_48khz_20ms_10kbps) { + const auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"maxaveragebitrate", "10000"}})); + ASSERT_TRUE(SetUpSender()); + RegisterExternalSendCodec(AudioEncoderOpus::MakeAudioEncoder(*config, 107), + 107); + RunInner(7000, 12000); +} + +TEST_F(AcmSetBitRateNewApi, OpusFromFormat_48khz_20ms_50kbps) { + const auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"maxaveragebitrate", "50000"}})); + ASSERT_TRUE(SetUpSender()); + RegisterExternalSendCodec(AudioEncoderOpus::MakeAudioEncoder(*config, 107), + 107); + RunInner(40000, 60000); +} + +// Verify that it works when the data to send is mono and the encoder is set to +// send surround audio. +TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForMonoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = kSampleRateHz * 10 / 1000; + + audio_format_ = SdpAudioFormat({"multiopus", + kSampleRateHz, + 6, + {{"minptime", "10"}, + {"useinbandfec", "1"}, + {"channel_mapping", "0,4,1,2,3,5"}, + {"num_streams", "4"}, + {"coupled_streams", "2"}}}); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// Verify that it works when the data to send is stereo and the encoder is set +// to send surround audio. +TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForStereoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat({"multiopus", + kSampleRateHz, + 6, + {{"minptime", "10"}, + {"useinbandfec", "1"}, + {"channel_mapping", "0,4,1,2,3,5"}, + {"num_streams", "4"}, + {"coupled_streams", "2"}}}); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 2; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// Verify that it works when the data to send is mono and the encoder is set to +// send stereo audio. +TEST_F(AudioCodingModuleTestOldApi, SendingStereoForMonoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 2); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// Verify that it works when the data to send is stereo and the encoder is set +// to send mono audio. +TEST_F(AudioCodingModuleTestOldApi, SendingMonoForStereoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 1); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// The result on the Android platforms is inconsistent for this test case. +// On android_rel the result is different from android and android arm64 rel. +#if defined(WEBRTC_ANDROID) +#define MAYBE_OpusFromFormat_48khz_20ms_100kbps \ + DISABLED_OpusFromFormat_48khz_20ms_100kbps +#else +#define MAYBE_OpusFromFormat_48khz_20ms_100kbps \ + OpusFromFormat_48khz_20ms_100kbps +#endif +TEST_F(AcmSetBitRateNewApi, MAYBE_OpusFromFormat_48khz_20ms_100kbps) { + const auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"maxaveragebitrate", "100000"}})); + ASSERT_TRUE(SetUpSender()); + RegisterExternalSendCodec(AudioEncoderOpus::MakeAudioEncoder(*config, 107), + 107); + RunInner(80000, 120000); +} + +TEST_F(AcmSenderBitExactnessOldApi, External_Pcmu_20ms) { + AudioEncoderPcmU::Config config; + config.frame_size_ms = 20; + config.num_channels = 1; + config.payload_type = 0; + AudioEncoderPcmU encoder(config); + auto mock_encoder = std::make_unique(); + // Set expectations on the mock encoder and also delegate the calls to the + // real encoder. + EXPECT_CALL(*mock_encoder, SampleRateHz()) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke(&encoder, &AudioEncoderPcmU::SampleRateHz)); + EXPECT_CALL(*mock_encoder, NumChannels()) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke(&encoder, &AudioEncoderPcmU::NumChannels)); + EXPECT_CALL(*mock_encoder, RtpTimestampRateHz()) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke(&encoder, &AudioEncoderPcmU::RtpTimestampRateHz)); + EXPECT_CALL(*mock_encoder, Num10MsFramesInNextPacket()) + .Times(AtLeast(1)) + .WillRepeatedly( + Invoke(&encoder, &AudioEncoderPcmU::Num10MsFramesInNextPacket)); + EXPECT_CALL(*mock_encoder, GetTargetBitrate()) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke(&encoder, &AudioEncoderPcmU::GetTargetBitrate)); + EXPECT_CALL(*mock_encoder, EncodeImpl(_, _, _)) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke( + &encoder, static_cast, rtc::Buffer*)>( + &AudioEncoderPcmU::Encode))); + ASSERT_TRUE(SetUpSender(kTestFileMono32kHz, 32000)); + ASSERT_NO_FATAL_FAILURE( + SetUpTestExternalEncoder(std::move(mock_encoder), config.payload_type)); + Run("c8d1fc677f33c2022ec5f83c7f302280", "8f9b8750bd80fe26b6cbf6659b89f0f9", + 50, test::AcmReceiveTestOldApi::kMonoOutput); +} + +// This test fixture is implemented to run ACM and change the desired output +// frequency during the call. The input packets are simply PCM16b-wb encoded +// payloads with a constant value of `kSampleValue`. The test fixture itself +// acts as PacketSource in between the receive test class and the constant- +// payload packet source class. The output is both written to file, and analyzed +// in this test fixture. +class AcmSwitchingOutputFrequencyOldApi : public ::testing::Test, + public test::PacketSource, + public test::AudioSink { + protected: + static const size_t kTestNumPackets = 50; + static const int kEncodedSampleRateHz = 16000; + static const size_t kPayloadLenSamples = 30 * kEncodedSampleRateHz / 1000; + static const int kPayloadType = 108; // Default payload type for PCM16b-wb. + + AcmSwitchingOutputFrequencyOldApi() + : first_output_(true), + num_packets_(0), + packet_source_(kPayloadLenSamples, + kSampleValue, + kEncodedSampleRateHz, + kPayloadType), + output_freq_2_(0), + has_toggled_(false) {} + + void Run(int output_freq_1, int output_freq_2, int toggle_period_ms) { + // Set up the receiver used to decode the packets and verify the decoded + // output. + const std::string output_file_name = + webrtc::test::OutputPath() + + ::testing::UnitTest::GetInstance() + ->current_test_info() + ->test_case_name() + + "_" + ::testing::UnitTest::GetInstance()->current_test_info()->name() + + "_output.pcm"; + test::OutputAudioFile output_file(output_file_name); + // Have the output audio sent both to file and to the WriteArray method in + // this class. + test::AudioSinkFork output(this, &output_file); + test::AcmReceiveTestToggleOutputFreqOldApi receive_test( + this, &output, output_freq_1, output_freq_2, toggle_period_ms, + test::AcmReceiveTestOldApi::kMonoOutput); + ASSERT_NO_FATAL_FAILURE(receive_test.RegisterDefaultCodecs()); + output_freq_2_ = output_freq_2; + + // This is where the actual test is executed. + receive_test.Run(); + + // Delete output file. + remove(output_file_name.c_str()); + } + + // Inherited from test::PacketSource. + std::unique_ptr NextPacket() override { + // Check if it is time to terminate the test. The packet source is of type + // ConstantPcmPacketSource, which is infinite, so we must end the test + // "manually". + if (num_packets_++ > kTestNumPackets) { + EXPECT_TRUE(has_toggled_); + return NULL; // Test ended. + } + + // Get the next packet from the source. + return packet_source_.NextPacket(); + } + + // Inherited from test::AudioSink. + bool WriteArray(const int16_t* audio, size_t num_samples) override { + // Skip checking the first output frame, since it has a number of zeros + // due to how NetEq is initialized. + if (first_output_) { + first_output_ = false; + return true; + } + for (size_t i = 0; i < num_samples; ++i) { + EXPECT_EQ(kSampleValue, audio[i]); + } + if (num_samples == + static_cast(output_freq_2_ / 100)) // Size of 10 ms frame. + has_toggled_ = true; + // The return value does not say if the values match the expectation, just + // that the method could process the samples. + return true; + } + + const int16_t kSampleValue = 1000; + bool first_output_; + size_t num_packets_; + test::ConstantPcmPacketSource packet_source_; + int output_freq_2_; + bool has_toggled_; +}; + +TEST_F(AcmSwitchingOutputFrequencyOldApi, TestWithoutToggling) { + Run(16000, 16000, 1000); +} + +TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle16KhzTo32Khz) { + Run(16000, 32000, 1000); +} + +TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle32KhzTo16Khz) { + Run(32000, 16000, 1000); +} + +TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle16KhzTo8Khz) { + Run(16000, 8000, 1000); +} + +TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle8KhzTo16Khz) { + Run(8000, 16000, 1000); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.cc b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.cc new file mode 100644 index 0000000000..9f3bdadc88 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/call_statistics.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace acm2 { + +void CallStatistics::DecodedByNetEq(AudioFrame::SpeechType speech_type, + bool muted) { + ++decoding_stat_.calls_to_neteq; + if (muted) { + ++decoding_stat_.decoded_muted_output; + } + switch (speech_type) { + case AudioFrame::kNormalSpeech: { + ++decoding_stat_.decoded_normal; + break; + } + case AudioFrame::kPLC: { + ++decoding_stat_.decoded_neteq_plc; + break; + } + case AudioFrame::kCodecPLC: { + ++decoding_stat_.decoded_codec_plc; + break; + } + case AudioFrame::kCNG: { + ++decoding_stat_.decoded_cng; + break; + } + case AudioFrame::kPLCCNG: { + ++decoding_stat_.decoded_plc_cng; + break; + } + case AudioFrame::kUndefined: { + // If the audio is decoded by NetEq, `kUndefined` is not an option. + RTC_DCHECK_NOTREACHED(); + } + } +} + +void CallStatistics::DecodedBySilenceGenerator() { + ++decoding_stat_.calls_to_silence_generator; +} + +const AudioDecodingCallStats& CallStatistics::GetDecodingStatistics() const { + return decoding_stat_; +} + +} // namespace acm2 + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.h b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.h new file mode 100644 index 0000000000..a2db2a29f4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_CALL_STATISTICS_H_ +#define MODULES_AUDIO_CODING_ACM2_CALL_STATISTICS_H_ + +#include "api/audio/audio_frame.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" + +// +// This class is for book keeping of calls to ACM. It is not useful to log API +// calls which are supposed to be called every 10ms, e.g. PlayoutData10Ms(), +// however, it is useful to know the number of such calls in a given time +// interval. The current implementation covers calls to PlayoutData10Ms() with +// detailed accounting of the decoded speech type. +// +// Thread Safety +// ============= +// Please note that this class in not thread safe. The class must be protected +// if different APIs are called from different threads. +// + +namespace webrtc { + +namespace acm2 { + +class CallStatistics { + public: + CallStatistics() {} + ~CallStatistics() {} + + // Call this method to indicate that NetEq engaged in decoding. `speech_type` + // is the audio-type according to NetEq, and `muted` indicates if the decoded + // frame was produced in muted state. + void DecodedByNetEq(AudioFrame::SpeechType speech_type, bool muted); + + // Call this method to indicate that a decoding call resulted in generating + // silence, i.e. call to NetEq is bypassed and the output audio is zero. + void DecodedBySilenceGenerator(); + + // Get statistics for decoding. The statistics include the number of calls to + // NetEq and silence generator, as well as the type of speech pulled of off + // NetEq, c.f. declaration of AudioDecodingCallStats for detailed description. + const AudioDecodingCallStats& GetDecodingStatistics() const; + + private: + // Reset the decoding statistics. + void ResetDecodingStatistics(); + + AudioDecodingCallStats decoding_stat_; +}; + +} // namespace acm2 + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_ACM2_CALL_STATISTICS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics_unittest.cc b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics_unittest.cc new file mode 100644 index 0000000000..b96977b8e4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics_unittest.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/call_statistics.h" + +#include "test/gtest.h" + +namespace webrtc { + +namespace acm2 { + +TEST(CallStatisticsTest, InitializedZero) { + CallStatistics call_stats; + AudioDecodingCallStats stats; + + stats = call_stats.GetDecodingStatistics(); + EXPECT_EQ(0, stats.calls_to_neteq); + EXPECT_EQ(0, stats.calls_to_silence_generator); + EXPECT_EQ(0, stats.decoded_normal); + EXPECT_EQ(0, stats.decoded_cng); + EXPECT_EQ(0, stats.decoded_neteq_plc); + EXPECT_EQ(0, stats.decoded_plc_cng); + EXPECT_EQ(0, stats.decoded_muted_output); +} + +TEST(CallStatisticsTest, AllCalls) { + CallStatistics call_stats; + AudioDecodingCallStats stats; + + call_stats.DecodedBySilenceGenerator(); + call_stats.DecodedByNetEq(AudioFrame::kNormalSpeech, false); + call_stats.DecodedByNetEq(AudioFrame::kPLC, false); + call_stats.DecodedByNetEq(AudioFrame::kCodecPLC, false); + call_stats.DecodedByNetEq(AudioFrame::kPLCCNG, true); // Let this be muted. + call_stats.DecodedByNetEq(AudioFrame::kCNG, false); + + stats = call_stats.GetDecodingStatistics(); + EXPECT_EQ(5, stats.calls_to_neteq); + EXPECT_EQ(1, stats.calls_to_silence_generator); + EXPECT_EQ(1, stats.decoded_normal); + EXPECT_EQ(1, stats.decoded_cng); + EXPECT_EQ(1, stats.decoded_neteq_plc); + EXPECT_EQ(1, stats.decoded_codec_plc); + EXPECT_EQ(1, stats.decoded_plc_cng); + EXPECT_EQ(1, stats.decoded_muted_output); +} + +} // namespace acm2 + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_coding.gni b/third_party/libwebrtc/modules/audio_coding/audio_coding.gni new file mode 100644 index 0000000000..3b147091de --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_coding.gni @@ -0,0 +1,25 @@ +# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") + +audio_codec_defines = [] +if (rtc_include_ilbc) { + audio_codec_defines += [ "WEBRTC_CODEC_ILBC" ] +} +if (rtc_include_opus) { + audio_codec_defines += [ "WEBRTC_CODEC_OPUS" ] +} +if (rtc_opus_support_120ms_ptime) { + audio_codec_defines += [ "WEBRTC_OPUS_SUPPORT_120MS_PTIME=1" ] +} else { + audio_codec_defines += [ "WEBRTC_OPUS_SUPPORT_120MS_PTIME=0" ] +} + +audio_coding_defines = audio_codec_defines +neteq_defines = audio_codec_defines diff --git a/third_party/libwebrtc/modules/audio_coding/audio_coding_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/audio_coding_gn/moz.build new file mode 100644 index 0000000000..03d99c93b8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_coding_gn/moz.build @@ -0,0 +1,236 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc", + "/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.cc", + "/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.cc", + "/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module.cc", + "/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_coding_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/audio_coding_module_typedefs_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/audio_coding_module_typedefs_gn/moz.build new file mode 100644 index 0000000000..2826ed8642 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_coding_module_typedefs_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_coding_module_typedefs_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/audio_coding_opus_common_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/audio_coding_opus_common_gn/moz.build new file mode 100644 index 0000000000..bf852e8442 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_coding_opus_common_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_coding_opus_common_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/audio_encoder_cng_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/audio_encoder_cng_gn/moz.build new file mode 100644 index 0000000000..56b6d50f6c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_encoder_cng_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_encoder_cng_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_config.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_config.cc new file mode 100644 index 0000000000..16fd2a1b9a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_config.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" + +namespace webrtc { + +AudioEncoderRuntimeConfig::AudioEncoderRuntimeConfig() = default; + +AudioEncoderRuntimeConfig::AudioEncoderRuntimeConfig( + const AudioEncoderRuntimeConfig& other) = default; + +AudioEncoderRuntimeConfig::~AudioEncoderRuntimeConfig() = default; + +AudioEncoderRuntimeConfig& AudioEncoderRuntimeConfig::operator=( + const AudioEncoderRuntimeConfig& other) = default; + +bool AudioEncoderRuntimeConfig::operator==( + const AudioEncoderRuntimeConfig& other) const { + return bitrate_bps == other.bitrate_bps && + frame_length_ms == other.frame_length_ms && + uplink_packet_loss_fraction == other.uplink_packet_loss_fraction && + enable_fec == other.enable_fec && enable_dtx == other.enable_dtx && + num_channels == other.num_channels; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.cc new file mode 100644 index 0000000000..64163f9118 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h" + +#include + +#include +#include + +#include "modules/audio_coding/audio_network_adaptor/controller_manager.h" +#include "modules/audio_coding/audio_network_adaptor/debug_dump_writer.h" +#include "modules/audio_coding/audio_network_adaptor/event_log_writer.h" +#include "rtc_base/checks.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { +constexpr int kEventLogMinBitrateChangeBps = 5000; +constexpr float kEventLogMinBitrateChangeFraction = 0.25; +constexpr float kEventLogMinPacketLossChangeFraction = 0.5; +} // namespace + +AudioNetworkAdaptorImpl::Config::Config() : event_log(nullptr) {} + +AudioNetworkAdaptorImpl::Config::~Config() = default; + +AudioNetworkAdaptorImpl::AudioNetworkAdaptorImpl( + const Config& config, + std::unique_ptr controller_manager, + std::unique_ptr debug_dump_writer) + : config_(config), + controller_manager_(std::move(controller_manager)), + debug_dump_writer_(std::move(debug_dump_writer)), + event_log_writer_( + config.event_log + ? new EventLogWriter(config.event_log, + kEventLogMinBitrateChangeBps, + kEventLogMinBitrateChangeFraction, + kEventLogMinPacketLossChangeFraction) + : nullptr) { + RTC_DCHECK(controller_manager_); +} + +AudioNetworkAdaptorImpl::~AudioNetworkAdaptorImpl() = default; + +void AudioNetworkAdaptorImpl::SetUplinkBandwidth(int uplink_bandwidth_bps) { + last_metrics_.uplink_bandwidth_bps = uplink_bandwidth_bps; + DumpNetworkMetrics(); + + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_bandwidth_bps = uplink_bandwidth_bps; + UpdateNetworkMetrics(network_metrics); +} + +void AudioNetworkAdaptorImpl::SetUplinkPacketLossFraction( + float uplink_packet_loss_fraction) { + last_metrics_.uplink_packet_loss_fraction = uplink_packet_loss_fraction; + DumpNetworkMetrics(); + + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_packet_loss_fraction = uplink_packet_loss_fraction; + UpdateNetworkMetrics(network_metrics); +} + +void AudioNetworkAdaptorImpl::SetRtt(int rtt_ms) { + last_metrics_.rtt_ms = rtt_ms; + DumpNetworkMetrics(); + + Controller::NetworkMetrics network_metrics; + network_metrics.rtt_ms = rtt_ms; + UpdateNetworkMetrics(network_metrics); +} + +void AudioNetworkAdaptorImpl::SetTargetAudioBitrate( + int target_audio_bitrate_bps) { + last_metrics_.target_audio_bitrate_bps = target_audio_bitrate_bps; + DumpNetworkMetrics(); + + Controller::NetworkMetrics network_metrics; + network_metrics.target_audio_bitrate_bps = target_audio_bitrate_bps; + UpdateNetworkMetrics(network_metrics); +} + +void AudioNetworkAdaptorImpl::SetOverhead(size_t overhead_bytes_per_packet) { + last_metrics_.overhead_bytes_per_packet = overhead_bytes_per_packet; + DumpNetworkMetrics(); + + Controller::NetworkMetrics network_metrics; + network_metrics.overhead_bytes_per_packet = overhead_bytes_per_packet; + UpdateNetworkMetrics(network_metrics); +} + +AudioEncoderRuntimeConfig AudioNetworkAdaptorImpl::GetEncoderRuntimeConfig() { + AudioEncoderRuntimeConfig config; + for (auto& controller : + controller_manager_->GetSortedControllers(last_metrics_)) + controller->MakeDecision(&config); + + // Update ANA stats. + auto increment_opt = [](absl::optional& a) { + a = a.value_or(0) + 1; + }; + if (prev_config_) { + if (config.bitrate_bps != prev_config_->bitrate_bps) { + increment_opt(stats_.bitrate_action_counter); + } + if (config.enable_dtx != prev_config_->enable_dtx) { + increment_opt(stats_.dtx_action_counter); + } + if (config.enable_fec != prev_config_->enable_fec) { + increment_opt(stats_.fec_action_counter); + } + if (config.frame_length_ms && prev_config_->frame_length_ms) { + if (*config.frame_length_ms > *prev_config_->frame_length_ms) { + increment_opt(stats_.frame_length_increase_counter); + } else if (*config.frame_length_ms < *prev_config_->frame_length_ms) { + increment_opt(stats_.frame_length_decrease_counter); + } + } + if (config.num_channels != prev_config_->num_channels) { + increment_opt(stats_.channel_action_counter); + } + if (config.uplink_packet_loss_fraction) { + stats_.uplink_packet_loss_fraction = *config.uplink_packet_loss_fraction; + } + } + prev_config_ = config; + + if (debug_dump_writer_) + debug_dump_writer_->DumpEncoderRuntimeConfig(config, rtc::TimeMillis()); + + if (event_log_writer_) + event_log_writer_->MaybeLogEncoderConfig(config); + + return config; +} + +void AudioNetworkAdaptorImpl::StartDebugDump(FILE* file_handle) { + debug_dump_writer_ = DebugDumpWriter::Create(file_handle); +} + +void AudioNetworkAdaptorImpl::StopDebugDump() { + debug_dump_writer_.reset(nullptr); +} + +ANAStats AudioNetworkAdaptorImpl::GetStats() const { + return stats_; +} + +void AudioNetworkAdaptorImpl::DumpNetworkMetrics() { + if (debug_dump_writer_) + debug_dump_writer_->DumpNetworkMetrics(last_metrics_, rtc::TimeMillis()); +} + +void AudioNetworkAdaptorImpl::UpdateNetworkMetrics( + const Controller::NetworkMetrics& network_metrics) { + for (auto& controller : controller_manager_->GetControllers()) + controller->UpdateNetworkMetrics(network_metrics); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h new file mode 100644 index 0000000000..664e76bda5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_AUDIO_NETWORK_ADAPTOR_IMPL_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_AUDIO_NETWORK_ADAPTOR_IMPL_H_ + +#include + +#include + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "modules/audio_coding/audio_network_adaptor/debug_dump_writer.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" + +namespace webrtc { + +class ControllerManager; +class EventLogWriter; +class RtcEventLog; + +class AudioNetworkAdaptorImpl final : public AudioNetworkAdaptor { + public: + struct Config { + Config(); + ~Config(); + RtcEventLog* event_log; + }; + + AudioNetworkAdaptorImpl( + const Config& config, + std::unique_ptr controller_manager, + std::unique_ptr debug_dump_writer = nullptr); + + ~AudioNetworkAdaptorImpl() override; + + AudioNetworkAdaptorImpl(const AudioNetworkAdaptorImpl&) = delete; + AudioNetworkAdaptorImpl& operator=(const AudioNetworkAdaptorImpl&) = delete; + + void SetUplinkBandwidth(int uplink_bandwidth_bps) override; + + void SetUplinkPacketLossFraction(float uplink_packet_loss_fraction) override; + + void SetRtt(int rtt_ms) override; + + void SetTargetAudioBitrate(int target_audio_bitrate_bps) override; + + void SetOverhead(size_t overhead_bytes_per_packet) override; + + AudioEncoderRuntimeConfig GetEncoderRuntimeConfig() override; + + void StartDebugDump(FILE* file_handle) override; + + void StopDebugDump() override; + + ANAStats GetStats() const override; + + private: + void DumpNetworkMetrics(); + + void UpdateNetworkMetrics(const Controller::NetworkMetrics& network_metrics); + + const Config config_; + + std::unique_ptr controller_manager_; + + std::unique_ptr debug_dump_writer_; + + const std::unique_ptr event_log_writer_; + + Controller::NetworkMetrics last_metrics_; + + absl::optional prev_config_; + + ANAStats stats_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_AUDIO_NETWORK_ADAPTOR_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl_unittest.cc new file mode 100644 index 0000000000..70a50d6de7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl_unittest.cc @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h" + +#include +#include + +#include "api/rtc_event_log/rtc_event.h" +#include "logging/rtc_event_log/events/rtc_event_audio_network_adaptation.h" +#include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "modules/audio_coding/audio_network_adaptor/mock/mock_controller.h" +#include "modules/audio_coding/audio_network_adaptor/mock/mock_controller_manager.h" +#include "modules/audio_coding/audio_network_adaptor/mock/mock_debug_dump_writer.h" +#include "rtc_base/fake_clock.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { + +using ::testing::_; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace { + +constexpr size_t kNumControllers = 2; + +constexpr int64_t kClockInitialTimeMs = 12345678; + +MATCHER_P(NetworkMetricsIs, metric, "") { + return arg.uplink_bandwidth_bps == metric.uplink_bandwidth_bps && + arg.target_audio_bitrate_bps == metric.target_audio_bitrate_bps && + arg.rtt_ms == metric.rtt_ms && + arg.overhead_bytes_per_packet == metric.overhead_bytes_per_packet && + arg.uplink_packet_loss_fraction == metric.uplink_packet_loss_fraction; +} + +MATCHER_P(IsRtcEventAnaConfigEqualTo, config, "") { + if (arg->GetType() != RtcEvent::Type::AudioNetworkAdaptation) { + return false; + } + auto ana_event = static_cast(arg); + return ana_event->config() == config; +} + +MATCHER_P(EncoderRuntimeConfigIs, config, "") { + return arg.bitrate_bps == config.bitrate_bps && + arg.frame_length_ms == config.frame_length_ms && + arg.uplink_packet_loss_fraction == + config.uplink_packet_loss_fraction && + arg.enable_fec == config.enable_fec && + arg.enable_dtx == config.enable_dtx && + arg.num_channels == config.num_channels; +} + +struct AudioNetworkAdaptorStates { + std::unique_ptr audio_network_adaptor; + std::vector> mock_controllers; + std::unique_ptr event_log; + MockDebugDumpWriter* mock_debug_dump_writer; +}; + +AudioNetworkAdaptorStates CreateAudioNetworkAdaptor() { + AudioNetworkAdaptorStates states; + std::vector controllers; + for (size_t i = 0; i < kNumControllers; ++i) { + auto controller = + std::unique_ptr(new NiceMock()); + EXPECT_CALL(*controller, Die()); + controllers.push_back(controller.get()); + states.mock_controllers.push_back(std::move(controller)); + } + + auto controller_manager = std::unique_ptr( + new NiceMock()); + + EXPECT_CALL(*controller_manager, Die()); + EXPECT_CALL(*controller_manager, GetControllers()) + .WillRepeatedly(Return(controllers)); + EXPECT_CALL(*controller_manager, GetSortedControllers(_)) + .WillRepeatedly(Return(controllers)); + + states.event_log.reset(new NiceMock()); + + auto debug_dump_writer = + std::unique_ptr(new NiceMock()); + EXPECT_CALL(*debug_dump_writer, Die()); + states.mock_debug_dump_writer = debug_dump_writer.get(); + + AudioNetworkAdaptorImpl::Config config; + config.event_log = states.event_log.get(); + // AudioNetworkAdaptorImpl governs the lifetime of controller manager. + states.audio_network_adaptor.reset(new AudioNetworkAdaptorImpl( + config, std::move(controller_manager), std::move(debug_dump_writer))); + + return states; +} + +void SetExpectCallToUpdateNetworkMetrics( + const std::vector>& controllers, + const Controller::NetworkMetrics& check) { + for (auto& mock_controller : controllers) { + EXPECT_CALL(*mock_controller, + UpdateNetworkMetrics(NetworkMetricsIs(check))); + } +} + +} // namespace + +TEST(AudioNetworkAdaptorImplTest, + UpdateNetworkMetricsIsCalledOnSetUplinkBandwidth) { + auto states = CreateAudioNetworkAdaptor(); + constexpr int kBandwidth = 16000; + Controller::NetworkMetrics check; + check.uplink_bandwidth_bps = kBandwidth; + SetExpectCallToUpdateNetworkMetrics(states.mock_controllers, check); + states.audio_network_adaptor->SetUplinkBandwidth(kBandwidth); +} + +TEST(AudioNetworkAdaptorImplTest, + UpdateNetworkMetricsIsCalledOnSetUplinkPacketLossFraction) { + auto states = CreateAudioNetworkAdaptor(); + constexpr float kPacketLoss = 0.7f; + Controller::NetworkMetrics check; + check.uplink_packet_loss_fraction = kPacketLoss; + SetExpectCallToUpdateNetworkMetrics(states.mock_controllers, check); + states.audio_network_adaptor->SetUplinkPacketLossFraction(kPacketLoss); +} + +TEST(AudioNetworkAdaptorImplTest, UpdateNetworkMetricsIsCalledOnSetRtt) { + auto states = CreateAudioNetworkAdaptor(); + constexpr int kRtt = 100; + Controller::NetworkMetrics check; + check.rtt_ms = kRtt; + SetExpectCallToUpdateNetworkMetrics(states.mock_controllers, check); + states.audio_network_adaptor->SetRtt(kRtt); +} + +TEST(AudioNetworkAdaptorImplTest, + UpdateNetworkMetricsIsCalledOnSetTargetAudioBitrate) { + auto states = CreateAudioNetworkAdaptor(); + constexpr int kTargetAudioBitrate = 15000; + Controller::NetworkMetrics check; + check.target_audio_bitrate_bps = kTargetAudioBitrate; + SetExpectCallToUpdateNetworkMetrics(states.mock_controllers, check); + states.audio_network_adaptor->SetTargetAudioBitrate(kTargetAudioBitrate); +} + +TEST(AudioNetworkAdaptorImplTest, UpdateNetworkMetricsIsCalledOnSetOverhead) { + auto states = CreateAudioNetworkAdaptor(); + constexpr size_t kOverhead = 64; + Controller::NetworkMetrics check; + check.overhead_bytes_per_packet = kOverhead; + SetExpectCallToUpdateNetworkMetrics(states.mock_controllers, check); + states.audio_network_adaptor->SetOverhead(kOverhead); +} + +TEST(AudioNetworkAdaptorImplTest, + MakeDecisionIsCalledOnGetEncoderRuntimeConfig) { + auto states = CreateAudioNetworkAdaptor(); + for (auto& mock_controller : states.mock_controllers) + EXPECT_CALL(*mock_controller, MakeDecision(_)); + states.audio_network_adaptor->GetEncoderRuntimeConfig(); +} + +TEST(AudioNetworkAdaptorImplTest, + DumpEncoderRuntimeConfigIsCalledOnGetEncoderRuntimeConfig) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-Audio-BitrateAdaptation/Enabled/WebRTC-Audio-FecAdaptation/" + "Enabled/"); + rtc::ScopedFakeClock fake_clock; + fake_clock.AdvanceTime(TimeDelta::Millis(kClockInitialTimeMs)); + auto states = CreateAudioNetworkAdaptor(); + AudioEncoderRuntimeConfig config; + config.bitrate_bps = 32000; + config.enable_fec = true; + + EXPECT_CALL(*states.mock_controllers[0], MakeDecision(_)) + .WillOnce(SetArgPointee<0>(config)); + + EXPECT_CALL(*states.mock_debug_dump_writer, + DumpEncoderRuntimeConfig(EncoderRuntimeConfigIs(config), + kClockInitialTimeMs)); + states.audio_network_adaptor->GetEncoderRuntimeConfig(); +} + +TEST(AudioNetworkAdaptorImplTest, + DumpNetworkMetricsIsCalledOnSetNetworkMetrics) { + rtc::ScopedFakeClock fake_clock; + fake_clock.AdvanceTime(TimeDelta::Millis(kClockInitialTimeMs)); + + auto states = CreateAudioNetworkAdaptor(); + + constexpr int kBandwidth = 16000; + constexpr float kPacketLoss = 0.7f; + constexpr int kRtt = 100; + constexpr int kTargetAudioBitrate = 15000; + constexpr size_t kOverhead = 64; + + Controller::NetworkMetrics check; + check.uplink_bandwidth_bps = kBandwidth; + int64_t timestamp_check = kClockInitialTimeMs; + + EXPECT_CALL(*states.mock_debug_dump_writer, + DumpNetworkMetrics(NetworkMetricsIs(check), timestamp_check)); + states.audio_network_adaptor->SetUplinkBandwidth(kBandwidth); + + fake_clock.AdvanceTime(TimeDelta::Millis(100)); + timestamp_check += 100; + check.uplink_packet_loss_fraction = kPacketLoss; + EXPECT_CALL(*states.mock_debug_dump_writer, + DumpNetworkMetrics(NetworkMetricsIs(check), timestamp_check)); + states.audio_network_adaptor->SetUplinkPacketLossFraction(kPacketLoss); + + fake_clock.AdvanceTime(TimeDelta::Millis(50)); + timestamp_check += 50; + + fake_clock.AdvanceTime(TimeDelta::Millis(200)); + timestamp_check += 200; + check.rtt_ms = kRtt; + EXPECT_CALL(*states.mock_debug_dump_writer, + DumpNetworkMetrics(NetworkMetricsIs(check), timestamp_check)); + states.audio_network_adaptor->SetRtt(kRtt); + + fake_clock.AdvanceTime(TimeDelta::Millis(150)); + timestamp_check += 150; + check.target_audio_bitrate_bps = kTargetAudioBitrate; + EXPECT_CALL(*states.mock_debug_dump_writer, + DumpNetworkMetrics(NetworkMetricsIs(check), timestamp_check)); + states.audio_network_adaptor->SetTargetAudioBitrate(kTargetAudioBitrate); + + fake_clock.AdvanceTime(TimeDelta::Millis(50)); + timestamp_check += 50; + check.overhead_bytes_per_packet = kOverhead; + EXPECT_CALL(*states.mock_debug_dump_writer, + DumpNetworkMetrics(NetworkMetricsIs(check), timestamp_check)); + states.audio_network_adaptor->SetOverhead(kOverhead); +} + +TEST(AudioNetworkAdaptorImplTest, LogRuntimeConfigOnGetEncoderRuntimeConfig) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-Audio-BitrateAdaptation/Enabled/WebRTC-Audio-FecAdaptation/" + "Enabled/"); + auto states = CreateAudioNetworkAdaptor(); + + AudioEncoderRuntimeConfig config; + config.bitrate_bps = 32000; + config.enable_fec = true; + + EXPECT_CALL(*states.mock_controllers[0], MakeDecision(_)) + .WillOnce(SetArgPointee<0>(config)); + + EXPECT_CALL(*states.event_log, LogProxy(IsRtcEventAnaConfigEqualTo(config))) + .Times(1); + states.audio_network_adaptor->GetEncoderRuntimeConfig(); +} + +TEST(AudioNetworkAdaptorImplTest, TestANAStats) { + auto states = CreateAudioNetworkAdaptor(); + + // Simulate some adaptation, otherwise the stats will not show anything. + AudioEncoderRuntimeConfig config1, config2; + config1.bitrate_bps = 32000; + config1.num_channels = 2; + config1.enable_fec = true; + config1.enable_dtx = true; + config1.frame_length_ms = 120; + config1.uplink_packet_loss_fraction = 0.1f; + config2.bitrate_bps = 16000; + config2.num_channels = 1; + config2.enable_fec = false; + config2.enable_dtx = false; + config2.frame_length_ms = 60; + config1.uplink_packet_loss_fraction = 0.1f; + + EXPECT_CALL(*states.mock_controllers[0], MakeDecision(_)) + .WillOnce(SetArgPointee<0>(config1)); + states.audio_network_adaptor->GetEncoderRuntimeConfig(); + EXPECT_CALL(*states.mock_controllers[0], MakeDecision(_)) + .WillOnce(SetArgPointee<0>(config2)); + states.audio_network_adaptor->GetEncoderRuntimeConfig(); + EXPECT_CALL(*states.mock_controllers[0], MakeDecision(_)) + .WillOnce(SetArgPointee<0>(config1)); + states.audio_network_adaptor->GetEncoderRuntimeConfig(); + + auto ana_stats = states.audio_network_adaptor->GetStats(); + + EXPECT_EQ(ana_stats.bitrate_action_counter, 2u); + EXPECT_EQ(ana_stats.channel_action_counter, 2u); + EXPECT_EQ(ana_stats.dtx_action_counter, 2u); + EXPECT_EQ(ana_stats.fec_action_counter, 2u); + EXPECT_EQ(ana_stats.frame_length_increase_counter, 1u); + EXPECT_EQ(ana_stats.frame_length_decrease_counter, 1u); + EXPECT_EQ(ana_stats.uplink_packet_loss_fraction, 0.1f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller.cc new file mode 100644 index 0000000000..88ca38d074 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/bitrate_controller.h" + +#include + +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace audio_network_adaptor { + +BitrateController::Config::Config(int initial_bitrate_bps, + int initial_frame_length_ms, + int fl_increase_overhead_offset, + int fl_decrease_overhead_offset) + : initial_bitrate_bps(initial_bitrate_bps), + initial_frame_length_ms(initial_frame_length_ms), + fl_increase_overhead_offset(fl_increase_overhead_offset), + fl_decrease_overhead_offset(fl_decrease_overhead_offset) {} + +BitrateController::Config::~Config() = default; + +BitrateController::BitrateController(const Config& config) + : config_(config), + bitrate_bps_(config_.initial_bitrate_bps), + frame_length_ms_(config_.initial_frame_length_ms) { + RTC_DCHECK_GT(bitrate_bps_, 0); + RTC_DCHECK_GT(frame_length_ms_, 0); +} + +BitrateController::~BitrateController() = default; + +void BitrateController::UpdateNetworkMetrics( + const NetworkMetrics& network_metrics) { + if (network_metrics.target_audio_bitrate_bps) + target_audio_bitrate_bps_ = network_metrics.target_audio_bitrate_bps; + if (network_metrics.overhead_bytes_per_packet) { + RTC_DCHECK_GT(*network_metrics.overhead_bytes_per_packet, 0); + overhead_bytes_per_packet_ = network_metrics.overhead_bytes_per_packet; + } +} + +void BitrateController::MakeDecision(AudioEncoderRuntimeConfig* config) { + // Decision on `bitrate_bps` should not have been made. + RTC_DCHECK(!config->bitrate_bps); + if (target_audio_bitrate_bps_ && overhead_bytes_per_packet_) { + if (config->frame_length_ms) + frame_length_ms_ = *config->frame_length_ms; + int offset = config->last_fl_change_increase + ? config_.fl_increase_overhead_offset + : config_.fl_decrease_overhead_offset; + // Check that + // -(*overhead_bytes_per_packet_) <= offset <= (*overhead_bytes_per_packet_) + RTC_DCHECK_GE(*overhead_bytes_per_packet_, -offset); + RTC_DCHECK_LE(offset, *overhead_bytes_per_packet_); + int overhead_rate_bps = static_cast( + (*overhead_bytes_per_packet_ + offset) * 8 * 1000 / frame_length_ms_); + bitrate_bps_ = std::max(0, *target_audio_bitrate_bps_ - overhead_rate_bps); + } + config->bitrate_bps = bitrate_bps_; +} + +} // namespace audio_network_adaptor +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller.h new file mode 100644 index 0000000000..c1032146cc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_BITRATE_CONTROLLER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_BITRATE_CONTROLLER_H_ + +#include + +#include "absl/types/optional.h" +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" + +namespace webrtc { +namespace audio_network_adaptor { + +class BitrateController final : public Controller { + public: + struct Config { + Config(int initial_bitrate_bps, + int initial_frame_length_ms, + int fl_increase_overhead_offset, + int fl_decrease_overhead_offset); + ~Config(); + int initial_bitrate_bps; + int initial_frame_length_ms; + int fl_increase_overhead_offset; + int fl_decrease_overhead_offset; + }; + + explicit BitrateController(const Config& config); + + ~BitrateController() override; + + BitrateController(const BitrateController&) = delete; + BitrateController& operator=(const BitrateController&) = delete; + + void UpdateNetworkMetrics(const NetworkMetrics& network_metrics) override; + + void MakeDecision(AudioEncoderRuntimeConfig* config) override; + + private: + const Config config_; + int bitrate_bps_; + int frame_length_ms_; + absl::optional target_audio_bitrate_bps_; + absl::optional overhead_bytes_per_packet_; +}; + +} // namespace audio_network_adaptor +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_BITRATE_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller_unittest.cc new file mode 100644 index 0000000000..9c593b818b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller_unittest.cc @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/bitrate_controller.h" + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { +namespace audio_network_adaptor { + +namespace { + +void UpdateNetworkMetrics( + BitrateController* controller, + const absl::optional& target_audio_bitrate_bps, + const absl::optional& overhead_bytes_per_packet) { + // UpdateNetworkMetrics can accept multiple network metric updates at once. + // However, currently, the most used case is to update one metric at a time. + // To reflect this fact, we separate the calls. + if (target_audio_bitrate_bps) { + Controller::NetworkMetrics network_metrics; + network_metrics.target_audio_bitrate_bps = target_audio_bitrate_bps; + controller->UpdateNetworkMetrics(network_metrics); + } + if (overhead_bytes_per_packet) { + Controller::NetworkMetrics network_metrics; + network_metrics.overhead_bytes_per_packet = overhead_bytes_per_packet; + controller->UpdateNetworkMetrics(network_metrics); + } +} + +void CheckDecision(BitrateController* controller, + const absl::optional& frame_length_ms, + int expected_bitrate_bps) { + AudioEncoderRuntimeConfig config; + config.frame_length_ms = frame_length_ms; + controller->MakeDecision(&config); + EXPECT_EQ(expected_bitrate_bps, config.bitrate_bps); +} + +} // namespace + +// These tests are named AnaBitrateControllerTest to distinguish from +// BitrateControllerTest in +// modules/bitrate_controller/bitrate_controller_unittest.cc. + +TEST(AnaBitrateControllerTest, OutputInitValueWhenTargetBitrateUnknown) { + constexpr int kInitialBitrateBps = 32000; + constexpr int kInitialFrameLengthMs = 20; + constexpr size_t kOverheadBytesPerPacket = 64; + BitrateController controller(BitrateController::Config( + kInitialBitrateBps, kInitialFrameLengthMs, 0, 0)); + UpdateNetworkMetrics(&controller, absl::nullopt, kOverheadBytesPerPacket); + CheckDecision(&controller, kInitialFrameLengthMs * 2, kInitialBitrateBps); +} + +TEST(AnaBitrateControllerTest, OutputInitValueWhenOverheadUnknown) { + constexpr int kInitialBitrateBps = 32000; + constexpr int kInitialFrameLengthMs = 20; + constexpr int kTargetBitrateBps = 48000; + BitrateController controller(BitrateController::Config( + kInitialBitrateBps, kInitialFrameLengthMs, 0, 0)); + UpdateNetworkMetrics(&controller, kTargetBitrateBps, absl::nullopt); + CheckDecision(&controller, kInitialFrameLengthMs * 2, kInitialBitrateBps); +} + +TEST(AnaBitrateControllerTest, ChangeBitrateOnTargetBitrateChanged) { + constexpr int kInitialFrameLengthMs = 20; + BitrateController controller( + BitrateController::Config(32000, kInitialFrameLengthMs, 0, 0)); + constexpr int kTargetBitrateBps = 48000; + constexpr size_t kOverheadBytesPerPacket = 64; + constexpr int kBitrateBps = kTargetBitrateBps - kOverheadBytesPerPacket * 8 * + 1000 / + kInitialFrameLengthMs; + // Frame length unchanged, bitrate changes in accordance with + // `metrics.target_audio_bitrate_bps` and `metrics.overhead_bytes_per_packet`. + UpdateNetworkMetrics(&controller, kTargetBitrateBps, kOverheadBytesPerPacket); + CheckDecision(&controller, kInitialFrameLengthMs, kBitrateBps); +} + +TEST(AnaBitrateControllerTest, UpdateMultipleNetworkMetricsAtOnce) { + // This test is similar to ChangeBitrateOnTargetBitrateChanged. But instead of + // using ::UpdateNetworkMetrics(...), which calls + // BitrateController::UpdateNetworkMetrics(...) multiple times, we + // we call it only once. This is to verify that + // BitrateController::UpdateNetworkMetrics(...) can handle multiple + // network updates at once. This is, however, not a common use case in current + // audio_network_adaptor_impl.cc. + constexpr int kInitialFrameLengthMs = 20; + BitrateController controller( + BitrateController::Config(32000, kInitialFrameLengthMs, 0, 0)); + constexpr int kTargetBitrateBps = 48000; + constexpr size_t kOverheadBytesPerPacket = 64; + constexpr int kBitrateBps = kTargetBitrateBps - kOverheadBytesPerPacket * 8 * + 1000 / + kInitialFrameLengthMs; + Controller::NetworkMetrics network_metrics; + network_metrics.target_audio_bitrate_bps = kTargetBitrateBps; + network_metrics.overhead_bytes_per_packet = kOverheadBytesPerPacket; + controller.UpdateNetworkMetrics(network_metrics); + CheckDecision(&controller, kInitialFrameLengthMs, kBitrateBps); +} + +TEST(AnaBitrateControllerTest, TreatUnknownFrameLengthAsFrameLengthUnchanged) { + constexpr int kInitialFrameLengthMs = 20; + BitrateController controller( + BitrateController::Config(32000, kInitialFrameLengthMs, 0, 0)); + constexpr int kTargetBitrateBps = 48000; + constexpr size_t kOverheadBytesPerPacket = 64; + constexpr int kBitrateBps = kTargetBitrateBps - kOverheadBytesPerPacket * 8 * + 1000 / + kInitialFrameLengthMs; + UpdateNetworkMetrics(&controller, kTargetBitrateBps, kOverheadBytesPerPacket); + CheckDecision(&controller, absl::nullopt, kBitrateBps); +} + +TEST(AnaBitrateControllerTest, IncreaseBitrateOnFrameLengthIncreased) { + constexpr int kInitialFrameLengthMs = 20; + BitrateController controller( + BitrateController::Config(32000, kInitialFrameLengthMs, 0, 0)); + + constexpr int kTargetBitrateBps = 48000; + constexpr size_t kOverheadBytesPerPacket = 64; + constexpr int kBitrateBps = kTargetBitrateBps - kOverheadBytesPerPacket * 8 * + 1000 / + kInitialFrameLengthMs; + UpdateNetworkMetrics(&controller, kTargetBitrateBps, kOverheadBytesPerPacket); + CheckDecision(&controller, absl::nullopt, kBitrateBps); + + constexpr int kFrameLengthMs = 60; + constexpr size_t kPacketOverheadRateDiff = + kOverheadBytesPerPacket * 8 * 1000 / 20 - + kOverheadBytesPerPacket * 8 * 1000 / 60; + UpdateNetworkMetrics(&controller, kTargetBitrateBps, kOverheadBytesPerPacket); + CheckDecision(&controller, kFrameLengthMs, + kBitrateBps + kPacketOverheadRateDiff); +} + +TEST(AnaBitrateControllerTest, DecreaseBitrateOnFrameLengthDecreased) { + constexpr int kInitialFrameLengthMs = 60; + BitrateController controller( + BitrateController::Config(32000, kInitialFrameLengthMs, 0, 0)); + + constexpr int kTargetBitrateBps = 48000; + constexpr size_t kOverheadBytesPerPacket = 64; + constexpr int kBitrateBps = kTargetBitrateBps - kOverheadBytesPerPacket * 8 * + 1000 / + kInitialFrameLengthMs; + UpdateNetworkMetrics(&controller, kTargetBitrateBps, kOverheadBytesPerPacket); + CheckDecision(&controller, absl::nullopt, kBitrateBps); + + constexpr int kFrameLengthMs = 20; + constexpr size_t kPacketOverheadRateDiff = + kOverheadBytesPerPacket * 8 * 1000 / 20 - + kOverheadBytesPerPacket * 8 * 1000 / 60; + UpdateNetworkMetrics(&controller, kTargetBitrateBps, kOverheadBytesPerPacket); + CheckDecision(&controller, kFrameLengthMs, + kBitrateBps - kPacketOverheadRateDiff); +} + +TEST(AnaBitrateControllerTest, BitrateNeverBecomesNegative) { + BitrateController controller(BitrateController::Config(32000, 20, 0, 0)); + constexpr size_t kOverheadBytesPerPacket = 64; + constexpr int kFrameLengthMs = 60; + // Set a target rate smaller than overhead rate, the bitrate is bounded by 0. + constexpr int kTargetBitrateBps = + kOverheadBytesPerPacket * 8 * 1000 / kFrameLengthMs - 1; + UpdateNetworkMetrics(&controller, kTargetBitrateBps, kOverheadBytesPerPacket); + CheckDecision(&controller, kFrameLengthMs, 0); +} + +TEST(AnaBitrateControllerTest, CheckBehaviorOnChangingCondition) { + BitrateController controller(BitrateController::Config(32000, 20, 0, 0)); + + // Start from an arbitrary overall bitrate. + int overall_bitrate = 34567; + size_t overhead_bytes_per_packet = 64; + int frame_length_ms = 20; + int current_bitrate = rtc::checked_cast( + overall_bitrate - overhead_bytes_per_packet * 8 * 1000 / frame_length_ms); + + UpdateNetworkMetrics(&controller, overall_bitrate, overhead_bytes_per_packet); + CheckDecision(&controller, frame_length_ms, current_bitrate); + + // Next: increase overall bitrate. + overall_bitrate += 100; + current_bitrate += 100; + UpdateNetworkMetrics(&controller, overall_bitrate, overhead_bytes_per_packet); + CheckDecision(&controller, frame_length_ms, current_bitrate); + + // Next: change frame length. + frame_length_ms = 60; + current_bitrate += + rtc::checked_cast(overhead_bytes_per_packet * 8 * 1000 / 20 - + overhead_bytes_per_packet * 8 * 1000 / 60); + UpdateNetworkMetrics(&controller, overall_bitrate, overhead_bytes_per_packet); + CheckDecision(&controller, frame_length_ms, current_bitrate); + + // Next: change overhead. + overhead_bytes_per_packet -= 30; + current_bitrate += 30 * 8 * 1000 / frame_length_ms; + UpdateNetworkMetrics(&controller, overall_bitrate, overhead_bytes_per_packet); + CheckDecision(&controller, frame_length_ms, current_bitrate); + + // Next: change frame length. + frame_length_ms = 20; + current_bitrate -= + rtc::checked_cast(overhead_bytes_per_packet * 8 * 1000 / 20 - + overhead_bytes_per_packet * 8 * 1000 / 60); + UpdateNetworkMetrics(&controller, overall_bitrate, overhead_bytes_per_packet); + CheckDecision(&controller, frame_length_ms, current_bitrate); + + // Next: decrease overall bitrate and frame length. + overall_bitrate -= 100; + current_bitrate -= 100; + frame_length_ms = 60; + current_bitrate += + rtc::checked_cast(overhead_bytes_per_packet * 8 * 1000 / 20 - + overhead_bytes_per_packet * 8 * 1000 / 60); + + UpdateNetworkMetrics(&controller, overall_bitrate, overhead_bytes_per_packet); + CheckDecision(&controller, frame_length_ms, current_bitrate); +} + +} // namespace audio_network_adaptor +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller.cc new file mode 100644 index 0000000000..2ef2f4c4d8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/channel_controller.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +ChannelController::Config::Config(size_t num_encoder_channels, + size_t intial_channels_to_encode, + int channel_1_to_2_bandwidth_bps, + int channel_2_to_1_bandwidth_bps) + : num_encoder_channels(num_encoder_channels), + intial_channels_to_encode(intial_channels_to_encode), + channel_1_to_2_bandwidth_bps(channel_1_to_2_bandwidth_bps), + channel_2_to_1_bandwidth_bps(channel_2_to_1_bandwidth_bps) {} + +ChannelController::ChannelController(const Config& config) + : config_(config), channels_to_encode_(config_.intial_channels_to_encode) { + RTC_DCHECK_GT(config_.intial_channels_to_encode, 0lu); + // Currently, we require `intial_channels_to_encode` to be <= 2. + RTC_DCHECK_LE(config_.intial_channels_to_encode, 2lu); + RTC_DCHECK_GE(config_.num_encoder_channels, + config_.intial_channels_to_encode); +} + +ChannelController::~ChannelController() = default; + +void ChannelController::UpdateNetworkMetrics( + const NetworkMetrics& network_metrics) { + if (network_metrics.uplink_bandwidth_bps) + uplink_bandwidth_bps_ = network_metrics.uplink_bandwidth_bps; +} + +void ChannelController::MakeDecision(AudioEncoderRuntimeConfig* config) { + // Decision on `num_channels` should not have been made. + RTC_DCHECK(!config->num_channels); + + if (uplink_bandwidth_bps_) { + if (channels_to_encode_ == 2 && + *uplink_bandwidth_bps_ <= config_.channel_2_to_1_bandwidth_bps) { + channels_to_encode_ = 1; + } else if (channels_to_encode_ == 1 && + *uplink_bandwidth_bps_ >= config_.channel_1_to_2_bandwidth_bps) { + channels_to_encode_ = + std::min(static_cast(2), config_.num_encoder_channels); + } + } + config->num_channels = channels_to_encode_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller.h new file mode 100644 index 0000000000..3cd4bb7dec --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_CHANNEL_CONTROLLER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_CHANNEL_CONTROLLER_H_ + +#include + +#include "absl/types/optional.h" +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" + +namespace webrtc { + +class ChannelController final : public Controller { + public: + struct Config { + Config(size_t num_encoder_channels, + size_t intial_channels_to_encode, + int channel_1_to_2_bandwidth_bps, + int channel_2_to_1_bandwidth_bps); + size_t num_encoder_channels; + size_t intial_channels_to_encode; + // Uplink bandwidth above which the number of encoded channels should switch + // from 1 to 2. + int channel_1_to_2_bandwidth_bps; + // Uplink bandwidth below which the number of encoded channels should switch + // from 2 to 1. + int channel_2_to_1_bandwidth_bps; + }; + + explicit ChannelController(const Config& config); + + ~ChannelController() override; + + ChannelController(const ChannelController&) = delete; + ChannelController& operator=(const ChannelController&) = delete; + + void UpdateNetworkMetrics(const NetworkMetrics& network_metrics) override; + + void MakeDecision(AudioEncoderRuntimeConfig* config) override; + + private: + const Config config_; + size_t channels_to_encode_; + absl::optional uplink_bandwidth_bps_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_CHANNEL_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller_unittest.cc new file mode 100644 index 0000000000..21504bcec0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller_unittest.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/channel_controller.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kNumChannels = 2; +constexpr int kChannel1To2BandwidthBps = 31000; +constexpr int kChannel2To1BandwidthBps = 29000; +constexpr int kMediumBandwidthBps = + (kChannel1To2BandwidthBps + kChannel2To1BandwidthBps) / 2; + +std::unique_ptr CreateChannelController(int init_channels) { + std::unique_ptr controller( + new ChannelController(ChannelController::Config( + kNumChannels, init_channels, kChannel1To2BandwidthBps, + kChannel2To1BandwidthBps))); + return controller; +} + +void CheckDecision(ChannelController* controller, + const absl::optional& uplink_bandwidth_bps, + size_t expected_num_channels) { + if (uplink_bandwidth_bps) { + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_bandwidth_bps = uplink_bandwidth_bps; + controller->UpdateNetworkMetrics(network_metrics); + } + AudioEncoderRuntimeConfig config; + controller->MakeDecision(&config); + EXPECT_EQ(expected_num_channels, config.num_channels); +} + +} // namespace + +TEST(ChannelControllerTest, OutputInitValueWhenUplinkBandwidthUnknown) { + constexpr int kInitChannels = 2; + auto controller = CreateChannelController(kInitChannels); + CheckDecision(controller.get(), absl::nullopt, kInitChannels); +} + +TEST(ChannelControllerTest, SwitchTo2ChannelsOnHighUplinkBandwidth) { + constexpr int kInitChannels = 1; + auto controller = CreateChannelController(kInitChannels); + // Use high bandwidth to check output switch to 2. + CheckDecision(controller.get(), kChannel1To2BandwidthBps, 2); +} + +TEST(ChannelControllerTest, SwitchTo1ChannelOnLowUplinkBandwidth) { + constexpr int kInitChannels = 2; + auto controller = CreateChannelController(kInitChannels); + // Use low bandwidth to check output switch to 1. + CheckDecision(controller.get(), kChannel2To1BandwidthBps, 1); +} + +TEST(ChannelControllerTest, Maintain1ChannelOnMediumUplinkBandwidth) { + constexpr int kInitChannels = 1; + auto controller = CreateChannelController(kInitChannels); + // Use between-thresholds bandwidth to check output remains at 1. + CheckDecision(controller.get(), kMediumBandwidthBps, 1); +} + +TEST(ChannelControllerTest, Maintain2ChannelsOnMediumUplinkBandwidth) { + constexpr int kInitChannels = 2; + auto controller = CreateChannelController(kInitChannels); + // Use between-thresholds bandwidth to check output remains at 2. + CheckDecision(controller.get(), kMediumBandwidthBps, 2); +} + +TEST(ChannelControllerTest, CheckBehaviorOnChangingUplinkBandwidth) { + constexpr int kInitChannels = 1; + auto controller = CreateChannelController(kInitChannels); + + // Use between-thresholds bandwidth to check output remains at 1. + CheckDecision(controller.get(), kMediumBandwidthBps, 1); + + // Use high bandwidth to check output switch to 2. + CheckDecision(controller.get(), kChannel1To2BandwidthBps, 2); + + // Use between-thresholds bandwidth to check output remains at 2. + CheckDecision(controller.get(), kMediumBandwidthBps, 2); + + // Use low bandwidth to check output switch to 1. + CheckDecision(controller.get(), kChannel2To1BandwidthBps, 1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/config.proto b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/config.proto new file mode 100644 index 0000000000..a815451993 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/config.proto @@ -0,0 +1,196 @@ +syntax = "proto2"; + +package webrtc.audio_network_adaptor.config; + +option optimize_for = LITE_RUNTIME; +option java_package = "org.webrtc.AudioNetworkAdaptor"; +option java_outer_classname = "Config"; +option objc_class_prefix = "WANA"; + +message FecController { + message Threshold { + // Threshold defines a curve in the bandwidth/packet-loss domain. The + // curve is characterized by the two conjunction points: A and B. + // + // packet ^ | + // loss | A| + // | \ A: (low_bandwidth_bps, low_bandwidth_packet_loss) + // | \ B: (high_bandwidth_bps, high_bandwidth_packet_loss) + // | B\________ + // |---------------> bandwidth + optional int32 low_bandwidth_bps = 1; + optional float low_bandwidth_packet_loss = 2; + optional int32 high_bandwidth_bps = 3; + optional float high_bandwidth_packet_loss = 4; + } + + // `fec_enabling_threshold` defines a curve, above which FEC should be + // enabled. `fec_disabling_threshold` defines a curve, under which FEC + // should be disabled. See below + // + // packet-loss ^ | | + // | | | FEC + // | \ \ ON + // | FEC \ \_______ fec_enabling_threshold + // | OFF \_________ fec_disabling_threshold + // |-----------------> bandwidth + optional Threshold fec_enabling_threshold = 1; + optional Threshold fec_disabling_threshold = 2; + + // `time_constant_ms` is the time constant for an exponential filter, which + // is used for smoothing the packet loss fraction. + optional int32 time_constant_ms = 3; +} + +message FecControllerRplrBased { + message Threshold { + // Threshold defines a curve in the bandwidth/recoverable-packet-loss + // domain. + // The curve is characterized by the two conjunction points: A and B. + // + // recoverable ^ + // packet | | + // loss | A| + // | \ A: (low_bandwidth_bps, + // | \ low_bandwidth_recoverable_packet_loss) + // | \ B: (high_bandwidth_bps, + // | \ high_bandwidth_recoverable_packet_loss) + // | B\________ + // |---------------> bandwidth + optional int32 low_bandwidth_bps = 1; + optional float low_bandwidth_recoverable_packet_loss = 2; + optional int32 high_bandwidth_bps = 3; + optional float high_bandwidth_recoverable_packet_loss = 4; + } + + // `fec_enabling_threshold` defines a curve, above which FEC should be + // enabled. `fec_disabling_threshold` defines a curve, under which FEC + // should be disabled. See below + // + // packet-loss ^ | | + // | | | FEC + // | \ \ ON + // | FEC \ \_______ fec_enabling_threshold + // | OFF \_________ fec_disabling_threshold + // |-----------------> bandwidth + optional Threshold fec_enabling_threshold = 1; + optional Threshold fec_disabling_threshold = 2; +} + +message FrameLengthController { + // Uplink packet loss fraction below which frame length can increase. + optional float fl_increasing_packet_loss_fraction = 1; + + // Uplink packet loss fraction above which frame length should decrease. + optional float fl_decreasing_packet_loss_fraction = 2; + + // Uplink bandwidth below which frame length can switch from 20ms to 60ms. + optional int32 fl_20ms_to_60ms_bandwidth_bps = 3; + + // Uplink bandwidth above which frame length should switch from 60ms to 20ms. + optional int32 fl_60ms_to_20ms_bandwidth_bps = 4; + + // Uplink bandwidth below which frame length can switch from 60ms to 120ms. + optional int32 fl_60ms_to_120ms_bandwidth_bps = 5; + + // Uplink bandwidth above which frame length should switch from 120ms to 60ms. + optional int32 fl_120ms_to_60ms_bandwidth_bps = 6; + + // Offset to apply to the per-packet overhead when increasing frame length. + optional int32 fl_increase_overhead_offset = 7; + + // Offset to apply to the per-packet overhead when decreasing frame length. + optional int32 fl_decrease_overhead_offset = 8; + + // Uplink bandwidth below which frame length can switch from 20ms to 40ms. In + // current implementation, defining this will invalidate + // fl_20ms_to_60ms_bandwidth_bps. + optional int32 fl_20ms_to_40ms_bandwidth_bps = 9; + + // Uplink bandwidth above which frame length should switch from 40ms to 20ms. + optional int32 fl_40ms_to_20ms_bandwidth_bps = 10; + + // Uplink bandwidth below which frame length can switch from 40ms to 60ms. + optional int32 fl_40ms_to_60ms_bandwidth_bps = 11; + + // Uplink bandwidth above which frame length should switch from 60ms to 40ms. + // In current implementation, defining this will invalidate + // fl_60ms_to_20ms_bandwidth_bps. + optional int32 fl_60ms_to_40ms_bandwidth_bps = 12; +} + +message FrameLengthControllerV2 { + // FrameLengthControllerV2 chooses the frame length by taking the target + // bitrate and subtracting the overhead bitrate to obtain the remaining + // bitrate for the payload. The chosen frame length is the shortest possible + // where the payload bitrate is more than `min_payload_bitrate_bps`. + optional int32 min_payload_bitrate_bps = 1; + + // If true, uses the stable target bitrate to decide the frame length. This + // will result in less frame length toggling but spending more time at longer + // frame lengths compared to using the normal target bitrate. + optional bool use_slow_adaptation = 2; +} + +message ChannelController { + // Uplink bandwidth above which the number of encoded channels should switch + // from 1 to 2. + optional int32 channel_1_to_2_bandwidth_bps = 1; + + // Uplink bandwidth below which the number of encoded channels should switch + // from 2 to 1. + optional int32 channel_2_to_1_bandwidth_bps = 2; +} + +message DtxController { + // Uplink bandwidth below which DTX should be switched on. + optional int32 dtx_enabling_bandwidth_bps = 1; + + // Uplink bandwidth above which DTX should be switched off. + optional int32 dtx_disabling_bandwidth_bps = 2; +} + +message BitrateController { + // Offset to apply to per-packet overhead when the frame length is increased. + optional int32 fl_increase_overhead_offset = 1; + // Offset to apply to per-packet overhead when the frame length is decreased. + optional int32 fl_decrease_overhead_offset = 2; +} + +message Controller { + message ScoringPoint { + // `ScoringPoint` is a subspace of network condition. It is used for + // comparing the significance of controllers. + optional int32 uplink_bandwidth_bps = 1; + optional float uplink_packet_loss_fraction = 2; + } + + // The distance from `scoring_point` to a given network condition defines + // the significance of this controller with respect that network condition. + // Shorter distance means higher significance. The significances of + // controllers determine their order in the processing pipeline. Controllers + // without `scoring_point` follow their default order in + // `ControllerManager::controllers`. + optional ScoringPoint scoring_point = 1; + + oneof controller { + FecController fec_controller = 21; + FrameLengthController frame_length_controller = 22; + ChannelController channel_controller = 23; + DtxController dtx_controller = 24; + BitrateController bitrate_controller = 25; + FecControllerRplrBased fec_controller_rplr_based = 26; + FrameLengthControllerV2 frame_length_controller_v2 = 27; + } +} + +message ControllerManager { + repeated Controller controllers = 1; + + // Least time since last reordering for a new reordering to be made. + optional int32 min_reordering_time_ms = 2; + + // Least squared distance from last scoring point for a new reordering to be + // made. + optional float min_reordering_squared_distance = 3; +} diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller.cc new file mode 100644 index 0000000000..5e2dc859bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller.cc @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/controller.h" + +namespace webrtc { + +Controller::NetworkMetrics::NetworkMetrics() = default; + +Controller::NetworkMetrics::~NetworkMetrics() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller.h new file mode 100644 index 0000000000..b70ada01a4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_CONTROLLER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_CONTROLLER_H_ + +#include "absl/types/optional.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h" + +namespace webrtc { + +class Controller { + public: + struct NetworkMetrics { + NetworkMetrics(); + ~NetworkMetrics(); + absl::optional uplink_bandwidth_bps; + absl::optional uplink_packet_loss_fraction; + absl::optional target_audio_bitrate_bps; + absl::optional rtt_ms; + absl::optional overhead_bytes_per_packet; + }; + + virtual ~Controller() = default; + + // Informs network metrics update to this controller. Any non-empty field + // indicates an update on the corresponding network metric. + virtual void UpdateNetworkMetrics(const NetworkMetrics& network_metrics) = 0; + + virtual void MakeDecision(AudioEncoderRuntimeConfig* config) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager.cc new file mode 100644 index 0000000000..42dd8a8786 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager.cc @@ -0,0 +1,454 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/controller_manager.h" + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/audio_network_adaptor/bitrate_controller.h" +#include "modules/audio_coding/audio_network_adaptor/channel_controller.h" +#include "modules/audio_coding/audio_network_adaptor/debug_dump_writer.h" +#include "modules/audio_coding/audio_network_adaptor/dtx_controller.h" +#include "modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.h" +#include "modules/audio_coding/audio_network_adaptor/frame_length_controller.h" +#include "modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.h" +#include "modules/audio_coding/audio_network_adaptor/util/threshold_curve.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" + +#if WEBRTC_ENABLE_PROTOBUF +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_coding/audio_network_adaptor/config.pb.h" +#else +#include "modules/audio_coding/audio_network_adaptor/config.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() +#endif + +namespace webrtc { + +namespace { + +#if WEBRTC_ENABLE_PROTOBUF + +std::unique_ptr CreateFecControllerPlrBased( + const audio_network_adaptor::config::FecController& config, + bool initial_fec_enabled) { + RTC_CHECK(config.has_fec_enabling_threshold()); + RTC_CHECK(config.has_fec_disabling_threshold()); + RTC_CHECK(config.has_time_constant_ms()); + + auto& fec_enabling_threshold = config.fec_enabling_threshold(); + RTC_CHECK(fec_enabling_threshold.has_low_bandwidth_bps()); + RTC_CHECK(fec_enabling_threshold.has_low_bandwidth_packet_loss()); + RTC_CHECK(fec_enabling_threshold.has_high_bandwidth_bps()); + RTC_CHECK(fec_enabling_threshold.has_high_bandwidth_packet_loss()); + + auto& fec_disabling_threshold = config.fec_disabling_threshold(); + RTC_CHECK(fec_disabling_threshold.has_low_bandwidth_bps()); + RTC_CHECK(fec_disabling_threshold.has_low_bandwidth_packet_loss()); + RTC_CHECK(fec_disabling_threshold.has_high_bandwidth_bps()); + RTC_CHECK(fec_disabling_threshold.has_high_bandwidth_packet_loss()); + + return std::unique_ptr( + new FecControllerPlrBased(FecControllerPlrBased::Config( + initial_fec_enabled, + ThresholdCurve(fec_enabling_threshold.low_bandwidth_bps(), + fec_enabling_threshold.low_bandwidth_packet_loss(), + fec_enabling_threshold.high_bandwidth_bps(), + fec_enabling_threshold.high_bandwidth_packet_loss()), + ThresholdCurve(fec_disabling_threshold.low_bandwidth_bps(), + fec_disabling_threshold.low_bandwidth_packet_loss(), + fec_disabling_threshold.high_bandwidth_bps(), + fec_disabling_threshold.high_bandwidth_packet_loss()), + config.time_constant_ms()))); +} + +std::unique_ptr CreateFrameLengthController( + const audio_network_adaptor::config::FrameLengthController& config, + rtc::ArrayView encoder_frame_lengths_ms, + int initial_frame_length_ms, + int min_encoder_bitrate_bps) { + RTC_CHECK(config.has_fl_increasing_packet_loss_fraction()); + RTC_CHECK(config.has_fl_decreasing_packet_loss_fraction()); + + std::map + fl_changing_bandwidths_bps; + + if (config.has_fl_20ms_to_60ms_bandwidth_bps()) { + fl_changing_bandwidths_bps.insert( + std::make_pair(FrameLengthController::Config::FrameLengthChange(20, 60), + config.fl_20ms_to_60ms_bandwidth_bps())); + } + + if (config.has_fl_60ms_to_20ms_bandwidth_bps()) { + fl_changing_bandwidths_bps.insert( + std::make_pair(FrameLengthController::Config::FrameLengthChange(60, 20), + config.fl_60ms_to_20ms_bandwidth_bps())); + } + + if (config.has_fl_20ms_to_40ms_bandwidth_bps()) { + fl_changing_bandwidths_bps.insert( + std::make_pair(FrameLengthController::Config::FrameLengthChange(20, 40), + config.fl_20ms_to_40ms_bandwidth_bps())); + } + + if (config.has_fl_40ms_to_20ms_bandwidth_bps()) { + fl_changing_bandwidths_bps.insert( + std::make_pair(FrameLengthController::Config::FrameLengthChange(40, 20), + config.fl_40ms_to_20ms_bandwidth_bps())); + } + + if (config.has_fl_40ms_to_60ms_bandwidth_bps()) { + fl_changing_bandwidths_bps.insert( + std::make_pair(FrameLengthController::Config::FrameLengthChange(40, 60), + config.fl_40ms_to_60ms_bandwidth_bps())); + } + + if (config.has_fl_60ms_to_40ms_bandwidth_bps()) { + fl_changing_bandwidths_bps.insert( + std::make_pair(FrameLengthController::Config::FrameLengthChange(60, 40), + config.fl_60ms_to_40ms_bandwidth_bps())); + } + + if (config.has_fl_60ms_to_120ms_bandwidth_bps()) { + fl_changing_bandwidths_bps.insert(std::make_pair( + FrameLengthController::Config::FrameLengthChange(60, 120), + config.fl_60ms_to_120ms_bandwidth_bps())); + } + + if (config.has_fl_120ms_to_60ms_bandwidth_bps()) { + fl_changing_bandwidths_bps.insert(std::make_pair( + FrameLengthController::Config::FrameLengthChange(120, 60), + config.fl_120ms_to_60ms_bandwidth_bps())); + } + + int fl_increase_overhead_offset = 0; + if (config.has_fl_increase_overhead_offset()) { + fl_increase_overhead_offset = config.fl_increase_overhead_offset(); + } + int fl_decrease_overhead_offset = 0; + if (config.has_fl_decrease_overhead_offset()) { + fl_decrease_overhead_offset = config.fl_decrease_overhead_offset(); + } + + FrameLengthController::Config ctor_config( + std::set(), initial_frame_length_ms, min_encoder_bitrate_bps, + config.fl_increasing_packet_loss_fraction(), + config.fl_decreasing_packet_loss_fraction(), fl_increase_overhead_offset, + fl_decrease_overhead_offset, std::move(fl_changing_bandwidths_bps)); + + for (auto frame_length : encoder_frame_lengths_ms) + ctor_config.encoder_frame_lengths_ms.insert(frame_length); + + return std::unique_ptr( + new FrameLengthController(ctor_config)); +} + +std::unique_ptr CreateChannelController( + const audio_network_adaptor::config::ChannelController& config, + size_t num_encoder_channels, + size_t intial_channels_to_encode) { + RTC_CHECK(config.has_channel_1_to_2_bandwidth_bps()); + RTC_CHECK(config.has_channel_2_to_1_bandwidth_bps()); + + return std::unique_ptr(new ChannelController( + ChannelController::Config(num_encoder_channels, intial_channels_to_encode, + config.channel_1_to_2_bandwidth_bps(), + config.channel_2_to_1_bandwidth_bps()))); +} + +std::unique_ptr CreateDtxController( + const audio_network_adaptor::config::DtxController& dtx_config, + bool initial_dtx_enabled) { + RTC_CHECK(dtx_config.has_dtx_enabling_bandwidth_bps()); + RTC_CHECK(dtx_config.has_dtx_disabling_bandwidth_bps()); + + return std::unique_ptr(new DtxController(DtxController::Config( + initial_dtx_enabled, dtx_config.dtx_enabling_bandwidth_bps(), + dtx_config.dtx_disabling_bandwidth_bps()))); +} + +using audio_network_adaptor::BitrateController; +std::unique_ptr CreateBitrateController( + const audio_network_adaptor::config::BitrateController& bitrate_config, + int initial_bitrate_bps, + int initial_frame_length_ms) { + int fl_increase_overhead_offset = 0; + if (bitrate_config.has_fl_increase_overhead_offset()) { + fl_increase_overhead_offset = bitrate_config.fl_increase_overhead_offset(); + } + int fl_decrease_overhead_offset = 0; + if (bitrate_config.has_fl_decrease_overhead_offset()) { + fl_decrease_overhead_offset = bitrate_config.fl_decrease_overhead_offset(); + } + return std::unique_ptr( + new BitrateController(BitrateController::Config( + initial_bitrate_bps, initial_frame_length_ms, + fl_increase_overhead_offset, fl_decrease_overhead_offset))); +} + +std::unique_ptr CreateFrameLengthControllerV2( + const audio_network_adaptor::config::FrameLengthControllerV2& config, + rtc::ArrayView encoder_frame_lengths_ms) { + return std::make_unique( + encoder_frame_lengths_ms, config.min_payload_bitrate_bps(), + config.use_slow_adaptation()); +} +#endif // WEBRTC_ENABLE_PROTOBUF + +} // namespace + +ControllerManagerImpl::Config::Config(int min_reordering_time_ms, + float min_reordering_squared_distance) + : min_reordering_time_ms(min_reordering_time_ms), + min_reordering_squared_distance(min_reordering_squared_distance) {} + +ControllerManagerImpl::Config::~Config() = default; + +std::unique_ptr ControllerManagerImpl::Create( + absl::string_view config_string, + size_t num_encoder_channels, + rtc::ArrayView encoder_frame_lengths_ms, + int min_encoder_bitrate_bps, + size_t intial_channels_to_encode, + int initial_frame_length_ms, + int initial_bitrate_bps, + bool initial_fec_enabled, + bool initial_dtx_enabled) { + return Create(config_string, num_encoder_channels, encoder_frame_lengths_ms, + min_encoder_bitrate_bps, intial_channels_to_encode, + initial_frame_length_ms, initial_bitrate_bps, + initial_fec_enabled, initial_dtx_enabled, nullptr); +} + +std::unique_ptr ControllerManagerImpl::Create( + absl::string_view config_string, + size_t num_encoder_channels, + rtc::ArrayView encoder_frame_lengths_ms, + int min_encoder_bitrate_bps, + size_t intial_channels_to_encode, + int initial_frame_length_ms, + int initial_bitrate_bps, + bool initial_fec_enabled, + bool initial_dtx_enabled, + DebugDumpWriter* debug_dump_writer) { +#if WEBRTC_ENABLE_PROTOBUF + audio_network_adaptor::config::ControllerManager controller_manager_config; + RTC_CHECK( + controller_manager_config.ParseFromString(std::string(config_string))); + if (debug_dump_writer) + debug_dump_writer->DumpControllerManagerConfig(controller_manager_config, + rtc::TimeMillis()); + + std::vector> controllers; + std::map> scoring_points; + + for (int i = 0; i < controller_manager_config.controllers_size(); ++i) { + auto& controller_config = controller_manager_config.controllers(i); + std::unique_ptr controller; + switch (controller_config.controller_case()) { + case audio_network_adaptor::config::Controller::kFecController: + controller = CreateFecControllerPlrBased( + controller_config.fec_controller(), initial_fec_enabled); + break; + case audio_network_adaptor::config::Controller::kFecControllerRplrBased: + // FecControllerRplrBased has been removed and can't be used anymore. + RTC_DCHECK_NOTREACHED(); + continue; + case audio_network_adaptor::config::Controller::kFrameLengthController: + controller = CreateFrameLengthController( + controller_config.frame_length_controller(), + encoder_frame_lengths_ms, initial_frame_length_ms, + min_encoder_bitrate_bps); + break; + case audio_network_adaptor::config::Controller::kChannelController: + controller = CreateChannelController( + controller_config.channel_controller(), num_encoder_channels, + intial_channels_to_encode); + break; + case audio_network_adaptor::config::Controller::kDtxController: + controller = CreateDtxController(controller_config.dtx_controller(), + initial_dtx_enabled); + break; + case audio_network_adaptor::config::Controller::kBitrateController: + controller = CreateBitrateController( + controller_config.bitrate_controller(), initial_bitrate_bps, + initial_frame_length_ms); + break; + case audio_network_adaptor::config::Controller::kFrameLengthControllerV2: + controller = CreateFrameLengthControllerV2( + controller_config.frame_length_controller_v2(), + encoder_frame_lengths_ms); + break; + default: + RTC_DCHECK_NOTREACHED(); + } + if (controller_config.has_scoring_point()) { + auto& scoring_point = controller_config.scoring_point(); + RTC_CHECK(scoring_point.has_uplink_bandwidth_bps()); + RTC_CHECK(scoring_point.has_uplink_packet_loss_fraction()); + scoring_points[controller.get()] = std::make_pair( + scoring_point.uplink_bandwidth_bps(), + scoring_point.uplink_packet_loss_fraction()); + } + controllers.push_back(std::move(controller)); + } + + if (scoring_points.size() == 0) { + return std::unique_ptr( + new ControllerManagerImpl(ControllerManagerImpl::Config(0, 0), + std::move(controllers), scoring_points)); + } else { + RTC_CHECK(controller_manager_config.has_min_reordering_time_ms()); + RTC_CHECK(controller_manager_config.has_min_reordering_squared_distance()); + return std::unique_ptr(new ControllerManagerImpl( + ControllerManagerImpl::Config( + controller_manager_config.min_reordering_time_ms(), + controller_manager_config.min_reordering_squared_distance()), + std::move(controllers), scoring_points)); + } + +#else + RTC_DCHECK_NOTREACHED(); + return nullptr; +#endif // WEBRTC_ENABLE_PROTOBUF +} + +ControllerManagerImpl::ControllerManagerImpl(const Config& config) + : ControllerManagerImpl( + config, + std::vector>(), + std::map>()) {} + +ControllerManagerImpl::ControllerManagerImpl( + const Config& config, + std::vector> controllers, + const std::map>& scoring_points) + : config_(config), + controllers_(std::move(controllers)), + last_reordering_time_ms_(absl::nullopt), + last_scoring_point_(0, 0.0) { + for (auto& controller : controllers_) + default_sorted_controllers_.push_back(controller.get()); + sorted_controllers_ = default_sorted_controllers_; + for (auto& controller_point : scoring_points) { + controller_scoring_points_.insert(std::make_pair( + controller_point.first, ScoringPoint(controller_point.second.first, + controller_point.second.second))); + } +} + +ControllerManagerImpl::~ControllerManagerImpl() = default; + +std::vector ControllerManagerImpl::GetSortedControllers( + const Controller::NetworkMetrics& metrics) { + if (controller_scoring_points_.size() == 0) + return default_sorted_controllers_; + + if (!metrics.uplink_bandwidth_bps || !metrics.uplink_packet_loss_fraction) + return sorted_controllers_; + + const int64_t now_ms = rtc::TimeMillis(); + if (last_reordering_time_ms_ && + now_ms - *last_reordering_time_ms_ < config_.min_reordering_time_ms) + return sorted_controllers_; + + ScoringPoint scoring_point(*metrics.uplink_bandwidth_bps, + *metrics.uplink_packet_loss_fraction); + + if (last_reordering_time_ms_ && + last_scoring_point_.SquaredDistanceTo(scoring_point) < + config_.min_reordering_squared_distance) + return sorted_controllers_; + + // Sort controllers according to the distances of `scoring_point` to the + // scoring points of controllers. + // + // A controller that does not associate with any scoring point + // are treated as if + // 1) they are less important than any controller that has a scoring point, + // 2) they are equally important to any controller that has no scoring point, + // and their relative order will follow `default_sorted_controllers_`. + std::vector sorted_controllers(default_sorted_controllers_); + std::stable_sort( + sorted_controllers.begin(), sorted_controllers.end(), + [this, &scoring_point](const Controller* lhs, const Controller* rhs) { + auto lhs_scoring_point = controller_scoring_points_.find(lhs); + auto rhs_scoring_point = controller_scoring_points_.find(rhs); + + if (lhs_scoring_point == controller_scoring_points_.end()) + return false; + + if (rhs_scoring_point == controller_scoring_points_.end()) + return true; + + return lhs_scoring_point->second.SquaredDistanceTo(scoring_point) < + rhs_scoring_point->second.SquaredDistanceTo(scoring_point); + }); + + if (sorted_controllers_ != sorted_controllers) { + sorted_controllers_ = sorted_controllers; + last_reordering_time_ms_ = now_ms; + last_scoring_point_ = scoring_point; + } + return sorted_controllers_; +} + +std::vector ControllerManagerImpl::GetControllers() const { + return default_sorted_controllers_; +} + +ControllerManagerImpl::ScoringPoint::ScoringPoint( + int uplink_bandwidth_bps, + float uplink_packet_loss_fraction) + : uplink_bandwidth_bps(uplink_bandwidth_bps), + uplink_packet_loss_fraction(uplink_packet_loss_fraction) {} + +namespace { + +constexpr int kMinUplinkBandwidthBps = 0; +constexpr int kMaxUplinkBandwidthBps = 120000; + +float NormalizeUplinkBandwidth(int uplink_bandwidth_bps) { + uplink_bandwidth_bps = + std::min(kMaxUplinkBandwidthBps, + std::max(kMinUplinkBandwidthBps, uplink_bandwidth_bps)); + return static_cast(uplink_bandwidth_bps - kMinUplinkBandwidthBps) / + (kMaxUplinkBandwidthBps - kMinUplinkBandwidthBps); +} + +float NormalizePacketLossFraction(float uplink_packet_loss_fraction) { + // `uplink_packet_loss_fraction` is seldom larger than 0.3, so we scale it up + // by 3.3333f. + return std::min(uplink_packet_loss_fraction * 3.3333f, 1.0f); +} + +} // namespace + +float ControllerManagerImpl::ScoringPoint::SquaredDistanceTo( + const ScoringPoint& scoring_point) const { + float diff_normalized_bitrate_bps = + NormalizeUplinkBandwidth(scoring_point.uplink_bandwidth_bps) - + NormalizeUplinkBandwidth(uplink_bandwidth_bps); + float diff_normalized_packet_loss = + NormalizePacketLossFraction(scoring_point.uplink_packet_loss_fraction) - + NormalizePacketLossFraction(uplink_packet_loss_fraction); + return std::pow(diff_normalized_bitrate_bps, 2) + + std::pow(diff_normalized_packet_loss, 2); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager.h new file mode 100644 index 0000000000..47e8e0f5a0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_CONTROLLER_MANAGER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_CONTROLLER_MANAGER_H_ + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/audio_network_adaptor/controller.h" + +namespace webrtc { + +class DebugDumpWriter; + +class ControllerManager { + public: + virtual ~ControllerManager() = default; + + // Sort controllers based on their significance. + virtual std::vector GetSortedControllers( + const Controller::NetworkMetrics& metrics) = 0; + + virtual std::vector GetControllers() const = 0; +}; + +class ControllerManagerImpl final : public ControllerManager { + public: + struct Config { + Config(int min_reordering_time_ms, float min_reordering_squared_distance); + ~Config(); + // Least time since last reordering for a new reordering to be made. + int min_reordering_time_ms; + // Least squared distance from last scoring point for a new reordering to be + // made. + float min_reordering_squared_distance; + }; + + static std::unique_ptr Create( + absl::string_view config_string, + size_t num_encoder_channels, + rtc::ArrayView encoder_frame_lengths_ms, + int min_encoder_bitrate_bps, + size_t intial_channels_to_encode, + int initial_frame_length_ms, + int initial_bitrate_bps, + bool initial_fec_enabled, + bool initial_dtx_enabled); + + static std::unique_ptr Create( + absl::string_view config_string, + size_t num_encoder_channels, + rtc::ArrayView encoder_frame_lengths_ms, + int min_encoder_bitrate_bps, + size_t intial_channels_to_encode, + int initial_frame_length_ms, + int initial_bitrate_bps, + bool initial_fec_enabled, + bool initial_dtx_enabled, + DebugDumpWriter* debug_dump_writer); + + explicit ControllerManagerImpl(const Config& config); + + // Dependency injection for testing. + ControllerManagerImpl( + const Config& config, + std::vector> controllers, + const std::map>& + chracteristic_points); + + ~ControllerManagerImpl() override; + + ControllerManagerImpl(const ControllerManagerImpl&) = delete; + ControllerManagerImpl& operator=(const ControllerManagerImpl&) = delete; + + // Sort controllers based on their significance. + std::vector GetSortedControllers( + const Controller::NetworkMetrics& metrics) override; + + std::vector GetControllers() const override; + + private: + // Scoring point is a subset of NetworkMetrics that is used for comparing the + // significance of controllers. + struct ScoringPoint { + // TODO(eladalon): Do we want to experiment with RPLR-based scoring? + ScoringPoint(int uplink_bandwidth_bps, float uplink_packet_loss_fraction); + + // Calculate the normalized [0,1] distance between two scoring points. + float SquaredDistanceTo(const ScoringPoint& scoring_point) const; + + int uplink_bandwidth_bps; + float uplink_packet_loss_fraction; + }; + + const Config config_; + + std::vector> controllers_; + + absl::optional last_reordering_time_ms_; + ScoringPoint last_scoring_point_; + + std::vector default_sorted_controllers_; + + std::vector sorted_controllers_; + + // `scoring_points_` saves the scoring points of various + // controllers. + std::map controller_scoring_points_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_CONTROLLER_MANAGER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager_unittest.cc new file mode 100644 index 0000000000..3e6ecf6def --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager_unittest.cc @@ -0,0 +1,486 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/controller_manager.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/audio_network_adaptor/mock/mock_controller.h" +#include "modules/audio_coding/audio_network_adaptor/mock/mock_debug_dump_writer.h" +#include "rtc_base/fake_clock.h" +#include "rtc_base/ignore_wundef.h" +#include "test/gtest.h" + +#if WEBRTC_ENABLE_PROTOBUF +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_coding/audio_network_adaptor/config.pb.h" +#else +#include "modules/audio_coding/audio_network_adaptor/config.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() +#endif + +namespace webrtc { + +using ::testing::_; +using ::testing::NiceMock; + +namespace { + +constexpr size_t kNumControllers = 4; +constexpr int kChracteristicBandwithBps[2] = {15000, 0}; +constexpr float kChracteristicPacketLossFraction[2] = {0.2f, 0.0f}; +constexpr int kMinReorderingTimeMs = 200; +constexpr int kFactor = 100; +constexpr float kMinReorderingSquareDistance = 1.0f / kFactor / kFactor; + +// `kMinUplinkBandwidthBps` and `kMaxUplinkBandwidthBps` are copied from +// controller_manager.cc +constexpr int kMinUplinkBandwidthBps = 0; +constexpr int kMaxUplinkBandwidthBps = 120000; +constexpr int kMinBandwithChangeBps = + (kMaxUplinkBandwidthBps - kMinUplinkBandwidthBps) / kFactor; + +struct ControllerManagerStates { + std::unique_ptr controller_manager; + std::vector mock_controllers; +}; + +ControllerManagerStates CreateControllerManager() { + ControllerManagerStates states; + std::vector> controllers; + std::map> chracteristic_points; + for (size_t i = 0; i < kNumControllers; ++i) { + auto controller = + std::unique_ptr(new NiceMock()); + EXPECT_CALL(*controller, Die()); + states.mock_controllers.push_back(controller.get()); + controllers.push_back(std::move(controller)); + } + + // Assign characteristic points to the last two controllers. + chracteristic_points[states.mock_controllers[kNumControllers - 2]] = + std::make_pair(kChracteristicBandwithBps[0], + kChracteristicPacketLossFraction[0]); + chracteristic_points[states.mock_controllers[kNumControllers - 1]] = + std::make_pair(kChracteristicBandwithBps[1], + kChracteristicPacketLossFraction[1]); + + states.controller_manager.reset(new ControllerManagerImpl( + ControllerManagerImpl::Config(kMinReorderingTimeMs, + kMinReorderingSquareDistance), + std::move(controllers), chracteristic_points)); + return states; +} + +// `expected_order` contains the expected indices of all controllers in the +// vector of controllers returned by GetSortedControllers(). A negative index +// means that we do not care about its exact place, but we do check that it +// exists in the vector. +void CheckControllersOrder( + ControllerManagerStates* states, + const absl::optional& uplink_bandwidth_bps, + const absl::optional& uplink_packet_loss_fraction, + const std::vector& expected_order) { + RTC_DCHECK_EQ(kNumControllers, expected_order.size()); + Controller::NetworkMetrics metrics; + metrics.uplink_bandwidth_bps = uplink_bandwidth_bps; + metrics.uplink_packet_loss_fraction = uplink_packet_loss_fraction; + auto check = states->controller_manager->GetSortedControllers(metrics); + EXPECT_EQ(states->mock_controllers.size(), check.size()); + for (size_t i = 0; i < states->mock_controllers.size(); ++i) { + if (expected_order[i] >= 0) { + EXPECT_EQ(states->mock_controllers[i], check[expected_order[i]]); + } else { + EXPECT_NE(check.end(), std::find(check.begin(), check.end(), + states->mock_controllers[i])); + } + } +} + +} // namespace + +TEST(ControllerManagerTest, GetControllersReturnAllControllers) { + auto states = CreateControllerManager(); + auto check = states.controller_manager->GetControllers(); + // Verify that controllers in `check` are one-to-one mapped to those in + // `mock_controllers_`. + EXPECT_EQ(states.mock_controllers.size(), check.size()); + for (auto& controller : check) + EXPECT_NE(states.mock_controllers.end(), + std::find(states.mock_controllers.begin(), + states.mock_controllers.end(), controller)); +} + +TEST(ControllerManagerTest, ControllersInDefaultOrderOnEmptyNetworkMetrics) { + auto states = CreateControllerManager(); + // `network_metrics` are empty, and the controllers are supposed to follow the + // default order. + CheckControllersOrder(&states, absl::nullopt, absl::nullopt, {0, 1, 2, 3}); +} + +TEST(ControllerManagerTest, ControllersWithoutCharPointAtEndAndInDefaultOrder) { + auto states = CreateControllerManager(); + CheckControllersOrder(&states, 0, 0.0, + {kNumControllers - 2, kNumControllers - 1, -1, -1}); +} + +TEST(ControllerManagerTest, ControllersWithCharPointDependOnNetworkMetrics) { + auto states = CreateControllerManager(); + CheckControllersOrder(&states, kChracteristicBandwithBps[1], + kChracteristicPacketLossFraction[1], + {kNumControllers - 2, kNumControllers - 1, 1, 0}); +} + +TEST(ControllerManagerTest, DoNotReorderBeforeMinReordingTime) { + rtc::ScopedFakeClock fake_clock; + auto states = CreateControllerManager(); + CheckControllersOrder(&states, kChracteristicBandwithBps[0], + kChracteristicPacketLossFraction[0], + {kNumControllers - 2, kNumControllers - 1, 0, 1}); + fake_clock.AdvanceTime(TimeDelta::Millis(kMinReorderingTimeMs - 1)); + // Move uplink bandwidth and packet loss fraction to the other controller's + // characteristic point, which would cause controller manager to reorder the + // controllers if time had reached min reordering time. + CheckControllersOrder(&states, kChracteristicBandwithBps[1], + kChracteristicPacketLossFraction[1], + {kNumControllers - 2, kNumControllers - 1, 0, 1}); +} + +TEST(ControllerManagerTest, ReorderBeyondMinReordingTimeAndMinDistance) { + rtc::ScopedFakeClock fake_clock; + auto states = CreateControllerManager(); + constexpr int kBandwidthBps = + (kChracteristicBandwithBps[0] + kChracteristicBandwithBps[1]) / 2; + constexpr float kPacketLossFraction = (kChracteristicPacketLossFraction[0] + + kChracteristicPacketLossFraction[1]) / + 2.0f; + // Set network metrics to be in the middle between the characteristic points + // of two controllers. + CheckControllersOrder(&states, kBandwidthBps, kPacketLossFraction, + {kNumControllers - 2, kNumControllers - 1, 0, 1}); + fake_clock.AdvanceTime(TimeDelta::Millis(kMinReorderingTimeMs)); + // Then let network metrics move a little towards the other controller. + CheckControllersOrder(&states, kBandwidthBps - kMinBandwithChangeBps - 1, + kPacketLossFraction, + {kNumControllers - 2, kNumControllers - 1, 1, 0}); +} + +TEST(ControllerManagerTest, DoNotReorderIfNetworkMetricsChangeTooSmall) { + rtc::ScopedFakeClock fake_clock; + auto states = CreateControllerManager(); + constexpr int kBandwidthBps = + (kChracteristicBandwithBps[0] + kChracteristicBandwithBps[1]) / 2; + constexpr float kPacketLossFraction = (kChracteristicPacketLossFraction[0] + + kChracteristicPacketLossFraction[1]) / + 2.0f; + // Set network metrics to be in the middle between the characteristic points + // of two controllers. + CheckControllersOrder(&states, kBandwidthBps, kPacketLossFraction, + {kNumControllers - 2, kNumControllers - 1, 0, 1}); + fake_clock.AdvanceTime(TimeDelta::Millis(kMinReorderingTimeMs)); + // Then let network metrics move a little towards the other controller. + CheckControllersOrder(&states, kBandwidthBps - kMinBandwithChangeBps + 1, + kPacketLossFraction, + {kNumControllers - 2, kNumControllers - 1, 0, 1}); +} + +#if WEBRTC_ENABLE_PROTOBUF + +namespace { + +void AddBitrateControllerConfig( + audio_network_adaptor::config::ControllerManager* config) { + config->add_controllers()->mutable_bitrate_controller(); +} + +void AddChannelControllerConfig( + audio_network_adaptor::config::ControllerManager* config) { + auto controller_config = + config->add_controllers()->mutable_channel_controller(); + controller_config->set_channel_1_to_2_bandwidth_bps(31000); + controller_config->set_channel_2_to_1_bandwidth_bps(29000); +} + +void AddDtxControllerConfig( + audio_network_adaptor::config::ControllerManager* config) { + auto controller_config = config->add_controllers()->mutable_dtx_controller(); + controller_config->set_dtx_enabling_bandwidth_bps(55000); + controller_config->set_dtx_disabling_bandwidth_bps(65000); +} + +void AddFecControllerConfig( + audio_network_adaptor::config::ControllerManager* config) { + auto controller_config_ext = config->add_controllers(); + auto controller_config = controller_config_ext->mutable_fec_controller(); + auto fec_enabling_threshold = + controller_config->mutable_fec_enabling_threshold(); + fec_enabling_threshold->set_low_bandwidth_bps(17000); + fec_enabling_threshold->set_low_bandwidth_packet_loss(0.1f); + fec_enabling_threshold->set_high_bandwidth_bps(64000); + fec_enabling_threshold->set_high_bandwidth_packet_loss(0.05f); + auto fec_disabling_threshold = + controller_config->mutable_fec_disabling_threshold(); + fec_disabling_threshold->set_low_bandwidth_bps(15000); + fec_disabling_threshold->set_low_bandwidth_packet_loss(0.08f); + fec_disabling_threshold->set_high_bandwidth_bps(64000); + fec_disabling_threshold->set_high_bandwidth_packet_loss(0.01f); + controller_config->set_time_constant_ms(500); + + auto scoring_point = controller_config_ext->mutable_scoring_point(); + scoring_point->set_uplink_bandwidth_bps(kChracteristicBandwithBps[0]); + scoring_point->set_uplink_packet_loss_fraction( + kChracteristicPacketLossFraction[0]); +} + +void AddFrameLengthControllerConfig( + audio_network_adaptor::config::ControllerManager* config) { + auto controller_config_ext = config->add_controllers(); + auto controller_config = + controller_config_ext->mutable_frame_length_controller(); + controller_config->set_fl_decreasing_packet_loss_fraction(0.05f); + controller_config->set_fl_increasing_packet_loss_fraction(0.04f); + controller_config->set_fl_20ms_to_40ms_bandwidth_bps(80000); + controller_config->set_fl_40ms_to_20ms_bandwidth_bps(88000); + controller_config->set_fl_40ms_to_60ms_bandwidth_bps(72000); + controller_config->set_fl_60ms_to_40ms_bandwidth_bps(80000); + + auto scoring_point = controller_config_ext->mutable_scoring_point(); + scoring_point->set_uplink_bandwidth_bps(kChracteristicBandwithBps[1]); + scoring_point->set_uplink_packet_loss_fraction( + kChracteristicPacketLossFraction[1]); +} + +void AddFrameLengthControllerV2Config( + audio_network_adaptor::config::ControllerManager* config) { + auto controller = + config->add_controllers()->mutable_frame_length_controller_v2(); + controller->set_min_payload_bitrate_bps(16000); + controller->set_use_slow_adaptation(true); +} + +constexpr int kInitialBitrateBps = 24000; +constexpr size_t kIntialChannelsToEncode = 1; +constexpr bool kInitialDtxEnabled = true; +constexpr bool kInitialFecEnabled = true; +constexpr int kInitialFrameLengthMs = 60; +constexpr int kMinBitrateBps = 6000; + +ControllerManagerStates CreateControllerManager( + absl::string_view config_string) { + ControllerManagerStates states; + constexpr size_t kNumEncoderChannels = 2; + const std::vector encoder_frame_lengths_ms = {20, 60}; + states.controller_manager = ControllerManagerImpl::Create( + config_string, kNumEncoderChannels, encoder_frame_lengths_ms, + kMinBitrateBps, kIntialChannelsToEncode, kInitialFrameLengthMs, + kInitialBitrateBps, kInitialFecEnabled, kInitialDtxEnabled); + return states; +} + +enum class ControllerType : int8_t { + FEC, + CHANNEL, + DTX, + FRAME_LENGTH, + BIT_RATE +}; + +void CheckControllersOrder(const std::vector& controllers, + const std::vector& expected_types) { + ASSERT_EQ(expected_types.size(), controllers.size()); + + // We also check that the controllers follow the initial settings. + AudioEncoderRuntimeConfig encoder_config; + + for (size_t i = 0; i < controllers.size(); ++i) { + AudioEncoderRuntimeConfig encoder_config; + // We check the order of `controllers` by judging their decisions. + controllers[i]->MakeDecision(&encoder_config); + + // Since controllers are not provided with network metrics, they give the + // initial values. + switch (expected_types[i]) { + case ControllerType::FEC: + EXPECT_EQ(kInitialFecEnabled, encoder_config.enable_fec); + break; + case ControllerType::CHANNEL: + EXPECT_EQ(kIntialChannelsToEncode, encoder_config.num_channels); + break; + case ControllerType::DTX: + EXPECT_EQ(kInitialDtxEnabled, encoder_config.enable_dtx); + break; + case ControllerType::FRAME_LENGTH: + EXPECT_EQ(kInitialFrameLengthMs, encoder_config.frame_length_ms); + break; + case ControllerType::BIT_RATE: + EXPECT_EQ(kInitialBitrateBps, encoder_config.bitrate_bps); + } + } +} + +MATCHER_P(ControllerManagerEqual, value, "") { + std::string value_string; + std::string arg_string; + EXPECT_TRUE(arg.SerializeToString(&arg_string)); + EXPECT_TRUE(value.SerializeToString(&value_string)); + return arg_string == value_string; +} + +} // namespace + +TEST(ControllerManagerTest, DebugDumpLoggedWhenCreateFromConfigString) { + audio_network_adaptor::config::ControllerManager config; + config.set_min_reordering_time_ms(kMinReorderingTimeMs); + config.set_min_reordering_squared_distance(kMinReorderingSquareDistance); + + AddFecControllerConfig(&config); + AddChannelControllerConfig(&config); + AddDtxControllerConfig(&config); + AddFrameLengthControllerConfig(&config); + AddBitrateControllerConfig(&config); + + std::string config_string; + config.SerializeToString(&config_string); + + constexpr size_t kNumEncoderChannels = 2; + const std::vector encoder_frame_lengths_ms = {20, 60}; + + constexpr int64_t kClockInitialTimeMs = 12345678; + rtc::ScopedFakeClock fake_clock; + fake_clock.AdvanceTime(TimeDelta::Millis(kClockInitialTimeMs)); + auto debug_dump_writer = + std::unique_ptr(new NiceMock()); + EXPECT_CALL(*debug_dump_writer, Die()); + EXPECT_CALL(*debug_dump_writer, + DumpControllerManagerConfig(ControllerManagerEqual(config), + kClockInitialTimeMs)); + + ControllerManagerImpl::Create(config_string, kNumEncoderChannels, + encoder_frame_lengths_ms, kMinBitrateBps, + kIntialChannelsToEncode, kInitialFrameLengthMs, + kInitialBitrateBps, kInitialFecEnabled, + kInitialDtxEnabled, debug_dump_writer.get()); +} + +TEST(ControllerManagerTest, CreateFromConfigStringAndCheckDefaultOrder) { + audio_network_adaptor::config::ControllerManager config; + config.set_min_reordering_time_ms(kMinReorderingTimeMs); + config.set_min_reordering_squared_distance(kMinReorderingSquareDistance); + + AddFecControllerConfig(&config); + AddChannelControllerConfig(&config); + AddDtxControllerConfig(&config); + AddFrameLengthControllerConfig(&config); + AddBitrateControllerConfig(&config); + + std::string config_string; + config.SerializeToString(&config_string); + + auto states = CreateControllerManager(config_string); + Controller::NetworkMetrics metrics; + + auto controllers = states.controller_manager->GetSortedControllers(metrics); + CheckControllersOrder( + controllers, + std::vector{ + ControllerType::FEC, ControllerType::CHANNEL, ControllerType::DTX, + ControllerType::FRAME_LENGTH, ControllerType::BIT_RATE}); +} + +TEST(ControllerManagerTest, CreateCharPointFreeConfigAndCheckDefaultOrder) { + audio_network_adaptor::config::ControllerManager config; + + // Following controllers have no characteristic points. + AddChannelControllerConfig(&config); + AddDtxControllerConfig(&config); + AddBitrateControllerConfig(&config); + + std::string config_string; + config.SerializeToString(&config_string); + + auto states = CreateControllerManager(config_string); + Controller::NetworkMetrics metrics; + + auto controllers = states.controller_manager->GetSortedControllers(metrics); + CheckControllersOrder( + controllers, + std::vector{ControllerType::CHANNEL, ControllerType::DTX, + ControllerType::BIT_RATE}); +} + +TEST(ControllerManagerTest, CreateFromConfigStringAndCheckReordering) { + rtc::ScopedFakeClock fake_clock; + audio_network_adaptor::config::ControllerManager config; + config.set_min_reordering_time_ms(kMinReorderingTimeMs); + config.set_min_reordering_squared_distance(kMinReorderingSquareDistance); + + AddChannelControllerConfig(&config); + + // Internally associated with characteristic point 0. + AddFecControllerConfig(&config); + + AddDtxControllerConfig(&config); + + // Internally associated with characteristic point 1. + AddFrameLengthControllerConfig(&config); + + AddBitrateControllerConfig(&config); + + std::string config_string; + config.SerializeToString(&config_string); + + auto states = CreateControllerManager(config_string); + + Controller::NetworkMetrics metrics; + metrics.uplink_bandwidth_bps = kChracteristicBandwithBps[0]; + metrics.uplink_packet_loss_fraction = kChracteristicPacketLossFraction[0]; + + auto controllers = states.controller_manager->GetSortedControllers(metrics); + CheckControllersOrder(controllers, + std::vector{ + ControllerType::FEC, ControllerType::FRAME_LENGTH, + ControllerType::CHANNEL, ControllerType::DTX, + ControllerType::BIT_RATE}); + + metrics.uplink_bandwidth_bps = kChracteristicBandwithBps[1]; + metrics.uplink_packet_loss_fraction = kChracteristicPacketLossFraction[1]; + fake_clock.AdvanceTime(TimeDelta::Millis(kMinReorderingTimeMs - 1)); + controllers = states.controller_manager->GetSortedControllers(metrics); + // Should not reorder since min reordering time is not met. + CheckControllersOrder(controllers, + std::vector{ + ControllerType::FEC, ControllerType::FRAME_LENGTH, + ControllerType::CHANNEL, ControllerType::DTX, + ControllerType::BIT_RATE}); + + fake_clock.AdvanceTime(TimeDelta::Millis(1)); + controllers = states.controller_manager->GetSortedControllers(metrics); + // Reorder now. + CheckControllersOrder(controllers, + std::vector{ + ControllerType::FRAME_LENGTH, ControllerType::FEC, + ControllerType::CHANNEL, ControllerType::DTX, + ControllerType::BIT_RATE}); +} + +TEST(ControllerManagerTest, CreateFrameLengthControllerV2) { + audio_network_adaptor::config::ControllerManager config; + AddFrameLengthControllerV2Config(&config); + auto states = CreateControllerManager(config.SerializeAsString()); + auto controllers = states.controller_manager->GetControllers(); + EXPECT_TRUE(controllers.size() == 1); +} +#endif // WEBRTC_ENABLE_PROTOBUF + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump.proto b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump.proto new file mode 100644 index 0000000000..3aa6a504f3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump.proto @@ -0,0 +1,42 @@ +syntax = "proto2"; +option optimize_for = LITE_RUNTIME; +package webrtc.audio_network_adaptor.debug_dump; + +import "config.proto"; + +message NetworkMetrics { + optional int32 uplink_bandwidth_bps = 1; + optional float uplink_packet_loss_fraction = 2; + optional int32 target_audio_bitrate_bps = 3; + optional int32 rtt_ms = 4; + optional int32 uplink_recoverable_packet_loss_fraction = 5; +} + +message EncoderRuntimeConfig { + optional int32 bitrate_bps = 1; + optional int32 frame_length_ms = 2; + // Note: This is what we tell the encoder. It doesn't have to reflect + // the actual NetworkMetrics; it's subject to our decision. + optional float uplink_packet_loss_fraction = 3; + optional bool enable_fec = 4; + optional bool enable_dtx = 5; + // Some encoders can encode fewer channels than the actual input to make + // better use of the bandwidth. `num_channels` sets the number of channels + // to encode. + optional uint32 num_channels = 6; +} + +message Event { + enum Type { + NETWORK_METRICS = 0; + ENCODER_RUNTIME_CONFIG = 1; + CONTROLLER_MANAGER_CONFIG = 2; + } + required Type type = 1; + required uint32 timestamp = 2; + optional NetworkMetrics network_metrics = 3; + optional EncoderRuntimeConfig encoder_runtime_config = 4; + optional webrtc.audio_network_adaptor.config.ControllerManager + controller_manager_config = 5; +} + diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump_writer.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump_writer.cc new file mode 100644 index 0000000000..2616706ee5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump_writer.cc @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/debug_dump_writer.h" + +#include + +#include "absl/types/optional.h" +#include "rtc_base/checks.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/system/file_wrapper.h" + +#if WEBRTC_ENABLE_PROTOBUF +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_coding/audio_network_adaptor/debug_dump.pb.h" +#else +#include "modules/audio_coding/audio_network_adaptor/debug_dump.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() +#endif + +namespace webrtc { + +#if WEBRTC_ENABLE_PROTOBUF +namespace { + +using audio_network_adaptor::debug_dump::EncoderRuntimeConfig; +using audio_network_adaptor::debug_dump::Event; +using audio_network_adaptor::debug_dump::NetworkMetrics; + +void DumpEventToFile(const Event& event, FileWrapper* dump_file) { + RTC_CHECK(dump_file->is_open()); + std::string dump_data; + event.SerializeToString(&dump_data); + int32_t size = rtc::checked_cast(event.ByteSizeLong()); + dump_file->Write(&size, sizeof(size)); + dump_file->Write(dump_data.data(), dump_data.length()); +} + +} // namespace +#endif // WEBRTC_ENABLE_PROTOBUF + +class DebugDumpWriterImpl final : public DebugDumpWriter { + public: + explicit DebugDumpWriterImpl(FILE* file_handle); + ~DebugDumpWriterImpl() override = default; + + void DumpEncoderRuntimeConfig(const AudioEncoderRuntimeConfig& config, + int64_t timestamp) override; + + void DumpNetworkMetrics(const Controller::NetworkMetrics& metrics, + int64_t timestamp) override; + +#if WEBRTC_ENABLE_PROTOBUF + void DumpControllerManagerConfig( + const audio_network_adaptor::config::ControllerManager& + controller_manager_config, + int64_t timestamp) override; +#endif + + private: + FileWrapper dump_file_; +}; + +DebugDumpWriterImpl::DebugDumpWriterImpl(FILE* file_handle) { +#if WEBRTC_ENABLE_PROTOBUF + dump_file_ = FileWrapper(file_handle); + RTC_CHECK(dump_file_.is_open()); +#else + RTC_DCHECK_NOTREACHED(); +#endif +} + +void DebugDumpWriterImpl::DumpNetworkMetrics( + const Controller::NetworkMetrics& metrics, + int64_t timestamp) { +#if WEBRTC_ENABLE_PROTOBUF + Event event; + event.set_timestamp(timestamp); + event.set_type(Event::NETWORK_METRICS); + auto dump_metrics = event.mutable_network_metrics(); + + if (metrics.uplink_bandwidth_bps) + dump_metrics->set_uplink_bandwidth_bps(*metrics.uplink_bandwidth_bps); + + if (metrics.uplink_packet_loss_fraction) { + dump_metrics->set_uplink_packet_loss_fraction( + *metrics.uplink_packet_loss_fraction); + } + + if (metrics.target_audio_bitrate_bps) { + dump_metrics->set_target_audio_bitrate_bps( + *metrics.target_audio_bitrate_bps); + } + + if (metrics.rtt_ms) + dump_metrics->set_rtt_ms(*metrics.rtt_ms); + + DumpEventToFile(event, &dump_file_); +#endif // WEBRTC_ENABLE_PROTOBUF +} + +void DebugDumpWriterImpl::DumpEncoderRuntimeConfig( + const AudioEncoderRuntimeConfig& config, + int64_t timestamp) { +#if WEBRTC_ENABLE_PROTOBUF + Event event; + event.set_timestamp(timestamp); + event.set_type(Event::ENCODER_RUNTIME_CONFIG); + auto dump_config = event.mutable_encoder_runtime_config(); + + if (config.bitrate_bps) + dump_config->set_bitrate_bps(*config.bitrate_bps); + + if (config.frame_length_ms) + dump_config->set_frame_length_ms(*config.frame_length_ms); + + if (config.uplink_packet_loss_fraction) { + dump_config->set_uplink_packet_loss_fraction( + *config.uplink_packet_loss_fraction); + } + + if (config.enable_fec) + dump_config->set_enable_fec(*config.enable_fec); + + if (config.enable_dtx) + dump_config->set_enable_dtx(*config.enable_dtx); + + if (config.num_channels) + dump_config->set_num_channels(*config.num_channels); + + DumpEventToFile(event, &dump_file_); +#endif // WEBRTC_ENABLE_PROTOBUF +} + +#if WEBRTC_ENABLE_PROTOBUF +void DebugDumpWriterImpl::DumpControllerManagerConfig( + const audio_network_adaptor::config::ControllerManager& + controller_manager_config, + int64_t timestamp) { + Event event; + event.set_timestamp(timestamp); + event.set_type(Event::CONTROLLER_MANAGER_CONFIG); + event.mutable_controller_manager_config()->CopyFrom( + controller_manager_config); + DumpEventToFile(event, &dump_file_); +} +#endif // WEBRTC_ENABLE_PROTOBUF + +std::unique_ptr DebugDumpWriter::Create(FILE* file_handle) { + return std::unique_ptr(new DebugDumpWriterImpl(file_handle)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump_writer.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump_writer.h new file mode 100644 index 0000000000..8fdf2f7728 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump_writer.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_DEBUG_DUMP_WRITER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_DEBUG_DUMP_WRITER_H_ + +#include + +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/system/file_wrapper.h" +#if WEBRTC_ENABLE_PROTOBUF +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_coding/audio_network_adaptor/config.pb.h" +#else +#include "modules/audio_coding/audio_network_adaptor/config.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() +#endif + +namespace webrtc { + +class DebugDumpWriter { + public: + static std::unique_ptr Create(FILE* file_handle); + + virtual ~DebugDumpWriter() = default; + + virtual void DumpEncoderRuntimeConfig(const AudioEncoderRuntimeConfig& config, + int64_t timestamp) = 0; + + virtual void DumpNetworkMetrics(const Controller::NetworkMetrics& metrics, + int64_t timestamp) = 0; + +#if WEBRTC_ENABLE_PROTOBUF + virtual void DumpControllerManagerConfig( + const audio_network_adaptor::config::ControllerManager& + controller_manager_config, + int64_t timestamp) = 0; +#endif +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_DEBUG_DUMP_WRITER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller.cc new file mode 100644 index 0000000000..b0a7d5d59d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/dtx_controller.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +DtxController::Config::Config(bool initial_dtx_enabled, + int dtx_enabling_bandwidth_bps, + int dtx_disabling_bandwidth_bps) + : initial_dtx_enabled(initial_dtx_enabled), + dtx_enabling_bandwidth_bps(dtx_enabling_bandwidth_bps), + dtx_disabling_bandwidth_bps(dtx_disabling_bandwidth_bps) {} + +DtxController::DtxController(const Config& config) + : config_(config), dtx_enabled_(config_.initial_dtx_enabled) {} + +DtxController::~DtxController() = default; + +void DtxController::UpdateNetworkMetrics( + const NetworkMetrics& network_metrics) { + if (network_metrics.uplink_bandwidth_bps) + uplink_bandwidth_bps_ = network_metrics.uplink_bandwidth_bps; +} + +void DtxController::MakeDecision(AudioEncoderRuntimeConfig* config) { + // Decision on `enable_dtx` should not have been made. + RTC_DCHECK(!config->enable_dtx); + + if (uplink_bandwidth_bps_) { + if (dtx_enabled_ && + *uplink_bandwidth_bps_ >= config_.dtx_disabling_bandwidth_bps) { + dtx_enabled_ = false; + } else if (!dtx_enabled_ && + *uplink_bandwidth_bps_ <= config_.dtx_enabling_bandwidth_bps) { + dtx_enabled_ = true; + } + } + config->enable_dtx = dtx_enabled_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller.h new file mode 100644 index 0000000000..b8a8e476e4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_DTX_CONTROLLER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_DTX_CONTROLLER_H_ + +#include "absl/types/optional.h" +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" + +namespace webrtc { + +class DtxController final : public Controller { + public: + struct Config { + Config(bool initial_dtx_enabled, + int dtx_enabling_bandwidth_bps, + int dtx_disabling_bandwidth_bps); + bool initial_dtx_enabled; + // Uplink bandwidth below which DTX should be switched on. + int dtx_enabling_bandwidth_bps; + // Uplink bandwidth above which DTX should be switched off. + int dtx_disabling_bandwidth_bps; + }; + + explicit DtxController(const Config& config); + + ~DtxController() override; + + DtxController(const DtxController&) = delete; + DtxController& operator=(const DtxController&) = delete; + + void UpdateNetworkMetrics(const NetworkMetrics& network_metrics) override; + + void MakeDecision(AudioEncoderRuntimeConfig* config) override; + + private: + const Config config_; + bool dtx_enabled_; + absl::optional uplink_bandwidth_bps_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_DTX_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller_unittest.cc new file mode 100644 index 0000000000..567df6f76e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller_unittest.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/dtx_controller.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kDtxEnablingBandwidthBps = 55000; +constexpr int kDtxDisablingBandwidthBps = 65000; +constexpr int kMediumBandwidthBps = + (kDtxEnablingBandwidthBps + kDtxDisablingBandwidthBps) / 2; + +std::unique_ptr CreateController(int initial_dtx_enabled) { + std::unique_ptr controller(new DtxController( + DtxController::Config(initial_dtx_enabled, kDtxEnablingBandwidthBps, + kDtxDisablingBandwidthBps))); + return controller; +} + +void CheckDecision(DtxController* controller, + const absl::optional& uplink_bandwidth_bps, + bool expected_dtx_enabled) { + if (uplink_bandwidth_bps) { + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_bandwidth_bps = uplink_bandwidth_bps; + controller->UpdateNetworkMetrics(network_metrics); + } + AudioEncoderRuntimeConfig config; + controller->MakeDecision(&config); + EXPECT_EQ(expected_dtx_enabled, config.enable_dtx); +} + +} // namespace + +TEST(DtxControllerTest, OutputInitValueWhenUplinkBandwidthUnknown) { + constexpr bool kInitialDtxEnabled = true; + auto controller = CreateController(kInitialDtxEnabled); + CheckDecision(controller.get(), absl::nullopt, kInitialDtxEnabled); +} + +TEST(DtxControllerTest, TurnOnDtxForLowUplinkBandwidth) { + auto controller = CreateController(false); + CheckDecision(controller.get(), kDtxEnablingBandwidthBps, true); +} + +TEST(DtxControllerTest, TurnOffDtxForHighUplinkBandwidth) { + auto controller = CreateController(true); + CheckDecision(controller.get(), kDtxDisablingBandwidthBps, false); +} + +TEST(DtxControllerTest, MaintainDtxOffForMediumUplinkBandwidth) { + auto controller = CreateController(false); + CheckDecision(controller.get(), kMediumBandwidthBps, false); +} + +TEST(DtxControllerTest, MaintainDtxOnForMediumUplinkBandwidth) { + auto controller = CreateController(true); + CheckDecision(controller.get(), kMediumBandwidthBps, true); +} + +TEST(DtxControllerTest, CheckBehaviorOnChangingUplinkBandwidth) { + auto controller = CreateController(false); + CheckDecision(controller.get(), kMediumBandwidthBps, false); + CheckDecision(controller.get(), kDtxEnablingBandwidthBps, true); + CheckDecision(controller.get(), kMediumBandwidthBps, true); + CheckDecision(controller.get(), kDtxDisablingBandwidthBps, false); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer.cc new file mode 100644 index 0000000000..0a79484a16 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/event_log_writer.h" + +#include + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/rtc_event_log/rtc_event.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "logging/rtc_event_log/events/rtc_event_audio_network_adaptation.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +EventLogWriter::EventLogWriter(RtcEventLog* event_log, + int min_bitrate_change_bps, + float min_bitrate_change_fraction, + float min_packet_loss_change_fraction) + : event_log_(event_log), + min_bitrate_change_bps_(min_bitrate_change_bps), + min_bitrate_change_fraction_(min_bitrate_change_fraction), + min_packet_loss_change_fraction_(min_packet_loss_change_fraction) { + RTC_DCHECK(event_log_); +} + +EventLogWriter::~EventLogWriter() = default; + +void EventLogWriter::MaybeLogEncoderConfig( + const AudioEncoderRuntimeConfig& config) { + if (last_logged_config_.num_channels != config.num_channels) + return LogEncoderConfig(config); + if (last_logged_config_.enable_dtx != config.enable_dtx) + return LogEncoderConfig(config); + if (last_logged_config_.enable_fec != config.enable_fec) + return LogEncoderConfig(config); + if (last_logged_config_.frame_length_ms != config.frame_length_ms) + return LogEncoderConfig(config); + if ((!last_logged_config_.bitrate_bps && config.bitrate_bps) || + (last_logged_config_.bitrate_bps && config.bitrate_bps && + std::abs(*last_logged_config_.bitrate_bps - *config.bitrate_bps) >= + std::min(static_cast(*last_logged_config_.bitrate_bps * + min_bitrate_change_fraction_), + min_bitrate_change_bps_))) { + return LogEncoderConfig(config); + } + if ((!last_logged_config_.uplink_packet_loss_fraction && + config.uplink_packet_loss_fraction) || + (last_logged_config_.uplink_packet_loss_fraction && + config.uplink_packet_loss_fraction && + fabs(*last_logged_config_.uplink_packet_loss_fraction - + *config.uplink_packet_loss_fraction) >= + min_packet_loss_change_fraction_ * + *last_logged_config_.uplink_packet_loss_fraction)) { + return LogEncoderConfig(config); + } +} + +void EventLogWriter::LogEncoderConfig(const AudioEncoderRuntimeConfig& config) { + auto config_copy = std::make_unique(config); + event_log_->Log( + std::make_unique(std::move(config_copy))); + last_logged_config_ = config; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer.h new file mode 100644 index 0000000000..a147311fc7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_EVENT_LOG_WRITER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_EVENT_LOG_WRITER_H_ + +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" + +namespace webrtc { +class RtcEventLog; + +class EventLogWriter final { + public: + EventLogWriter(RtcEventLog* event_log, + int min_bitrate_change_bps, + float min_bitrate_change_fraction, + float min_packet_loss_change_fraction); + ~EventLogWriter(); + + EventLogWriter(const EventLogWriter&) = delete; + EventLogWriter& operator=(const EventLogWriter&) = delete; + + void MaybeLogEncoderConfig(const AudioEncoderRuntimeConfig& config); + + private: + void LogEncoderConfig(const AudioEncoderRuntimeConfig& config); + + RtcEventLog* const event_log_; + const int min_bitrate_change_bps_; + const float min_bitrate_change_fraction_; + const float min_packet_loss_change_fraction_; + AudioEncoderRuntimeConfig last_logged_config_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_EVENT_LOG_WRITER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer_unittest.cc new file mode 100644 index 0000000000..2c344534ea --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer_unittest.cc @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/event_log_writer.h" + +#include + +#include "logging/rtc_event_log/events/rtc_event_audio_network_adaptation.h" +#include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kMinBitrateChangeBps = 5000; +constexpr float kMinPacketLossChangeFraction = 0.5; +constexpr float kMinBitrateChangeFraction = 0.25; + +constexpr int kHighBitrateBps = 70000; +constexpr int kLowBitrateBps = 10000; +constexpr int kFrameLengthMs = 60; +constexpr bool kEnableFec = true; +constexpr bool kEnableDtx = true; +constexpr float kPacketLossFraction = 0.05f; +constexpr size_t kNumChannels = 1; + +MATCHER_P(IsRtcEventAnaConfigEqualTo, config, "") { + if (arg->GetType() != RtcEvent::Type::AudioNetworkAdaptation) { + return false; + } + auto ana_event = static_cast(arg); + return ana_event->config() == config; +} + +struct EventLogWriterStates { + std::unique_ptr event_log_writer; + std::unique_ptr> event_log; + AudioEncoderRuntimeConfig runtime_config; +}; + +EventLogWriterStates CreateEventLogWriter() { + EventLogWriterStates state; + state.event_log.reset(new ::testing::StrictMock()); + state.event_log_writer.reset(new EventLogWriter( + state.event_log.get(), kMinBitrateChangeBps, kMinBitrateChangeFraction, + kMinPacketLossChangeFraction)); + state.runtime_config.bitrate_bps = kHighBitrateBps; + state.runtime_config.frame_length_ms = kFrameLengthMs; + state.runtime_config.uplink_packet_loss_fraction = kPacketLossFraction; + state.runtime_config.enable_fec = kEnableFec; + state.runtime_config.enable_dtx = kEnableDtx; + state.runtime_config.num_channels = kNumChannels; + return state; +} +} // namespace + +TEST(EventLogWriterTest, FirstConfigIsLogged) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, SameConfigIsNotLogged) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, LogFecStateChange) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + + state.runtime_config.enable_fec = !kEnableFec; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, LogDtxStateChange) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + + state.runtime_config.enable_dtx = !kEnableDtx; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, LogChannelChange) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + + state.runtime_config.num_channels = kNumChannels + 1; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, LogFrameLengthChange) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + + state.runtime_config.frame_length_ms = 20; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, DoNotLogSmallBitrateChange) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + state.runtime_config.bitrate_bps = kHighBitrateBps + kMinBitrateChangeBps - 1; + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, LogLargeBitrateChange) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + // At high bitrate, the min fraction rule requires a larger change than the + // min change rule. We make sure that the min change rule applies. + RTC_DCHECK_GT(kHighBitrateBps * kMinBitrateChangeFraction, + kMinBitrateChangeBps); + state.runtime_config.bitrate_bps = kHighBitrateBps + kMinBitrateChangeBps; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, LogMinBitrateChangeFractionOnLowBitrateChange) { + auto state = CreateEventLogWriter(); + state.runtime_config.bitrate_bps = kLowBitrateBps; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + // At high bitrate, the min change rule requires a larger change than the min + // fraction rule. We make sure that the min fraction rule applies. + state.runtime_config.bitrate_bps = + kLowBitrateBps + kLowBitrateBps * kMinBitrateChangeFraction; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, DoNotLogSmallPacketLossFractionChange) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + state.runtime_config.uplink_packet_loss_fraction = + kPacketLossFraction + kMinPacketLossChangeFraction * kPacketLossFraction - + 0.001f; + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, LogLargePacketLossFractionChange) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + state.runtime_config.uplink_packet_loss_fraction = + kPacketLossFraction + kMinPacketLossChangeFraction * kPacketLossFraction; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, LogJustOnceOnMultipleChanges) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + state.runtime_config.uplink_packet_loss_fraction = + kPacketLossFraction + kMinPacketLossChangeFraction * kPacketLossFraction; + state.runtime_config.frame_length_ms = 20; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); +} + +TEST(EventLogWriterTest, LogAfterGradualChange) { + auto state = CreateEventLogWriter(); + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + state.runtime_config.bitrate_bps = kHighBitrateBps + kMinBitrateChangeBps; + EXPECT_CALL(*state.event_log, + LogProxy(IsRtcEventAnaConfigEqualTo(state.runtime_config))) + .Times(1); + for (int bitrate_bps = kHighBitrateBps; + bitrate_bps <= kHighBitrateBps + kMinBitrateChangeBps; bitrate_bps++) { + state.runtime_config.bitrate_bps = bitrate_bps; + state.event_log_writer->MaybeLogEncoderConfig(state.runtime_config); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.cc new file mode 100644 index 0000000000..c5e5fa76e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.h" + +#include +#include + +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { +class NullSmoothingFilter final : public SmoothingFilter { + public: + void AddSample(float sample) override { last_sample_ = sample; } + + absl::optional GetAverage() override { return last_sample_; } + + bool SetTimeConstantMs(int time_constant_ms) override { + RTC_DCHECK_NOTREACHED(); + return false; + } + + private: + absl::optional last_sample_; +}; +} // namespace + +FecControllerPlrBased::Config::Config( + bool initial_fec_enabled, + const ThresholdCurve& fec_enabling_threshold, + const ThresholdCurve& fec_disabling_threshold, + int time_constant_ms) + : initial_fec_enabled(initial_fec_enabled), + fec_enabling_threshold(fec_enabling_threshold), + fec_disabling_threshold(fec_disabling_threshold), + time_constant_ms(time_constant_ms) {} + +FecControllerPlrBased::FecControllerPlrBased( + const Config& config, + std::unique_ptr smoothing_filter) + : config_(config), + fec_enabled_(config.initial_fec_enabled), + packet_loss_smoother_(std::move(smoothing_filter)) { + RTC_DCHECK(config_.fec_disabling_threshold <= config_.fec_enabling_threshold); +} + +FecControllerPlrBased::FecControllerPlrBased(const Config& config) + : FecControllerPlrBased( + config, + webrtc::field_trial::FindFullName("UseTwccPlrForAna") == "Enabled" + ? std::unique_ptr(new NullSmoothingFilter()) + : std::unique_ptr( + new SmoothingFilterImpl(config.time_constant_ms))) {} + +FecControllerPlrBased::~FecControllerPlrBased() = default; + +void FecControllerPlrBased::UpdateNetworkMetrics( + const NetworkMetrics& network_metrics) { + if (network_metrics.uplink_bandwidth_bps) + uplink_bandwidth_bps_ = network_metrics.uplink_bandwidth_bps; + if (network_metrics.uplink_packet_loss_fraction) { + packet_loss_smoother_->AddSample( + *network_metrics.uplink_packet_loss_fraction); + } +} + +void FecControllerPlrBased::MakeDecision(AudioEncoderRuntimeConfig* config) { + RTC_DCHECK(!config->enable_fec); + RTC_DCHECK(!config->uplink_packet_loss_fraction); + + const auto& packet_loss = packet_loss_smoother_->GetAverage(); + + fec_enabled_ = fec_enabled_ ? !FecDisablingDecision(packet_loss) + : FecEnablingDecision(packet_loss); + + config->enable_fec = fec_enabled_; + + config->uplink_packet_loss_fraction = packet_loss ? *packet_loss : 0.0; +} + +bool FecControllerPlrBased::FecEnablingDecision( + const absl::optional& packet_loss) const { + if (!uplink_bandwidth_bps_ || !packet_loss) { + return false; + } else { + // Enable when above the curve or exactly on it. + return !config_.fec_enabling_threshold.IsBelowCurve( + {static_cast(*uplink_bandwidth_bps_), *packet_loss}); + } +} + +bool FecControllerPlrBased::FecDisablingDecision( + const absl::optional& packet_loss) const { + if (!uplink_bandwidth_bps_ || !packet_loss) { + return false; + } else { + // Disable when below the curve. + return config_.fec_disabling_threshold.IsBelowCurve( + {static_cast(*uplink_bandwidth_bps_), *packet_loss}); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.h new file mode 100644 index 0000000000..0c57ad1d1e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_FEC_CONTROLLER_PLR_BASED_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_FEC_CONTROLLER_PLR_BASED_H_ + +#include + +#include "absl/types/optional.h" +#include "common_audio/smoothing_filter.h" +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" +#include "modules/audio_coding/audio_network_adaptor/util/threshold_curve.h" + +namespace webrtc { + +class FecControllerPlrBased final : public Controller { + public: + struct Config { + // `fec_enabling_threshold` defines a curve, above which FEC should be + // enabled. `fec_disabling_threshold` defines a curve, under which FEC + // should be disabled. See below + // + // packet-loss ^ | | + // | | | FEC + // | \ \ ON + // | FEC \ \_______ fec_enabling_threshold + // | OFF \_________ fec_disabling_threshold + // |-----------------> bandwidth + Config(bool initial_fec_enabled, + const ThresholdCurve& fec_enabling_threshold, + const ThresholdCurve& fec_disabling_threshold, + int time_constant_ms); + bool initial_fec_enabled; + ThresholdCurve fec_enabling_threshold; + ThresholdCurve fec_disabling_threshold; + int time_constant_ms; + }; + + // Dependency injection for testing. + FecControllerPlrBased(const Config& config, + std::unique_ptr smoothing_filter); + + explicit FecControllerPlrBased(const Config& config); + + ~FecControllerPlrBased() override; + + FecControllerPlrBased(const FecControllerPlrBased&) = delete; + FecControllerPlrBased& operator=(const FecControllerPlrBased&) = delete; + + void UpdateNetworkMetrics(const NetworkMetrics& network_metrics) override; + + void MakeDecision(AudioEncoderRuntimeConfig* config) override; + + private: + bool FecEnablingDecision(const absl::optional& packet_loss) const; + bool FecDisablingDecision(const absl::optional& packet_loss) const; + + const Config config_; + bool fec_enabled_; + absl::optional uplink_bandwidth_bps_; + const std::unique_ptr packet_loss_smoother_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_FEC_CONTROLLER_PLR_BASED_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based_unittest.cc new file mode 100644 index 0000000000..743b087163 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based_unittest.cc @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.h" + +#include + +#include "common_audio/mocks/mock_smoothing_filter.h" +#include "test/gtest.h" + +namespace webrtc { + +using ::testing::_; +using ::testing::NiceMock; +using ::testing::Return; + +namespace { + +// The test uses the following settings: +// +// packet-loss ^ | | +// | A| C| FEC +// | \ \ ON +// | FEC \ D\_______ +// | OFF B\_________ +// |-----------------> bandwidth +// +// A : (kDisablingBandwidthLow, kDisablingPacketLossAtLowBw) +// B : (kDisablingBandwidthHigh, kDisablingPacketLossAtHighBw) +// C : (kEnablingBandwidthLow, kEnablingPacketLossAtLowBw) +// D : (kEnablingBandwidthHigh, kEnablingPacketLossAtHighBw) + +constexpr int kDisablingBandwidthLow = 15000; +constexpr float kDisablingPacketLossAtLowBw = 0.08f; +constexpr int kDisablingBandwidthHigh = 64000; +constexpr float kDisablingPacketLossAtHighBw = 0.01f; +constexpr int kEnablingBandwidthLow = 17000; +constexpr float kEnablingPacketLossAtLowBw = 0.1f; +constexpr int kEnablingBandwidthHigh = 64000; +constexpr float kEnablingPacketLossAtHighBw = 0.05f; + +constexpr float kEpsilon = 1e-5f; + +struct FecControllerPlrBasedTestStates { + std::unique_ptr controller; + MockSmoothingFilter* packet_loss_smoother; +}; + +FecControllerPlrBasedTestStates CreateFecControllerPlrBased( + bool initial_fec_enabled, + const ThresholdCurve& enabling_curve, + const ThresholdCurve& disabling_curve) { + FecControllerPlrBasedTestStates states; + std::unique_ptr mock_smoothing_filter( + new NiceMock()); + states.packet_loss_smoother = mock_smoothing_filter.get(); + states.controller.reset(new FecControllerPlrBased( + FecControllerPlrBased::Config(initial_fec_enabled, enabling_curve, + disabling_curve, 0), + std::move(mock_smoothing_filter))); + return states; +} + +FecControllerPlrBasedTestStates CreateFecControllerPlrBased( + bool initial_fec_enabled) { + return CreateFecControllerPlrBased( + initial_fec_enabled, + ThresholdCurve(kEnablingBandwidthLow, kEnablingPacketLossAtLowBw, + kEnablingBandwidthHigh, kEnablingPacketLossAtHighBw), + ThresholdCurve(kDisablingBandwidthLow, kDisablingPacketLossAtLowBw, + kDisablingBandwidthHigh, kDisablingPacketLossAtHighBw)); +} + +void UpdateNetworkMetrics(FecControllerPlrBasedTestStates* states, + const absl::optional& uplink_bandwidth_bps, + const absl::optional& uplink_packet_loss) { + // UpdateNetworkMetrics can accept multiple network metric updates at once. + // However, currently, the most used case is to update one metric at a time. + // To reflect this fact, we separate the calls. + if (uplink_bandwidth_bps) { + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_bandwidth_bps = uplink_bandwidth_bps; + states->controller->UpdateNetworkMetrics(network_metrics); + } + if (uplink_packet_loss) { + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_packet_loss_fraction = uplink_packet_loss; + EXPECT_CALL(*states->packet_loss_smoother, AddSample(*uplink_packet_loss)); + states->controller->UpdateNetworkMetrics(network_metrics); + // This is called during CheckDecision(). + EXPECT_CALL(*states->packet_loss_smoother, GetAverage()) + .WillOnce(Return(*uplink_packet_loss)); + } +} + +// Checks that the FEC decision and `uplink_packet_loss_fraction` given by +// `states->controller->MakeDecision` matches `expected_enable_fec` and +// `expected_uplink_packet_loss_fraction`, respectively. +void CheckDecision(FecControllerPlrBasedTestStates* states, + bool expected_enable_fec, + float expected_uplink_packet_loss_fraction) { + AudioEncoderRuntimeConfig config; + states->controller->MakeDecision(&config); + EXPECT_EQ(expected_enable_fec, config.enable_fec); + EXPECT_EQ(expected_uplink_packet_loss_fraction, + config.uplink_packet_loss_fraction); +} + +} // namespace + +TEST(FecControllerPlrBasedTest, OutputInitValueBeforeAnyInputsAreReceived) { + for (bool initial_fec_enabled : {false, true}) { + auto states = CreateFecControllerPlrBased(initial_fec_enabled); + CheckDecision(&states, initial_fec_enabled, 0); + } +} + +TEST(FecControllerPlrBasedTest, OutputInitValueWhenUplinkBandwidthUnknown) { + // Regardless of the initial FEC state and the packet-loss rate, + // the initial FEC state is maintained as long as the BWE is unknown. + for (bool initial_fec_enabled : {false, true}) { + for (float packet_loss : + {kDisablingPacketLossAtLowBw - kEpsilon, kDisablingPacketLossAtLowBw, + kDisablingPacketLossAtLowBw + kEpsilon, + kEnablingPacketLossAtLowBw - kEpsilon, kEnablingPacketLossAtLowBw, + kEnablingPacketLossAtLowBw + kEpsilon}) { + auto states = CreateFecControllerPlrBased(initial_fec_enabled); + UpdateNetworkMetrics(&states, absl::nullopt, packet_loss); + CheckDecision(&states, initial_fec_enabled, packet_loss); + } + } +} + +TEST(FecControllerPlrBasedTest, + OutputInitValueWhenUplinkPacketLossFractionUnknown) { + // Regardless of the initial FEC state and the BWE, the initial FEC state + // is maintained as long as the packet-loss rate is unknown. + for (bool initial_fec_enabled : {false, true}) { + for (int bandwidth : {kDisablingBandwidthLow - 1, kDisablingBandwidthLow, + kDisablingBandwidthLow + 1, kEnablingBandwidthLow - 1, + kEnablingBandwidthLow, kEnablingBandwidthLow + 1}) { + auto states = CreateFecControllerPlrBased(initial_fec_enabled); + UpdateNetworkMetrics(&states, bandwidth, absl::nullopt); + CheckDecision(&states, initial_fec_enabled, 0.0); + } + } +} + +TEST(FecControllerPlrBasedTest, EnableFecForHighBandwidth) { + auto states = CreateFecControllerPlrBased(false); + UpdateNetworkMetrics(&states, kEnablingBandwidthHigh, + kEnablingPacketLossAtHighBw); + CheckDecision(&states, true, kEnablingPacketLossAtHighBw); +} + +TEST(FecControllerPlrBasedTest, UpdateMultipleNetworkMetricsAtOnce) { + // This test is similar to EnableFecForHighBandwidth. But instead of + // using ::UpdateNetworkMetrics(...), which calls + // FecControllerPlrBased::UpdateNetworkMetrics(...) multiple times, we + // we call it only once. This is to verify that + // FecControllerPlrBased::UpdateNetworkMetrics(...) can handle multiple + // network updates at once. This is, however, not a common use case in current + // audio_network_adaptor_impl.cc. + auto states = CreateFecControllerPlrBased(false); + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_bandwidth_bps = kEnablingBandwidthHigh; + network_metrics.uplink_packet_loss_fraction = kEnablingPacketLossAtHighBw; + EXPECT_CALL(*states.packet_loss_smoother, GetAverage()) + .WillOnce(Return(kEnablingPacketLossAtHighBw)); + states.controller->UpdateNetworkMetrics(network_metrics); + CheckDecision(&states, true, kEnablingPacketLossAtHighBw); +} + +TEST(FecControllerPlrBasedTest, MaintainFecOffForHighBandwidth) { + auto states = CreateFecControllerPlrBased(false); + constexpr float kPacketLoss = kEnablingPacketLossAtHighBw * 0.99f; + UpdateNetworkMetrics(&states, kEnablingBandwidthHigh, kPacketLoss); + CheckDecision(&states, false, kPacketLoss); +} + +TEST(FecControllerPlrBasedTest, EnableFecForMediumBandwidth) { + auto states = CreateFecControllerPlrBased(false); + constexpr float kPacketLoss = + (kEnablingPacketLossAtLowBw + kEnablingPacketLossAtHighBw) / 2.0; + UpdateNetworkMetrics(&states, + (kEnablingBandwidthHigh + kEnablingBandwidthLow) / 2, + kPacketLoss); + CheckDecision(&states, true, kPacketLoss); +} + +TEST(FecControllerPlrBasedTest, MaintainFecOffForMediumBandwidth) { + auto states = CreateFecControllerPlrBased(false); + constexpr float kPacketLoss = + kEnablingPacketLossAtLowBw * 0.49f + kEnablingPacketLossAtHighBw * 0.51f; + UpdateNetworkMetrics(&states, + (kEnablingBandwidthHigh + kEnablingBandwidthLow) / 2, + kPacketLoss); + CheckDecision(&states, false, kPacketLoss); +} + +TEST(FecControllerPlrBasedTest, EnableFecForLowBandwidth) { + auto states = CreateFecControllerPlrBased(false); + UpdateNetworkMetrics(&states, kEnablingBandwidthLow, + kEnablingPacketLossAtLowBw); + CheckDecision(&states, true, kEnablingPacketLossAtLowBw); +} + +TEST(FecControllerPlrBasedTest, MaintainFecOffForLowBandwidth) { + auto states = CreateFecControllerPlrBased(false); + constexpr float kPacketLoss = kEnablingPacketLossAtLowBw * 0.99f; + UpdateNetworkMetrics(&states, kEnablingBandwidthLow, kPacketLoss); + CheckDecision(&states, false, kPacketLoss); +} + +TEST(FecControllerPlrBasedTest, MaintainFecOffForVeryLowBandwidth) { + auto states = CreateFecControllerPlrBased(false); + // Below `kEnablingBandwidthLow`, no packet loss fraction can cause FEC to + // turn on. + UpdateNetworkMetrics(&states, kEnablingBandwidthLow - 1, 1.0); + CheckDecision(&states, false, 1.0); +} + +TEST(FecControllerPlrBasedTest, DisableFecForHighBandwidth) { + auto states = CreateFecControllerPlrBased(true); + constexpr float kPacketLoss = kDisablingPacketLossAtHighBw - kEpsilon; + UpdateNetworkMetrics(&states, kDisablingBandwidthHigh, kPacketLoss); + CheckDecision(&states, false, kPacketLoss); +} + +TEST(FecControllerPlrBasedTest, MaintainFecOnForHighBandwidth) { + // Note: Disabling happens when the value is strictly below the threshold. + auto states = CreateFecControllerPlrBased(true); + UpdateNetworkMetrics(&states, kDisablingBandwidthHigh, + kDisablingPacketLossAtHighBw); + CheckDecision(&states, true, kDisablingPacketLossAtHighBw); +} + +TEST(FecControllerPlrBasedTest, DisableFecOnMediumBandwidth) { + auto states = CreateFecControllerPlrBased(true); + constexpr float kPacketLoss = + (kDisablingPacketLossAtLowBw + kDisablingPacketLossAtHighBw) / 2.0f - + kEpsilon; + UpdateNetworkMetrics(&states, + (kDisablingBandwidthHigh + kDisablingBandwidthLow) / 2, + kPacketLoss); + CheckDecision(&states, false, kPacketLoss); +} + +TEST(FecControllerPlrBasedTest, MaintainFecOnForMediumBandwidth) { + auto states = CreateFecControllerPlrBased(true); + constexpr float kPacketLoss = kDisablingPacketLossAtLowBw * 0.51f + + kDisablingPacketLossAtHighBw * 0.49f - kEpsilon; + UpdateNetworkMetrics(&states, + (kEnablingBandwidthHigh + kDisablingBandwidthLow) / 2, + kPacketLoss); + CheckDecision(&states, true, kPacketLoss); +} + +TEST(FecControllerPlrBasedTest, DisableFecForLowBandwidth) { + auto states = CreateFecControllerPlrBased(true); + constexpr float kPacketLoss = kDisablingPacketLossAtLowBw - kEpsilon; + UpdateNetworkMetrics(&states, kDisablingBandwidthLow, kPacketLoss); + CheckDecision(&states, false, kPacketLoss); +} + +TEST(FecControllerPlrBasedTest, DisableFecForVeryLowBandwidth) { + auto states = CreateFecControllerPlrBased(true); + // Below `kEnablingBandwidthLow`, any packet loss fraction can cause FEC to + // turn off. + UpdateNetworkMetrics(&states, kDisablingBandwidthLow - 1, 1.0); + CheckDecision(&states, false, 1.0); +} + +TEST(FecControllerPlrBasedTest, CheckBehaviorOnChangingNetworkMetrics) { + // In this test, we let the network metrics to traverse from 1 to 5. + // packet-loss ^ 1 | | + // | | 2| + // | \ \ 3 + // | \4 \_______ + // | \_________ + // |---------5-------> bandwidth + + auto states = CreateFecControllerPlrBased(true); + UpdateNetworkMetrics(&states, kDisablingBandwidthLow - 1, 1.0); + CheckDecision(&states, false, 1.0); + + UpdateNetworkMetrics(&states, kEnablingBandwidthLow, + kEnablingPacketLossAtLowBw * 0.99f); + CheckDecision(&states, false, kEnablingPacketLossAtLowBw * 0.99f); + + UpdateNetworkMetrics(&states, kEnablingBandwidthHigh, + kEnablingPacketLossAtHighBw); + CheckDecision(&states, true, kEnablingPacketLossAtHighBw); + + UpdateNetworkMetrics(&states, kDisablingBandwidthHigh, + kDisablingPacketLossAtHighBw); + CheckDecision(&states, true, kDisablingPacketLossAtHighBw); + + UpdateNetworkMetrics(&states, kDisablingBandwidthHigh + 1, 0.0); + CheckDecision(&states, false, 0.0); +} + +TEST(FecControllerPlrBasedTest, CheckBehaviorOnSpecialCurves) { + // We test a special configuration, where the points to define the FEC + // enabling/disabling curves are placed like the following, otherwise the test + // is the same as CheckBehaviorOnChangingNetworkMetrics. + // + // packet-loss ^ | | + // | | C| + // | | | + // | | D|_______ + // | A|___B______ + // |-----------------> bandwidth + + constexpr int kEnablingBandwidthHigh = kEnablingBandwidthLow; + constexpr float kDisablingPacketLossAtLowBw = kDisablingPacketLossAtHighBw; + FecControllerPlrBasedTestStates states; + std::unique_ptr mock_smoothing_filter( + new NiceMock()); + states.packet_loss_smoother = mock_smoothing_filter.get(); + states.controller.reset(new FecControllerPlrBased( + FecControllerPlrBased::Config( + true, + ThresholdCurve(kEnablingBandwidthLow, kEnablingPacketLossAtLowBw, + kEnablingBandwidthHigh, kEnablingPacketLossAtHighBw), + ThresholdCurve(kDisablingBandwidthLow, kDisablingPacketLossAtLowBw, + kDisablingBandwidthHigh, kDisablingPacketLossAtHighBw), + 0), + std::move(mock_smoothing_filter))); + + UpdateNetworkMetrics(&states, kDisablingBandwidthLow - 1, 1.0); + CheckDecision(&states, false, 1.0); + + UpdateNetworkMetrics(&states, kEnablingBandwidthLow, + kEnablingPacketLossAtHighBw * 0.99f); + CheckDecision(&states, false, kEnablingPacketLossAtHighBw * 0.99f); + + UpdateNetworkMetrics(&states, kEnablingBandwidthHigh, + kEnablingPacketLossAtHighBw); + CheckDecision(&states, true, kEnablingPacketLossAtHighBw); + + UpdateNetworkMetrics(&states, kDisablingBandwidthHigh, + kDisablingPacketLossAtHighBw); + CheckDecision(&states, true, kDisablingPacketLossAtHighBw); + + UpdateNetworkMetrics(&states, kDisablingBandwidthHigh + 1, 0.0); + CheckDecision(&states, false, 0.0); +} + +TEST(FecControllerPlrBasedTest, SingleThresholdCurveForEnablingAndDisabling) { + // Note: To avoid numerical errors, keep kPacketLossAtLowBw and + // kPacketLossAthighBw as (negative) integer powers of 2. + // This is mostly relevant for the O3 case. + constexpr int kBandwidthLow = 10000; + constexpr float kPacketLossAtLowBw = 0.25f; + constexpr int kBandwidthHigh = 20000; + constexpr float kPacketLossAtHighBw = 0.125f; + auto curve = ThresholdCurve(kBandwidthLow, kPacketLossAtLowBw, kBandwidthHigh, + kPacketLossAtHighBw); + + // B* stands for "below-curve", O* for "on-curve", and A* for "above-curve". + // + // // + // packet-loss ^ // + // | | // + // | B1 O1 // + // | | // + // | O2 // + // | \ A1 // + // | \ // + // | O3 A2 // + // | B2 \ // + // | \ // + // | O4--O5---- // + // | // + // | B3 // + // |-----------------> bandwidth // + + struct NetworkState { + int bandwidth; + float packet_loss; + }; + + std::vector below{ + {kBandwidthLow - 1, kPacketLossAtLowBw + 0.1f}, // B1 + {(kBandwidthLow + kBandwidthHigh) / 2, + (kPacketLossAtLowBw + kPacketLossAtHighBw) / 2 - kEpsilon}, // B2 + {kBandwidthHigh + 1, kPacketLossAtHighBw - kEpsilon} // B3 + }; + + std::vector on{ + {kBandwidthLow, kPacketLossAtLowBw + 0.1f}, // O1 + {kBandwidthLow, kPacketLossAtLowBw}, // O2 + {(kBandwidthLow + kBandwidthHigh) / 2, + (kPacketLossAtLowBw + kPacketLossAtHighBw) / 2}, // O3 + {kBandwidthHigh, kPacketLossAtHighBw}, // O4 + {kBandwidthHigh + 1, kPacketLossAtHighBw}, // O5 + }; + + std::vector above{ + {(kBandwidthLow + kBandwidthHigh) / 2, + (kPacketLossAtLowBw + kPacketLossAtHighBw) / 2 + kEpsilon}, // A1 + {kBandwidthHigh + 1, kPacketLossAtHighBw + kEpsilon}, // A2 + }; + + // Test that FEC is turned off whenever we're below the curve, independent + // of the starting FEC state. + for (NetworkState net_state : below) { + for (bool initial_fec_enabled : {false, true}) { + auto states = + CreateFecControllerPlrBased(initial_fec_enabled, curve, curve); + UpdateNetworkMetrics(&states, net_state.bandwidth, net_state.packet_loss); + CheckDecision(&states, false, net_state.packet_loss); + } + } + + // Test that FEC is turned on whenever we're on the curve or above it, + // independent of the starting FEC state. + for (const std::vector& states_list : {on, above}) { + for (NetworkState net_state : states_list) { + for (bool initial_fec_enabled : {false, true}) { + auto states = + CreateFecControllerPlrBased(initial_fec_enabled, curve, curve); + UpdateNetworkMetrics(&states, net_state.bandwidth, + net_state.packet_loss); + CheckDecision(&states, true, net_state.packet_loss); + } + } + } +} + +TEST(FecControllerPlrBasedTest, FecAlwaysOff) { + ThresholdCurve always_off_curve(0, 1.0f + kEpsilon, 0, 1.0f + kEpsilon); + for (bool initial_fec_enabled : {false, true}) { + for (int bandwidth : {0, 10000}) { + for (float packet_loss : {0.0f, 0.5f, 1.0f}) { + auto states = CreateFecControllerPlrBased( + initial_fec_enabled, always_off_curve, always_off_curve); + UpdateNetworkMetrics(&states, bandwidth, packet_loss); + CheckDecision(&states, false, packet_loss); + } + } + } +} + +TEST(FecControllerPlrBasedTest, FecAlwaysOn) { + ThresholdCurve always_on_curve(0, 0.0f, 0, 0.0f); + for (bool initial_fec_enabled : {false, true}) { + for (int bandwidth : {0, 10000}) { + for (float packet_loss : {0.0f, 0.5f, 1.0f}) { + auto states = CreateFecControllerPlrBased( + initial_fec_enabled, always_on_curve, always_on_curve); + UpdateNetworkMetrics(&states, bandwidth, packet_loss); + CheckDecision(&states, true, packet_loss); + } + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(FecControllerPlrBasedDeathTest, InvalidConfig) { + FecControllerPlrBasedTestStates states; + std::unique_ptr mock_smoothing_filter( + new NiceMock()); + states.packet_loss_smoother = mock_smoothing_filter.get(); + EXPECT_DEATH( + states.controller.reset(new FecControllerPlrBased( + FecControllerPlrBased::Config( + true, + ThresholdCurve(kDisablingBandwidthLow - 1, + kEnablingPacketLossAtLowBw, kEnablingBandwidthHigh, + kEnablingPacketLossAtHighBw), + ThresholdCurve( + kDisablingBandwidthLow, kDisablingPacketLossAtLowBw, + kDisablingBandwidthHigh, kDisablingPacketLossAtHighBw), + 0), + std::move(mock_smoothing_filter))), + "Check failed"); +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller.cc new file mode 100644 index 0000000000..c47434f9aa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller.cc @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/frame_length_controller.h" + +#include +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +constexpr int kPreventOveruseMarginBps = 5000; + +int OverheadRateBps(size_t overhead_bytes_per_packet, int frame_length_ms) { + return static_cast(overhead_bytes_per_packet * 8 * 1000 / + frame_length_ms); +} +} // namespace + +FrameLengthController::Config::Config( + const std::set& encoder_frame_lengths_ms, + int initial_frame_length_ms, + int min_encoder_bitrate_bps, + float fl_increasing_packet_loss_fraction, + float fl_decreasing_packet_loss_fraction, + int fl_increase_overhead_offset, + int fl_decrease_overhead_offset, + std::map fl_changing_bandwidths_bps) + : encoder_frame_lengths_ms(encoder_frame_lengths_ms), + initial_frame_length_ms(initial_frame_length_ms), + min_encoder_bitrate_bps(min_encoder_bitrate_bps), + fl_increasing_packet_loss_fraction(fl_increasing_packet_loss_fraction), + fl_decreasing_packet_loss_fraction(fl_decreasing_packet_loss_fraction), + fl_increase_overhead_offset(fl_increase_overhead_offset), + fl_decrease_overhead_offset(fl_decrease_overhead_offset), + fl_changing_bandwidths_bps(std::move(fl_changing_bandwidths_bps)) {} + +FrameLengthController::Config::Config(const Config& other) = default; + +FrameLengthController::Config::~Config() = default; + +FrameLengthController::FrameLengthController(const Config& config) + : config_(config) { + frame_length_ms_ = std::find(config_.encoder_frame_lengths_ms.begin(), + config_.encoder_frame_lengths_ms.end(), + config_.initial_frame_length_ms); + // `encoder_frame_lengths_ms` must contain `initial_frame_length_ms`. + RTC_DCHECK(frame_length_ms_ != config_.encoder_frame_lengths_ms.end()); +} + +FrameLengthController::~FrameLengthController() = default; + +void FrameLengthController::UpdateNetworkMetrics( + const NetworkMetrics& network_metrics) { + if (network_metrics.uplink_bandwidth_bps) + uplink_bandwidth_bps_ = network_metrics.uplink_bandwidth_bps; + if (network_metrics.uplink_packet_loss_fraction) + uplink_packet_loss_fraction_ = network_metrics.uplink_packet_loss_fraction; + if (network_metrics.overhead_bytes_per_packet) + overhead_bytes_per_packet_ = network_metrics.overhead_bytes_per_packet; +} + +void FrameLengthController::MakeDecision(AudioEncoderRuntimeConfig* config) { + // Decision on `frame_length_ms` should not have been made. + RTC_DCHECK(!config->frame_length_ms); + + if (FrameLengthIncreasingDecision(*config)) { + prev_decision_increase_ = true; + } else if (FrameLengthDecreasingDecision(*config)) { + prev_decision_increase_ = false; + } + config->last_fl_change_increase = prev_decision_increase_; + config->frame_length_ms = *frame_length_ms_; +} + +FrameLengthController::Config::FrameLengthChange::FrameLengthChange( + int from_frame_length_ms, + int to_frame_length_ms) + : from_frame_length_ms(from_frame_length_ms), + to_frame_length_ms(to_frame_length_ms) {} + +bool FrameLengthController::Config::FrameLengthChange::operator<( + const FrameLengthChange& rhs) const { + return from_frame_length_ms < rhs.from_frame_length_ms || + (from_frame_length_ms == rhs.from_frame_length_ms && + to_frame_length_ms < rhs.to_frame_length_ms); +} + +bool FrameLengthController::FrameLengthIncreasingDecision( + const AudioEncoderRuntimeConfig& config) { + // Increase frame length if + // 1. `uplink_bandwidth_bps` is known to be smaller or equal than + // `min_encoder_bitrate_bps` plus `prevent_overuse_margin_bps` plus the + // current overhead rate OR all the following: + // 2. longer frame length is available AND + // 3. `uplink_bandwidth_bps` is known to be smaller than a threshold AND + // 4. `uplink_packet_loss_fraction` is known to be smaller than a threshold. + + // Find next frame length to which a criterion is defined to shift from + // current frame length. + auto longer_frame_length_ms = std::next(frame_length_ms_); + auto increase_threshold = config_.fl_changing_bandwidths_bps.end(); + while (longer_frame_length_ms != config_.encoder_frame_lengths_ms.end()) { + increase_threshold = config_.fl_changing_bandwidths_bps.find( + Config::FrameLengthChange(*frame_length_ms_, *longer_frame_length_ms)); + if (increase_threshold != config_.fl_changing_bandwidths_bps.end()) + break; + longer_frame_length_ms = std::next(longer_frame_length_ms); + } + + if (increase_threshold == config_.fl_changing_bandwidths_bps.end()) + return false; + + // Check that + // -(*overhead_bytes_per_packet_) <= offset <= (*overhead_bytes_per_packet_) + RTC_DCHECK( + !overhead_bytes_per_packet_ || + (overhead_bytes_per_packet_ && + static_cast(std::max(0, -config_.fl_increase_overhead_offset)) <= + *overhead_bytes_per_packet_ && + static_cast(std::max(0, config_.fl_increase_overhead_offset)) <= + *overhead_bytes_per_packet_)); + + if (uplink_bandwidth_bps_ && overhead_bytes_per_packet_ && + *uplink_bandwidth_bps_ <= + config_.min_encoder_bitrate_bps + kPreventOveruseMarginBps + + OverheadRateBps(*overhead_bytes_per_packet_ + + config_.fl_increase_overhead_offset, + *frame_length_ms_)) { + frame_length_ms_ = longer_frame_length_ms; + return true; + } + + if ((uplink_bandwidth_bps_ && + *uplink_bandwidth_bps_ <= increase_threshold->second) && + (uplink_packet_loss_fraction_ && + *uplink_packet_loss_fraction_ <= + config_.fl_increasing_packet_loss_fraction)) { + frame_length_ms_ = longer_frame_length_ms; + return true; + } + return false; +} + +bool FrameLengthController::FrameLengthDecreasingDecision( + const AudioEncoderRuntimeConfig& config) { + // Decrease frame length if + // 1. shorter frame length is available AND + // 2. `uplink_bandwidth_bps` is known to be bigger than + // `min_encoder_bitrate_bps` plus `prevent_overuse_margin_bps` plus the + // overhead which would be produced with the shorter frame length AND + // one or more of the followings: + // 3. `uplink_bandwidth_bps` is known to be larger than a threshold, + // 4. `uplink_packet_loss_fraction` is known to be larger than a threshold, + + // Find next frame length to which a criterion is defined to shift from + // current frame length. + auto shorter_frame_length_ms = frame_length_ms_; + auto decrease_threshold = config_.fl_changing_bandwidths_bps.end(); + while (shorter_frame_length_ms != config_.encoder_frame_lengths_ms.begin()) { + shorter_frame_length_ms = std::prev(shorter_frame_length_ms); + decrease_threshold = config_.fl_changing_bandwidths_bps.find( + Config::FrameLengthChange(*frame_length_ms_, *shorter_frame_length_ms)); + if (decrease_threshold != config_.fl_changing_bandwidths_bps.end()) + break; + } + + if (decrease_threshold == config_.fl_changing_bandwidths_bps.end()) + return false; + + if (uplink_bandwidth_bps_ && overhead_bytes_per_packet_ && + *uplink_bandwidth_bps_ <= + config_.min_encoder_bitrate_bps + kPreventOveruseMarginBps + + OverheadRateBps(*overhead_bytes_per_packet_ + + config_.fl_decrease_overhead_offset, + *shorter_frame_length_ms)) { + return false; + } + + if ((uplink_bandwidth_bps_ && + *uplink_bandwidth_bps_ >= decrease_threshold->second) || + (uplink_packet_loss_fraction_ && + *uplink_packet_loss_fraction_ >= + config_.fl_decreasing_packet_loss_fraction)) { + frame_length_ms_ = shorter_frame_length_ms; + return true; + } + return false; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller.h new file mode 100644 index 0000000000..04693f8db7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_FRAME_LENGTH_CONTROLLER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_FRAME_LENGTH_CONTROLLER_H_ + +#include + +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" + +namespace webrtc { + +// Determines target frame length based on the network metrics and the decision +// of FEC controller. +class FrameLengthController final : public Controller { + public: + struct Config { + struct FrameLengthChange { + FrameLengthChange(int from_frame_length_ms, int to_frame_length_ms); + bool operator<(const FrameLengthChange& rhs) const; + int from_frame_length_ms; + int to_frame_length_ms; + }; + Config(const std::set& encoder_frame_lengths_ms, + int initial_frame_length_ms, + int min_encoder_bitrate_bps, + float fl_increasing_packet_loss_fraction, + float fl_decreasing_packet_loss_fraction, + int fl_increase_overhead_offset, + int fl_decrease_overhead_offset, + std::map fl_changing_bandwidths_bps); + Config(const Config& other); + ~Config(); + std::set encoder_frame_lengths_ms; + int initial_frame_length_ms; + int min_encoder_bitrate_bps; + // Uplink packet loss fraction below which frame length can increase. + float fl_increasing_packet_loss_fraction; + // Uplink packet loss fraction below which frame length should decrease. + float fl_decreasing_packet_loss_fraction; + // Offset to apply to overhead calculation when increasing frame length. + int fl_increase_overhead_offset; + // Offset to apply to overhead calculation when decreasing frame length. + int fl_decrease_overhead_offset; + std::map fl_changing_bandwidths_bps; + }; + + explicit FrameLengthController(const Config& config); + + ~FrameLengthController() override; + + FrameLengthController(const FrameLengthController&) = delete; + FrameLengthController& operator=(const FrameLengthController&) = delete; + + void UpdateNetworkMetrics(const NetworkMetrics& network_metrics) override; + + void MakeDecision(AudioEncoderRuntimeConfig* config) override; + + private: + bool FrameLengthIncreasingDecision(const AudioEncoderRuntimeConfig& config); + + bool FrameLengthDecreasingDecision(const AudioEncoderRuntimeConfig& config); + + const Config config_; + + std::set::const_iterator frame_length_ms_; + + absl::optional uplink_bandwidth_bps_; + + absl::optional uplink_packet_loss_fraction_; + + absl::optional overhead_bytes_per_packet_; + + // True if the previous frame length decision was an increase, otherwise + // false. + bool prev_decision_increase_ = false; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_FRAME_LENGTH_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_unittest.cc new file mode 100644 index 0000000000..23123934dc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_unittest.cc @@ -0,0 +1,444 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/frame_length_controller.h" + +#include +#include + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr float kFlIncreasingPacketLossFraction = 0.04f; +constexpr float kFlDecreasingPacketLossFraction = 0.05f; +constexpr int kFlIncreaseOverheadOffset = 0; +constexpr int kFlDecreaseOverheadOffset = 0; +constexpr int kMinEncoderBitrateBps = 6000; +constexpr int kPreventOveruseMarginBps = 5000; +constexpr size_t kOverheadBytesPerPacket = 20; +constexpr int kFl20msTo60msBandwidthBps = 40000; +constexpr int kFl60msTo20msBandwidthBps = 50000; +constexpr int kFl60msTo120msBandwidthBps = 30000; +constexpr int kFl120msTo60msBandwidthBps = 40000; +constexpr int kFl20msTo40msBandwidthBps = 45000; +constexpr int kFl40msTo20msBandwidthBps = 50000; +constexpr int kFl40msTo60msBandwidthBps = 40000; +constexpr int kFl60msTo40msBandwidthBps = 45000; + +constexpr int kMediumBandwidthBps = + (kFl40msTo20msBandwidthBps + kFl20msTo40msBandwidthBps) / 2; +constexpr float kMediumPacketLossFraction = + (kFlDecreasingPacketLossFraction + kFlIncreasingPacketLossFraction) / 2; +const std::set kDefaultEncoderFrameLengthsMs = {20, 40, 60, 120}; + +int VeryLowBitrate(int frame_length_ms) { + return kMinEncoderBitrateBps + kPreventOveruseMarginBps + + (kOverheadBytesPerPacket * 8 * 1000 / frame_length_ms); +} + +std::unique_ptr CreateController( + const std::map& + frame_length_change_criteria, + const std::set& encoder_frame_lengths_ms, + int initial_frame_length_ms) { + std::unique_ptr controller( + new FrameLengthController(FrameLengthController::Config( + encoder_frame_lengths_ms, initial_frame_length_ms, + kMinEncoderBitrateBps, kFlIncreasingPacketLossFraction, + kFlDecreasingPacketLossFraction, kFlIncreaseOverheadOffset, + kFlDecreaseOverheadOffset, frame_length_change_criteria))); + + return controller; +} + +std::map +CreateChangeCriteriaFor20msAnd60ms() { + return std::map{ + {FrameLengthController::Config::FrameLengthChange(20, 60), + kFl20msTo60msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(60, 20), + kFl60msTo20msBandwidthBps}}; +} + +std::map +CreateChangeCriteriaFor20msAnd40ms() { + return std::map{ + {FrameLengthController::Config::FrameLengthChange(20, 40), + kFl20msTo40msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(40, 20), + kFl40msTo20msBandwidthBps}}; +} + +std::map +CreateChangeCriteriaFor20ms60msAnd120ms() { + return std::map{ + {FrameLengthController::Config::FrameLengthChange(20, 60), + kFl20msTo60msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(60, 20), + kFl60msTo20msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(60, 120), + kFl60msTo120msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(120, 60), + kFl120msTo60msBandwidthBps}}; +} + +std::map +CreateChangeCriteriaFor20ms40ms60msAnd120ms() { + return std::map{ + {FrameLengthController::Config::FrameLengthChange(20, 60), + kFl20msTo60msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(60, 20), + kFl60msTo20msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(20, 40), + kFl20msTo40msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(40, 20), + kFl40msTo20msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(40, 60), + kFl40msTo60msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(60, 40), + kFl60msTo40msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(60, 120), + kFl60msTo120msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(120, 60), + kFl120msTo60msBandwidthBps}}; +} + +std::map +CreateChangeCriteriaFor40msAnd60ms() { + return std::map{ + {FrameLengthController::Config::FrameLengthChange(40, 60), + kFl40msTo60msBandwidthBps}, + {FrameLengthController::Config::FrameLengthChange(60, 40), + kFl60msTo40msBandwidthBps}}; +} + +void UpdateNetworkMetrics( + FrameLengthController* controller, + const absl::optional& uplink_bandwidth_bps, + const absl::optional& uplink_packet_loss_fraction, + const absl::optional& overhead_bytes_per_packet) { + // UpdateNetworkMetrics can accept multiple network metric updates at once. + // However, currently, the most used case is to update one metric at a time. + // To reflect this fact, we separate the calls. + if (uplink_bandwidth_bps) { + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_bandwidth_bps = uplink_bandwidth_bps; + controller->UpdateNetworkMetrics(network_metrics); + } + if (uplink_packet_loss_fraction) { + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_packet_loss_fraction = uplink_packet_loss_fraction; + controller->UpdateNetworkMetrics(network_metrics); + } + if (overhead_bytes_per_packet) { + Controller::NetworkMetrics network_metrics; + network_metrics.overhead_bytes_per_packet = overhead_bytes_per_packet; + controller->UpdateNetworkMetrics(network_metrics); + } +} + +void CheckDecision(FrameLengthController* controller, + int expected_frame_length_ms) { + AudioEncoderRuntimeConfig config; + controller->MakeDecision(&config); + EXPECT_EQ(expected_frame_length_ms, config.frame_length_ms); +} + +} // namespace + +TEST(FrameLengthControllerTest, DecreaseTo20MsOnHighUplinkBandwidth) { + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 60); + UpdateNetworkMetrics(controller.get(), kFl60msTo20msBandwidthBps, + absl::nullopt, kOverheadBytesPerPacket); + CheckDecision(controller.get(), 20); +} + +TEST(FrameLengthControllerTest, DecreaseTo20MsOnHighUplinkPacketLossFraction) { + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 60); + UpdateNetworkMetrics(controller.get(), absl::nullopt, + kFlDecreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 20); +} + +TEST(FrameLengthControllerTest, + Maintain60MsIf20MsNotInReceiverFrameLengthRange) { + auto controller = + CreateController(CreateChangeCriteriaFor20msAnd60ms(), {60}, 60); + // Set FEC on that would cause frame length to decrease if receiver frame + // length range included 20ms. + CheckDecision(controller.get(), 60); +} + +TEST(FrameLengthControllerTest, IncreaseTo40MsOnMultipleConditions) { + // Increase to 40ms frame length if + // 1. `uplink_bandwidth_bps` is known to be smaller than a threshold AND + // 2. `uplink_packet_loss_fraction` is known to be smaller than a threshold + // AND + // 3. FEC is not decided or OFF. + auto controller = CreateController(CreateChangeCriteriaFor20msAnd40ms(), + kDefaultEncoderFrameLengthsMs, 20); + UpdateNetworkMetrics(controller.get(), kFl20msTo40msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 40); +} + +TEST(FrameLengthControllerTest, DecreaseTo40MsOnHighUplinkBandwidth) { + auto controller = CreateController(CreateChangeCriteriaFor40msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 40); + UpdateNetworkMetrics(controller.get(), kFl60msTo40msBandwidthBps, + absl::nullopt, kOverheadBytesPerPacket); + CheckDecision(controller.get(), 40); +} + +TEST(FrameLengthControllerTest, Maintain60MsOnMultipleConditions) { + // Maintain 60ms frame length if + // 1. `uplink_bandwidth_bps` is at medium level, + // 2. `uplink_packet_loss_fraction` is at medium, + // 3. FEC is not decided ON. + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 60); + UpdateNetworkMetrics(controller.get(), kMediumBandwidthBps, + kMediumPacketLossFraction, kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); +} + +TEST(FrameLengthControllerTest, IncreaseTo60MsOnMultipleConditions) { + // Increase to 60ms frame length if + // 1. `uplink_bandwidth_bps` is known to be smaller than a threshold AND + // 2. `uplink_packet_loss_fraction` is known to be smaller than a threshold + // AND + // 3. FEC is not decided or OFF. + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 20); + UpdateNetworkMetrics(controller.get(), kFl20msTo60msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); +} + +TEST(FrameLengthControllerTest, IncreaseTo60MsOnVeryLowUplinkBandwidth) { + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 20); + // We set packet loss fraction to kFlDecreasingPacketLossFraction, which + // should have prevented frame length to increase, if the uplink bandwidth + // was not this low. + UpdateNetworkMetrics(controller.get(), VeryLowBitrate(20), + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); +} + +TEST(FrameLengthControllerTest, Maintain60MsOnVeryLowUplinkBandwidth) { + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 60); + // We set packet loss fraction to FlDecreasingPacketLossFraction, which should + // have caused the frame length to decrease, if the uplink bandwidth was not + // this low. + UpdateNetworkMetrics(controller.get(), VeryLowBitrate(20), + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); +} + +TEST(FrameLengthControllerTest, UpdateMultipleNetworkMetricsAtOnce) { + // This test is similar to IncreaseTo60MsOnMultipleConditions. But instead of + // using ::UpdateNetworkMetrics(...), which calls + // FrameLengthController::UpdateNetworkMetrics(...) multiple times, we + // we call it only once. This is to verify that + // FrameLengthController::UpdateNetworkMetrics(...) can handle multiple + // network updates at once. This is, however, not a common use case in current + // audio_network_adaptor_impl.cc. + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 20); + Controller::NetworkMetrics network_metrics; + network_metrics.uplink_bandwidth_bps = kFl20msTo60msBandwidthBps; + network_metrics.uplink_packet_loss_fraction = kFlIncreasingPacketLossFraction; + controller->UpdateNetworkMetrics(network_metrics); + CheckDecision(controller.get(), 60); +} + +TEST(FrameLengthControllerTest, + Maintain20MsIf60MsNotInReceiverFrameLengthRange) { + auto controller = + CreateController(CreateChangeCriteriaFor20msAnd60ms(), {20}, 20); + // Use a low uplink bandwidth and a low uplink packet loss fraction that would + // cause frame length to increase if receiver frame length included 60ms. + UpdateNetworkMetrics(controller.get(), kFl20msTo60msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 20); +} + +TEST(FrameLengthControllerTest, Maintain20MsOnMediumUplinkBandwidth) { + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 20); + UpdateNetworkMetrics(controller.get(), kMediumBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 20); +} + +TEST(FrameLengthControllerTest, Maintain20MsOnMediumUplinkPacketLossFraction) { + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 20); + // Use a low uplink bandwidth that would cause frame length to increase if + // uplink packet loss fraction was low. + UpdateNetworkMetrics(controller.get(), kFl20msTo60msBandwidthBps, + kMediumPacketLossFraction, kOverheadBytesPerPacket); + CheckDecision(controller.get(), 20); +} + +TEST(FrameLengthControllerTest, Maintain60MsWhenNo120msCriteriaIsSet) { + auto controller = CreateController(CreateChangeCriteriaFor20msAnd60ms(), + kDefaultEncoderFrameLengthsMs, 60); + UpdateNetworkMetrics(controller.get(), kFl60msTo120msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); +} + +TEST(FrameLengthControllerTest, From120MsTo20MsOnHighUplinkBandwidth) { + auto controller = CreateController(CreateChangeCriteriaFor20ms60msAnd120ms(), + kDefaultEncoderFrameLengthsMs, 120); + // It takes two steps for frame length to go from 120ms to 20ms. + UpdateNetworkMetrics(controller.get(), kFl60msTo20msBandwidthBps, + absl::nullopt, kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); + + UpdateNetworkMetrics(controller.get(), kFl60msTo20msBandwidthBps, + absl::nullopt, kOverheadBytesPerPacket); + CheckDecision(controller.get(), 20); +} + +TEST(FrameLengthControllerTest, From120MsTo20MsOnHighUplinkPacketLossFraction) { + auto controller = CreateController(CreateChangeCriteriaFor20ms60msAnd120ms(), + kDefaultEncoderFrameLengthsMs, 120); + // It takes two steps for frame length to go from 120ms to 20ms. + UpdateNetworkMetrics(controller.get(), absl::nullopt, + kFlDecreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); + + UpdateNetworkMetrics(controller.get(), absl::nullopt, + kFlDecreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 20); +} + +TEST(FrameLengthControllerTest, Maintain120MsOnVeryLowUplinkBandwidth) { + auto controller = CreateController(CreateChangeCriteriaFor20ms60msAnd120ms(), + kDefaultEncoderFrameLengthsMs, 120); + // We set packet loss fraction to FlDecreasingPacketLossFraction, which should + // have caused the frame length to decrease, if the uplink bandwidth was not + // this low. + UpdateNetworkMetrics(controller.get(), VeryLowBitrate(60), + kFlDecreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 120); +} + +TEST(FrameLengthControllerTest, From60MsTo120MsOnVeryLowUplinkBandwidth) { + auto controller = CreateController(CreateChangeCriteriaFor20ms60msAnd120ms(), + kDefaultEncoderFrameLengthsMs, 60); + // We set packet loss fraction to FlDecreasingPacketLossFraction, which should + // have prevented frame length to increase, if the uplink bandwidth was not + // this low. + UpdateNetworkMetrics(controller.get(), VeryLowBitrate(60), + kFlDecreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 120); +} + +TEST(FrameLengthControllerTest, From20MsTo120MsOnMultipleConditions) { + // Increase to 120ms frame length if + // 1. `uplink_bandwidth_bps` is known to be smaller than a threshold AND + // 2. `uplink_packet_loss_fraction` is known to be smaller than a threshold. + auto controller = CreateController(CreateChangeCriteriaFor20ms60msAnd120ms(), + kDefaultEncoderFrameLengthsMs, 20); + // It takes two steps for frame length to go from 20ms to 120ms. + UpdateNetworkMetrics(controller.get(), kFl60msTo120msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); + UpdateNetworkMetrics(controller.get(), kFl60msTo120msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 120); +} + +TEST(FrameLengthControllerTest, Stall60MsIf120MsNotInReceiverFrameLengthRange) { + auto controller = + CreateController(CreateChangeCriteriaFor20ms60msAnd120ms(), {20, 60}, 20); + UpdateNetworkMetrics(controller.get(), kFl60msTo120msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); + UpdateNetworkMetrics(controller.get(), kFl60msTo120msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); +} + +TEST(FrameLengthControllerTest, CheckBehaviorOnChangingNetworkMetrics) { + auto controller = + CreateController(CreateChangeCriteriaFor20ms40ms60msAnd120ms(), + kDefaultEncoderFrameLengthsMs, 20); + UpdateNetworkMetrics(controller.get(), kMediumBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 20); + + UpdateNetworkMetrics(controller.get(), kFl20msTo40msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 40); + + UpdateNetworkMetrics(controller.get(), kFl60msTo40msBandwidthBps, + kMediumPacketLossFraction, kOverheadBytesPerPacket); + CheckDecision(controller.get(), 40); + + UpdateNetworkMetrics(controller.get(), kFl20msTo60msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); + + UpdateNetworkMetrics(controller.get(), kFl60msTo120msBandwidthBps, + kMediumPacketLossFraction, kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); + + UpdateNetworkMetrics(controller.get(), kFl60msTo120msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 120); + + UpdateNetworkMetrics(controller.get(), kFl120msTo60msBandwidthBps, + kFlIncreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 60); + + UpdateNetworkMetrics(controller.get(), kFl60msTo40msBandwidthBps, + kFlDecreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 40); + + UpdateNetworkMetrics(controller.get(), kMediumBandwidthBps, + kFlDecreasingPacketLossFraction, + kOverheadBytesPerPacket); + CheckDecision(controller.get(), 20); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.cc new file mode 100644 index 0000000000..36fc10ba82 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.h" + +#include + +#include "absl/algorithm/container.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +int OverheadBps(int overhead_bytes_per_packet, int frame_length_ms) { + return overhead_bytes_per_packet * 8 * 1000 / frame_length_ms; +} + +} // namespace + +FrameLengthControllerV2::FrameLengthControllerV2( + rtc::ArrayView encoder_frame_lengths_ms, + int min_payload_bitrate_bps, + bool use_slow_adaptation) + : encoder_frame_lengths_ms_(encoder_frame_lengths_ms.begin(), + encoder_frame_lengths_ms.end()), + min_payload_bitrate_bps_(min_payload_bitrate_bps), + use_slow_adaptation_(use_slow_adaptation) { + RTC_CHECK(!encoder_frame_lengths_ms_.empty()); + absl::c_sort(encoder_frame_lengths_ms_); +} + +void FrameLengthControllerV2::UpdateNetworkMetrics( + const NetworkMetrics& network_metrics) { + if (network_metrics.target_audio_bitrate_bps) { + target_bitrate_bps_ = network_metrics.target_audio_bitrate_bps; + } + if (network_metrics.overhead_bytes_per_packet) { + overhead_bytes_per_packet_ = network_metrics.overhead_bytes_per_packet; + } + if (network_metrics.uplink_bandwidth_bps) { + uplink_bandwidth_bps_ = network_metrics.uplink_bandwidth_bps; + } +} + +void FrameLengthControllerV2::MakeDecision(AudioEncoderRuntimeConfig* config) { + if (!target_bitrate_bps_ || !overhead_bytes_per_packet_ || + !uplink_bandwidth_bps_) { + return; + } + + auto it = + absl::c_find_if(encoder_frame_lengths_ms_, [&](int frame_length_ms) { + int target = use_slow_adaptation_ ? *uplink_bandwidth_bps_ + : *target_bitrate_bps_; + return target - + OverheadBps(*overhead_bytes_per_packet_, frame_length_ms) > + min_payload_bitrate_bps_; + }); + + // Longest frame length is chosen if none match our criteria. + config->frame_length_ms = it != encoder_frame_lengths_ms_.end() + ? *it + : encoder_frame_lengths_ms_.back(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.h new file mode 100644 index 0000000000..d7102b0b44 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_FRAME_LENGTH_CONTROLLER_V2_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_FRAME_LENGTH_CONTROLLER_V2_H_ + +#include + +#include "absl/types/optional.h" +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h" + +namespace webrtc { + +class FrameLengthControllerV2 final : public Controller { + public: + FrameLengthControllerV2(rtc::ArrayView encoder_frame_lengths_ms, + int min_payload_bitrate_bps, + bool use_slow_adaptation); + + void UpdateNetworkMetrics(const NetworkMetrics& network_metrics) override; + + void MakeDecision(AudioEncoderRuntimeConfig* config) override; + + private: + std::vector encoder_frame_lengths_ms_; + const int min_payload_bitrate_bps_; + const bool use_slow_adaptation_; + + absl::optional uplink_bandwidth_bps_; + absl::optional target_bitrate_bps_; + absl::optional overhead_bytes_per_packet_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_FRAME_LENGTH_CONTROLLER_V2_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2_unittest.cc new file mode 100644 index 0000000000..1c88f47c58 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2_unittest.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.h" + +#include +#include + +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr int kANASupportedFrameLengths[] = {20, 40, 60, 120}; +constexpr int kMinPayloadBitrateBps = 16000; + +} // namespace + +class FrameLengthControllerV2Test : public testing::Test { + protected: + AudioEncoderRuntimeConfig GetDecision() { + AudioEncoderRuntimeConfig config; + controller_->MakeDecision(&config); + return config; + } + + void SetOverhead(int overhead_bytes_per_packet) { + overhead_bytes_per_packet_ = overhead_bytes_per_packet; + Controller::NetworkMetrics metrics; + metrics.overhead_bytes_per_packet = overhead_bytes_per_packet; + controller_->UpdateNetworkMetrics(metrics); + } + + void SetTargetBitrate(int target_audio_bitrate_bps) { + target_audio_bitrate_bps_ = target_audio_bitrate_bps; + Controller::NetworkMetrics metrics; + metrics.target_audio_bitrate_bps = target_audio_bitrate_bps; + controller_->UpdateNetworkMetrics(metrics); + } + + void SetUplinkBandwidth(int uplink_bandwidth_bps) { + Controller::NetworkMetrics metrics; + metrics.uplink_bandwidth_bps = uplink_bandwidth_bps; + controller_->UpdateNetworkMetrics(metrics); + } + + void ExpectFrameLengthDecision(int expected_frame_length_ms) { + auto config = GetDecision(); + EXPECT_EQ(*config.frame_length_ms, expected_frame_length_ms); + } + + std::unique_ptr controller_ = + std::make_unique(kANASupportedFrameLengths, + kMinPayloadBitrateBps, + /*use_slow_adaptation=*/false); + absl::optional target_audio_bitrate_bps_; + absl::optional overhead_bytes_per_packet_; +}; + +// Don't return any decision if we haven't received all required network +// metrics. +TEST_F(FrameLengthControllerV2Test, RequireNetworkMetrics) { + auto config = GetDecision(); + EXPECT_FALSE(config.bitrate_bps); + EXPECT_FALSE(config.frame_length_ms); + + SetOverhead(30); + config = GetDecision(); + EXPECT_FALSE(config.frame_length_ms); + + SetTargetBitrate(32000); + config = GetDecision(); + EXPECT_FALSE(config.frame_length_ms); + + SetUplinkBandwidth(32000); + config = GetDecision(); + EXPECT_TRUE(config.frame_length_ms); +} + +TEST_F(FrameLengthControllerV2Test, UseFastAdaptation) { + SetOverhead(50); + SetTargetBitrate(50000); + SetUplinkBandwidth(50000); + ExpectFrameLengthDecision(20); + + SetTargetBitrate(20000); + ExpectFrameLengthDecision(120); + + SetTargetBitrate(30000); + ExpectFrameLengthDecision(40); + + SetTargetBitrate(25000); + ExpectFrameLengthDecision(60); +} + +TEST_F(FrameLengthControllerV2Test, UseSlowAdaptation) { + controller_ = std::make_unique( + kANASupportedFrameLengths, kMinPayloadBitrateBps, + /*use_slow_adaptation=*/true); + SetOverhead(50); + SetTargetBitrate(50000); + SetUplinkBandwidth(20000); + ExpectFrameLengthDecision(120); + + SetUplinkBandwidth(30000); + ExpectFrameLengthDecision(40); + + SetUplinkBandwidth(40000); + ExpectFrameLengthDecision(20); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h new file mode 100644 index 0000000000..346ed5db1a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_INCLUDE_AUDIO_NETWORK_ADAPTOR_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_INCLUDE_AUDIO_NETWORK_ADAPTOR_H_ + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" + +namespace webrtc { + +// An AudioNetworkAdaptor optimizes the audio experience by suggesting a +// suitable runtime configuration (bit rate, frame length, FEC, etc.) to the +// encoder based on network metrics. +class AudioNetworkAdaptor { + public: + virtual ~AudioNetworkAdaptor() = default; + + virtual void SetUplinkBandwidth(int uplink_bandwidth_bps) = 0; + + virtual void SetUplinkPacketLossFraction( + float uplink_packet_loss_fraction) = 0; + + virtual void SetRtt(int rtt_ms) = 0; + + virtual void SetTargetAudioBitrate(int target_audio_bitrate_bps) = 0; + + virtual void SetOverhead(size_t overhead_bytes_per_packet) = 0; + + virtual AudioEncoderRuntimeConfig GetEncoderRuntimeConfig() = 0; + + virtual void StartDebugDump(FILE* file_handle) = 0; + + virtual void StopDebugDump() = 0; + + virtual ANAStats GetStats() const = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_INCLUDE_AUDIO_NETWORK_ADAPTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h new file mode 100644 index 0000000000..bd16292f7e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_INCLUDE_AUDIO_NETWORK_ADAPTOR_CONFIG_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_INCLUDE_AUDIO_NETWORK_ADAPTOR_CONFIG_H_ + +#include + +#include "absl/types/optional.h" + +namespace webrtc { + +struct AudioEncoderRuntimeConfig { + AudioEncoderRuntimeConfig(); + AudioEncoderRuntimeConfig(const AudioEncoderRuntimeConfig& other); + ~AudioEncoderRuntimeConfig(); + AudioEncoderRuntimeConfig& operator=(const AudioEncoderRuntimeConfig& other); + bool operator==(const AudioEncoderRuntimeConfig& other) const; + absl::optional bitrate_bps; + absl::optional frame_length_ms; + // Note: This is what we tell the encoder. It doesn't have to reflect + // the actual NetworkMetrics; it's subject to our decision. + absl::optional uplink_packet_loss_fraction; + absl::optional enable_fec; + absl::optional enable_dtx; + + // Some encoders can encode fewer channels than the actual input to make + // better use of the bandwidth. `num_channels` sets the number of channels + // to encode. + absl::optional num_channels; + + // This is true if the last frame length change was an increase, and otherwise + // false. + // The value of this boolean is used to apply a different offset to the + // per-packet overhead that is reported by the BWE. The exact offset value + // is most important right after a frame length change, because the frame + // length change affects the overhead. In the steady state, the exact value is + // not important because the BWE will compensate. + bool last_fl_change_increase = false; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_INCLUDE_AUDIO_NETWORK_ADAPTOR_CONFIG_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_audio_network_adaptor.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_audio_network_adaptor.h new file mode 100644 index 0000000000..26a9061745 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_audio_network_adaptor.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_AUDIO_NETWORK_ADAPTOR_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_AUDIO_NETWORK_ADAPTOR_H_ + +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockAudioNetworkAdaptor : public AudioNetworkAdaptor { + public: + ~MockAudioNetworkAdaptor() override { Die(); } + MOCK_METHOD(void, Die, ()); + + MOCK_METHOD(void, SetUplinkBandwidth, (int uplink_bandwidth_bps), (override)); + + MOCK_METHOD(void, + SetUplinkPacketLossFraction, + (float uplink_packet_loss_fraction), + (override)); + + MOCK_METHOD(void, SetRtt, (int rtt_ms), (override)); + + MOCK_METHOD(void, + SetTargetAudioBitrate, + (int target_audio_bitrate_bps), + (override)); + + MOCK_METHOD(void, + SetOverhead, + (size_t overhead_bytes_per_packet), + (override)); + + MOCK_METHOD(AudioEncoderRuntimeConfig, + GetEncoderRuntimeConfig, + (), + (override)); + + MOCK_METHOD(void, StartDebugDump, (FILE * file_handle), (override)); + + MOCK_METHOD(void, StopDebugDump, (), (override)); + + MOCK_METHOD(ANAStats, GetStats, (), (const, override)); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_AUDIO_NETWORK_ADAPTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_controller.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_controller.h new file mode 100644 index 0000000000..de554c0517 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_controller.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_CONTROLLER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_CONTROLLER_H_ + +#include "modules/audio_coding/audio_network_adaptor/controller.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockController : public Controller { + public: + ~MockController() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(void, + UpdateNetworkMetrics, + (const NetworkMetrics& network_metrics), + (override)); + MOCK_METHOD(void, + MakeDecision, + (AudioEncoderRuntimeConfig * config), + (override)); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_controller_manager.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_controller_manager.h new file mode 100644 index 0000000000..9e2fa466fc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_controller_manager.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_CONTROLLER_MANAGER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_CONTROLLER_MANAGER_H_ + +#include + +#include "modules/audio_coding/audio_network_adaptor/controller_manager.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockControllerManager : public ControllerManager { + public: + ~MockControllerManager() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(std::vector, + GetSortedControllers, + (const Controller::NetworkMetrics& metrics), + (override)); + MOCK_METHOD(std::vector, GetControllers, (), (const, override)); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_CONTROLLER_MANAGER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_debug_dump_writer.h b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_debug_dump_writer.h new file mode 100644 index 0000000000..0c6a9efe1d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/mock/mock_debug_dump_writer.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_DEBUG_DUMP_WRITER_H_ +#define MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_DEBUG_DUMP_WRITER_H_ + +#include "modules/audio_coding/audio_network_adaptor/debug_dump_writer.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockDebugDumpWriter : public DebugDumpWriter { + public: + ~MockDebugDumpWriter() override { Die(); } + MOCK_METHOD(void, Die, ()); + + MOCK_METHOD(void, + DumpEncoderRuntimeConfig, + (const AudioEncoderRuntimeConfig& config, int64_t timestamp), + (override)); + MOCK_METHOD(void, + DumpNetworkMetrics, + (const Controller::NetworkMetrics& metrics, int64_t timestamp), + (override)); +#if WEBRTC_ENABLE_PROTOBUF + MOCK_METHOD(void, + DumpControllerManagerConfig, + (const audio_network_adaptor::config::ControllerManager& + controller_manager_config, + int64_t timestamp), + (override)); +#endif +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_MOCK_MOCK_DEBUG_DUMP_WRITER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/parse_ana_dump.py b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/parse_ana_dump.py new file mode 100755 index 0000000000..9c07c18c84 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/parse_ana_dump.py @@ -0,0 +1,149 @@ +#!/usr/bin/python2 +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +# To run this script please copy "out///pyproto/webrtc/modules/ +# audio_coding/audio_network_adaptor/debug_dump_pb2.py" to this folder. +# The you can run this script with: +# "python parse_ana_dump.py -m uplink_bandwidth_bps -f dump_file.dat" +# You can add as may metrics or decisions to the plot as you like. +# form more information call: +# "python parse_ana_dump.py --help" + +import struct +from optparse import OptionParser + +import matplotlib.pyplot as plt + +import debug_dump_pb2 + + +def GetNextMessageSize(file_to_parse): + data = file_to_parse.read(4) + if data == '': + return 0 + return struct.unpack(' bandwidth + // + // If either a.x == b.x or a.y == b.y, the curve can be defined + // by a single point. (We merge the two points into one - either the lower or + // the leftmost one - for easier treatment.) + // + // y-axis ^ | + // | | + // | | + // | | + // | P|__________ + // |---------------> bandwidth + ThresholdCurve(const Point& left, const Point& right) + : a(GetPoint(left, right, true)), + b(GetPoint(left, right, false)), + slope(b.x - a.x == 0.0f ? 0.0f : (b.y - a.y) / (b.x - a.x)), + offset(a.y - slope * a.x) { + // TODO(eladalon): We might want to introduce some numerical validations. + } + + ThresholdCurve(float a_x, float a_y, float b_x, float b_y) + : ThresholdCurve(Point{a_x, a_y}, Point{b_x, b_y}) {} + + // Checks if a point is strictly below the curve. + bool IsBelowCurve(const Point& p) const { + if (p.x < a.x) { + return true; + } else if (p.x == a.x) { + // In principle, we could merge this into the next else, but to avoid + // numerical errors, we treat it separately. + return p.y < a.y; + } else if (a.x < p.x && p.x < b.x) { + return p.y < offset + slope * p.x; + } else { // if (b.x <= p.x) + return p.y < b.y; + } + } + + // Checks if a point is strictly above the curve. + bool IsAboveCurve(const Point& p) const { + if (p.x <= a.x) { + return false; + } else if (a.x < p.x && p.x < b.x) { + return p.y > offset + slope * p.x; + } else { // if (b.x <= p.x) + return p.y > b.y; + } + } + + bool operator<=(const ThresholdCurve& rhs) const { + // This curve is <= the rhs curve if no point from this curve is + // above a corresponding point from the rhs curve. + return !IsBelowCurve(rhs.a) && !IsBelowCurve(rhs.b) && + !rhs.IsAboveCurve(a) && !rhs.IsAboveCurve(b); + } + + private: + static const Point& GetPoint(const Point& left, + const Point& right, + bool is_for_left) { + RTC_DCHECK_LE(left.x, right.x); + RTC_DCHECK_GE(left.y, right.y); + + // Same X-value or Y-value triggers merging both points to the + // lower and/or left of the two points, respectively. + if (left.x == right.x) { + return right; + } else if (left.y == right.y) { + return left; + } + + // If unmerged, boolean flag determines which of the points is desired. + return is_for_left ? left : right; + } + + const Point a; + const Point b; + const float slope; + const float offset; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_AUDIO_NETWORK_ADAPTOR_UTIL_THRESHOLD_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/util/threshold_curve_unittest.cc b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/util/threshold_curve_unittest.cc new file mode 100644 index 0000000000..dc3aec0b18 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/util/threshold_curve_unittest.cc @@ -0,0 +1,632 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/audio_network_adaptor/util/threshold_curve.h" + +#include + +#include "test/gtest.h" + +// A threshold curve divides 2D space into three domains - below, on and above +// the threshold curve. +// The curve is defined by two points. Those points, P1 and P2, are ordered so +// that (P1.x <= P2.x && P1.y >= P2.y). +// The part of the curve which is between the two points is hereon referred +// to as the "segment". +// A "ray" extends from P1 directly upwards into infinity; that's the "vertical +// ray". Likewise, a "horizontal ray" extends from P2 directly rightwards. +// +// ^ | // +// | | vertical ray // +// | | // +// | | // +// | P1| // +// | \ // +// | \ segment // +// | \ // +// | \ horizontal ray // +// | P2 ------------------ // +// *---------------------------> // + +namespace webrtc { + +namespace { +enum RelativePosition { kBelow, kOn, kAbove }; + +void CheckRelativePosition(const ThresholdCurve& curve, + ThresholdCurve::Point point, + RelativePosition pos) { + RTC_CHECK(pos == kBelow || pos == kOn || pos == kAbove); + + EXPECT_EQ(pos == kBelow, curve.IsBelowCurve(point)); + EXPECT_EQ(pos == kAbove, curve.IsAboveCurve(point)); +} +} // namespace + +// Test that the curve correctly reports the below/above position of points, +// when the curve is a "normal" one - P1 and P2 are different in both their +// X and Y values. +TEST(ThresholdCurveTest, PointPositionToCommonCurve) { + // The points (P1-P2) define the curve. // + // All other points are above/below/on the curve. // + // // + // ^ // + // | | // + // | A F J R V // + // | | // + // | B P1 K S W // + // | \ // + // | \ // + // | \ L // + // | \ // + // | C G M T X // + // | \ // + // | N \ // + // | \ // + // | D H O P2--Y---------------- // + // | E I Q U Z // + // *----------------------------------> // + constexpr ThresholdCurve::Point p1{1000, 2000}; + constexpr ThresholdCurve::Point p2{2000, 1000}; + + RTC_CHECK_GT((p1.x + p2.x) / 2, p1.x); + RTC_CHECK_LT((p1.x + p2.x) / 2, p2.x); + RTC_CHECK_LT((p1.y + p2.y) / 2, p1.y); + RTC_CHECK_GT((p1.y + p2.y) / 2, p2.y); + + const ThresholdCurve curve(p1, p2); + + { + // All cases where the point lies to the left of P1. + constexpr float x = p1.x - 1; + CheckRelativePosition(curve, {x, p1.y + 1}, kBelow); // A + CheckRelativePosition(curve, {x, p1.y + 0}, kBelow); // B + CheckRelativePosition(curve, {x, (p1.y + p2.y) / 2}, kBelow); // C + CheckRelativePosition(curve, {x, p2.y + 0}, kBelow); // D + CheckRelativePosition(curve, {x, p2.y - 1}, kBelow); // E + } + + { + // All cases where the point has the same x-value as P1. + constexpr float x = p1.x; + CheckRelativePosition(curve, {x, p1.y + 1}, kOn); // F + CheckRelativePosition(curve, {x, p1.y + 0}, kOn); // P1 + CheckRelativePosition(curve, {x, (p1.y + p2.y) / 2}, kBelow); // G + CheckRelativePosition(curve, {x, p2.y + 0}, kBelow); // H + CheckRelativePosition(curve, {x, p2.y - 1}, kBelow); // I + } + + { + // To make sure we're really covering all of the cases, make sure that P1 + // and P2 were chosen so that L would really be below K, and O would really + // be below N. (This would not hold if the Y values are too close together.) + RTC_CHECK_LT(((p1.y + p2.y) / 2) + 1, p1.y); + RTC_CHECK_LT(p2.y, ((p1.y + p2.y) / 2) - 1); + + // All cases where the point's x-value is between P1 and P2. + constexpr float x = (p1.x + p2.x) / 2; + CheckRelativePosition(curve, {x, p1.y + 1}, kAbove); // J + CheckRelativePosition(curve, {x, p1.y + 0}, kAbove); // K + CheckRelativePosition(curve, {x, ((p1.y + p2.y) / 2) + 1}, kAbove); // L + CheckRelativePosition(curve, {x, (p1.y + p2.y) / 2}, kOn); // M + CheckRelativePosition(curve, {x, ((p1.y + p2.y) / 2) - 1}, kBelow); // N + CheckRelativePosition(curve, {x, p2.y + 0}, kBelow); // O + CheckRelativePosition(curve, {x, p2.y - 1}, kBelow); // Q + } + + { + // All cases where the point has the same x-value as P2. + constexpr float x = p2.x; + CheckRelativePosition(curve, {x, p1.y + 1}, kAbove); // R + CheckRelativePosition(curve, {x, p1.y + 0}, kAbove); // S + CheckRelativePosition(curve, {x, (p1.y + p2.y) / 2}, kAbove); // T + CheckRelativePosition(curve, {x, p2.y + 0}, kOn); // P2 + CheckRelativePosition(curve, {x, p2.y - 1}, kBelow); // U + } + + { + // All cases where the point lies to the right of P2. + constexpr float x = p2.x + 1; + CheckRelativePosition(curve, {x, p1.y + 1}, kAbove); // V + CheckRelativePosition(curve, {x, p1.y + 0}, kAbove); // W + CheckRelativePosition(curve, {x, (p1.y + p2.y) / 2}, kAbove); // X + CheckRelativePosition(curve, {x, p2.y + 0}, kOn); // Y + CheckRelativePosition(curve, {x, p2.y - 1}, kBelow); // Z + } +} + +// Test that the curve correctly reports the below/above position of points, +// when the curve is defined by two points with the same Y value. +TEST(ThresholdCurveTest, PointPositionToCurveWithHorizaontalSegment) { + // The points (P1-P2) define the curve. + // All other points are above/below/on the curve. + // + // ^ + // | | + // | | + // | A D F I K + // | | + // | | + // | B P1--G--P2-L-- + // | C E H J M + // *------------------> + + constexpr ThresholdCurve::Point p1{100, 200}; + constexpr ThresholdCurve::Point p2{p1.x + 1, p1.y}; + + RTC_CHECK_GT((p1.x + p2.x) / 2, p1.x); + RTC_CHECK_LT((p1.x + p2.x) / 2, p2.x); + + const ThresholdCurve curve(p1, p2); + + { + // All cases where the point lies to the left of P1. + constexpr float x = p1.x - 1; + CheckRelativePosition(curve, {x, p1.y + 1}, kBelow); // A + CheckRelativePosition(curve, {x, p1.y + 0}, kBelow); // B + CheckRelativePosition(curve, {x, p1.y - 1}, kBelow); // C + } + + { + // All cases where the point has the same x-value as P1. + constexpr float x = p1.x; + CheckRelativePosition(curve, {x, p1.y + 1}, kOn); // D + CheckRelativePosition(curve, {x, p1.y + 0}, kOn); // P1 + CheckRelativePosition(curve, {x, p1.y - 1}, kBelow); // E + } + + { + // All cases where the point's x-value is between P1 and P2. + constexpr float x = (p1.x + p2.x) / 2; + CheckRelativePosition(curve, {x, p1.y + 1}, kAbove); // F + CheckRelativePosition(curve, {x, p1.y + 0}, kOn); // G + CheckRelativePosition(curve, {x, p1.y - 1}, kBelow); // H + } + + { + // All cases where the point has the same x-value as P2. + constexpr float x = p2.x; + CheckRelativePosition(curve, {x, p1.y + 1}, kAbove); // I + CheckRelativePosition(curve, {x, p1.y + 0}, kOn); // P2 + CheckRelativePosition(curve, {x, p1.y - 1}, kBelow); // J + } + + { + // All cases where the point lies to the right of P2. + constexpr float x = p2.x + 1; + CheckRelativePosition(curve, {x, p1.y + 1}, kAbove); // K + CheckRelativePosition(curve, {x, p1.y + 0}, kOn); // L + CheckRelativePosition(curve, {x, p1.y - 1}, kBelow); // M + } +} + +// Test that the curve correctly reports the below/above position of points, +// when the curve is defined by two points with the same X value. +TEST(ThresholdCurveTest, PointPositionToCurveWithVerticalSegment) { + // The points (P1-P2) define the curve. + // All other points are above/below/on the curve. + // + // ^ + // | | + // | A B C + // | | + // | D P1 E + // | | + // | F G H + // | | + // | I P2--J------ + // | K L M + // *------------------> + + constexpr ThresholdCurve::Point p1{100, 200}; + constexpr ThresholdCurve::Point p2{p1.x, p1.y - 1}; + + constexpr float left = p1.x - 1; + constexpr float on = p1.x; + constexpr float right = p1.x + 1; + + RTC_CHECK_LT((p1.y + p2.y) / 2, p1.y); + RTC_CHECK_GT((p1.y + p2.y) / 2, p2.y); + + const ThresholdCurve curve(p1, p2); + + { + // All cases where the point lies above P1. + constexpr float y = p1.y + 1; + CheckRelativePosition(curve, {left, y}, kBelow); // A + CheckRelativePosition(curve, {on, y}, kOn); // B + CheckRelativePosition(curve, {right, y}, kAbove); // C + } + + { + // All cases where the point has the same y-value as P1. + constexpr float y = p1.y; + CheckRelativePosition(curve, {left, y}, kBelow); // D + CheckRelativePosition(curve, {on, y}, kOn); // P1 + CheckRelativePosition(curve, {right, y}, kAbove); // E + } + + { + // All cases where the point's y-value is between P1 and P2. + constexpr float y = (p1.y + p2.y) / 2; + CheckRelativePosition(curve, {left, y}, kBelow); // F + CheckRelativePosition(curve, {on, y}, kOn); // G + CheckRelativePosition(curve, {right, y}, kAbove); // H + } + + { + // All cases where the point has the same y-value as P2. + constexpr float y = p2.y; + CheckRelativePosition(curve, {left, y}, kBelow); // I + CheckRelativePosition(curve, {on, y}, kOn); // P2 + CheckRelativePosition(curve, {right, y}, kOn); // J + } + + { + // All cases where the point lies below P2. + constexpr float y = p2.y - 1; + CheckRelativePosition(curve, {left, y}, kBelow); // K + CheckRelativePosition(curve, {on, y}, kBelow); // L + CheckRelativePosition(curve, {right, y}, kBelow); // M + } +} + +// Test that the curve correctly reports the below/above position of points, +// when the curve is defined by two points which are identical. +TEST(ThresholdCurveTest, PointPositionCurveWithNullSegment) { + // The points (P1-P2) define the curve. + // All other points are above/below/on the curve. + // + // ^ + // | | + // | A D F + // | | + // | B P---G------ + // | C E H + // *------------------> + + constexpr ThresholdCurve::Point p{100, 200}; + + const ThresholdCurve curve(p, p); + + { + // All cases where the point lies to the left of P. + constexpr float x = p.x - 1; + CheckRelativePosition(curve, {x, p.y + 1}, kBelow); // A + CheckRelativePosition(curve, {x, p.y + 0}, kBelow); // B + CheckRelativePosition(curve, {x, p.y - 1}, kBelow); // C + } + + { + // All cases where the point has the same x-value as P. + constexpr float x = p.x + 0; + CheckRelativePosition(curve, {x, p.y + 1}, kOn); // D + CheckRelativePosition(curve, {x, p.y + 0}, kOn); // P + CheckRelativePosition(curve, {x, p.y - 1}, kBelow); // E + } + + { + // All cases where the point lies to the right of P. + constexpr float x = p.x + 1; + CheckRelativePosition(curve, {x, p.y + 1}, kAbove); // F + CheckRelativePosition(curve, {x, p.y + 0}, kOn); // G + CheckRelativePosition(curve, {x, p.y - 1}, kBelow); // H + } +} + +// Test that the relative position of two curves is computed correctly when +// the two curves have the same projection on the X-axis. +TEST(ThresholdCurveTest, TwoCurvesSegmentHasSameProjectionAxisX) { + // ^ // + // | C1 + C2 // + // | | // + // | |\ // + // | | \ // + // | \ \ // + // | \ \ // + // | \ \ // + // | \ -------- C2 // + // | --------- C1 // + // *---------------------> // + + constexpr ThresholdCurve::Point c1_left{5, 10}; + constexpr ThresholdCurve::Point c1_right{10, 5}; + const ThresholdCurve c1_curve(c1_left, c1_right); + + // Same x-values, but higher on Y. (Can be parallel, but doesn't have to be.) + constexpr ThresholdCurve::Point c2_left{c1_left.x, c1_left.y + 20}; + constexpr ThresholdCurve::Point c2_right{c1_right.x, c1_right.y + 10}; + const ThresholdCurve c2_curve(c2_left, c2_right); + + EXPECT_TRUE(c1_curve <= c2_curve); + EXPECT_FALSE(c2_curve <= c1_curve); +} + +// Test that the relative position of two curves is computed correctly when +// the higher curve's projection on the X-axis is a strict subset of the +// lower curve's projection on the X-axis (on both ends). +TEST(ThresholdCurveTest, TwoCurvesSegmentOfHigherSubsetProjectionAxisX) { + // ^ // + // | C1 C2 // + // | | | // + // | | | // + // | \ | // + // | \ | // + // | \ \ // + // | \ \ // + // | \ --------- C2 // + // | \ // + // | \ // + // | ---------C1 // + // *---------------------> // + + constexpr ThresholdCurve::Point c1_left{5, 10}; + constexpr ThresholdCurve::Point c1_right{10, 5}; + const ThresholdCurve c1_curve(c1_left, c1_right); + + constexpr ThresholdCurve::Point c2_left{6, 11}; + constexpr ThresholdCurve::Point c2_right{9, 7}; + const ThresholdCurve c2_curve(c2_left, c2_right); + + EXPECT_TRUE(c1_curve <= c2_curve); + EXPECT_FALSE(c2_curve <= c1_curve); +} + +// Test that the relative position of two curves is computed correctly when +// the higher curve's right point is above lower curve's horizontal ray (meaning +// the higher curve's projection on the X-axis extends further right than +// the lower curve's). +TEST(ThresholdCurveTest, + TwoCurvesRightPointOfHigherCurveAboveHorizontalRayOfLower) { + // ^ // + // | C1 + C2 // + // | | // + // | |\ // + // | | \ // + // | | \ // + // | | \ // + // | | \ // + // | \ \ // + // | \ \ // + // | \ \ // + // | \ ----- C2 // + // | --------- C1 // + // *---------------------> // + + constexpr ThresholdCurve::Point c1_left{5, 10}; + constexpr ThresholdCurve::Point c1_right{10, 5}; + const ThresholdCurve c1_curve(c1_left, c1_right); + + constexpr ThresholdCurve::Point c2_left{c1_left.x, c1_left.y + 1}; + constexpr ThresholdCurve::Point c2_right{c1_right.x + 1, c1_right.y + 1}; + const ThresholdCurve c2_curve(c2_left, c2_right); + + EXPECT_TRUE(c1_curve <= c2_curve); + EXPECT_FALSE(c2_curve <= c1_curve); +} + +// Test that the relative position of two curves is computed correctly when +// the higher curve's points are on the lower curve's rays (left point on the +// veritcal ray, right point on the horizontal ray). +TEST(ThresholdCurveTest, TwoCurvesPointsOfHigherOnRaysOfLower) { + // ^ + // | C1 + C2 // + // | | // + // | |\ // + // | | \ // + // | \ \ // + // | \ \ // + // | \ \ // + // | \ \ // + // | ----- C1 + C2 // + // *---------------------> // + + constexpr ThresholdCurve::Point c1_left{5, 10}; + constexpr ThresholdCurve::Point c1_right{10, 5}; + const ThresholdCurve c1_curve(c1_left, c1_right); + + // Same x-values, but one of the points is higher on Y (the other isn't). + constexpr ThresholdCurve::Point c2_left{c1_left.x, c1_left.y + 2}; + constexpr ThresholdCurve::Point c2_right{c1_right.x + 3, c1_right.y}; + const ThresholdCurve c2_curve(c2_left, c2_right); + + EXPECT_TRUE(c1_curve <= c2_curve); + EXPECT_FALSE(c2_curve <= c1_curve); +} + +// Test that the relative position of two curves is computed correctly when +// the second curve's segment intersects the first curve's vertical ray. +TEST(ThresholdCurveTest, SecondCurveCrossesVerticalRayOfFirstCurve) { + // ^ // + // | C2 C1 // + // | | | // + // | \| // + // | | // + // | |\ // + // | | \ // + // | \ \ // + // | \ \ // + // | \ \ // + // | \ ------- C2 // + // | -------- C1 // + // *---------------------> // + + constexpr ThresholdCurve::Point c1_left{5, 10}; + constexpr ThresholdCurve::Point c1_right{10, 5}; + const ThresholdCurve c1_curve(c1_left, c1_right); + + constexpr ThresholdCurve::Point c2_left{c1_left.x - 1, c1_left.y + 1}; + constexpr ThresholdCurve::Point c2_right{c1_right.x, c1_right.y + 1}; + const ThresholdCurve c2_curve(c2_left, c2_right); + + EXPECT_FALSE(c1_curve <= c2_curve); + EXPECT_FALSE(c2_curve <= c1_curve); +} + +// Test that the relative position of two curves is computed correctly when +// the second curve's segment intersects the first curve's horizontal ray. +TEST(ThresholdCurveTest, SecondCurveCrossesHorizontalRayOfFirstCurve) { + // ^ // + // | C1 + C2 // + // | | // + // | |\ // + // | \ \ // + // | \ \ // + // | \ \ // + // | \ \ // + // | ----------- C1 // + // | \ // + // | ------- C2 // + // *--------------------> // + + constexpr ThresholdCurve::Point c1_left{5, 10}; + constexpr ThresholdCurve::Point c1_right{10, 5}; + const ThresholdCurve c1_curve(c1_left, c1_right); + + constexpr ThresholdCurve::Point c2_left{c1_left.x, c1_left.y + 1}; + constexpr ThresholdCurve::Point c2_right{c1_right.x + 2, c1_right.y - 1}; + const ThresholdCurve c2_curve(c2_left, c2_right); + + EXPECT_FALSE(c1_curve <= c2_curve); + EXPECT_FALSE(c2_curve <= c1_curve); +} + +// Test that the relative position of two curves is computed correctly when +// the second curve's segment intersects the first curve's segment. +TEST(ThresholdCurveTest, TwoCurvesWithCrossingSegments) { + // ^ // + // | C2 C1 // + // | | | // + // | | | // + // | | \ // + // | | \ // + // | -_ \ // + // | -_ \ // + // | -_\ // + // | -_ // + // | \-_ // + // | \ ---------- C2 // + // | ----------- C1 // + // | // + // | // + // *-------------------------> // + + constexpr ThresholdCurve::Point c1_left{5, 10}; + constexpr ThresholdCurve::Point c1_right{10, 5}; + const ThresholdCurve c1_curve(c1_left, c1_right); + + constexpr ThresholdCurve::Point c2_left{4, 9}; + constexpr ThresholdCurve::Point c2_right{10, 6}; + const ThresholdCurve c2_curve(c2_left, c2_right); + + // The test is structured so that the two curves intersect at (8, 7). + RTC_CHECK(!c1_curve.IsAboveCurve({8, 7})); + RTC_CHECK(!c1_curve.IsBelowCurve({8, 7})); + RTC_CHECK(!c2_curve.IsAboveCurve({8, 7})); + RTC_CHECK(!c2_curve.IsBelowCurve({8, 7})); + + EXPECT_FALSE(c1_curve <= c2_curve); + EXPECT_FALSE(c2_curve <= c1_curve); +} + +// Test that the relative position of two curves is computed correctly when +// both curves are identical. +TEST(ThresholdCurveTest, IdenticalCurves) { + // ^ // + // | C1 + C2 // + // | | // + // | | // + // | \ // + // | \ // + // | \ // + // | ------- C1 + C2 // + // *---------------------> // + + constexpr ThresholdCurve::Point left{5, 10}; + constexpr ThresholdCurve::Point right{10, 5}; + + const ThresholdCurve c1_curve(left, right); + const ThresholdCurve c2_curve(left, right); + + EXPECT_TRUE(c1_curve <= c2_curve); + EXPECT_TRUE(c2_curve <= c1_curve); +} + +// Test that the relative position of two curves is computed correctly when +// they are "nearly identical" - the first curve's segment is contained within +// the second curve's segment, but the second curve's segment extends further +// to the left (which also produces separate vertical rays for the curves). +TEST(ThresholdCurveTest, NearlyIdenticalCurvesSecondContinuesOnOtherLeftSide) { + // ^ // + // | C2 C1 // + // | | | // + // | | | // + // | \| // + // | | // + // | \ // + // | \ // + // | \ // + // | ----- C1 + C2 // + // *---------------------> // + + constexpr ThresholdCurve::Point c1_left{5, 10}; + constexpr ThresholdCurve::Point c1_right{10, 5}; + const ThresholdCurve c1_curve(c1_left, c1_left); + + constexpr ThresholdCurve::Point c2_left{c1_left.x - 1, c1_left.y + 1}; + constexpr ThresholdCurve::Point c2_right = c1_right; + const ThresholdCurve c2_curve(c2_left, c2_right); + + EXPECT_FALSE(c1_curve <= c2_curve); + EXPECT_TRUE(c2_curve <= c1_curve); +} + +// Test that the relative position of two curves is computed correctly when +// they are "nearly identical" - the first curve's segment is contained within +// the second curve's segment, but the second curve's segment extends further +// to the right (which also produces separate horizontal rays for the curves). +TEST(ThresholdCurveTest, NearlyIdenticalCurvesSecondContinuesOnOtherRightSide) { + // ^ // + // | C1 + C2 // + // | | // + // | | // + // | \ // + // | \ // + // | \ // + // | \----------- C1 // + // | \ // + // | ---------- C2 // + // *---------------------> // + + constexpr ThresholdCurve::Point c1_left{5, 10}; + constexpr ThresholdCurve::Point c1_right{10, 5}; + const ThresholdCurve c1_curve(c1_left, c1_left); + + constexpr ThresholdCurve::Point c2_left = c1_left; + constexpr ThresholdCurve::Point c2_right{c1_right.x + 1, c1_right.y - 1}; + const ThresholdCurve c2_curve(c2_left, c2_right); + + EXPECT_FALSE(c1_curve <= c2_curve); + EXPECT_TRUE(c2_curve <= c1_curve); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// The higher-left point must be given as the first point, and the lower-right +// point must be given as the second. +// This necessarily produces a non-positive slope. +TEST(ThresholdCurveDeathTest, WrongOrderPoints) { + std::unique_ptr curve; + constexpr ThresholdCurve::Point left{5, 10}; + constexpr ThresholdCurve::Point right{10, 5}; + EXPECT_DEATH(curve.reset(new ThresholdCurve(right, left)), ""); +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor_config_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor_config_gn/moz.build new file mode 100644 index 0000000000..37cf81ad9f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor_config_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_config.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_network_adaptor_config_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor_gn/moz.build new file mode 100644 index 0000000000..1d0905ebc1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor_gn/moz.build @@ -0,0 +1,242 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/bitrate_controller.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/channel_controller.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/controller_manager.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/debug_dump_writer.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/dtx_controller.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/event_log_writer.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/fec_controller_plr_based.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller.cc", + "/third_party/libwebrtc/modules/audio_coding/audio_network_adaptor/frame_length_controller_v2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_network_adaptor_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/audio_decoder.h b/third_party/libwebrtc/modules/audio_coding/codecs/audio_decoder.h new file mode 100644 index 0000000000..b7b15cdd6e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/audio_decoder.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file is for backwards compatibility only! Use +// webrtc/api/audio_codecs/audio_decoder.h instead! +// TODO(kwiberg): Remove it. + +#ifndef MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_ +#define MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_ + +#include "api/audio_codecs/audio_decoder.h" + +#endif // MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/audio_encoder.h b/third_party/libwebrtc/modules/audio_coding/codecs/audio_encoder.h new file mode 100644 index 0000000000..010ae6705f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/audio_encoder.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file is for backwards compatibility only! Use +// webrtc/api/audio_codecs/audio_encoder.h instead! +// TODO(ossu): Remove it. + +#ifndef MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ +#define MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ + +#include "api/audio_codecs/audio_encoder.h" + +#endif // MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc new file mode 100644 index 0000000000..4a2b261a59 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +TEST(AudioDecoderFactoryTest, CreateUnknownDecoder) { + rtc::scoped_refptr adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("rey", 8000, 1), absl::nullopt)); +} + +TEST(AudioDecoderFactoryTest, CreatePcmu) { + rtc::scoped_refptr adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // PCMu supports 8 kHz, and any number of channels. + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 8000, 0), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 8000, 1), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 8000, 2), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 8000, 3), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 16000, 1), absl::nullopt)); +} + +TEST(AudioDecoderFactoryTest, CreatePcma) { + rtc::scoped_refptr adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // PCMa supports 8 kHz, and any number of channels. + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 8000, 0), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 8000, 1), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 8000, 2), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 8000, 3), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 16000, 1), absl::nullopt)); +} + +TEST(AudioDecoderFactoryTest, CreateIlbc) { + rtc::scoped_refptr adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // iLBC supports 8 kHz, 1 channel. + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("ilbc", 8000, 0), absl::nullopt)); +#ifdef WEBRTC_CODEC_ILBC + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("ilbc", 8000, 1), absl::nullopt)); +#endif + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("ilbc", 8000, 2), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("ilbc", 16000, 1), absl::nullopt)); +} + +TEST(AudioDecoderFactoryTest, CreateL16) { + rtc::scoped_refptr adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // L16 supports any clock rate and any number of channels up to 24. + const int clockrates[] = {8000, 16000, 32000, 48000}; + const int num_channels[] = {1, 2, 3, 24}; + for (int clockrate : clockrates) { + EXPECT_FALSE(adf->MakeAudioDecoder(SdpAudioFormat("l16", clockrate, 0), + absl::nullopt)); + for (int channels : num_channels) { + EXPECT_TRUE(adf->MakeAudioDecoder( + SdpAudioFormat("l16", clockrate, channels), absl::nullopt)); + } + } +} + +// Tests that using more channels than the maximum does not work +TEST(AudioDecoderFactoryTest, MaxNrOfChannels) { + rtc::scoped_refptr adf = + CreateBuiltinAudioDecoderFactory(); + std::vector codecs = { +#ifdef WEBRTC_CODEC_OPUS + "opus", +#endif +#ifdef WEBRTC_CODEC_ILBC + "ilbc", +#endif + "pcmu", + "pcma", + "l16", + "G722", + "G711", + }; + + for (auto codec : codecs) { + EXPECT_FALSE(adf->MakeAudioDecoder( + SdpAudioFormat(codec, 32000, AudioDecoder::kMaxNumberOfChannels + 1), + absl::nullopt)); + } +} + +TEST(AudioDecoderFactoryTest, CreateG722) { + rtc::scoped_refptr adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // g722 supports 8 kHz, 1-2 channels. + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 0), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 1), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 2), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 3), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 16000, 1), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 32000, 1), absl::nullopt)); + + // g722 actually uses a 16 kHz sample rate instead of the nominal 8 kHz. + std::unique_ptr dec = + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 1), absl::nullopt); + EXPECT_EQ(16000, dec->SampleRateHz()); +} + +TEST(AudioDecoderFactoryTest, CreateOpus) { + rtc::scoped_refptr adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // Opus supports 48 kHz, 2 channels, and wants a "stereo" parameter whose + // value is either "0" or "1". + for (int hz : {8000, 16000, 32000, 48000}) { + for (int channels : {0, 1, 2, 3}) { + for (std::string stereo : {"XX", "0", "1", "2"}) { + SdpAudioFormat::Parameters params; + if (stereo != "XX") { + params["stereo"] = stereo; + } + const bool good = (hz == 48000 && channels == 2 && + (stereo == "XX" || stereo == "0" || stereo == "1")); + EXPECT_EQ(good, + static_cast(adf->MakeAudioDecoder( + SdpAudioFormat("opus", hz, channels, std::move(params)), + absl::nullopt))); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc new file mode 100644 index 0000000000..26ae1eda8a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/builtin_audio_encoder_factory.h" + +#include +#include +#include + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +class AudioEncoderFactoryTest + : public ::testing::TestWithParam> { +}; + +TEST_P(AudioEncoderFactoryTest, SupportsAtLeastOneFormat) { + auto factory = GetParam(); + auto supported_encoders = factory->GetSupportedEncoders(); + EXPECT_FALSE(supported_encoders.empty()); +} + +TEST_P(AudioEncoderFactoryTest, CanQueryAllSupportedFormats) { + auto factory = GetParam(); + auto supported_encoders = factory->GetSupportedEncoders(); + for (const auto& spec : supported_encoders) { + auto info = factory->QueryAudioEncoder(spec.format); + EXPECT_TRUE(info); + } +} + +TEST_P(AudioEncoderFactoryTest, CanConstructAllSupportedEncoders) { + auto factory = GetParam(); + auto supported_encoders = factory->GetSupportedEncoders(); + for (const auto& spec : supported_encoders) { + auto info = factory->QueryAudioEncoder(spec.format); + auto encoder = factory->MakeAudioEncoder(127, spec.format, absl::nullopt); + EXPECT_TRUE(encoder); + EXPECT_EQ(encoder->SampleRateHz(), info->sample_rate_hz); + EXPECT_EQ(encoder->NumChannels(), info->num_channels); + EXPECT_EQ(encoder->RtpTimestampRateHz(), spec.format.clockrate_hz); + } +} + +TEST_P(AudioEncoderFactoryTest, CanRunAllSupportedEncoders) { + constexpr int kTestPayloadType = 127; + auto factory = GetParam(); + auto supported_encoders = factory->GetSupportedEncoders(); + for (const auto& spec : supported_encoders) { + auto encoder = + factory->MakeAudioEncoder(kTestPayloadType, spec.format, absl::nullopt); + EXPECT_TRUE(encoder); + encoder->Reset(); + const int num_samples = rtc::checked_cast( + encoder->SampleRateHz() * encoder->NumChannels() / 100); + rtc::Buffer out; + rtc::BufferT audio; + audio.SetData(num_samples, [](rtc::ArrayView audio) { + for (size_t i = 0; i != audio.size(); ++i) { + // Just put some numbers in there, ensure they're within range. + audio[i] = + static_cast(i & std::numeric_limits::max()); + } + return audio.size(); + }); + // This is here to stop the test going forever with a broken encoder. + constexpr int kMaxEncodeCalls = 100; + int blocks = 0; + for (; blocks < kMaxEncodeCalls; ++blocks) { + AudioEncoder::EncodedInfo info = encoder->Encode( + blocks * encoder->RtpTimestampRateHz() / 100, audio, &out); + EXPECT_EQ(info.encoded_bytes, out.size()); + if (info.encoded_bytes > 0) { + EXPECT_EQ(0u, info.encoded_timestamp); + EXPECT_EQ(kTestPayloadType, info.payload_type); + break; + } + } + ASSERT_LT(blocks, kMaxEncodeCalls); + const unsigned int next_timestamp = + blocks * encoder->RtpTimestampRateHz() / 100; + out.Clear(); + for (; blocks < kMaxEncodeCalls; ++blocks) { + AudioEncoder::EncodedInfo info = encoder->Encode( + blocks * encoder->RtpTimestampRateHz() / 100, audio, &out); + EXPECT_EQ(info.encoded_bytes, out.size()); + if (info.encoded_bytes > 0) { + EXPECT_EQ(next_timestamp, info.encoded_timestamp); + EXPECT_EQ(kTestPayloadType, info.payload_type); + break; + } + } + ASSERT_LT(blocks, kMaxEncodeCalls); + } +} + +INSTANTIATE_TEST_SUITE_P(BuiltinAudioEncoderFactoryTest, + AudioEncoderFactoryTest, + ::testing::Values(CreateBuiltinAudioEncoderFactory())); + +TEST(BuiltinAudioEncoderFactoryTest, SupportsTheExpectedFormats) { + using ::testing::ElementsAreArray; + // Check that we claim to support the formats we expect from build flags, and + // we've ordered them correctly. + auto factory = CreateBuiltinAudioEncoderFactory(); + auto specs = factory->GetSupportedEncoders(); + + const std::vector supported_formats = [&specs] { + std::vector formats; + formats.reserve(specs.size()); + for (const auto& spec : specs) { + formats.push_back(spec.format); + } + return formats; + }(); + + const std::vector expected_formats = { +#ifdef WEBRTC_CODEC_OPUS + {"opus", 48000, 2, {{"minptime", "10"}, {"useinbandfec", "1"}}}, +#endif +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) + {"isac", 16000, 1}, +#endif +#ifdef WEBRTC_CODEC_ISAC + {"isac", 32000, 1}, +#endif + {"G722", 8000, 1}, +#ifdef WEBRTC_CODEC_ILBC + {"ilbc", 8000, 1}, +#endif + {"pcmu", 8000, 1}, + {"pcma", 8000, 1} + }; + + ASSERT_THAT(supported_formats, ElementsAreArray(expected_formats)); +} + +// Tests that using more channels than the maximum does not work. +TEST(BuiltinAudioEncoderFactoryTest, MaxNrOfChannels) { + rtc::scoped_refptr aef = + CreateBuiltinAudioEncoderFactory(); + std::vector codecs = { +#ifdef WEBRTC_CODEC_OPUS + "opus", +#endif +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) + "isac", +#endif +#ifdef WEBRTC_CODEC_ILBC + "ilbc", +#endif + "pcmu", + "pcma", + "l16", + "G722", + "G711", + }; + + for (auto codec : codecs) { + EXPECT_FALSE(aef->MakeAudioEncoder( + /*payload_type=*/111, + /*format=*/ + SdpAudioFormat(codec, 32000, AudioEncoder::kMaxNumberOfChannels + 1), + /*codec_pair_id=*/absl::nullopt)); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc new file mode 100644 index 0000000000..7546ac178f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/units/time_delta.h" +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +const int kMaxFrameSizeMs = 60; + +class AudioEncoderCng final : public AudioEncoder { + public: + explicit AudioEncoderCng(AudioEncoderCngConfig&& config); + ~AudioEncoderCng() override; + + // Not copyable or moveable. + AudioEncoderCng(const AudioEncoderCng&) = delete; + AudioEncoderCng(AudioEncoderCng&&) = delete; + AudioEncoderCng& operator=(const AudioEncoderCng&) = delete; + AudioEncoderCng& operator=(AudioEncoderCng&&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + int RtpTimestampRateHz() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) override; + void Reset() override; + bool SetFec(bool enable) override; + bool SetDtx(bool enable) override; + bool SetApplication(Application application) override; + void SetMaxPlaybackRate(int frequency_hz) override; + rtc::ArrayView> ReclaimContainedEncoders() + override; + void OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) override; + void OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional bwe_period_ms) override; + absl::optional> GetFrameLengthRange() + const override; + + private: + EncodedInfo EncodePassive(size_t frames_to_encode, rtc::Buffer* encoded); + EncodedInfo EncodeActive(size_t frames_to_encode, rtc::Buffer* encoded); + size_t SamplesPer10msFrame() const; + + std::unique_ptr speech_encoder_; + const int cng_payload_type_; + const int num_cng_coefficients_; + const int sid_frame_interval_ms_; + std::vector speech_buffer_; + std::vector rtp_timestamps_; + bool last_frame_active_; + std::unique_ptr vad_; + std::unique_ptr cng_encoder_; +}; + +AudioEncoderCng::AudioEncoderCng(AudioEncoderCngConfig&& config) + : speech_encoder_((static_cast([&] { + RTC_CHECK(config.IsOk()) << "Invalid configuration."; + }()), + std::move(config.speech_encoder))), + cng_payload_type_(config.payload_type), + num_cng_coefficients_(config.num_cng_coefficients), + sid_frame_interval_ms_(config.sid_frame_interval_ms), + last_frame_active_(true), + vad_(config.vad ? std::unique_ptr(config.vad) + : CreateVad(config.vad_mode)), + cng_encoder_(new ComfortNoiseEncoder(SampleRateHz(), + sid_frame_interval_ms_, + num_cng_coefficients_)) {} + +AudioEncoderCng::~AudioEncoderCng() = default; + +int AudioEncoderCng::SampleRateHz() const { + return speech_encoder_->SampleRateHz(); +} + +size_t AudioEncoderCng::NumChannels() const { + return 1; +} + +int AudioEncoderCng::RtpTimestampRateHz() const { + return speech_encoder_->RtpTimestampRateHz(); +} + +size_t AudioEncoderCng::Num10MsFramesInNextPacket() const { + return speech_encoder_->Num10MsFramesInNextPacket(); +} + +size_t AudioEncoderCng::Max10MsFramesInAPacket() const { + return speech_encoder_->Max10MsFramesInAPacket(); +} + +int AudioEncoderCng::GetTargetBitrate() const { + return speech_encoder_->GetTargetBitrate(); +} + +AudioEncoder::EncodedInfo AudioEncoderCng::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) { + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + RTC_CHECK_EQ(speech_buffer_.size(), + rtp_timestamps_.size() * samples_per_10ms_frame); + rtp_timestamps_.push_back(rtp_timestamp); + RTC_DCHECK_EQ(samples_per_10ms_frame, audio.size()); + speech_buffer_.insert(speech_buffer_.end(), audio.cbegin(), audio.cend()); + const size_t frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket(); + if (rtp_timestamps_.size() < frames_to_encode) { + return EncodedInfo(); + } + RTC_CHECK_LE(frames_to_encode * 10, kMaxFrameSizeMs) + << "Frame size cannot be larger than " << kMaxFrameSizeMs + << " ms when using VAD/CNG."; + + // Group several 10 ms blocks per VAD call. Call VAD once or twice using the + // following split sizes: + // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms; + // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms. + size_t blocks_in_first_vad_call = + (frames_to_encode > 3 ? 3 : frames_to_encode); + if (frames_to_encode == 4) + blocks_in_first_vad_call = 2; + RTC_CHECK_GE(frames_to_encode, blocks_in_first_vad_call); + const size_t blocks_in_second_vad_call = + frames_to_encode - blocks_in_first_vad_call; + + // Check if all of the buffer is passive speech. Start with checking the first + // block. + Vad::Activity activity = vad_->VoiceActivity( + &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call, + SampleRateHz()); + if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) { + // Only check the second block if the first was passive. + activity = vad_->VoiceActivity( + &speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call], + samples_per_10ms_frame * blocks_in_second_vad_call, SampleRateHz()); + } + + EncodedInfo info; + switch (activity) { + case Vad::kPassive: { + info = EncodePassive(frames_to_encode, encoded); + last_frame_active_ = false; + break; + } + case Vad::kActive: { + info = EncodeActive(frames_to_encode, encoded); + last_frame_active_ = true; + break; + } + default: { + RTC_CHECK_NOTREACHED(); + } + } + + speech_buffer_.erase( + speech_buffer_.begin(), + speech_buffer_.begin() + frames_to_encode * samples_per_10ms_frame); + rtp_timestamps_.erase(rtp_timestamps_.begin(), + rtp_timestamps_.begin() + frames_to_encode); + return info; +} + +void AudioEncoderCng::Reset() { + speech_encoder_->Reset(); + speech_buffer_.clear(); + rtp_timestamps_.clear(); + last_frame_active_ = true; + vad_->Reset(); + cng_encoder_.reset(new ComfortNoiseEncoder( + SampleRateHz(), sid_frame_interval_ms_, num_cng_coefficients_)); +} + +bool AudioEncoderCng::SetFec(bool enable) { + return speech_encoder_->SetFec(enable); +} + +bool AudioEncoderCng::SetDtx(bool enable) { + return speech_encoder_->SetDtx(enable); +} + +bool AudioEncoderCng::SetApplication(Application application) { + return speech_encoder_->SetApplication(application); +} + +void AudioEncoderCng::SetMaxPlaybackRate(int frequency_hz) { + speech_encoder_->SetMaxPlaybackRate(frequency_hz); +} + +rtc::ArrayView> +AudioEncoderCng::ReclaimContainedEncoders() { + return rtc::ArrayView>(&speech_encoder_, 1); +} + +void AudioEncoderCng::OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) { + speech_encoder_->OnReceivedUplinkPacketLossFraction( + uplink_packet_loss_fraction); +} + +void AudioEncoderCng::OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional bwe_period_ms) { + speech_encoder_->OnReceivedUplinkBandwidth(target_audio_bitrate_bps, + bwe_period_ms); +} + +absl::optional> +AudioEncoderCng::GetFrameLengthRange() const { + return speech_encoder_->GetFrameLengthRange(); +} + +AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive( + size_t frames_to_encode, + rtc::Buffer* encoded) { + bool force_sid = last_frame_active_; + bool output_produced = false; + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + AudioEncoder::EncodedInfo info; + + for (size_t i = 0; i < frames_to_encode; ++i) { + // It's important not to pass &info.encoded_bytes directly to + // WebRtcCng_Encode(), since later loop iterations may return zero in + // that value, in which case we don't want to overwrite any value from + // an earlier iteration. + size_t encoded_bytes_tmp = + cng_encoder_->Encode(rtc::ArrayView( + &speech_buffer_[i * samples_per_10ms_frame], + samples_per_10ms_frame), + force_sid, encoded); + + if (encoded_bytes_tmp > 0) { + RTC_CHECK(!output_produced); + info.encoded_bytes = encoded_bytes_tmp; + output_produced = true; + force_sid = false; + } + } + + info.encoded_timestamp = rtp_timestamps_.front(); + info.payload_type = cng_payload_type_; + info.send_even_if_empty = true; + info.speech = false; + return info; +} + +AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive(size_t frames_to_encode, + rtc::Buffer* encoded) { + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + AudioEncoder::EncodedInfo info; + for (size_t i = 0; i < frames_to_encode; ++i) { + info = + speech_encoder_->Encode(rtp_timestamps_.front(), + rtc::ArrayView( + &speech_buffer_[i * samples_per_10ms_frame], + samples_per_10ms_frame), + encoded); + if (i + 1 == frames_to_encode) { + RTC_CHECK_GT(info.encoded_bytes, 0) << "Encoder didn't deliver data."; + } else { + RTC_CHECK_EQ(info.encoded_bytes, 0) + << "Encoder delivered data too early."; + } + } + return info; +} + +size_t AudioEncoderCng::SamplesPer10msFrame() const { + return rtc::CheckedDivExact(10 * SampleRateHz(), 1000); +} + +} // namespace + +AudioEncoderCngConfig::AudioEncoderCngConfig() = default; +AudioEncoderCngConfig::AudioEncoderCngConfig(AudioEncoderCngConfig&&) = default; +AudioEncoderCngConfig::~AudioEncoderCngConfig() = default; + +bool AudioEncoderCngConfig::IsOk() const { + if (num_channels != 1) + return false; + if (!speech_encoder) + return false; + if (num_channels != speech_encoder->NumChannels()) + return false; + if (sid_frame_interval_ms < + static_cast(speech_encoder->Max10MsFramesInAPacket() * 10)) + return false; + if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER || + num_cng_coefficients <= 0) + return false; + return true; +} + +std::unique_ptr CreateComfortNoiseEncoder( + AudioEncoderCngConfig&& config) { + return std::make_unique(std::move(config)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.h b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.h new file mode 100644 index 0000000000..8a1183489f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_CNG_AUDIO_ENCODER_CNG_H_ +#define MODULES_AUDIO_CODING_CODECS_CNG_AUDIO_ENCODER_CNG_H_ + +#include + +#include + +#include "api/audio_codecs/audio_encoder.h" +#include "common_audio/vad/include/vad.h" + +namespace webrtc { + +struct AudioEncoderCngConfig { + // Moveable, not copyable. + AudioEncoderCngConfig(); + AudioEncoderCngConfig(AudioEncoderCngConfig&&); + ~AudioEncoderCngConfig(); + + bool IsOk() const; + + size_t num_channels = 1; + int payload_type = 13; + std::unique_ptr speech_encoder; + Vad::Aggressiveness vad_mode = Vad::kVadNormal; + int sid_frame_interval_ms = 100; + int num_cng_coefficients = 8; + // The Vad pointer is mainly for testing. If a NULL pointer is passed, the + // AudioEncoderCng creates (and destroys) a Vad object internally. If an + // object is passed, the AudioEncoderCng assumes ownership of the Vad + // object. + Vad* vad = nullptr; +}; + +std::unique_ptr CreateComfortNoiseEncoder( + AudioEncoderCngConfig&& config); + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_CNG_AUDIO_ENCODER_CNG_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc new file mode 100644 index 0000000000..c688004363 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc @@ -0,0 +1,520 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" + +#include +#include + +#include "common_audio/vad/mock/mock_vad.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "test/mock_audio_encoder.h" +#include "test/testsupport/rtc_expect_death.h" + +using ::testing::_; +using ::testing::Eq; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::Not; +using ::testing::Optional; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { + +namespace { +static const size_t kMaxNumSamples = 48 * 10 * 2; // 10 ms @ 48 kHz stereo. +static const size_t kMockReturnEncodedBytes = 17; +static const int kCngPayloadType = 18; +} // namespace + +class AudioEncoderCngTest : public ::testing::Test { + protected: + AudioEncoderCngTest() + : mock_encoder_owner_(new MockAudioEncoder), + mock_encoder_(mock_encoder_owner_.get()), + mock_vad_(new MockVad), + timestamp_(4711), + num_audio_samples_10ms_(0), + sample_rate_hz_(8000) { + memset(audio_, 0, kMaxNumSamples * 2); + EXPECT_CALL(*mock_encoder_, NumChannels()).WillRepeatedly(Return(1)); + } + + AudioEncoderCngTest(const AudioEncoderCngTest&) = delete; + AudioEncoderCngTest& operator=(const AudioEncoderCngTest&) = delete; + + void TearDown() override { + EXPECT_CALL(*mock_vad_, Die()).Times(1); + cng_.reset(); + } + + AudioEncoderCngConfig MakeCngConfig() { + AudioEncoderCngConfig config; + config.speech_encoder = std::move(mock_encoder_owner_); + EXPECT_TRUE(config.speech_encoder); + + // Let the AudioEncoderCng object use a MockVad instead of its internally + // created Vad object. + config.vad = mock_vad_; + config.payload_type = kCngPayloadType; + + return config; + } + + void CreateCng(AudioEncoderCngConfig&& config) { + num_audio_samples_10ms_ = static_cast(10 * sample_rate_hz_ / 1000); + ASSERT_LE(num_audio_samples_10ms_, kMaxNumSamples); + if (config.speech_encoder) { + EXPECT_CALL(*mock_encoder_, SampleRateHz()) + .WillRepeatedly(Return(sample_rate_hz_)); + // Max10MsFramesInAPacket() is just used to verify that the SID frame + // period is not too small. The return value does not matter that much, + // as long as it is smaller than 10. + EXPECT_CALL(*mock_encoder_, Max10MsFramesInAPacket()) + .WillOnce(Return(1u)); + } + cng_ = CreateComfortNoiseEncoder(std::move(config)); + } + + void Encode() { + ASSERT_TRUE(cng_) << "Must call CreateCng() first."; + encoded_info_ = cng_->Encode( + timestamp_, + rtc::ArrayView(audio_, num_audio_samples_10ms_), + &encoded_); + timestamp_ += static_cast(num_audio_samples_10ms_); + } + + // Expect `num_calls` calls to the encoder, all successful. The last call + // claims to have encoded `kMockReturnEncodedBytes` bytes, and all the + // preceding ones 0 bytes. + void ExpectEncodeCalls(size_t num_calls) { + InSequence s; + AudioEncoder::EncodedInfo info; + for (size_t j = 0; j < num_calls - 1; ++j) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).WillOnce(Return(info)); + } + info.encoded_bytes = kMockReturnEncodedBytes; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce( + Invoke(MockAudioEncoder::FakeEncoding(kMockReturnEncodedBytes))); + } + + // Verifies that the cng_ object waits until it has collected + // `blocks_per_frame` blocks of audio, and then dispatches all of them to + // the underlying codec (speech or cng). + void CheckBlockGrouping(size_t blocks_per_frame, bool active_speech) { + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + auto config = MakeCngConfig(); + const int num_cng_coefficients = config.num_cng_coefficients; + CreateCng(std::move(config)); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillRepeatedly(Return(active_speech ? Vad::kActive : Vad::kPassive)); + + // Don't expect any calls to the encoder yet. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).Times(0); + for (size_t i = 0; i < blocks_per_frame - 1; ++i) { + Encode(); + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + } + if (active_speech) + ExpectEncodeCalls(blocks_per_frame); + Encode(); + if (active_speech) { + EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes); + } else { + EXPECT_EQ(static_cast(num_cng_coefficients + 1), + encoded_info_.encoded_bytes); + } + } + + // Verifies that the audio is partitioned into larger blocks before calling + // the VAD. + void CheckVadInputSize(int input_frame_size_ms, + int expected_first_block_size_ms, + int expected_second_block_size_ms) { + const size_t blocks_per_frame = + static_cast(input_frame_size_ms / 10); + + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + + // Expect nothing to happen before the last block is sent to cng_. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)).Times(0); + for (size_t i = 0; i < blocks_per_frame - 1; ++i) { + Encode(); + } + + // Let the VAD decision be passive, since an active decision may lead to + // early termination of the decision loop. + InSequence s; + EXPECT_CALL( + *mock_vad_, + VoiceActivity(_, expected_first_block_size_ms * sample_rate_hz_ / 1000, + sample_rate_hz_)) + .WillOnce(Return(Vad::kPassive)); + if (expected_second_block_size_ms > 0) { + EXPECT_CALL(*mock_vad_, + VoiceActivity( + _, expected_second_block_size_ms * sample_rate_hz_ / 1000, + sample_rate_hz_)) + .WillOnce(Return(Vad::kPassive)); + } + + // With this call to Encode(), `mock_vad_` should be called according to the + // above expectations. + Encode(); + } + + // Tests a frame with both active and passive speech. Returns true if the + // decision was active speech, false if it was passive. + bool CheckMixedActivePassive(Vad::Activity first_type, + Vad::Activity second_type) { + // Set the speech encoder frame size to 60 ms, to ensure that the VAD will + // be called twice. + const size_t blocks_per_frame = 6; + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + InSequence s; + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(first_type)); + if (first_type == Vad::kPassive) { + // Expect a second call to the VAD only if the first frame was passive. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(second_type)); + } + encoded_info_.payload_type = 0; + for (size_t i = 0; i < blocks_per_frame; ++i) { + Encode(); + } + return encoded_info_.payload_type != kCngPayloadType; + } + + std::unique_ptr cng_; + std::unique_ptr mock_encoder_owner_; + MockAudioEncoder* mock_encoder_; + MockVad* mock_vad_; // Ownership is transferred to `cng_`. + uint32_t timestamp_; + int16_t audio_[kMaxNumSamples]; + size_t num_audio_samples_10ms_; + rtc::Buffer encoded_; + AudioEncoder::EncodedInfo encoded_info_; + int sample_rate_hz_; +}; + +TEST_F(AudioEncoderCngTest, CreateAndDestroy) { + CreateCng(MakeCngConfig()); +} + +TEST_F(AudioEncoderCngTest, CheckFrameSizePropagation) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillOnce(Return(17U)); + EXPECT_EQ(17U, cng_->Num10MsFramesInNextPacket()); +} + +TEST_F(AudioEncoderCngTest, CheckTargetAudioBitratePropagation) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, + OnReceivedUplinkBandwidth(4711, absl::optional())); + cng_->OnReceivedUplinkBandwidth(4711, absl::nullopt); +} + +TEST_F(AudioEncoderCngTest, CheckPacketLossFractionPropagation) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, OnReceivedUplinkPacketLossFraction(0.5)); + cng_->OnReceivedUplinkPacketLossFraction(0.5); +} + +TEST_F(AudioEncoderCngTest, CheckGetFrameLengthRangePropagation) { + CreateCng(MakeCngConfig()); + auto expected_range = + std::make_pair(TimeDelta::Millis(20), TimeDelta::Millis(20)); + EXPECT_CALL(*mock_encoder_, GetFrameLengthRange()) + .WillRepeatedly(Return(absl::make_optional(expected_range))); + EXPECT_THAT(cng_->GetFrameLengthRange(), Optional(Eq(expected_range))); +} + +TEST_F(AudioEncoderCngTest, EncodeCallsVad) { + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(1U)); + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + Encode(); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects1BlockPassiveSpeech) { + CheckBlockGrouping(1, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksPassiveSpeech) { + CheckBlockGrouping(2, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksPassiveSpeech) { + CheckBlockGrouping(3, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects1BlockActiveSpeech) { + CheckBlockGrouping(1, true); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksActiveSpeech) { + CheckBlockGrouping(2, true); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksActiveSpeech) { + CheckBlockGrouping(3, true); +} + +TEST_F(AudioEncoderCngTest, EncodePassive) { + const size_t kBlocksPerFrame = 3; + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(kBlocksPerFrame)); + auto config = MakeCngConfig(); + const auto sid_frame_interval_ms = config.sid_frame_interval_ms; + const auto num_cng_coefficients = config.num_cng_coefficients; + CreateCng(std::move(config)); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillRepeatedly(Return(Vad::kPassive)); + // Expect no calls at all to the speech encoder mock. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).Times(0); + uint32_t expected_timestamp = timestamp_; + for (size_t i = 0; i < 100; ++i) { + Encode(); + // Check if it was time to call the cng encoder. This is done once every + // `kBlocksPerFrame` calls. + if ((i + 1) % kBlocksPerFrame == 0) { + // Now check if a SID interval has elapsed. + if ((i % (sid_frame_interval_ms / 10)) < kBlocksPerFrame) { + // If so, verify that we got a CNG encoding. + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_FALSE(encoded_info_.speech); + EXPECT_EQ(static_cast(num_cng_coefficients) + 1, + encoded_info_.encoded_bytes); + EXPECT_EQ(expected_timestamp, encoded_info_.encoded_timestamp); + } + expected_timestamp += rtc::checked_cast( + kBlocksPerFrame * num_audio_samples_10ms_); + } else { + // Otherwise, expect no output. + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + } + } +} + +// Verifies that the correct action is taken for frames with both active and +// passive speech. +TEST_F(AudioEncoderCngTest, MixedActivePassive) { + CreateCng(MakeCngConfig()); + + // All of the frame is active speech. + ExpectEncodeCalls(6); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kActive)); + EXPECT_TRUE(encoded_info_.speech); + + // First half of the frame is active speech. + ExpectEncodeCalls(6); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kPassive)); + EXPECT_TRUE(encoded_info_.speech); + + // Second half of the frame is active speech. + ExpectEncodeCalls(6); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kPassive, Vad::kActive)); + EXPECT_TRUE(encoded_info_.speech); + + // All of the frame is passive speech. Expect no calls to `mock_encoder_`. + EXPECT_FALSE(CheckMixedActivePassive(Vad::kPassive, Vad::kPassive)); + EXPECT_FALSE(encoded_info_.speech); +} + +// These tests verify that the audio is partitioned into larger blocks before +// calling the VAD. +// The parameters for CheckVadInputSize are: +// CheckVadInputSize(frame_size, expected_first_block_size, +// expected_second_block_size); +TEST_F(AudioEncoderCngTest, VadInputSize10Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(10, 10, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize20Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(20, 20, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize30Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(30, 30, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize40Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(40, 20, 20); +} +TEST_F(AudioEncoderCngTest, VadInputSize50Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(50, 30, 20); +} +TEST_F(AudioEncoderCngTest, VadInputSize60Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(60, 30, 30); +} + +// Verifies that the correct payload type is set when CNG is encoded. +TEST_F(AudioEncoderCngTest, VerifyCngPayloadType) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).Times(0); + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1U)); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + encoded_info_.payload_type = 0; + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); +} + +// Verifies that a SID frame is encoded immediately as the signal changes from +// active speech to passive. +TEST_F(AudioEncoderCngTest, VerifySidFrameAfterSpeech) { + auto config = MakeCngConfig(); + const auto num_cng_coefficients = config.num_cng_coefficients; + CreateCng(std::move(config)); + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(1U)); + // Start with encoding noise. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .Times(2) + .WillRepeatedly(Return(Vad::kPassive)); + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_EQ(static_cast(num_cng_coefficients) + 1, + encoded_info_.encoded_bytes); + // Encode again, and make sure we got no frame at all (since the SID frame + // period is 100 ms by default). + Encode(); + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + + // Now encode active speech. + encoded_info_.payload_type = 0; + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kActive)); + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce( + Invoke(MockAudioEncoder::FakeEncoding(kMockReturnEncodedBytes))); + Encode(); + EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes); + + // Go back to noise again, and verify that a SID frame is emitted. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_EQ(static_cast(num_cng_coefficients) + 1, + encoded_info_.encoded_bytes); +} + +// Resetting the CNG should reset both the VAD and the encoder. +TEST_F(AudioEncoderCngTest, Reset) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, Reset()).Times(1); + EXPECT_CALL(*mock_vad_, Reset()).Times(1); + cng_->Reset(); +} + +#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// This test fixture tests various error conditions that makes the +// AudioEncoderCng die via CHECKs. +class AudioEncoderCngDeathTest : public AudioEncoderCngTest { + protected: + AudioEncoderCngDeathTest() : AudioEncoderCngTest() { + EXPECT_CALL(*mock_vad_, Die()).Times(1); + delete mock_vad_; + mock_vad_ = nullptr; + } + + // Override AudioEncoderCngTest::TearDown, since that one expects a call to + // the destructor of `mock_vad_`. In this case, that object is already + // deleted. + void TearDown() override { cng_.reset(); } + + AudioEncoderCngConfig MakeCngConfig() { + // Don't provide a Vad mock object, since it would leak when the test dies. + auto config = AudioEncoderCngTest::MakeCngConfig(); + config.vad = nullptr; + return config; + } + + void TryWrongNumCoefficients(int num) { + RTC_EXPECT_DEATH( + [&] { + auto config = MakeCngConfig(); + config.num_cng_coefficients = num; + CreateCng(std::move(config)); + }(), + "Invalid configuration"); + } +}; + +TEST_F(AudioEncoderCngDeathTest, WrongFrameSize) { + CreateCng(MakeCngConfig()); + num_audio_samples_10ms_ *= 2; // 20 ms frame. + RTC_EXPECT_DEATH(Encode(), ""); + num_audio_samples_10ms_ = 0; // Zero samples. + RTC_EXPECT_DEATH(Encode(), ""); +} + +TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficientsA) { + TryWrongNumCoefficients(-1); +} + +TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficientsB) { + TryWrongNumCoefficients(0); +} + +TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficientsC) { + TryWrongNumCoefficients(13); +} + +TEST_F(AudioEncoderCngDeathTest, NullSpeechEncoder) { + auto config = MakeCngConfig(); + config.speech_encoder = nullptr; + RTC_EXPECT_DEATH(CreateCng(std::move(config)), ""); +} + +TEST_F(AudioEncoderCngDeathTest, StereoEncoder) { + EXPECT_CALL(*mock_encoder_, NumChannels()).WillRepeatedly(Return(2)); + RTC_EXPECT_DEATH(CreateCng(MakeCngConfig()), "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, StereoConfig) { + RTC_EXPECT_DEATH( + [&] { + auto config = MakeCngConfig(); + config.num_channels = 2; + CreateCng(std::move(config)); + }(), + "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, EncoderFrameSizeTooLarge) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(7U)); + for (int i = 0; i < 6; ++i) + Encode(); + RTC_EXPECT_DEATH( + Encode(), "Frame size cannot be larger than 60 ms when using VAD/CNG."); +} + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/cng_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/cng/cng_unittest.cc new file mode 100644 index 0000000000..0e6ab79394 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/cng_unittest.cc @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include +#include + +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +enum { + kSidShortIntervalUpdate = 1, + kSidNormalIntervalUpdate = 100, + kSidLongIntervalUpdate = 10000 +}; + +enum : size_t { + kCNGNumParamsLow = 0, + kCNGNumParamsNormal = 8, + kCNGNumParamsHigh = WEBRTC_CNG_MAX_LPC_ORDER, + kCNGNumParamsTooHigh = WEBRTC_CNG_MAX_LPC_ORDER + 1 +}; + +enum { kNoSid, kForceSid }; + +class CngTest : public ::testing::Test { + protected: + virtual void SetUp(); + + void TestCngEncode(int sample_rate_hz, int quality); + + int16_t speech_data_[640]; // Max size of CNG internal buffers. +}; + +class CngDeathTest : public CngTest {}; + +void CngTest::SetUp() { + FILE* input_file; + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + input_file = fopen(file_name.c_str(), "rb"); + ASSERT_TRUE(input_file != NULL); + ASSERT_EQ(640, static_cast( + fread(speech_data_, sizeof(int16_t), 640, input_file))); + fclose(input_file); + input_file = NULL; +} + +void CngTest::TestCngEncode(int sample_rate_hz, int quality) { + const size_t num_samples_10ms = rtc::CheckedDivExact(sample_rate_hz, 100); + rtc::Buffer sid_data; + + ComfortNoiseEncoder cng_encoder(sample_rate_hz, kSidNormalIntervalUpdate, + quality); + EXPECT_EQ(0U, cng_encoder.Encode(rtc::ArrayView( + speech_data_, num_samples_10ms), + kNoSid, &sid_data)); + EXPECT_EQ(static_cast(quality + 1), + cng_encoder.Encode( + rtc::ArrayView(speech_data_, num_samples_10ms), + kForceSid, &sid_data)); +} + +#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Create CNG encoder, init with faulty values, free CNG encoder. +TEST_F(CngDeathTest, CngInitFail) { + // Call with too few parameters. + EXPECT_DEATH( + { + ComfortNoiseEncoder(8000, kSidNormalIntervalUpdate, kCNGNumParamsLow); + }, + ""); + // Call with too many parameters. + EXPECT_DEATH( + { + ComfortNoiseEncoder(8000, kSidNormalIntervalUpdate, + kCNGNumParamsTooHigh); + }, + ""); +} + +// Encode Cng with too long input vector. +TEST_F(CngDeathTest, CngEncodeTooLong) { + rtc::Buffer sid_data; + + // Create encoder. + ComfortNoiseEncoder cng_encoder(8000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + // Run encoder with too much data. + EXPECT_DEATH( + cng_encoder.Encode(rtc::ArrayView(speech_data_, 641), + kNoSid, &sid_data), + ""); +} +#endif // GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +TEST_F(CngTest, CngEncode8000) { + TestCngEncode(8000, kCNGNumParamsNormal); +} + +TEST_F(CngTest, CngEncode16000) { + TestCngEncode(16000, kCNGNumParamsNormal); +} + +TEST_F(CngTest, CngEncode32000) { + TestCngEncode(32000, kCNGNumParamsHigh); +} + +TEST_F(CngTest, CngEncode48000) { + TestCngEncode(48000, kCNGNumParamsNormal); +} + +TEST_F(CngTest, CngEncode64000) { + TestCngEncode(64000, kCNGNumParamsNormal); +} + +// Update SID parameters, for both 9 and 16 parameters. +TEST_F(CngTest, CngUpdateSid) { + rtc::Buffer sid_data; + + // Create and initialize encoder and decoder. + ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + + // Run normal Encode and UpdateSid. + EXPECT_EQ(kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView(speech_data_, 160), + kForceSid, &sid_data)); + cng_decoder.UpdateSid(sid_data); + + // Reinit with new length. + cng_encoder.Reset(16000, kSidNormalIntervalUpdate, kCNGNumParamsHigh); + cng_decoder.Reset(); + + // Expect 0 because of unstable parameters after switching length. + EXPECT_EQ(0U, + cng_encoder.Encode(rtc::ArrayView(speech_data_, 160), + kForceSid, &sid_data)); + EXPECT_EQ( + kCNGNumParamsHigh + 1, + cng_encoder.Encode(rtc::ArrayView(speech_data_ + 160, 160), + kForceSid, &sid_data)); + cng_decoder.UpdateSid( + rtc::ArrayView(sid_data.data(), kCNGNumParamsNormal + 1)); +} + +// Update SID parameters, with wrong parameters or without calling decode. +TEST_F(CngTest, CngUpdateSidErroneous) { + rtc::Buffer sid_data; + + // Encode. + ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + EXPECT_EQ(kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView(speech_data_, 160), + kForceSid, &sid_data)); + + // First run with valid parameters, then with too many CNG parameters. + // The function will operate correctly by only reading the maximum number of + // parameters, skipping the extra. + EXPECT_EQ(kCNGNumParamsNormal + 1, sid_data.size()); + cng_decoder.UpdateSid(sid_data); + + // Make sure the input buffer is large enough. Since Encode() appends data, we + // need to set the size manually only afterwards, or the buffer will be bigger + // than anticipated. + sid_data.SetSize(kCNGNumParamsTooHigh + 1); + cng_decoder.UpdateSid(sid_data); +} + +// Test to generate cng data, by forcing SID. Both normal and faulty condition. +TEST_F(CngTest, CngGenerate) { + rtc::Buffer sid_data; + int16_t out_data[640]; + + // Create and initialize encoder and decoder. + ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + + // Normal Encode. + EXPECT_EQ(kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView(speech_data_, 160), + kForceSid, &sid_data)); + + // Normal UpdateSid. + cng_decoder.UpdateSid(sid_data); + + // Two normal Generate, one with new_period. + EXPECT_TRUE(cng_decoder.Generate(rtc::ArrayView(out_data, 640), 1)); + EXPECT_TRUE(cng_decoder.Generate(rtc::ArrayView(out_data, 640), 0)); + + // Call Genereate with too much data. + EXPECT_FALSE(cng_decoder.Generate(rtc::ArrayView(out_data, 641), 0)); +} + +// Test automatic SID. +TEST_F(CngTest, CngAutoSid) { + rtc::Buffer sid_data; + + // Create and initialize encoder and decoder. + ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + + // Normal Encode, 100 msec, where no SID data should be generated. + for (int i = 0; i < 10; i++) { + EXPECT_EQ( + 0U, cng_encoder.Encode(rtc::ArrayView(speech_data_, 160), + kNoSid, &sid_data)); + } + + // We have reached 100 msec, and SID data should be generated. + EXPECT_EQ(kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView(speech_data_, 160), + kNoSid, &sid_data)); +} + +// Test automatic SID, with very short interval. +TEST_F(CngTest, CngAutoSidShort) { + rtc::Buffer sid_data; + + // Create and initialize encoder and decoder. + ComfortNoiseEncoder cng_encoder(16000, kSidShortIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + + // First call will never generate SID, unless forced to. + EXPECT_EQ(0U, + cng_encoder.Encode(rtc::ArrayView(speech_data_, 160), + kNoSid, &sid_data)); + + // Normal Encode, 100 msec, SID data should be generated all the time. + for (int i = 0; i < 10; i++) { + EXPECT_EQ( + kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView(speech_data_, 160), + kNoSid, &sid_data)); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.cc b/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.cc new file mode 100644 index 0000000000..48f1b8c296 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.cc @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" + +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +const size_t kCngMaxOutsizeOrder = 640; + +// TODO(ossu): Rename the left-over WebRtcCng according to style guide. +void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a); + +const int32_t WebRtcCng_kDbov[94] = { + 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992, + 271562548, 215709799, 171344384, 136103682, 108110997, 85875618, + 68213428, 54183852, 43039763, 34187699, 27156255, 21570980, + 17134438, 13610368, 10811100, 8587562, 6821343, 5418385, + 4303976, 3418770, 2715625, 2157098, 1713444, 1361037, + 1081110, 858756, 682134, 541839, 430398, 341877, + 271563, 215710, 171344, 136104, 108111, 85876, + 68213, 54184, 43040, 34188, 27156, 21571, + 17134, 13610, 10811, 8588, 6821, 5418, + 4304, 3419, 2716, 2157, 1713, 1361, + 1081, 859, 682, 542, 430, 342, + 272, 216, 171, 136, 108, 86, + 68, 54, 43, 34, 27, 22, + 17, 14, 11, 9, 7, 5, + 4, 3, 3, 2, 2, 1, + 1, 1, 1, 1}; + +const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = { + 32702, 32636, 32570, 32505, 32439, 32374, + 32309, 32244, 32179, 32114, 32049, 31985}; + +} // namespace + +ComfortNoiseDecoder::ComfortNoiseDecoder() { + /* Needed to get the right function pointers in SPLIB. */ + Reset(); +} + +void ComfortNoiseDecoder::Reset() { + dec_seed_ = 7777; /* For debugging only. */ + dec_target_energy_ = 0; + dec_used_energy_ = 0; + for (auto& c : dec_target_reflCoefs_) + c = 0; + for (auto& c : dec_used_reflCoefs_) + c = 0; + for (auto& c : dec_filtstate_) + c = 0; + for (auto& c : dec_filtstateLow_) + c = 0; + dec_order_ = 5; + dec_target_scale_factor_ = 0; + dec_used_scale_factor_ = 0; +} + +void ComfortNoiseDecoder::UpdateSid(rtc::ArrayView sid) { + int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER]; + int32_t targetEnergy; + size_t length = sid.size(); + /* Throw away reflection coefficients of higher order than we can handle. */ + if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1)) + length = WEBRTC_CNG_MAX_LPC_ORDER + 1; + + dec_order_ = static_cast(length - 1); + + uint8_t sid0 = std::min(sid[0], 93); + targetEnergy = WebRtcCng_kDbov[sid0]; + /* Take down target energy to 75%. */ + targetEnergy = targetEnergy >> 1; + targetEnergy += targetEnergy >> 2; + + dec_target_energy_ = targetEnergy; + + /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */ + if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) { + for (size_t i = 0; i < (dec_order_); i++) { + refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/ + dec_target_reflCoefs_[i] = refCs[i]; + } + } else { + for (size_t i = 0; i < (dec_order_); i++) { + refCs[i] = (sid[i + 1] - 127) * (1 << 8); /* Q7 to Q15. */ + dec_target_reflCoefs_[i] = refCs[i]; + } + } + + for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) { + refCs[i] = 0; + dec_target_reflCoefs_[i] = refCs[i]; + } +} + +bool ComfortNoiseDecoder::Generate(rtc::ArrayView out_data, + bool new_period) { + int16_t excitation[kCngMaxOutsizeOrder]; + int16_t low[kCngMaxOutsizeOrder]; + int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t ReflBetaStd = 26214; /* 0.8 in q15. */ + int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */ + int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */ + int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */ + int16_t Beta, BetaC; /* These are in Q15. */ + int32_t targetEnergy; + int16_t En; + int16_t temp16; + const size_t num_samples = out_data.size(); + + if (num_samples > kCngMaxOutsizeOrder) { + return false; + } + + if (new_period) { + dec_used_scale_factor_ = dec_target_scale_factor_; + Beta = ReflBetaNewP; + BetaC = ReflBetaCompNewP; + } else { + Beta = ReflBetaStd; + BetaC = ReflBetaCompStd; + } + + /* Calculate new scale factor in Q13 */ + dec_used_scale_factor_ = rtc::checked_cast( + WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) + + WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13)); + + dec_used_energy_ = dec_used_energy_ >> 1; + dec_used_energy_ += dec_target_energy_ >> 1; + + /* Do the same for the reflection coeffs, albeit in Q15. */ + for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) { + dec_used_reflCoefs_[i] = + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], Beta, 15); + dec_used_reflCoefs_[i] += + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_target_reflCoefs_[i], BetaC, 15); + } + + /* Compute the polynomial coefficients. */ + WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly); + + targetEnergy = dec_used_energy_; + + /* Calculate scaling factor based on filter energy. */ + En = 8192; /* 1.0 in Q13. */ + for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) { + /* Floating point value for reference. + E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) * + (dec_used_reflCoefs_[i] / 32768.0); + */ + + /* Same in fixed point. */ + /* K(i).^2 in Q15. */ + temp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], + dec_used_reflCoefs_[i], 15); + /* 1 - K(i).^2 in Q15. */ + temp16 = 0x7fff - temp16; + En = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15); + } + + /* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */ + + /* Calculate sqrt(En * target_energy / excitation energy) */ + targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_); + + En = (int16_t)WebRtcSpl_Sqrt(En) << 6; + En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */ + dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12); + + /* Generate excitation. */ + /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */ + for (size_t i = 0; i < num_samples; i++) { + excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1; + } + + /* Scale to correct energy. */ + WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_, + num_samples, 13); + + /* `lpPoly` - Coefficients in Q12. + * `excitation` - Speech samples. + * `nst->dec_filtstate` - State preservation. + * `out_data` - Filtered speech samples. */ + WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation, + num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER, + dec_filtstateLow_, WEBRTC_CNG_MAX_LPC_ORDER, + out_data.data(), low, num_samples); + + return true; +} + +ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality) + : enc_nrOfCoefs_(quality), + enc_sampfreq_(fs), + enc_interval_(interval), + enc_msSinceSid_(0), + enc_Energy_(0), + enc_reflCoefs_{0}, + enc_corrVector_{0}, + enc_seed_(7777) /* For debugging only. */ { + RTC_CHECK_GT(quality, 0); + RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER); +} + +void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) { + RTC_CHECK_GT(quality, 0); + RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER); + enc_nrOfCoefs_ = quality; + enc_sampfreq_ = fs; + enc_interval_ = interval; + enc_msSinceSid_ = 0; + enc_Energy_ = 0; + for (auto& c : enc_reflCoefs_) + c = 0; + for (auto& c : enc_corrVector_) + c = 0; + enc_seed_ = 7777; /* For debugging only. */ +} + +size_t ComfortNoiseEncoder::Encode(rtc::ArrayView speech, + bool force_sid, + rtc::Buffer* output) { + int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t hanningW[kCngMaxOutsizeOrder]; + int16_t ReflBeta = 19661; /* 0.6 in q15. */ + int16_t ReflBetaComp = 13107; /* 0.4 in q15. */ + int32_t outEnergy; + int outShifts; + size_t i; + int stab; + int acorrScale; + size_t index; + size_t ind, factor; + int32_t* bptr; + int32_t blo, bhi; + int16_t negate; + const int16_t* aptr; + int16_t speechBuf[kCngMaxOutsizeOrder]; + + const size_t num_samples = speech.size(); + RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder); + + for (i = 0; i < num_samples; i++) { + speechBuf[i] = speech[i]; + } + + factor = num_samples; + + /* Calculate energy and a coefficients. */ + outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts); + while (outShifts > 0) { + /* We can only do 5 shifts without destroying accuracy in + * division factor. */ + if (outShifts > 5) { + outEnergy <<= (outShifts - 5); + outShifts = 5; + } else { + factor /= 2; + outShifts--; + } + } + outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor); + + if (outEnergy > 1) { + /* Create Hanning Window. */ + WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2); + for (i = 0; i < (num_samples / 2); i++) + hanningW[num_samples - i - 1] = hanningW[i]; + + WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples, + 14); + + WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_, + corrVector, &acorrScale); + + if (*corrVector == 0) + *corrVector = WEBRTC_SPL_WORD16_MAX; + + /* Adds the bandwidth expansion. */ + aptr = WebRtcCng_kCorrWindow; + bptr = corrVector; + + /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */ + for (ind = 0; ind < enc_nrOfCoefs_; ind++) { + /* The below code multiplies the 16 b corrWindow values (Q15) with + * the 32 b corrvector (Q0) and shifts the result down 15 steps. */ + negate = *bptr < 0; + if (negate) + *bptr = -*bptr; + + blo = (int32_t)*aptr * (*bptr & 0xffff); + bhi = ((blo >> 16) & 0xffff) + + ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff)); + blo = (blo & 0xffff) | ((bhi & 0xffff) << 16); + + *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t)blo >> 15); + if (negate) + *bptr = -*bptr; + bptr++; + } + /* End of bandwidth expansion. */ + + stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs, enc_nrOfCoefs_); + + if (!stab) { + /* Disregard from this frame */ + return 0; + } + + } else { + for (i = 0; i < enc_nrOfCoefs_; i++) + refCs[i] = 0; + } + + if (force_sid) { + /* Read instantaneous values instead of averaged. */ + for (i = 0; i < enc_nrOfCoefs_; i++) + enc_reflCoefs_[i] = refCs[i]; + enc_Energy_ = outEnergy; + } else { + /* Average history with new values. */ + for (i = 0; i < enc_nrOfCoefs_; i++) { + enc_reflCoefs_[i] = + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(enc_reflCoefs_[i], ReflBeta, 15); + enc_reflCoefs_[i] += + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15); + } + enc_Energy_ = (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2); + } + + if (enc_Energy_ < 1) { + enc_Energy_ = 1; + } + + if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) { + /* Search for best dbov value. */ + index = 0; + for (i = 1; i < 93; i++) { + /* Always round downwards. */ + if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) { + index = i; + break; + } + } + if ((i == 93) && (index == 0)) + index = 94; + + const size_t output_coefs = enc_nrOfCoefs_ + 1; + output->AppendData(output_coefs, [&](rtc::ArrayView output) { + output[0] = (uint8_t)index; + + /* Quantize coefficients with tweak for WebRtc implementation of + * RFC3389. */ + if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) { + for (i = 0; i < enc_nrOfCoefs_; i++) { + /* Q15 to Q7 with rounding. */ + output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8); + } + } else { + for (i = 0; i < enc_nrOfCoefs_; i++) { + /* Q15 to Q7 with rounding. */ + output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8)); + } + } + + return output_coefs; + }); + + enc_msSinceSid_ = + static_cast((1000 * num_samples) / enc_sampfreq_); + return output_coefs; + } else { + enc_msSinceSid_ += + static_cast((1000 * num_samples) / enc_sampfreq_); + return 0; + } +} + +namespace { +/* Values in `k` are Q15, and `a` Q12. */ +void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) { + int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1]; + int16_t* aptr; + int16_t* aptr2; + int16_t* anyptr; + const int16_t* kptr; + int m, i; + + kptr = k; + *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */ + *any = *a; + a[1] = (*k + 4) >> 3; + for (m = 1; m < useOrder; m++) { + kptr++; + aptr = a; + aptr++; + aptr2 = &a[m]; + anyptr = any; + anyptr++; + + any[m + 1] = (*kptr + 4) >> 3; + for (i = 0; i < m; i++) { + *anyptr++ = + (*aptr++) + + (int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15); + } + + aptr = a; + anyptr = any; + for (i = 0; i < (m + 2); i++) { + *aptr++ = *anyptr++; + } + } +} + +} // namespace + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.h b/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.h new file mode 100644 index 0000000000..7afd243f81 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_CNG_WEBRTC_CNG_H_ +#define MODULES_AUDIO_CODING_CODECS_CNG_WEBRTC_CNG_H_ + +#include + +#include + +#include "api/array_view.h" +#include "rtc_base/buffer.h" + +#define WEBRTC_CNG_MAX_LPC_ORDER 12 + +namespace webrtc { + +class ComfortNoiseDecoder { + public: + ComfortNoiseDecoder(); + ~ComfortNoiseDecoder() = default; + + ComfortNoiseDecoder(const ComfortNoiseDecoder&) = delete; + ComfortNoiseDecoder& operator=(const ComfortNoiseDecoder&) = delete; + + void Reset(); + + // Updates the CN state when a new SID packet arrives. + // `sid` is a view of the SID packet without the headers. + void UpdateSid(rtc::ArrayView sid); + + // Generates comfort noise. + // `out_data` will be filled with samples - its size determines the number of + // samples generated. When `new_period` is true, CNG history will be reset + // before any audio is generated. Returns `false` if outData is too large - + // currently 640 bytes (equalling 10ms at 64kHz). + // TODO(ossu): Specify better limits for the size of out_data. Either let it + // be unbounded or limit to 10ms in the current sample rate. + bool Generate(rtc::ArrayView out_data, bool new_period); + + private: + uint32_t dec_seed_; + int32_t dec_target_energy_; + int32_t dec_used_energy_; + int16_t dec_target_reflCoefs_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_used_reflCoefs_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_filtstate_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_filtstateLow_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + uint16_t dec_order_; + int16_t dec_target_scale_factor_; /* Q29 */ + int16_t dec_used_scale_factor_; /* Q29 */ +}; + +class ComfortNoiseEncoder { + public: + // Creates a comfort noise encoder. + // `fs` selects sample rate: 8000 for narrowband or 16000 for wideband. + // `interval` sets the interval at which to generate SID data (in ms). + // `quality` selects the number of refl. coeffs. Maximum allowed is 12. + ComfortNoiseEncoder(int fs, int interval, int quality); + ~ComfortNoiseEncoder() = default; + + ComfortNoiseEncoder(const ComfortNoiseEncoder&) = delete; + ComfortNoiseEncoder& operator=(const ComfortNoiseEncoder&) = delete; + + // Resets the comfort noise encoder to its initial state. + // Parameters are set as during construction. + void Reset(int fs, int interval, int quality); + + // Analyzes background noise from `speech` and appends coefficients to + // `output`. Returns the number of coefficients generated. If `force_sid` is + // true, a SID frame is forced and the internal sid interval counter is reset. + // Will fail if the input size is too large (> 640 samples, see + // ComfortNoiseDecoder::Generate). + size_t Encode(rtc::ArrayView speech, + bool force_sid, + rtc::Buffer* output); + + private: + size_t enc_nrOfCoefs_; + int enc_sampfreq_; + int16_t enc_interval_; + int16_t enc_msSinceSid_; + int32_t enc_Energy_; + int16_t enc_reflCoefs_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int32_t enc_corrVector_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + uint32_t enc_seed_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_CNG_WEBRTC_CNG_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc new file mode 100644 index 0000000000..46ac671b30 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h" + +#include + +#include "modules/audio_coding/codecs/g711/g711_interface.h" +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" + +namespace webrtc { + +void AudioDecoderPcmU::Reset() {} + +std::vector AudioDecoderPcmU::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + return LegacyEncodedAudioFrame::SplitBySamples( + this, std::move(payload), timestamp, 8 * num_channels_, 8); +} + +int AudioDecoderPcmU::SampleRateHz() const { + return 8000; +} + +size_t AudioDecoderPcmU::Channels() const { + return num_channels_; +} + +int AudioDecoderPcmU::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); + // Adjust the encoded length down to ensure the same number of samples in each + // channel. + const size_t encoded_len_adjusted = + PacketDuration(encoded, encoded_len) * + Channels(); // 1 byte per sample per channel + int16_t temp_type = 1; // Default is speech. + size_t ret = + WebRtcG711_DecodeU(encoded, encoded_len_adjusted, decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return static_cast(ret); +} + +int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // One encoded byte per sample per channel. + return static_cast(encoded_len / Channels()); +} + +void AudioDecoderPcmA::Reset() {} + +std::vector AudioDecoderPcmA::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + return LegacyEncodedAudioFrame::SplitBySamples( + this, std::move(payload), timestamp, 8 * num_channels_, 8); +} + +int AudioDecoderPcmA::SampleRateHz() const { + return 8000; +} + +size_t AudioDecoderPcmA::Channels() const { + return num_channels_; +} + +int AudioDecoderPcmA::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); + // Adjust the encoded length down to ensure the same number of samples in each + // channel. + const size_t encoded_len_adjusted = + PacketDuration(encoded, encoded_len) * + Channels(); // 1 byte per sample per channel + int16_t temp_type = 1; // Default is speech. + size_t ret = + WebRtcG711_DecodeA(encoded, encoded_len_adjusted, decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return static_cast(ret); +} + +int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // One encoded byte per sample per channel. + return static_cast(encoded_len / Channels()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h new file mode 100644 index 0000000000..3fa42cba30 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G711_AUDIO_DECODER_PCM_H_ +#define MODULES_AUDIO_CODING_CODECS_G711_AUDIO_DECODER_PCM_H_ + +#include +#include + +#include + +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +class AudioDecoderPcmU final : public AudioDecoder { + public: + explicit AudioDecoderPcmU(size_t num_channels) : num_channels_(num_channels) { + RTC_DCHECK_GE(num_channels, 1); + } + + AudioDecoderPcmU(const AudioDecoderPcmU&) = delete; + AudioDecoderPcmU& operator=(const AudioDecoderPcmU&) = delete; + + void Reset() override; + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + const size_t num_channels_; +}; + +class AudioDecoderPcmA final : public AudioDecoder { + public: + explicit AudioDecoderPcmA(size_t num_channels) : num_channels_(num_channels) { + RTC_DCHECK_GE(num_channels, 1); + } + + AudioDecoderPcmA(const AudioDecoderPcmA&) = delete; + AudioDecoderPcmA& operator=(const AudioDecoderPcmA&) = delete; + + void Reset() override; + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + const size_t num_channels_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_G711_AUDIO_DECODER_PCM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.cc new file mode 100644 index 0000000000..65e2da479d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" + +#include + +#include "modules/audio_coding/codecs/g711/g711_interface.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +bool AudioEncoderPcm::Config::IsOk() const { + return (frame_size_ms % 10 == 0) && (num_channels >= 1); +} + +AudioEncoderPcm::AudioEncoderPcm(const Config& config, int sample_rate_hz) + : sample_rate_hz_(sample_rate_hz), + num_channels_(config.num_channels), + payload_type_(config.payload_type), + num_10ms_frames_per_packet_( + static_cast(config.frame_size_ms / 10)), + full_frame_samples_(config.num_channels * config.frame_size_ms * + sample_rate_hz / 1000), + first_timestamp_in_buffer_(0) { + RTC_CHECK_GT(sample_rate_hz, 0) << "Sample rate must be larger than 0 Hz"; + RTC_CHECK_EQ(config.frame_size_ms % 10, 0) + << "Frame size must be an integer multiple of 10 ms."; + speech_buffer_.reserve(full_frame_samples_); +} + +AudioEncoderPcm::~AudioEncoderPcm() = default; + +int AudioEncoderPcm::SampleRateHz() const { + return sample_rate_hz_; +} + +size_t AudioEncoderPcm::NumChannels() const { + return num_channels_; +} + +size_t AudioEncoderPcm::Num10MsFramesInNextPacket() const { + return num_10ms_frames_per_packet_; +} + +size_t AudioEncoderPcm::Max10MsFramesInAPacket() const { + return num_10ms_frames_per_packet_; +} + +int AudioEncoderPcm::GetTargetBitrate() const { + return static_cast(8 * BytesPerSample() * SampleRateHz() * + NumChannels()); +} + +AudioEncoder::EncodedInfo AudioEncoderPcm::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) { + if (speech_buffer_.empty()) { + first_timestamp_in_buffer_ = rtp_timestamp; + } + speech_buffer_.insert(speech_buffer_.end(), audio.begin(), audio.end()); + if (speech_buffer_.size() < full_frame_samples_) { + return EncodedInfo(); + } + RTC_CHECK_EQ(speech_buffer_.size(), full_frame_samples_); + EncodedInfo info; + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.encoded_bytes = encoded->AppendData( + full_frame_samples_ * BytesPerSample(), + [&](rtc::ArrayView encoded) { + return EncodeCall(&speech_buffer_[0], full_frame_samples_, + encoded.data()); + }); + speech_buffer_.clear(); + info.encoder_type = GetCodecType(); + return info; +} + +void AudioEncoderPcm::Reset() { + speech_buffer_.clear(); +} + +absl::optional> +AudioEncoderPcm::GetFrameLengthRange() const { + return {{TimeDelta::Millis(num_10ms_frames_per_packet_ * 10), + TimeDelta::Millis(num_10ms_frames_per_packet_ * 10)}}; +} + +size_t AudioEncoderPcmA::EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) { + return WebRtcG711_EncodeA(audio, input_len, encoded); +} + +size_t AudioEncoderPcmA::BytesPerSample() const { + return 1; +} + +AudioEncoder::CodecType AudioEncoderPcmA::GetCodecType() const { + return AudioEncoder::CodecType::kPcmA; +} + +size_t AudioEncoderPcmU::EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) { + return WebRtcG711_EncodeU(audio, input_len, encoded); +} + +size_t AudioEncoderPcmU::BytesPerSample() const { + return 1; +} + +AudioEncoder::CodecType AudioEncoderPcmU::GetCodecType() const { + return AudioEncoder::CodecType::kPcmU; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.h b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.h new file mode 100644 index 0000000000..d50be4b457 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G711_AUDIO_ENCODER_PCM_H_ +#define MODULES_AUDIO_CODING_CODECS_G711_AUDIO_ENCODER_PCM_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/units/time_delta.h" + +namespace webrtc { + +class AudioEncoderPcm : public AudioEncoder { + public: + struct Config { + public: + bool IsOk() const; + + int frame_size_ms; + size_t num_channels; + int payload_type; + + protected: + explicit Config(int pt) + : frame_size_ms(20), num_channels(1), payload_type(pt) {} + }; + + ~AudioEncoderPcm() override; + + int SampleRateHz() const override; + size_t NumChannels() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + void Reset() override; + absl::optional> GetFrameLengthRange() + const override; + + protected: + AudioEncoderPcm(const Config& config, int sample_rate_hz); + + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) override; + + virtual size_t EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) = 0; + + virtual size_t BytesPerSample() const = 0; + + // Used to set EncodedInfoLeaf::encoder_type in + // AudioEncoderPcm::EncodeImpl + virtual AudioEncoder::CodecType GetCodecType() const = 0; + + private: + const int sample_rate_hz_; + const size_t num_channels_; + const int payload_type_; + const size_t num_10ms_frames_per_packet_; + const size_t full_frame_samples_; + std::vector speech_buffer_; + uint32_t first_timestamp_in_buffer_; +}; + +class AudioEncoderPcmA final : public AudioEncoderPcm { + public: + struct Config : public AudioEncoderPcm::Config { + Config() : AudioEncoderPcm::Config(8) {} + }; + + explicit AudioEncoderPcmA(const Config& config) + : AudioEncoderPcm(config, kSampleRateHz) {} + + AudioEncoderPcmA(const AudioEncoderPcmA&) = delete; + AudioEncoderPcmA& operator=(const AudioEncoderPcmA&) = delete; + + protected: + size_t EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) override; + + size_t BytesPerSample() const override; + + AudioEncoder::CodecType GetCodecType() const override; + + private: + static const int kSampleRateHz = 8000; +}; + +class AudioEncoderPcmU final : public AudioEncoderPcm { + public: + struct Config : public AudioEncoderPcm::Config { + Config() : AudioEncoderPcm::Config(0) {} + }; + + explicit AudioEncoderPcmU(const Config& config) + : AudioEncoderPcm(config, kSampleRateHz) {} + + AudioEncoderPcmU(const AudioEncoderPcmU&) = delete; + AudioEncoderPcmU& operator=(const AudioEncoderPcmU&) = delete; + + protected: + size_t EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) override; + + size_t BytesPerSample() const override; + + AudioEncoder::CodecType GetCodecType() const override; + + private: + static const int kSampleRateHz = 8000; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_G711_AUDIO_ENCODER_PCM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.c b/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.c new file mode 100644 index 0000000000..5fe1692ccb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "modules/third_party/g711/g711.h" +#include "modules/audio_coding/codecs/g711/g711_interface.h" + +size_t WebRtcG711_EncodeA(const int16_t* speechIn, + size_t len, + uint8_t* encoded) { + size_t n; + for (n = 0; n < len; n++) + encoded[n] = linear_to_alaw(speechIn[n]); + return len; +} + +size_t WebRtcG711_EncodeU(const int16_t* speechIn, + size_t len, + uint8_t* encoded) { + size_t n; + for (n = 0; n < len; n++) + encoded[n] = linear_to_ulaw(speechIn[n]); + return len; +} + +size_t WebRtcG711_DecodeA(const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) { + size_t n; + for (n = 0; n < len; n++) + decoded[n] = alaw_to_linear(encoded[n]); + *speechType = 1; + return len; +} + +size_t WebRtcG711_DecodeU(const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) { + size_t n; + for (n = 0; n < len; n++) + decoded[n] = ulaw_to_linear(encoded[n]); + *speechType = 1; + return len; +} + +int16_t WebRtcG711_Version(char* version, int16_t lenBytes) { + strncpy(version, "2.0.0", lenBytes); + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.h b/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.h new file mode 100644 index 0000000000..c92e6cc1c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G711_G711_INTERFACE_H_ +#define MODULES_AUDIO_CODING_CODECS_G711_G711_INTERFACE_H_ + +#include +#include + +// Comfort noise constants +#define G711_WEBRTC_SPEECH 1 +#define G711_WEBRTC_CNG 2 + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcG711_EncodeA(...) + * + * This function encodes a G711 A-law frame and inserts it into a packet. + * Input speech length has be of any length. + * + * Input: + * - speechIn : Input speech vector + * - len : Samples in speechIn + * + * Output: + * - encoded : The encoded data vector + * + * Return value : Length (in bytes) of coded data. + * Always equal to len input parameter. + */ + +size_t WebRtcG711_EncodeA(const int16_t* speechIn, + size_t len, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcG711_EncodeU(...) + * + * This function encodes a G711 U-law frame and inserts it into a packet. + * Input speech length has be of any length. + * + * Input: + * - speechIn : Input speech vector + * - len : Samples in speechIn + * + * Output: + * - encoded : The encoded data vector + * + * Return value : Length (in bytes) of coded data. + * Always equal to len input parameter. + */ + +size_t WebRtcG711_EncodeU(const int16_t* speechIn, + size_t len, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcG711_DecodeA(...) + * + * This function decodes a packet G711 A-law frame. + * + * Input: + * - encoded : Encoded data + * - len : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - speechType : 1 normal, 2 CNG (for G711 it should + * always return 1 since G711 does not have a + * built-in DTX/CNG scheme) + * + * Return value : >0 - Samples in decoded vector + * -1 - Error + */ + +size_t WebRtcG711_DecodeA(const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + +/**************************************************************************** + * WebRtcG711_DecodeU(...) + * + * This function decodes a packet G711 U-law frame. + * + * Input: + * - encoded : Encoded data + * - len : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - speechType : 1 normal, 2 CNG (for G711 it should + * always return 1 since G711 does not have a + * built-in DTX/CNG scheme) + * + * Return value : >0 - Samples in decoded vector + * -1 - Error + */ + +size_t WebRtcG711_DecodeU(const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + +/********************************************************************** + * WebRtcG711_Version(...) + * + * This function gives the version string of the G.711 codec. + * + * Input: + * - lenBytes: the size of Allocated space (in Bytes) where + * the version number is written to (in string format). + * + * Output: + * - version: Pointer to a buffer where the version number is + * written to. + * + */ + +int16_t WebRtcG711_Version(char* version, int16_t lenBytes); + +#ifdef __cplusplus +} +#endif + +#endif // MODULES_AUDIO_CODING_CODECS_G711_G711_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/test/testG711.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g711/test/testG711.cc new file mode 100644 index 0000000000..f3a42f5d79 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/test/testG711.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * testG711.cpp : Defines the entry point for the console application. + */ + +#include +#include +#include + +/* include API */ +#include "modules/audio_coding/codecs/g711/g711_interface.h" + +/* Runtime statistics */ +#include +#define CLOCKS_PER_SEC_G711 1000 + +/* function for reading audio data from PCM file */ +bool readframe(int16_t* data, FILE* inp, size_t length) { + size_t rlen = fread(data, sizeof(int16_t), length, inp); + if (rlen >= length) + return false; + memset(data + rlen, 0, (length - rlen) * sizeof(int16_t)); + return true; +} + +int main(int argc, char* argv[]) { + char inname[80], outname[40], bitname[40]; + FILE* inp; + FILE* outp; + FILE* bitp = NULL; + int framecnt; + bool endfile; + + size_t framelength = 80; + + /* Runtime statistics */ + double starttime; + double runtime; + double length_file; + + size_t stream_len = 0; + int16_t shortdata[480]; + int16_t decoded[480]; + uint8_t streamdata[1000]; + int16_t speechType[1]; + char law[2]; + char versionNumber[40]; + + /* handling wrong input arguments in the command line */ + if ((argc != 5) && (argc != 6)) { + printf("\n\nWrong number of arguments or flag values.\n\n"); + + printf("\n"); + printf("\nG.711 test application\n\n"); + printf("Usage:\n\n"); + printf("./testG711.exe framelength law infile outfile \n\n"); + printf("framelength: Framelength in samples.\n"); + printf("law : Coding law, A och u.\n"); + printf("infile : Normal speech input file\n"); + printf("outfile : Speech output file\n\n"); + printf("outbits : Output bitstream file [optional]\n\n"); + exit(0); + } + + /* Get version and print */ + WebRtcG711_Version(versionNumber, 40); + + printf("-----------------------------------\n"); + printf("G.711 version: %s\n\n", versionNumber); + /* Get frame length */ + int framelength_int = atoi(argv[1]); + if (framelength_int < 0) { + printf(" G.722: Invalid framelength %d.\n", framelength_int); + exit(1); + } + framelength = static_cast(framelength_int); + + /* Get compression law */ + strcpy(law, argv[2]); + + /* Get Input and Output files */ + sscanf(argv[3], "%s", inname); + sscanf(argv[4], "%s", outname); + if (argc == 6) { + sscanf(argv[5], "%s", bitname); + if ((bitp = fopen(bitname, "wb")) == NULL) { + printf(" G.711: Cannot read file %s.\n", bitname); + exit(1); + } + } + + if ((inp = fopen(inname, "rb")) == NULL) { + printf(" G.711: Cannot read file %s.\n", inname); + exit(1); + } + if ((outp = fopen(outname, "wb")) == NULL) { + printf(" G.711: Cannot write file %s.\n", outname); + exit(1); + } + printf("\nInput: %s\nOutput: %s\n", inname, outname); + if (argc == 6) { + printf("\nBitfile: %s\n", bitname); + } + + starttime = clock() / (double)CLOCKS_PER_SEC_G711; /* Runtime statistics */ + + /* Initialize encoder and decoder */ + framecnt = 0; + endfile = false; + while (!endfile) { + framecnt++; + /* Read speech block */ + endfile = readframe(shortdata, inp, framelength); + + /* G.711 encoding */ + if (!strcmp(law, "A")) { + /* A-law encoding */ + stream_len = WebRtcG711_EncodeA(shortdata, framelength, streamdata); + if (argc == 6) { + /* Write bits to file */ + if (fwrite(streamdata, sizeof(unsigned char), stream_len, bitp) != + stream_len) { + return -1; + } + } + WebRtcG711_DecodeA(streamdata, stream_len, decoded, speechType); + } else if (!strcmp(law, "u")) { + /* u-law encoding */ + stream_len = WebRtcG711_EncodeU(shortdata, framelength, streamdata); + if (argc == 6) { + /* Write bits to file */ + if (fwrite(streamdata, sizeof(unsigned char), stream_len, bitp) != + stream_len) { + return -1; + } + } + WebRtcG711_DecodeU(streamdata, stream_len, decoded, speechType); + } else { + printf("Wrong law mode\n"); + exit(1); + } + /* Write coded speech to file */ + if (fwrite(decoded, sizeof(short), framelength, outp) != framelength) { + return -1; + } + } + + runtime = (double)(clock() / (double)CLOCKS_PER_SEC_G711 - starttime); + length_file = ((double)framecnt * (double)framelength / 8000); + printf("\n\nLength of speech file: %.1f s\n", length_file); + printf("Time to run G.711: %.2f s (%.2f %% of realtime)\n\n", runtime, + (100 * runtime / length_file)); + printf("---------------------END----------------------\n"); + + fclose(inp); + fclose(outp); + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc new file mode 100644 index 0000000000..1ecc9bc3d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/g722/audio_decoder_g722.h" + +#include + +#include + +#include "modules/audio_coding/codecs/g722/g722_interface.h" +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioDecoderG722Impl::AudioDecoderG722Impl() { + WebRtcG722_CreateDecoder(&dec_state_); + WebRtcG722_DecoderInit(dec_state_); +} + +AudioDecoderG722Impl::~AudioDecoderG722Impl() { + WebRtcG722_FreeDecoder(dec_state_); +} + +bool AudioDecoderG722Impl::HasDecodePlc() const { + return false; +} + +int AudioDecoderG722Impl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); + int16_t temp_type = 1; // Default is speech. + size_t ret = + WebRtcG722_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return static_cast(ret); +} + +void AudioDecoderG722Impl::Reset() { + WebRtcG722_DecoderInit(dec_state_); +} + +std::vector AudioDecoderG722Impl::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + return LegacyEncodedAudioFrame::SplitBySamples(this, std::move(payload), + timestamp, 8, 16); +} + +int AudioDecoderG722Impl::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // 1/2 encoded byte per sample per channel. + return static_cast(2 * encoded_len / Channels()); +} + +int AudioDecoderG722Impl::SampleRateHz() const { + return 16000; +} + +size_t AudioDecoderG722Impl::Channels() const { + return 1; +} + +AudioDecoderG722StereoImpl::AudioDecoderG722StereoImpl() { + WebRtcG722_CreateDecoder(&dec_state_left_); + WebRtcG722_CreateDecoder(&dec_state_right_); + WebRtcG722_DecoderInit(dec_state_left_); + WebRtcG722_DecoderInit(dec_state_right_); +} + +AudioDecoderG722StereoImpl::~AudioDecoderG722StereoImpl() { + WebRtcG722_FreeDecoder(dec_state_left_); + WebRtcG722_FreeDecoder(dec_state_right_); +} + +int AudioDecoderG722StereoImpl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); + // Adjust the encoded length down to ensure the same number of samples in each + // channel. + const size_t encoded_len_adjusted = PacketDuration(encoded, encoded_len) * + Channels() / + 2; // 1/2 byte per sample per channel + int16_t temp_type = 1; // Default is speech. + // De-interleave the bit-stream into two separate payloads. + uint8_t* encoded_deinterleaved = new uint8_t[encoded_len_adjusted]; + SplitStereoPacket(encoded, encoded_len_adjusted, encoded_deinterleaved); + // Decode left and right. + size_t decoded_len = + WebRtcG722_Decode(dec_state_left_, encoded_deinterleaved, + encoded_len_adjusted / 2, decoded, &temp_type); + size_t ret = WebRtcG722_Decode( + dec_state_right_, &encoded_deinterleaved[encoded_len_adjusted / 2], + encoded_len_adjusted / 2, &decoded[decoded_len], &temp_type); + if (ret == decoded_len) { + ret += decoded_len; // Return total number of samples. + // Interleave output. + for (size_t k = ret / 2; k < ret; k++) { + int16_t temp = decoded[k]; + memmove(&decoded[2 * k - ret + 2], &decoded[2 * k - ret + 1], + (ret - k - 1) * sizeof(int16_t)); + decoded[2 * k - ret + 1] = temp; + } + } + *speech_type = ConvertSpeechType(temp_type); + delete[] encoded_deinterleaved; + return static_cast(ret); +} + +int AudioDecoderG722StereoImpl::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // 1/2 encoded byte per sample per channel. Make sure the length represents + // an equal number of bytes per channel. Otherwise, we cannot de-interleave + // the encoded data later. + return static_cast(2 * (encoded_len / Channels())); +} + +int AudioDecoderG722StereoImpl::SampleRateHz() const { + return 16000; +} + +size_t AudioDecoderG722StereoImpl::Channels() const { + return 2; +} + +void AudioDecoderG722StereoImpl::Reset() { + WebRtcG722_DecoderInit(dec_state_left_); + WebRtcG722_DecoderInit(dec_state_right_); +} + +std::vector AudioDecoderG722StereoImpl::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + return LegacyEncodedAudioFrame::SplitBySamples(this, std::move(payload), + timestamp, 2 * 8, 16); +} + +// Split the stereo packet and place left and right channel after each other +// in the output array. +void AudioDecoderG722StereoImpl::SplitStereoPacket( + const uint8_t* encoded, + size_t encoded_len, + uint8_t* encoded_deinterleaved) { + // Regroup the 4 bits/sample so |l1 l2| |r1 r2| |l3 l4| |r3 r4| ..., + // where "lx" is 4 bits representing left sample number x, and "rx" right + // sample. Two samples fit in one byte, represented with |...|. + for (size_t i = 0; i + 1 < encoded_len; i += 2) { + uint8_t right_byte = ((encoded[i] & 0x0F) << 4) + (encoded[i + 1] & 0x0F); + encoded_deinterleaved[i] = (encoded[i] & 0xF0) + (encoded[i + 1] >> 4); + encoded_deinterleaved[i + 1] = right_byte; + } + + // Move one byte representing right channel each loop, and place it at the + // end of the bytestream vector. After looping the data is reordered to: + // |l1 l2| |l3 l4| ... |l(N-1) lN| |r1 r2| |r3 r4| ... |r(N-1) r(N)|, + // where N is the total number of samples. + for (size_t i = 0; i < encoded_len / 2; i++) { + uint8_t right_byte = encoded_deinterleaved[i + 1]; + memmove(&encoded_deinterleaved[i + 1], &encoded_deinterleaved[i + 2], + encoded_len - i - 2); + encoded_deinterleaved[encoded_len - 1] = right_byte; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h new file mode 100644 index 0000000000..5872fad5de --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G722_AUDIO_DECODER_G722_H_ +#define MODULES_AUDIO_CODING_CODECS_G722_AUDIO_DECODER_G722_H_ + +#include "api/audio_codecs/audio_decoder.h" + +typedef struct WebRtcG722DecInst G722DecInst; + +namespace webrtc { + +class AudioDecoderG722Impl final : public AudioDecoder { + public: + AudioDecoderG722Impl(); + ~AudioDecoderG722Impl() override; + + AudioDecoderG722Impl(const AudioDecoderG722Impl&) = delete; + AudioDecoderG722Impl& operator=(const AudioDecoderG722Impl&) = delete; + + bool HasDecodePlc() const override; + void Reset() override; + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + G722DecInst* dec_state_; +}; + +class AudioDecoderG722StereoImpl final : public AudioDecoder { + public: + AudioDecoderG722StereoImpl(); + ~AudioDecoderG722StereoImpl() override; + + AudioDecoderG722StereoImpl(const AudioDecoderG722StereoImpl&) = delete; + AudioDecoderG722StereoImpl& operator=(const AudioDecoderG722StereoImpl&) = + delete; + + void Reset() override; + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int SampleRateHz() const override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + // Splits the stereo-interleaved payload in `encoded` into separate payloads + // for left and right channels. The separated payloads are written to + // `encoded_deinterleaved`, which must hold at least `encoded_len` samples. + // The left channel starts at offset 0, while the right channel starts at + // offset encoded_len / 2 into `encoded_deinterleaved`. + void SplitStereoPacket(const uint8_t* encoded, + size_t encoded_len, + uint8_t* encoded_deinterleaved); + + G722DecInst* dec_state_left_; + G722DecInst* dec_state_right_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_G722_AUDIO_DECODER_G722_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc new file mode 100644 index 0000000000..b7d34ba581 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/g722/audio_encoder_g722.h" + +#include + +#include "modules/audio_coding/codecs/g722/g722_interface.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +const size_t kSampleRateHz = 16000; + +} // namespace + +AudioEncoderG722Impl::AudioEncoderG722Impl(const AudioEncoderG722Config& config, + int payload_type) + : num_channels_(config.num_channels), + payload_type_(payload_type), + num_10ms_frames_per_packet_( + static_cast(config.frame_size_ms / 10)), + num_10ms_frames_buffered_(0), + first_timestamp_in_buffer_(0), + encoders_(new EncoderState[num_channels_]), + interleave_buffer_(2 * num_channels_) { + RTC_CHECK(config.IsOk()); + const size_t samples_per_channel = + kSampleRateHz / 100 * num_10ms_frames_per_packet_; + for (size_t i = 0; i < num_channels_; ++i) { + encoders_[i].speech_buffer.reset(new int16_t[samples_per_channel]); + encoders_[i].encoded_buffer.SetSize(samples_per_channel / 2); + } + Reset(); +} + +AudioEncoderG722Impl::~AudioEncoderG722Impl() = default; + +int AudioEncoderG722Impl::SampleRateHz() const { + return kSampleRateHz; +} + +size_t AudioEncoderG722Impl::NumChannels() const { + return num_channels_; +} + +int AudioEncoderG722Impl::RtpTimestampRateHz() const { + // The RTP timestamp rate for G.722 is 8000 Hz, even though it is a 16 kHz + // codec. + return kSampleRateHz / 2; +} + +size_t AudioEncoderG722Impl::Num10MsFramesInNextPacket() const { + return num_10ms_frames_per_packet_; +} + +size_t AudioEncoderG722Impl::Max10MsFramesInAPacket() const { + return num_10ms_frames_per_packet_; +} + +int AudioEncoderG722Impl::GetTargetBitrate() const { + // 4 bits/sample, 16000 samples/s/channel. + return static_cast(64000 * NumChannels()); +} + +void AudioEncoderG722Impl::Reset() { + num_10ms_frames_buffered_ = 0; + for (size_t i = 0; i < num_channels_; ++i) + RTC_CHECK_EQ(0, WebRtcG722_EncoderInit(encoders_[i].encoder)); +} + +absl::optional> +AudioEncoderG722Impl::GetFrameLengthRange() const { + return {{TimeDelta::Millis(num_10ms_frames_per_packet_ * 10), + TimeDelta::Millis(num_10ms_frames_per_packet_ * 10)}}; +} + +AudioEncoder::EncodedInfo AudioEncoderG722Impl::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) { + if (num_10ms_frames_buffered_ == 0) + first_timestamp_in_buffer_ = rtp_timestamp; + + // Deinterleave samples and save them in each channel's buffer. + const size_t start = kSampleRateHz / 100 * num_10ms_frames_buffered_; + for (size_t i = 0; i < kSampleRateHz / 100; ++i) + for (size_t j = 0; j < num_channels_; ++j) + encoders_[j].speech_buffer[start + i] = audio[i * num_channels_ + j]; + + // If we don't yet have enough samples for a packet, we're done for now. + if (++num_10ms_frames_buffered_ < num_10ms_frames_per_packet_) { + return EncodedInfo(); + } + + // Encode each channel separately. + RTC_CHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_); + num_10ms_frames_buffered_ = 0; + const size_t samples_per_channel = SamplesPerChannel(); + for (size_t i = 0; i < num_channels_; ++i) { + const size_t bytes_encoded = WebRtcG722_Encode( + encoders_[i].encoder, encoders_[i].speech_buffer.get(), + samples_per_channel, encoders_[i].encoded_buffer.data()); + RTC_CHECK_EQ(bytes_encoded, samples_per_channel / 2); + } + + const size_t bytes_to_encode = samples_per_channel / 2 * num_channels_; + EncodedInfo info; + info.encoded_bytes = encoded->AppendData( + bytes_to_encode, [&](rtc::ArrayView encoded) { + // Interleave the encoded bytes of the different channels. Each separate + // channel and the interleaved stream encodes two samples per byte, most + // significant half first. + for (size_t i = 0; i < samples_per_channel / 2; ++i) { + for (size_t j = 0; j < num_channels_; ++j) { + uint8_t two_samples = encoders_[j].encoded_buffer.data()[i]; + interleave_buffer_.data()[j] = two_samples >> 4; + interleave_buffer_.data()[num_channels_ + j] = two_samples & 0xf; + } + for (size_t j = 0; j < num_channels_; ++j) + encoded[i * num_channels_ + j] = + interleave_buffer_.data()[2 * j] << 4 | + interleave_buffer_.data()[2 * j + 1]; + } + + return bytes_to_encode; + }); + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.encoder_type = CodecType::kG722; + return info; +} + +AudioEncoderG722Impl::EncoderState::EncoderState() { + RTC_CHECK_EQ(0, WebRtcG722_CreateEncoder(&encoder)); +} + +AudioEncoderG722Impl::EncoderState::~EncoderState() { + RTC_CHECK_EQ(0, WebRtcG722_FreeEncoder(encoder)); +} + +size_t AudioEncoderG722Impl::SamplesPerChannel() const { + return kSampleRateHz / 100 * num_10ms_frames_per_packet_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.h b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.h new file mode 100644 index 0000000000..a932aa8b7d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G722_AUDIO_ENCODER_G722_H_ +#define MODULES_AUDIO_CODING_CODECS_G722_AUDIO_ENCODER_G722_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/g722/audio_encoder_g722_config.h" +#include "api/units/time_delta.h" +#include "modules/audio_coding/codecs/g722/g722_interface.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class AudioEncoderG722Impl final : public AudioEncoder { + public: + AudioEncoderG722Impl(const AudioEncoderG722Config& config, int payload_type); + ~AudioEncoderG722Impl() override; + + AudioEncoderG722Impl(const AudioEncoderG722Impl&) = delete; + AudioEncoderG722Impl& operator=(const AudioEncoderG722Impl&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + int RtpTimestampRateHz() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + void Reset() override; + absl::optional> GetFrameLengthRange() + const override; + + protected: + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) override; + + private: + // The encoder state for one channel. + struct EncoderState { + G722EncInst* encoder; + std::unique_ptr speech_buffer; // Queued up for encoding. + rtc::Buffer encoded_buffer; // Already encoded. + EncoderState(); + ~EncoderState(); + }; + + size_t SamplesPerChannel() const; + + const size_t num_channels_; + const int payload_type_; + const size_t num_10ms_frames_per_packet_; + size_t num_10ms_frames_buffered_; + uint32_t first_timestamp_in_buffer_; + const std::unique_ptr encoders_; + rtc::Buffer interleave_buffer_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_CODECS_G722_AUDIO_ENCODER_G722_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.c b/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.c new file mode 100644 index 0000000000..36ee6d92be --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_coding/codecs/g722/g722_interface.h" +#include "modules/third_party/g722/g722_enc_dec.h" + +int16_t WebRtcG722_CreateEncoder(G722EncInst **G722enc_inst) +{ + *G722enc_inst=(G722EncInst*)malloc(sizeof(G722EncoderState)); + if (*G722enc_inst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcG722_EncoderInit(G722EncInst *G722enc_inst) +{ + // Create and/or reset the G.722 encoder + // Bitrate 64 kbps and wideband mode (2) + G722enc_inst = (G722EncInst *) WebRtc_g722_encode_init( + (G722EncoderState*) G722enc_inst, 64000, 2); + if (G722enc_inst == NULL) { + return -1; + } else { + return 0; + } +} + +int WebRtcG722_FreeEncoder(G722EncInst *G722enc_inst) +{ + // Free encoder memory + return WebRtc_g722_encode_release((G722EncoderState*) G722enc_inst); +} + +size_t WebRtcG722_Encode(G722EncInst *G722enc_inst, + const int16_t* speechIn, + size_t len, + uint8_t* encoded) +{ + unsigned char *codechar = (unsigned char*) encoded; + // Encode the input speech vector + return WebRtc_g722_encode((G722EncoderState*) G722enc_inst, codechar, + speechIn, len); +} + +int16_t WebRtcG722_CreateDecoder(G722DecInst **G722dec_inst) +{ + *G722dec_inst=(G722DecInst*)malloc(sizeof(G722DecoderState)); + if (*G722dec_inst!=NULL) { + return(0); + } else { + return(-1); + } +} + +void WebRtcG722_DecoderInit(G722DecInst* inst) { + // Create and/or reset the G.722 decoder + // Bitrate 64 kbps and wideband mode (2) + WebRtc_g722_decode_init((G722DecoderState*)inst, 64000, 2); +} + +int WebRtcG722_FreeDecoder(G722DecInst *G722dec_inst) +{ + // Free encoder memory + return WebRtc_g722_decode_release((G722DecoderState*) G722dec_inst); +} + +size_t WebRtcG722_Decode(G722DecInst *G722dec_inst, + const uint8_t *encoded, + size_t len, + int16_t *decoded, + int16_t *speechType) +{ + // Decode the G.722 encoder stream + *speechType=G722_WEBRTC_SPEECH; + return WebRtc_g722_decode((G722DecoderState*) G722dec_inst, decoded, + encoded, len); +} + +int16_t WebRtcG722_Version(char *versionStr, short len) +{ + // Get version string + char version[30] = "2.0.0\n"; + if (strlen(version) < (unsigned int)len) + { + strcpy(versionStr, version); + return 0; + } + else + { + return -1; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.h b/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.h new file mode 100644 index 0000000000..353de4504f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.h @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G722_G722_INTERFACE_H_ +#define MODULES_AUDIO_CODING_CODECS_G722_G722_INTERFACE_H_ + +#include +#include + +/* + * Solution to support multiple instances + */ + +typedef struct WebRtcG722EncInst G722EncInst; +typedef struct WebRtcG722DecInst G722DecInst; + +/* + * Comfort noise constants + */ + +#define G722_WEBRTC_SPEECH 1 +#define G722_WEBRTC_CNG 2 + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcG722_CreateEncoder(...) + * + * Create memory used for G722 encoder + * + * Input: + * - G722enc_inst : G722 instance for encoder + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcG722_CreateEncoder(G722EncInst** G722enc_inst); + +/**************************************************************************** + * WebRtcG722_EncoderInit(...) + * + * This function initializes a G722 instance + * + * Input: + * - G722enc_inst : G722 instance, i.e. the user that should receive + * be initialized + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcG722_EncoderInit(G722EncInst* G722enc_inst); + +/**************************************************************************** + * WebRtcG722_FreeEncoder(...) + * + * Free the memory used for G722 encoder + * + * Input: + * - G722enc_inst : G722 instance for encoder + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcG722_FreeEncoder(G722EncInst* G722enc_inst); + +/**************************************************************************** + * WebRtcG722_Encode(...) + * + * This function encodes G722 encoded data. + * + * Input: + * - G722enc_inst : G722 instance, i.e. the user that should encode + * a packet + * - speechIn : Input speech vector + * - len : Samples in speechIn + * + * Output: + * - encoded : The encoded data vector + * + * Return value : Length (in bytes) of coded data + */ + +size_t WebRtcG722_Encode(G722EncInst* G722enc_inst, + const int16_t* speechIn, + size_t len, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcG722_CreateDecoder(...) + * + * Create memory used for G722 encoder + * + * Input: + * - G722dec_inst : G722 instance for decoder + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcG722_CreateDecoder(G722DecInst** G722dec_inst); + +/**************************************************************************** + * WebRtcG722_DecoderInit(...) + * + * This function initializes a G722 instance + * + * Input: + * - inst : G722 instance + */ + +void WebRtcG722_DecoderInit(G722DecInst* inst); + +/**************************************************************************** + * WebRtcG722_FreeDecoder(...) + * + * Free the memory used for G722 decoder + * + * Input: + * - G722dec_inst : G722 instance for decoder + * + * Return value : 0 - Ok + * -1 - Error + */ + +int WebRtcG722_FreeDecoder(G722DecInst* G722dec_inst); + +/**************************************************************************** + * WebRtcG722_Decode(...) + * + * This function decodes a packet with G729 frame(s). Output speech length + * will be a multiple of 80 samples (80*frames/packet). + * + * Input: + * - G722dec_inst : G722 instance, i.e. the user that should decode + * a packet + * - encoded : Encoded G722 frame(s) + * - len : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - speechType : 1 normal, 2 CNG (Since G722 does not have its own + * DTX/CNG scheme it should always return 1) + * + * Return value : Samples in decoded vector + */ + +size_t WebRtcG722_Decode(G722DecInst* G722dec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + +/**************************************************************************** + * WebRtcG722_Version(...) + * + * Get a string with the current version of the codec + */ + +int16_t WebRtcG722_Version(char* versionStr, short len); + +#ifdef __cplusplus +} +#endif + +#endif /* MODULES_AUDIO_CODING_CODECS_G722_G722_INTERFACE_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/test/testG722.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g722/test/testG722.cc new file mode 100644 index 0000000000..9f2155d0f7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/test/testG722.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * testG722.cpp : Defines the entry point for the console application. + */ + +#include +#include +#include + +/* include API */ +#include "modules/audio_coding/codecs/g722/g722_interface.h" + +/* Runtime statistics */ +#include +#define CLOCKS_PER_SEC_G722 100000 + +// Forward declaration +typedef struct WebRtcG722EncInst G722EncInst; +typedef struct WebRtcG722DecInst G722DecInst; + +/* function for reading audio data from PCM file */ +bool readframe(int16_t* data, FILE* inp, size_t length) { + size_t rlen = fread(data, sizeof(int16_t), length, inp); + if (rlen >= length) + return false; + memset(data + rlen, 0, (length - rlen) * sizeof(int16_t)); + return true; +} + +int main(int argc, char* argv[]) { + char inname[60], outbit[40], outname[40]; + FILE *inp, *outbitp, *outp; + + int framecnt; + bool endfile; + size_t framelength = 160; + G722EncInst* G722enc_inst; + G722DecInst* G722dec_inst; + + /* Runtime statistics */ + double starttime; + double runtime = 0; + double length_file; + + size_t stream_len = 0; + int16_t shortdata[960]; + int16_t decoded[960]; + uint8_t streamdata[80 * 6]; + int16_t speechType[1]; + + /* handling wrong input arguments in the command line */ + if (argc != 5) { + printf("\n\nWrong number of arguments or flag values.\n\n"); + + printf("\n"); + printf("Usage:\n\n"); + printf("./testG722.exe framelength infile outbitfile outspeechfile \n\n"); + printf("with:\n"); + printf("framelength : Framelength in samples.\n\n"); + printf("infile : Normal speech input file\n\n"); + printf("outbitfile : Bitstream output file\n\n"); + printf("outspeechfile: Speech output file\n\n"); + exit(0); + } + + /* Get frame length */ + int framelength_int = atoi(argv[1]); + if (framelength_int < 0) { + printf(" G.722: Invalid framelength %d.\n", framelength_int); + exit(1); + } + framelength = static_cast(framelength_int); + + /* Get Input and Output files */ + sscanf(argv[2], "%s", inname); + sscanf(argv[3], "%s", outbit); + sscanf(argv[4], "%s", outname); + + if ((inp = fopen(inname, "rb")) == NULL) { + printf(" G.722: Cannot read file %s.\n", inname); + exit(1); + } + if ((outbitp = fopen(outbit, "wb")) == NULL) { + printf(" G.722: Cannot write file %s.\n", outbit); + exit(1); + } + if ((outp = fopen(outname, "wb")) == NULL) { + printf(" G.722: Cannot write file %s.\n", outname); + exit(1); + } + printf("\nInput:%s\nOutput bitstream:%s\nOutput:%s\n", inname, outbit, + outname); + + /* Create and init */ + WebRtcG722_CreateEncoder((G722EncInst**)&G722enc_inst); + WebRtcG722_CreateDecoder((G722DecInst**)&G722dec_inst); + WebRtcG722_EncoderInit((G722EncInst*)G722enc_inst); + WebRtcG722_DecoderInit((G722DecInst*)G722dec_inst); + + /* Initialize encoder and decoder */ + framecnt = 0; + endfile = false; + while (!endfile) { + framecnt++; + + /* Read speech block */ + endfile = readframe(shortdata, inp, framelength); + + /* Start clock before call to encoder and decoder */ + starttime = clock() / (double)CLOCKS_PER_SEC_G722; + + /* G.722 encoding + decoding */ + stream_len = WebRtcG722_Encode((G722EncInst*)G722enc_inst, shortdata, + framelength, streamdata); + WebRtcG722_Decode(G722dec_inst, streamdata, stream_len, decoded, + speechType); + + /* Stop clock after call to encoder and decoder */ + runtime += (double)((clock() / (double)CLOCKS_PER_SEC_G722) - starttime); + + /* Write coded bits to file */ + if (fwrite(streamdata, sizeof(short), stream_len / 2, outbitp) != + stream_len / 2) { + return -1; + } + /* Write coded speech to file */ + if (fwrite(decoded, sizeof(short), framelength, outp) != framelength) { + return -1; + } + } + + WebRtcG722_FreeEncoder((G722EncInst*)G722enc_inst); + WebRtcG722_FreeDecoder((G722DecInst*)G722dec_inst); + + length_file = ((double)framecnt * (double)framelength / 16000); + printf("\n\nLength of speech file: %.1f s\n", length_file); + printf("Time to run G.722: %.2f s (%.2f %% of realtime)\n\n", runtime, + (100 * runtime / length_file)); + printf("---------------------END----------------------\n"); + + fclose(inp); + fclose(outbitp); + fclose(outp); + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.c new file mode 100644 index 0000000000..77da78ba7f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AbsQuant.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/abs_quant.h" + +#include "modules/audio_coding/codecs/ilbc/abs_quant_loop.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + + +/*----------------------------------------------------------------* + * predictive noise shaping encoding of scaled start state + * (subrutine for WebRtcIlbcfix_StateSearch) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_AbsQuant( + IlbcEncoder *iLBCenc_inst, + /* (i) Encoder instance */ + iLBC_bits *iLBC_encbits, /* (i/o) Encoded bits (outputs idxForMax + and idxVec, uses state_first as + input) */ + int16_t *in, /* (i) vector to encode */ + int16_t *weightDenum /* (i) denominator of synthesis filter */ + ) { + int16_t *syntOut; + size_t quantLen[2]; + + /* Stack based */ + int16_t syntOutBuf[LPC_FILTERORDER+STATE_SHORT_LEN_30MS]; + int16_t in_weightedVec[STATE_SHORT_LEN_30MS+LPC_FILTERORDER]; + int16_t *in_weighted = &in_weightedVec[LPC_FILTERORDER]; + + /* Initialize the buffers */ + WebRtcSpl_MemSetW16(syntOutBuf, 0, LPC_FILTERORDER+STATE_SHORT_LEN_30MS); + syntOut = &syntOutBuf[LPC_FILTERORDER]; + /* Start with zero state */ + WebRtcSpl_MemSetW16(in_weightedVec, 0, LPC_FILTERORDER); + + /* Perform the quantization loop in two sections of length quantLen[i], + where the perceptual weighting filter is updated at the subframe + border */ + + if (iLBC_encbits->state_first) { + quantLen[0]=SUBL; + quantLen[1]=iLBCenc_inst->state_short_len-SUBL; + } else { + quantLen[0]=iLBCenc_inst->state_short_len-SUBL; + quantLen[1]=SUBL; + } + + /* Calculate the weighted residual, switch perceptual weighting + filter at the subframe border */ + WebRtcSpl_FilterARFastQ12( + in, in_weighted, + weightDenum, LPC_FILTERORDER+1, quantLen[0]); + WebRtcSpl_FilterARFastQ12( + &in[quantLen[0]], &in_weighted[quantLen[0]], + &weightDenum[LPC_FILTERORDER+1], LPC_FILTERORDER+1, quantLen[1]); + + WebRtcIlbcfix_AbsQuantLoop( + syntOut, + in_weighted, + weightDenum, + quantLen, + iLBC_encbits->idxVec); + +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.h new file mode 100644 index 0000000000..c72e29cf29 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AbsQuant.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ABS_QUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ABS_QUANT_H_ + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * predictive noise shaping encoding of scaled start state + * (subrutine for WebRtcIlbcfix_StateSearch) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_AbsQuant( + IlbcEncoder* iLBCenc_inst, + /* (i) Encoder instance */ + iLBC_bits* iLBC_encbits, /* (i/o) Encoded bits (outputs idxForMax + and idxVec, uses state_first as + input) */ + int16_t* in, /* (i) vector to encode */ + int16_t* weightDenum /* (i) denominator of synthesis filter */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.c new file mode 100644 index 0000000000..cf9266299d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AbsQuantLoop.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/abs_quant_loop.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/sort_sq.h" + +void WebRtcIlbcfix_AbsQuantLoop(int16_t *syntOutIN, int16_t *in_weightedIN, + int16_t *weightDenumIN, size_t *quantLenIN, + int16_t *idxVecIN ) { + size_t k1, k2; + int16_t index; + int32_t toQW32; + int32_t toQ32; + int16_t tmp16a; + int16_t xq; + + int16_t *syntOut = syntOutIN; + int16_t *in_weighted = in_weightedIN; + int16_t *weightDenum = weightDenumIN; + size_t *quantLen = quantLenIN; + int16_t *idxVec = idxVecIN; + + for(k1=0;k1<2;k1++) { + for(k2=0;k2 32767) { + toQ32 = (int32_t) 32767; + } else if (toQ32 < -32768) { + toQ32 = (int32_t) -32768; + } + + /* Quantize the state */ + if (toQW32<(-7577)) { + /* To prevent negative overflow */ + index=0; + } else if (toQW32>8151) { + /* To prevent positive overflow */ + index=7; + } else { + /* Find the best quantization index + (state_sq3Tbl is in Q13 and toQ is in Q11) + */ + WebRtcIlbcfix_SortSq(&xq, &index, + (int16_t)toQ32, + WebRtcIlbcfix_kStateSq3, 8); + } + + /* Store selected index */ + (*idxVec++) = index; + + /* Compute decoded sample and update of the prediction filter */ + tmp16a = ((WebRtcIlbcfix_kStateSq3[index] + 2 ) >> 2); + + *syntOut = (int16_t) (tmp16a + (int32_t)(*in_weighted) - toQW32); + + syntOut++; in_weighted++; + } + /* Update perceptual weighting filter at subframe border */ + weightDenum += 11; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.h new file mode 100644 index 0000000000..841d73b9fb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AbsQuantLoop.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ABS_QUANT_LOOP_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ABS_QUANT_LOOP_H_ + +#include +#include + +/*----------------------------------------------------------------* + * predictive noise shaping encoding of scaled start state + * (subrutine for WebRtcIlbcfix_StateSearch) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_AbsQuantLoop(int16_t* syntOutIN, + int16_t* in_weightedIN, + int16_t* weightDenumIN, + size_t* quantLenIN, + int16_t* idxVecIN); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc new file mode 100644 index 0000000000..57b5abbe23 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h" + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/ilbc.h" +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +AudioDecoderIlbcImpl::AudioDecoderIlbcImpl() { + WebRtcIlbcfix_DecoderCreate(&dec_state_); + WebRtcIlbcfix_Decoderinit30Ms(dec_state_); +} + +AudioDecoderIlbcImpl::~AudioDecoderIlbcImpl() { + WebRtcIlbcfix_DecoderFree(dec_state_); +} + +bool AudioDecoderIlbcImpl::HasDecodePlc() const { + return true; +} + +int AudioDecoderIlbcImpl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz, 8000); + int16_t temp_type = 1; // Default is speech. + int ret = WebRtcIlbcfix_Decode(dec_state_, encoded, encoded_len, decoded, + &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +size_t AudioDecoderIlbcImpl::DecodePlc(size_t num_frames, int16_t* decoded) { + return WebRtcIlbcfix_NetEqPlc(dec_state_, decoded, num_frames); +} + +void AudioDecoderIlbcImpl::Reset() { + WebRtcIlbcfix_Decoderinit30Ms(dec_state_); +} + +std::vector AudioDecoderIlbcImpl::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + std::vector results; + size_t bytes_per_frame; + int timestamps_per_frame; + if (payload.size() >= 950) { + RTC_LOG(LS_WARNING) + << "AudioDecoderIlbcImpl::ParsePayload: Payload too large"; + return results; + } + if (payload.size() % 38 == 0) { + // 20 ms frames. + bytes_per_frame = 38; + timestamps_per_frame = 160; + } else if (payload.size() % 50 == 0) { + // 30 ms frames. + bytes_per_frame = 50; + timestamps_per_frame = 240; + } else { + RTC_LOG(LS_WARNING) + << "AudioDecoderIlbcImpl::ParsePayload: Invalid payload"; + return results; + } + + RTC_DCHECK_EQ(0, payload.size() % bytes_per_frame); + if (payload.size() == bytes_per_frame) { + std::unique_ptr frame( + new LegacyEncodedAudioFrame(this, std::move(payload))); + results.emplace_back(timestamp, 0, std::move(frame)); + } else { + size_t byte_offset; + uint32_t timestamp_offset; + for (byte_offset = 0, timestamp_offset = 0; byte_offset < payload.size(); + byte_offset += bytes_per_frame, + timestamp_offset += timestamps_per_frame) { + std::unique_ptr frame(new LegacyEncodedAudioFrame( + this, rtc::Buffer(payload.data() + byte_offset, bytes_per_frame))); + results.emplace_back(timestamp + timestamp_offset, 0, std::move(frame)); + } + } + + return results; +} + +int AudioDecoderIlbcImpl::SampleRateHz() const { + return 8000; +} + +size_t AudioDecoderIlbcImpl::Channels() const { + return 1; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h new file mode 100644 index 0000000000..46ba755148 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_DECODER_ILBC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_DECODER_ILBC_H_ + +#include +#include + +#include + +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/buffer.h" + +typedef struct iLBC_decinst_t_ IlbcDecoderInstance; + +namespace webrtc { + +class AudioDecoderIlbcImpl final : public AudioDecoder { + public: + AudioDecoderIlbcImpl(); + ~AudioDecoderIlbcImpl() override; + + AudioDecoderIlbcImpl(const AudioDecoderIlbcImpl&) = delete; + AudioDecoderIlbcImpl& operator=(const AudioDecoderIlbcImpl&) = delete; + + bool HasDecodePlc() const override; + size_t DecodePlc(size_t num_frames, int16_t* decoded) override; + void Reset() override; + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + IlbcDecoderInstance* dec_state_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_DECODER_ILBC_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.cc b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.cc new file mode 100644 index 0000000000..9fbf42ceeb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.cc @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h" + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/ilbc.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +const int kSampleRateHz = 8000; + +int GetIlbcBitrate(int ptime) { + switch (ptime) { + case 20: + case 40: + // 38 bytes per frame of 20 ms => 15200 bits/s. + return 15200; + case 30: + case 60: + // 50 bytes per frame of 30 ms => (approx) 13333 bits/s. + return 13333; + default: + RTC_CHECK_NOTREACHED(); + } +} + +} // namespace + +AudioEncoderIlbcImpl::AudioEncoderIlbcImpl(const AudioEncoderIlbcConfig& config, + int payload_type) + : frame_size_ms_(config.frame_size_ms), + payload_type_(payload_type), + num_10ms_frames_per_packet_( + static_cast(config.frame_size_ms / 10)), + encoder_(nullptr) { + RTC_CHECK(config.IsOk()); + Reset(); +} + +AudioEncoderIlbcImpl::~AudioEncoderIlbcImpl() { + RTC_CHECK_EQ(0, WebRtcIlbcfix_EncoderFree(encoder_)); +} + +int AudioEncoderIlbcImpl::SampleRateHz() const { + return kSampleRateHz; +} + +size_t AudioEncoderIlbcImpl::NumChannels() const { + return 1; +} + +size_t AudioEncoderIlbcImpl::Num10MsFramesInNextPacket() const { + return num_10ms_frames_per_packet_; +} + +size_t AudioEncoderIlbcImpl::Max10MsFramesInAPacket() const { + return num_10ms_frames_per_packet_; +} + +int AudioEncoderIlbcImpl::GetTargetBitrate() const { + return GetIlbcBitrate(rtc::dchecked_cast(num_10ms_frames_per_packet_) * + 10); +} + +AudioEncoder::EncodedInfo AudioEncoderIlbcImpl::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) { + // Save timestamp if starting a new packet. + if (num_10ms_frames_buffered_ == 0) + first_timestamp_in_buffer_ = rtp_timestamp; + + // Buffer input. + std::copy(audio.cbegin(), audio.cend(), + input_buffer_ + kSampleRateHz / 100 * num_10ms_frames_buffered_); + + // If we don't yet have enough buffered input for a whole packet, we're done + // for now. + if (++num_10ms_frames_buffered_ < num_10ms_frames_per_packet_) { + return EncodedInfo(); + } + + // Encode buffered input. + RTC_DCHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_); + num_10ms_frames_buffered_ = 0; + size_t encoded_bytes = encoded->AppendData( + RequiredOutputSizeBytes(), [&](rtc::ArrayView encoded) { + const int r = WebRtcIlbcfix_Encode( + encoder_, input_buffer_, + kSampleRateHz / 100 * num_10ms_frames_per_packet_, encoded.data()); + RTC_CHECK_GE(r, 0); + + return static_cast(r); + }); + + RTC_DCHECK_EQ(encoded_bytes, RequiredOutputSizeBytes()); + + EncodedInfo info; + info.encoded_bytes = encoded_bytes; + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.encoder_type = CodecType::kIlbc; + return info; +} + +void AudioEncoderIlbcImpl::Reset() { + if (encoder_) + RTC_CHECK_EQ(0, WebRtcIlbcfix_EncoderFree(encoder_)); + RTC_CHECK_EQ(0, WebRtcIlbcfix_EncoderCreate(&encoder_)); + const int encoder_frame_size_ms = + frame_size_ms_ > 30 ? frame_size_ms_ / 2 : frame_size_ms_; + RTC_CHECK_EQ(0, WebRtcIlbcfix_EncoderInit(encoder_, encoder_frame_size_ms)); + num_10ms_frames_buffered_ = 0; +} + +absl::optional> +AudioEncoderIlbcImpl::GetFrameLengthRange() const { + return {{TimeDelta::Millis(num_10ms_frames_per_packet_ * 10), + TimeDelta::Millis(num_10ms_frames_per_packet_ * 10)}}; +} + +size_t AudioEncoderIlbcImpl::RequiredOutputSizeBytes() const { + switch (num_10ms_frames_per_packet_) { + case 2: + return 38; + case 3: + return 50; + case 4: + return 2 * 38; + case 6: + return 2 * 50; + default: + RTC_CHECK_NOTREACHED(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h new file mode 100644 index 0000000000..c8dfa2ca6d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_ENCODER_ILBC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_ENCODER_ILBC_H_ + +#include +#include + +#include + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/ilbc/audio_encoder_ilbc_config.h" +#include "api/units/time_delta.h" +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +namespace webrtc { + +class AudioEncoderIlbcImpl final : public AudioEncoder { + public: + AudioEncoderIlbcImpl(const AudioEncoderIlbcConfig& config, int payload_type); + ~AudioEncoderIlbcImpl() override; + + AudioEncoderIlbcImpl(const AudioEncoderIlbcImpl&) = delete; + AudioEncoderIlbcImpl& operator=(const AudioEncoderIlbcImpl&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) override; + void Reset() override; + absl::optional> GetFrameLengthRange() + const override; + + private: + size_t RequiredOutputSizeBytes() const; + + static constexpr size_t kMaxSamplesPerPacket = 480; + const int frame_size_ms_; + const int payload_type_; + const size_t num_10ms_frames_per_packet_; + size_t num_10ms_frames_buffered_; + uint32_t first_timestamp_in_buffer_; + int16_t input_buffer_[kMaxSamplesPerPacket]; + IlbcEncoderInstance* encoder_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_ENCODER_ILBC_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.c new file mode 100644 index 0000000000..c915a2f9f0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AugmentedCbCorr.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/augmented_cb_corr.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_AugmentedCbCorr( + int16_t *target, /* (i) Target vector */ + int16_t *buffer, /* (i) Memory buffer */ + int16_t *interpSamples, /* (i) buffer with + interpolated samples */ + int32_t *crossDot, /* (o) The cross correlation between + the target and the Augmented + vector */ + size_t low, /* (i) Lag to start from (typically + 20) */ + size_t high, /* (i) Lag to end at (typically 39) */ + int scale) /* (i) Scale factor to use for + the crossDot */ +{ + size_t lagcount; + size_t ilow; + int16_t *targetPtr; + int32_t *crossDotPtr; + int16_t *iSPtr=interpSamples; + + /* Calculate the correlation between the target and the + interpolated codebook. The correlation is calculated in + 3 sections with the interpolated part in the middle */ + crossDotPtr=crossDot; + for (lagcount=low; lagcount<=high; lagcount++) { + + ilow = lagcount - 4; + + /* Compute dot product for the first (lagcount-4) samples */ + (*crossDotPtr) = WebRtcSpl_DotProductWithScale(target, buffer-lagcount, ilow, scale); + + /* Compute dot product on the interpolated samples */ + (*crossDotPtr) += WebRtcSpl_DotProductWithScale(target+ilow, iSPtr, 4, scale); + targetPtr = target + lagcount; + iSPtr += lagcount-ilow; + + /* Compute dot product for the remaining samples */ + (*crossDotPtr) += WebRtcSpl_DotProductWithScale(targetPtr, buffer-lagcount, SUBL-lagcount, scale); + crossDotPtr++; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.h new file mode 100644 index 0000000000..2e9612e51a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AugmentedCbCorr.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_AUGMENTED_CB_CORR_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_AUGMENTED_CB_CORR_H_ + +#include +#include + +/*----------------------------------------------------------------* + * Calculate correlation between target and Augmented codebooks + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_AugmentedCbCorr( + int16_t* target, /* (i) Target vector */ + int16_t* buffer, /* (i) Memory buffer */ + int16_t* interpSamples, /* (i) buffer with + interpolated samples */ + int32_t* crossDot, /* (o) The cross correlation between + the target and the Augmented + vector */ + size_t low, /* (i) Lag to start from (typically + 20) */ + size_t high, /* (i) Lag to end at (typically 39 */ + int scale); /* (i) Scale factor to use for the crossDot */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.c new file mode 100644 index 0000000000..1a9b882adf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_BwExpand.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * lpc bandwidth expansion + *---------------------------------------------------------------*/ + +/* The output is in the same domain as the input */ +void WebRtcIlbcfix_BwExpand( + int16_t *out, /* (o) the bandwidth expanded lpc coefficients */ + int16_t *in, /* (i) the lpc coefficients before bandwidth + expansion */ + int16_t *coef, /* (i) the bandwidth expansion factor Q15 */ + int16_t length /* (i) the length of lpc coefficient vectors */ + ) { + int i; + + out[0] = in[0]; + for (i = 1; i < length; i++) { + /* out[i] = coef[i] * in[i] with rounding. + in[] and out[] are in Q12 and coef[] is in Q15 + */ + out[i] = (int16_t)((coef[i] * in[i] + 16384) >> 15); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.h new file mode 100644 index 0000000000..ff9b0b302e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_BwExpand.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_BW_EXPAND_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_BW_EXPAND_H_ + +#include +#include + +/*----------------------------------------------------------------* + * lpc bandwidth expansion + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_BwExpand( + int16_t* out, /* (o) the bandwidth expanded lpc coefficients */ + int16_t* in, /* (i) the lpc coefficients before bandwidth + expansion */ + int16_t* coef, /* (i) the bandwidth expansion factor Q15 */ + int16_t length /* (i) the length of lpc coefficient vectors */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.c new file mode 100644 index 0000000000..1e9a7040c7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbConstruct.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_construct.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/gain_dequant.h" +#include "modules/audio_coding/codecs/ilbc/get_cd_vec.h" +#include "rtc_base/sanitizer.h" + +// An arithmetic operation that is allowed to overflow. (It's still undefined +// behavior, so not a good idea; this just makes UBSan ignore the violation, so +// that our old code can continue to do what it's always been doing.) +static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow") + OverflowingAddS32S32ToS32(int32_t a, int32_t b) { + return a + b; +} + +/*----------------------------------------------------------------* + * Construct decoded vector from codebook and gains. + *---------------------------------------------------------------*/ + +bool WebRtcIlbcfix_CbConstruct( + int16_t* decvector, /* (o) Decoded vector */ + const int16_t* index, /* (i) Codebook indices */ + const int16_t* gain_index, /* (i) Gain quantization indices */ + int16_t* mem, /* (i) Buffer for codevector construction */ + size_t lMem, /* (i) Length of buffer */ + size_t veclen) { /* (i) Length of vector */ + size_t j; + int16_t gain[CB_NSTAGES]; + /* Stack based */ + int16_t cbvec0[SUBL]; + int16_t cbvec1[SUBL]; + int16_t cbvec2[SUBL]; + int32_t a32; + int16_t *gainPtr; + + /* gain de-quantization */ + + gain[0] = WebRtcIlbcfix_GainDequant(gain_index[0], 16384, 0); + gain[1] = WebRtcIlbcfix_GainDequant(gain_index[1], gain[0], 1); + gain[2] = WebRtcIlbcfix_GainDequant(gain_index[2], gain[1], 2); + + /* codebook vector construction and construction of total vector */ + + /* Stack based */ + if (!WebRtcIlbcfix_GetCbVec(cbvec0, mem, (size_t)index[0], lMem, veclen)) + return false; // Failure. + if (!WebRtcIlbcfix_GetCbVec(cbvec1, mem, (size_t)index[1], lMem, veclen)) + return false; // Failure. + if (!WebRtcIlbcfix_GetCbVec(cbvec2, mem, (size_t)index[2], lMem, veclen)) + return false; // Failure. + + gainPtr = &gain[0]; + for (j=0;j> 14); + } + + return true; // Success. +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.h new file mode 100644 index 0000000000..8f7c663164 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbConstruct.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_CONSTRUCT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_CONSTRUCT_H_ + +#include +#include +#include + +#include "absl/base/attributes.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Construct decoded vector from codebook and gains. + *---------------------------------------------------------------*/ + +// Returns true on success, false on failure. +ABSL_MUST_USE_RESULT +bool WebRtcIlbcfix_CbConstruct( + int16_t* decvector, /* (o) Decoded vector */ + const int16_t* index, /* (i) Codebook indices */ + const int16_t* gain_index, /* (i) Gain quantization indices */ + int16_t* mem, /* (i) Buffer for codevector construction */ + size_t lMem, /* (i) Length of buffer */ + size_t veclen /* (i) Length of vector */ +); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.c new file mode 100644 index 0000000000..21e4197607 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergy.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy.h" + +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Function WebRtcIlbcfix_CbMemEnergy computes the energy of all + * the vectors in the codebook memory that will be used in the + * following search for the best match. + *----------------------------------------------------------------*/ + +void WebRtcIlbcfix_CbMemEnergy( + size_t range, + int16_t *CB, /* (i) The CB memory (1:st section) */ + int16_t *filteredCB, /* (i) The filtered CB memory (2:nd section) */ + size_t lMem, /* (i) Length of the CB memory */ + size_t lTarget, /* (i) Length of the target vector */ + int16_t *energyW16, /* (o) Energy in the CB vectors */ + int16_t *energyShifts, /* (o) Shift value of the energy */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size /* (i) Index to where energy values should be stored */ + ) { + int16_t *ppi, *ppo, *pp; + int32_t energy, tmp32; + + /* Compute the energy and store it in a vector. Also the + * corresponding shift values are stored. The energy values + * are reused in all three stages. */ + + /* Calculate the energy in the first block of 'lTarget' sampels. */ + ppi = CB+lMem-lTarget-1; + ppo = CB+lMem-1; + + pp=CB+lMem-lTarget; + energy = WebRtcSpl_DotProductWithScale( pp, pp, lTarget, scale); + + /* Normalize the energy and store the number of shifts */ + energyShifts[0] = (int16_t)WebRtcSpl_NormW32(energy); + tmp32 = energy << energyShifts[0]; + energyW16[0] = (int16_t)(tmp32 >> 16); + + /* Compute the energy of the rest of the cb memory + * by step wise adding and subtracting the next + * sample and the last sample respectively. */ + WebRtcIlbcfix_CbMemEnergyCalc(energy, range, ppi, ppo, energyW16, energyShifts, scale, 0); + + /* Next, precompute the energy values for the filtered cb section */ + energy=0; + pp=filteredCB+lMem-lTarget; + + energy = WebRtcSpl_DotProductWithScale( pp, pp, lTarget, scale); + + /* Normalize the energy and store the number of shifts */ + energyShifts[base_size] = (int16_t)WebRtcSpl_NormW32(energy); + tmp32 = energy << energyShifts[base_size]; + energyW16[base_size] = (int16_t)(tmp32 >> 16); + + ppi = filteredCB + lMem - 1 - lTarget; + ppo = filteredCB + lMem - 1; + + WebRtcIlbcfix_CbMemEnergyCalc(energy, range, ppi, ppo, energyW16, energyShifts, scale, base_size); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.h new file mode 100644 index 0000000000..17ec337dc6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergy.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_H_ + +#include +#include + +void WebRtcIlbcfix_CbMemEnergy( + size_t range, + int16_t* CB, /* (i) The CB memory (1:st section) */ + int16_t* filteredCB, /* (i) The filtered CB memory (2:nd section) */ + size_t lMem, /* (i) Length of the CB memory */ + size_t lTarget, /* (i) Length of the target vector */ + int16_t* energyW16, /* (o) Energy in the CB vectors */ + int16_t* energyShifts, /* (o) Shift value of the energy */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size /* (i) Index to where energy values should be stored */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.c new file mode 100644 index 0000000000..0619bbe422 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergyAugmentation.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_CbMemEnergyAugmentation( + int16_t *interpSamples, /* (i) The interpolated samples */ + int16_t *CBmem, /* (i) The CB memory */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size, /* (i) Index to where energy values should be stored */ + int16_t *energyW16, /* (o) Energy in the CB vectors */ + int16_t *energyShifts /* (o) Shift value of the energy */ + ){ + int32_t energy, tmp32; + int16_t *ppe, *pp, *interpSamplesPtr; + int16_t *CBmemPtr; + size_t lagcount; + int16_t *enPtr=&energyW16[base_size-20]; + int16_t *enShPtr=&energyShifts[base_size-20]; + int32_t nrjRecursive; + + CBmemPtr = CBmem+147; + interpSamplesPtr = interpSamples; + + /* Compute the energy for the first (low-5) noninterpolated samples */ + nrjRecursive = WebRtcSpl_DotProductWithScale( CBmemPtr-19, CBmemPtr-19, 15, scale); + ppe = CBmemPtr - 20; + + for (lagcount=20; lagcount<=39; lagcount++) { + + /* Update the energy recursively to save complexity */ + nrjRecursive += (*ppe * *ppe) >> scale; + ppe--; + energy = nrjRecursive; + + /* interpolation */ + energy += WebRtcSpl_DotProductWithScale(interpSamplesPtr, interpSamplesPtr, 4, scale); + interpSamplesPtr += 4; + + /* Compute energy for the remaining samples */ + pp = CBmemPtr - lagcount; + energy += WebRtcSpl_DotProductWithScale(pp, pp, SUBL-lagcount, scale); + + /* Normalize the energy and store the number of shifts */ + (*enShPtr) = (int16_t)WebRtcSpl_NormW32(energy); + tmp32 = energy << *enShPtr; + *enPtr = (int16_t)(tmp32 >> 16); + enShPtr++; + enPtr++; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h new file mode 100644 index 0000000000..d7b7a0d97e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergyAugmentation.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_AUGMENTATION_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_AUGMENTATION_H_ + +#include +#include + +void WebRtcIlbcfix_CbMemEnergyAugmentation( + int16_t* interpSamples, /* (i) The interpolated samples */ + int16_t* CBmem, /* (i) The CB memory */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size, /* (i) Index to where energy values should be stored */ + int16_t* energyW16, /* (o) Energy in the CB vectors */ + int16_t* energyShifts /* (o) Shift value of the energy */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.c new file mode 100644 index 0000000000..58c0c5fe6d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergyCalc.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/* Compute the energy of the rest of the cb memory + * by step wise adding and subtracting the next + * sample and the last sample respectively */ +void WebRtcIlbcfix_CbMemEnergyCalc( + int32_t energy, /* (i) input start energy */ + size_t range, /* (i) number of iterations */ + int16_t *ppi, /* (i) input pointer 1 */ + int16_t *ppo, /* (i) input pointer 2 */ + int16_t *energyW16, /* (o) Energy in the CB vectors */ + int16_t *energyShifts, /* (o) Shift value of the energy */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size /* (i) Index to where energy values should be stored */ + ) +{ + size_t j; + int16_t shft; + int32_t tmp; + int16_t *eSh_ptr; + int16_t *eW16_ptr; + + + eSh_ptr = &energyShifts[1+base_size]; + eW16_ptr = &energyW16[1+base_size]; + + for (j = 0; j + 1 < range; j++) { + + /* Calculate next energy by a +/- + operation on the edge samples */ + tmp = (*ppi) * (*ppi) - (*ppo) * (*ppo); + energy += tmp >> scale; + energy = WEBRTC_SPL_MAX(energy, 0); + + ppi--; + ppo--; + + /* Normalize the energy into a int16_t and store + the number of shifts */ + + shft = (int16_t)WebRtcSpl_NormW32(energy); + *eSh_ptr++ = shft; + + tmp = energy << shft; + *eW16_ptr++ = (int16_t)(tmp >> 16); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h new file mode 100644 index 0000000000..1d1e8d62b9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergyCalc.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_CALC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_CALC_H_ + +#include +#include + +void WebRtcIlbcfix_CbMemEnergyCalc( + int32_t energy, /* (i) input start energy */ + size_t range, /* (i) number of iterations */ + int16_t* ppi, /* (i) input pointer 1 */ + int16_t* ppo, /* (i) input pointer 2 */ + int16_t* energyW16, /* (o) Energy in the CB vectors */ + int16_t* energyShifts, /* (o) Shift value of the energy */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size /* (i) Index to where energy values should be stored */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.c new file mode 100644 index 0000000000..24b5292354 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.c @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbSearch.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_search.h" + +#include "modules/audio_coding/codecs/ilbc/augmented_cb_corr.h" +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy.h" +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h" +#include "modules/audio_coding/codecs/ilbc/cb_search_core.h" +#include "modules/audio_coding/codecs/ilbc/cb_update_best_index.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/create_augmented_vec.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/energy_inverse.h" +#include "modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h" +#include "modules/audio_coding/codecs/ilbc/gain_quant.h" +#include "modules/audio_coding/codecs/ilbc/interpolate_samples.h" + +/*----------------------------------------------------------------* + * Search routine for codebook encoding and gain quantization. + *----------------------------------------------------------------*/ + +void WebRtcIlbcfix_CbSearch( + IlbcEncoder *iLBCenc_inst, + /* (i) the encoder state structure */ + int16_t *index, /* (o) Codebook indices */ + int16_t *gain_index, /* (o) Gain quantization indices */ + int16_t *intarget, /* (i) Target vector for encoding */ + int16_t *decResidual,/* (i) Decoded residual for codebook construction */ + size_t lMem, /* (i) Length of buffer */ + size_t lTarget, /* (i) Length of vector */ + int16_t *weightDenum,/* (i) weighting filter coefficients in Q12 */ + size_t block /* (i) the subblock number */ + ) { + size_t i, range; + int16_t ii, j, stage; + int16_t *pp; + int16_t tmp; + int scale; + int16_t bits, temp1, temp2; + size_t base_size; + int32_t codedEner, targetEner; + int16_t gains[CB_NSTAGES+1]; + int16_t *cb_vecPtr; + size_t indexOffset, sInd, eInd; + int32_t CritMax=0; + int16_t shTotMax=WEBRTC_SPL_WORD16_MIN; + size_t bestIndex=0; + int16_t bestGain=0; + size_t indexNew; + int16_t CritNewSh; + int32_t CritNew; + int32_t *cDotPtr; + size_t noOfZeros; + int16_t *gainPtr; + int32_t t32, tmpW32; + int16_t *WebRtcIlbcfix_kGainSq5_ptr; + /* Stack based */ + int16_t CBbuf[CB_MEML+LPC_FILTERORDER+CB_HALFFILTERLEN]; + int32_t cDot[128]; + int32_t Crit[128]; + int16_t targetVec[SUBL+LPC_FILTERORDER]; + int16_t cbvectors[CB_MEML + 1]; /* Adding one extra position for + Coverity warnings. */ + int16_t codedVec[SUBL]; + int16_t interpSamples[20*4]; + int16_t interpSamplesFilt[20*4]; + int16_t energyW16[CB_EXPAND*128]; + int16_t energyShifts[CB_EXPAND*128]; + int16_t *inverseEnergy=energyW16; /* Reuse memory */ + int16_t *inverseEnergyShifts=energyShifts; /* Reuse memory */ + int16_t *buf = &CBbuf[LPC_FILTERORDER]; + int16_t *target = &targetVec[LPC_FILTERORDER]; + int16_t *aug_vec = (int16_t*)cDot; /* length [SUBL], reuse memory */ + + /* Determine size of codebook sections */ + + base_size=lMem-lTarget+1; + if (lTarget==SUBL) { + base_size=lMem-19; + } + + /* weighting of the CB memory */ + noOfZeros=lMem-WebRtcIlbcfix_kFilterRange[block]; + WebRtcSpl_MemSetW16(&buf[-LPC_FILTERORDER], 0, noOfZeros+LPC_FILTERORDER); + WebRtcSpl_FilterARFastQ12( + decResidual+noOfZeros, buf+noOfZeros, + weightDenum, LPC_FILTERORDER+1, WebRtcIlbcfix_kFilterRange[block]); + + /* weighting of the target vector */ + WEBRTC_SPL_MEMCPY_W16(&target[-LPC_FILTERORDER], buf+noOfZeros+WebRtcIlbcfix_kFilterRange[block]-LPC_FILTERORDER, LPC_FILTERORDER); + WebRtcSpl_FilterARFastQ12( + intarget, target, + weightDenum, LPC_FILTERORDER+1, lTarget); + + /* Store target, towards the end codedVec is calculated as + the initial target minus the remaining target */ + WEBRTC_SPL_MEMCPY_W16(codedVec, target, lTarget); + + /* Find the highest absolute value to calculate proper + vector scale factor (so that it uses 12 bits) */ + temp1 = WebRtcSpl_MaxAbsValueW16(buf, lMem); + temp2 = WebRtcSpl_MaxAbsValueW16(target, lTarget); + + if ((temp1>0)&&(temp2>0)) { + temp1 = WEBRTC_SPL_MAX(temp1, temp2); + scale = WebRtcSpl_GetSizeInBits((uint32_t)(temp1 * temp1)); + } else { + /* temp1 or temp2 is negative (maximum was -32768) */ + scale = 30; + } + + /* Scale to so that a mul-add 40 times does not overflow */ + scale = scale - 25; + scale = WEBRTC_SPL_MAX(0, scale); + + /* Compute energy of the original target */ + targetEner = WebRtcSpl_DotProductWithScale(target, target, lTarget, scale); + + /* Prepare search over one more codebook section. This section + is created by filtering the original buffer with a filter. */ + WebRtcIlbcfix_FilteredCbVecs(cbvectors, buf, lMem, WebRtcIlbcfix_kFilterRange[block]); + + range = WebRtcIlbcfix_kSearchRange[block][0]; + + if(lTarget == SUBL) { + /* Create the interpolated samples and store them for use in all stages */ + + /* First section, non-filtered half of the cb */ + WebRtcIlbcfix_InterpolateSamples(interpSamples, buf, lMem); + + /* Second section, filtered half of the cb */ + WebRtcIlbcfix_InterpolateSamples(interpSamplesFilt, cbvectors, lMem); + + /* Compute the CB vectors' energies for the first cb section (non-filtered) */ + WebRtcIlbcfix_CbMemEnergyAugmentation(interpSamples, buf, + scale, 20, energyW16, energyShifts); + + /* Compute the CB vectors' energies for the second cb section (filtered cb) */ + WebRtcIlbcfix_CbMemEnergyAugmentation(interpSamplesFilt, cbvectors, scale, + base_size + 20, energyW16, + energyShifts); + + /* Compute the CB vectors' energies and store them in the vector + * energyW16. Also the corresponding shift values are stored. The + * energy values are used in all three stages. */ + WebRtcIlbcfix_CbMemEnergy(range, buf, cbvectors, lMem, + lTarget, energyW16+20, energyShifts+20, scale, base_size); + + } else { + /* Compute the CB vectors' energies and store them in the vector + * energyW16. Also the corresponding shift values are stored. The + * energy values are used in all three stages. */ + WebRtcIlbcfix_CbMemEnergy(range, buf, cbvectors, lMem, + lTarget, energyW16, energyShifts, scale, base_size); + + /* Set the energy positions 58-63 and 122-127 to zero + (otherwise they are uninitialized) */ + WebRtcSpl_MemSetW16(energyW16+range, 0, (base_size-range)); + WebRtcSpl_MemSetW16(energyW16+range+base_size, 0, (base_size-range)); + } + + /* Calculate Inverse Energy (energyW16 is already normalized + and will contain the inverse energy in Q29 after this call */ + WebRtcIlbcfix_EnergyInverse(energyW16, base_size*CB_EXPAND); + + /* The gain value computed in the previous stage is used + * as an upper limit to what the next stage gain value + * is allowed to be. In stage 0, 16384 (1.0 in Q14) is used as + * the upper limit. */ + gains[0] = 16384; + + for (stage=0; stage> 1) > bestIndex) ? + 0 : (bestIndex - (CB_RESRANGE >> 1)); + eInd=sInd+CB_RESRANGE; + if (eInd>=range) { + eInd=range-1; + sInd=eInd-CB_RESRANGE; + } + + range = WebRtcIlbcfix_kSearchRange[block][stage]; + + if (lTarget==SUBL) { + i=sInd; + if (sInd<20) { + WebRtcIlbcfix_AugmentedCbCorr(target, cbvectors + lMem, + interpSamplesFilt, cDot, sInd + 20, + WEBRTC_SPL_MIN(39, (eInd + 20)), scale); + i=20; + cDotPtr = &cDot[20 - sInd]; + } else { + cDotPtr = cDot; + } + + cb_vecPtr = cbvectors+lMem-20-i; + + /* Calculate the cross correlations (main part of the filtered CB) */ + WebRtcSpl_CrossCorrelation(cDotPtr, target, cb_vecPtr, lTarget, + eInd - i + 1, scale, -1); + + } else { + cDotPtr = cDot; + cb_vecPtr = cbvectors+lMem-lTarget-sInd; + + /* Calculate the cross correlations (main part of the filtered CB) */ + WebRtcSpl_CrossCorrelation(cDotPtr, target, cb_vecPtr, lTarget, + eInd - sInd + 1, scale, -1); + + } + + /* Adjust the search range for the augmented vectors */ + indexOffset=base_size+sInd; + + /* Search for best index in this part of the vector */ + WebRtcIlbcfix_CbSearchCore( + cDot, eInd-sInd+1, stage, inverseEnergy+indexOffset, + inverseEnergyShifts+indexOffset, Crit, + &indexNew, &CritNew, &CritNewSh); + + /* Update the global best index and the corresponding gain */ + WebRtcIlbcfix_CbUpdateBestIndex( + CritNew, CritNewSh, indexNew+indexOffset, cDot[indexNew], + inverseEnergy[indexNew+indexOffset], inverseEnergyShifts[indexNew+indexOffset], + &CritMax, &shTotMax, &bestIndex, &bestGain); + + index[stage] = (int16_t)bestIndex; + + + bestGain = WebRtcIlbcfix_GainQuant(bestGain, + (int16_t)WEBRTC_SPL_ABS_W16(gains[stage]), stage, &gain_index[stage]); + + /* Extract the best (according to measure) codebook vector + Also adjust the index, so that the augmented vectors are last. + Above these vectors were first... + */ + + if(lTarget==(STATE_LEN-iLBCenc_inst->state_short_len)) { + + if((size_t)index[stage]=20) { + /* Adjust index and extract vector */ + index[stage]-=20; + pp=buf+lMem-lTarget-index[stage]; + } else { + /* Adjust index and extract vector */ + index[stage]+=(int16_t)(base_size-20); + + WebRtcIlbcfix_CreateAugmentedVec(index[stage]-base_size+40, + buf+lMem, aug_vec); + pp = aug_vec; + + } + } else { + + if ((index[stage] - base_size) >= 20) { + /* Adjust index and extract vector */ + index[stage]-=20; + pp=cbvectors+lMem-lTarget- + index[stage]+base_size; + } else { + /* Adjust index and extract vector */ + index[stage]+=(int16_t)(base_size-20); + WebRtcIlbcfix_CreateAugmentedVec(index[stage]-2*base_size+40, + cbvectors+lMem, aug_vec); + pp = aug_vec; + } + } + } + + /* Subtract the best codebook vector, according + to measure, from the target vector */ + + WebRtcSpl_AddAffineVectorToVector(target, pp, (int16_t)(-bestGain), + (int32_t)8192, (int16_t)14, lTarget); + + /* record quantized gain */ + gains[stage+1] = bestGain; + + } /* end of Main Loop. for (stage=0;... */ + + /* Calculte the coded vector (original target - what's left) */ + for (i=0;i> 14); + + targetEner = (int16_t)WEBRTC_SPL_SHIFT_W32(targetEner, -bits) * tmp; + + tmpW32 = ((int32_t)(gains[1]-1))<<1; + + /* Pointer to the table that contains + gain_sq5TblFIX * gain_sq5TblFIX in Q14 */ + gainPtr=(int16_t*)WebRtcIlbcfix_kGainSq5Sq+gain_index[0]; + temp1 = (int16_t)WEBRTC_SPL_SHIFT_W32(codedEner, -bits); + + WebRtcIlbcfix_kGainSq5_ptr = (int16_t*)&WebRtcIlbcfix_kGainSq5[j]; + + /* targetEner and codedEner are in Q(-2*scale) */ + for (ii=gain_index[0];ii<32;ii++) { + + /* Change the index if + (codedEnergy*gainTbl[i]*gainTbl[i])<(targetEn*gain[0]*gain[0]) AND + gainTbl[i] < 2*gain[0] + */ + + t32 = temp1 * *gainPtr; + t32 = t32 - targetEner; + if (t32 < 0) { + if ((*WebRtcIlbcfix_kGainSq5_ptr) < tmpW32) { + j=ii; + WebRtcIlbcfix_kGainSq5_ptr = (int16_t*)&WebRtcIlbcfix_kGainSq5[ii]; + } + } + gainPtr++; + } + gain_index[0]=j; + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.h new file mode 100644 index 0000000000..84a52c7868 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbSearch.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_SEARCH_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_SEARCH_H_ + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_CbSearch( + IlbcEncoder* iLBCenc_inst, + /* (i) the encoder state structure */ + int16_t* index, /* (o) Codebook indices */ + int16_t* gain_index, /* (o) Gain quantization indices */ + int16_t* intarget, /* (i) Target vector for encoding */ + int16_t* decResidual, /* (i) Decoded residual for codebook construction */ + size_t lMem, /* (i) Length of buffer */ + size_t lTarget, /* (i) Length of vector */ + int16_t* weightDenum, /* (i) weighting filter coefficients in Q12 */ + size_t block /* (i) the subblock number */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.c new file mode 100644 index 0000000000..a75e5b0ab8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbSearchCore.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_search_core.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_CbSearchCore( + int32_t *cDot, /* (i) Cross Correlation */ + size_t range, /* (i) Search range */ + int16_t stage, /* (i) Stage of this search */ + int16_t *inverseEnergy, /* (i) Inversed energy */ + int16_t *inverseEnergyShift, /* (i) Shifts of inversed energy + with the offset 2*16-29 */ + int32_t *Crit, /* (o) The criteria */ + size_t *bestIndex, /* (o) Index that corresponds to + maximum criteria (in this + vector) */ + int32_t *bestCrit, /* (o) Value of critera for the + chosen index */ + int16_t *bestCritSh) /* (o) The domain of the chosen + criteria */ +{ + int32_t maxW32, tmp32; + int16_t max, sh, tmp16; + size_t i; + int32_t *cDotPtr; + int16_t cDotSqW16; + int16_t *inverseEnergyPtr; + int32_t *critPtr; + int16_t *inverseEnergyShiftPtr; + + /* Don't allow negative values for stage 0 */ + if (stage==0) { + cDotPtr=cDot; + for (i=0;i> 16); + cDotSqW16 = (int16_t)(((int32_t)(tmp16)*(tmp16))>>16); + + /* Calculate the criteria (cDot*cDot/energy) */ + *critPtr = cDotSqW16 * *inverseEnergyPtr; + + /* Extract the maximum shift value under the constraint + that the criteria is not zero */ + if ((*critPtr)!=0) { + max = WEBRTC_SPL_MAX((*inverseEnergyShiftPtr), max); + } + + inverseEnergyPtr++; + inverseEnergyShiftPtr++; + critPtr++; + cDotPtr++; + } + + /* If no max shifts still at initialization value, set shift to zero */ + if (max==WEBRTC_SPL_WORD16_MIN) { + max = 0; + } + + /* Modify the criterias, so that all of them use the same Q domain */ + critPtr=Crit; + inverseEnergyShiftPtr=inverseEnergyShift; + for (i=0;i31) */ + tmp16 = WEBRTC_SPL_MIN(16, max-(*inverseEnergyShiftPtr)); + + (*critPtr)=WEBRTC_SPL_SHIFT_W32((*critPtr),-tmp16); + critPtr++; + inverseEnergyShiftPtr++; + } + + /* Find the index of the best value */ + *bestIndex = WebRtcSpl_MaxIndexW32(Crit, range); + *bestCrit = Crit[*bestIndex]; + + /* Calculate total shifts of this criteria */ + *bestCritSh = 32 - 2*sh + max; + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.h new file mode 100644 index 0000000000..5da70e0988 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbSearchCore.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_SEARCH_CORE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_SEARCH_CORE_H_ + +#include +#include + +void WebRtcIlbcfix_CbSearchCore( + int32_t* cDot, /* (i) Cross Correlation */ + size_t range, /* (i) Search range */ + int16_t stage, /* (i) Stage of this search */ + int16_t* inverseEnergy, /* (i) Inversed energy */ + int16_t* inverseEnergyShift, /* (i) Shifts of inversed energy + with the offset 2*16-29 */ + int32_t* Crit, /* (o) The criteria */ + size_t* bestIndex, /* (o) Index that corresponds to + maximum criteria (in this + vector) */ + int32_t* bestCrit, /* (o) Value of critera for the + chosen index */ + int16_t* bestCritSh); /* (o) The domain of the chosen + criteria */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.c new file mode 100644 index 0000000000..d6fa4d93d4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbUpdateBestIndex.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_update_best_index.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_CbUpdateBestIndex( + int32_t CritNew, /* (i) New Potentially best Criteria */ + int16_t CritNewSh, /* (i) Shift value of above Criteria */ + size_t IndexNew, /* (i) Index of new Criteria */ + int32_t cDotNew, /* (i) Cross dot of new index */ + int16_t invEnergyNew, /* (i) Inversed energy new index */ + int16_t energyShiftNew, /* (i) Energy shifts of new index */ + int32_t *CritMax, /* (i/o) Maximum Criteria (so far) */ + int16_t *shTotMax, /* (i/o) Shifts of maximum criteria */ + size_t *bestIndex, /* (i/o) Index that corresponds to + maximum criteria */ + int16_t *bestGain) /* (i/o) Gain in Q14 that corresponds + to maximum criteria */ +{ + int16_t shOld, shNew, tmp16; + int16_t scaleTmp; + int32_t gainW32; + + /* Normalize the new and old Criteria to the same domain */ + if (CritNewSh>(*shTotMax)) { + shOld=WEBRTC_SPL_MIN(31,CritNewSh-(*shTotMax)); + shNew=0; + } else { + shOld=0; + shNew=WEBRTC_SPL_MIN(31,(*shTotMax)-CritNewSh); + } + + /* Compare the two criterias. If the new one is better, + calculate the gain and store this index as the new best one + */ + + if ((CritNew >> shNew) > (*CritMax >> shOld)) { + + tmp16 = (int16_t)WebRtcSpl_NormW32(cDotNew); + tmp16 = 16 - tmp16; + + /* Calculate the gain in Q14 + Compensate for inverseEnergyshift in Q29 and that the energy + value was stored in a int16_t (shifted down 16 steps) + => 29-14+16 = 31 */ + + scaleTmp = -energyShiftNew-tmp16+31; + scaleTmp = WEBRTC_SPL_MIN(31, scaleTmp); + + gainW32 = ((int16_t)WEBRTC_SPL_SHIFT_W32(cDotNew, -tmp16) * invEnergyNew) >> + scaleTmp; + + /* Check if criteria satisfies Gain criteria (max 1.3) + if it is larger set the gain to 1.3 + (slightly different from FLP version) + */ + if (gainW32>21299) { + *bestGain=21299; + } else if (gainW32<-21299) { + *bestGain=-21299; + } else { + *bestGain=(int16_t)gainW32; + } + + *CritMax=CritNew; + *shTotMax=CritNewSh; + *bestIndex = IndexNew; + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.h new file mode 100644 index 0000000000..1a95d531e9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbUpdateBestIndex.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_UPDATE_BEST_INDEX_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_UPDATE_BEST_INDEX_H_ + +#include +#include + +void WebRtcIlbcfix_CbUpdateBestIndex( + int32_t CritNew, /* (i) New Potentially best Criteria */ + int16_t CritNewSh, /* (i) Shift value of above Criteria */ + size_t IndexNew, /* (i) Index of new Criteria */ + int32_t cDotNew, /* (i) Cross dot of new index */ + int16_t invEnergyNew, /* (i) Inversed energy new index */ + int16_t energyShiftNew, /* (i) Energy shifts of new index */ + int32_t* CritMax, /* (i/o) Maximum Criteria (so far) */ + int16_t* shTotMax, /* (i/o) Shifts of maximum criteria */ + size_t* bestIndex, /* (i/o) Index that corresponds to + maximum criteria */ + int16_t* bestGain); /* (i/o) Gain in Q14 that corresponds + to maximum criteria */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.c new file mode 100644 index 0000000000..b4eee66219 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Chebyshev.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/chebyshev.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*------------------------------------------------------------------* + * Calculate the Chevyshev polynomial series + * F(w) = 2*exp(-j5w)*C(x) + * C(x) = (T_0(x) + f(1)T_1(x) + ... + f(4)T_1(x) + f(5)/2) + * T_i(x) is the i:th order Chebyshev polynomial + *------------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_Chebyshev( + /* (o) Result of C(x) */ + int16_t x, /* (i) Value to the Chevyshev polynomial */ + int16_t *f /* (i) The coefficients in the polynomial */ + ) { + int16_t b1_high, b1_low; /* Use the high, low format to increase the accuracy */ + int32_t b2; + int32_t tmp1W32; + int32_t tmp2W32; + int i; + + b2 = (int32_t)0x1000000; /* b2 = 1.0 (Q23) */ + /* Calculate b1 = 2*x + f[1] */ + tmp1W32 = (x << 10) + (f[1] << 14); + + for (i = 2; i < 5; i++) { + tmp2W32 = tmp1W32; + + /* Split b1 (in tmp1W32) into a high and low part */ + b1_high = (int16_t)(tmp1W32 >> 16); + b1_low = (int16_t)((tmp1W32 - ((int32_t)b1_high << 16)) >> 1); + + /* Calculate 2*x*b1-b2+f[i] */ + tmp1W32 = ((b1_high * x + ((b1_low * x) >> 15)) << 2) - b2 + (f[i] << 14); + + /* Update b2 for next round */ + b2 = tmp2W32; + } + + /* Split b1 (in tmp1W32) into a high and low part */ + b1_high = (int16_t)(tmp1W32 >> 16); + b1_low = (int16_t)((tmp1W32 - ((int32_t)b1_high << 16)) >> 1); + + /* tmp1W32 = x*b1 - b2 + f[i]/2 */ + tmp1W32 = ((b1_high * x) << 1) + (((b1_low * x) >> 15) << 1) - + b2 + (f[i] << 13); + + /* Handle overflows and set to maximum or minimum int16_t instead */ + if (tmp1W32>((int32_t)33553408)) { + return(WEBRTC_SPL_WORD16_MAX); + } else if (tmp1W32<((int32_t)-33554432)) { + return(WEBRTC_SPL_WORD16_MIN); + } else { + return (int16_t)(tmp1W32 >> 10); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.h new file mode 100644 index 0000000000..7e7742c5cc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Chebyshev.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CHEBYSHEV_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CHEBYSHEV_H_ + +#include +#include + +/*------------------------------------------------------------------* + * Calculate the Chevyshev polynomial series + * F(w) = 2*exp(-j5w)*C(x) + * C(x) = (T_0(x) + f(1)T_1(x) + ... + f(4)T_1(x) + f(5)/2) + * T_i(x) is the i:th order Chebyshev polynomial + *------------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_Chebyshev( + /* (o) Result of C(x) */ + int16_t x, /* (i) Value to the Chevyshev polynomial */ + int16_t* f /* (i) The coefficients in the polynomial */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.c new file mode 100644 index 0000000000..452bc78e3b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CompCorr.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/comp_corr.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Compute cross correlation and pitch gain for pitch prediction + * of last subframe at given lag. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_CompCorr( + int32_t *corr, /* (o) cross correlation */ + int32_t *ener, /* (o) energy */ + int16_t *buffer, /* (i) signal buffer */ + size_t lag, /* (i) pitch lag */ + size_t bLen, /* (i) length of buffer */ + size_t sRange, /* (i) correlation search length */ + int16_t scale /* (i) number of rightshifts to use */ + ){ + int16_t *w16ptr; + + w16ptr=&buffer[bLen-sRange-lag]; + + /* Calculate correlation and energy */ + (*corr)=WebRtcSpl_DotProductWithScale(&buffer[bLen-sRange], w16ptr, sRange, scale); + (*ener)=WebRtcSpl_DotProductWithScale(w16ptr, w16ptr, sRange, scale); + + /* For zero energy set the energy to 0 in order to avoid potential + problems for coming divisions */ + if (*ener == 0) { + *corr = 0; + *ener = 1; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.h new file mode 100644 index 0000000000..010c6a1ce5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CompCorr.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_COMP_CORR_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_COMP_CORR_H_ + +#include +#include + +/*----------------------------------------------------------------* + * Compute cross correlation and pitch gain for pitch prediction + * of last subframe at given lag. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_CompCorr(int32_t* corr, /* (o) cross correlation */ + int32_t* ener, /* (o) energy */ + int16_t* buffer, /* (i) signal buffer */ + size_t lag, /* (i) pitch lag */ + size_t bLen, /* (i) length of buffer */ + size_t sRange, /* (i) correlation search length */ + int16_t scale /* (i) number of rightshifts to use */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/complexityMeasures.m b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/complexityMeasures.m new file mode 100644 index 0000000000..4bda83622f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/complexityMeasures.m @@ -0,0 +1,57 @@ +% % Copyright(c) 2011 The WebRTC project authors.All Rights Reserved.% + % Use of this source code is governed by a BSD + - + style license % that can be found in the LICENSE file in the root of the source + % tree.An additional intellectual property rights grant can be found + % in the file PATENTS.All contributing project authors may + % be found in the AUTHORS file in the root of the source tree.% + + clear; +pack; +% +% Enter the path to YOUR executable and remember to define the perprocessor +% variable PRINT_MIPS te get the instructions printed to the screen. +% +command = '!iLBCtest.exe 30 speechAndBGnoise.pcm out1.bit out1.pcm tlm10_30ms.dat'; +cout=' > st.txt'; %saves to matlab variable 'st' +eval(strcat(command,cout)); +if(length(cout)>3) + load st.txt +else + disp('No cout file to load') +end + +% initialize vector to zero +index = find(st(1:end,1)==-1); +indexnonzero = find(st(1:end,1)>0); +frames = length(index)-indexnonzero(1)+1; +start = indexnonzero(1) - 1; +functionOrder=max(st(:,2)); +new=zeros(frames,functionOrder); + +for i = 1:frames, + for j = index(start-1+i)+1:(index(start+i)-1), + new(i,st(j,2)) = new(i,st(j,2)) + st(j,1); + end +end + +result=zeros(functionOrder,3); +for i=1:functionOrder + nonzeroelements = find(new(1:end,i)>0); + result(i,1)=i; + + % Compute each function's mean complexity + % result(i,2)=(sum(new(nonzeroelements,i))/(length(nonzeroelements)*0.03))/1000000; + + % Compute each function's maximum complexity in encoding + % and decoding respectively and then add it together: + % result(i,3)=(max(new(1:end,i))/0.03)/1000000; + result(i,3)=(max(new(1:size(new,1)/2,i))/0.03)/1000000 + (max(new(size(new,1)/2+1:end,i))/0.03)/1000000; +end + +result + +% Compute maximum complexity for a single frame (enc/dec separately and together) +maxEncComplexityInAFrame = (max(sum(new(1:size(new,1)/2,:),2))/0.03)/1000000 +maxDecComplexityInAFrame = (max(sum(new(size(new,1)/2+1:end,:),2))/0.03)/1000000 +totalComplexity = maxEncComplexityInAFrame + maxDecComplexityInAFrame diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.c new file mode 100644 index 0000000000..22f2acb330 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.c @@ -0,0 +1,667 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + constants.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/constants.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/* HP Filters {b[0] b[1] b[2] -a[1] -a[2]} */ + +const int16_t WebRtcIlbcfix_kHpInCoefs[5] = {3798, -7596, 3798, 7807, -3733}; +const int16_t WebRtcIlbcfix_kHpOutCoefs[5] = {3849, -7699, 3849, 7918, -3833}; + +/* Window in Q11 to window the energies of the 5 choises (3 for 20ms) in the choise for + the 80 sample start state +*/ +const int16_t WebRtcIlbcfix_kStartSequenceEnrgWin[NSUB_MAX-1]= { + 1638, 1843, 2048, 1843, 1638 +}; + +/* LP Filter coeffs used for downsampling */ +const int16_t WebRtcIlbcfix_kLpFiltCoefs[FILTERORDER_DS_PLUS1]= { + -273, 512, 1297, 1696, 1297, 512, -273 +}; + +/* Constants used in the LPC calculations */ + +/* Hanning LPC window (in Q15) */ +const int16_t WebRtcIlbcfix_kLpcWin[BLOCKL_MAX] = { + 6, 22, 50, 89, 139, 200, 272, 355, 449, 554, 669, 795, + 932, 1079, 1237, 1405, 1583, 1771, 1969, 2177, 2395, 2622, 2858, 3104, + 3359, 3622, 3894, 4175, 4464, 4761, 5066, 5379, 5699, 6026, 6361, 6702, + 7050, 7404, 7764, 8130, 8502, 8879, 9262, 9649, 10040, 10436, 10836, 11240, + 11647, 12058, 12471, 12887, 13306, 13726, 14148, 14572, 14997, 15423, 15850, 16277, + 16704, 17131, 17558, 17983, 18408, 18831, 19252, 19672, 20089, 20504, 20916, 21325, + 21730, 22132, 22530, 22924, 23314, 23698, 24078, 24452, 24821, 25185, 25542, 25893, + 26238, 26575, 26906, 27230, 27547, 27855, 28156, 28450, 28734, 29011, 29279, 29538, + 29788, 30029, 30261, 30483, 30696, 30899, 31092, 31275, 31448, 31611, 31764, 31906, + 32037, 32158, 32268, 32367, 32456, 32533, 32600, 32655, 32700, 32733, 32755, 32767, + 32767, 32755, 32733, 32700, 32655, 32600, 32533, 32456, 32367, 32268, 32158, 32037, + 31906, 31764, 31611, 31448, 31275, 31092, 30899, 30696, 30483, 30261, 30029, 29788, + 29538, 29279, 29011, 28734, 28450, 28156, 27855, 27547, 27230, 26906, 26575, 26238, + 25893, 25542, 25185, 24821, 24452, 24078, 23698, 23314, 22924, 22530, 22132, 21730, + 21325, 20916, 20504, 20089, 19672, 19252, 18831, 18408, 17983, 17558, 17131, 16704, + 16277, 15850, 15423, 14997, 14572, 14148, 13726, 13306, 12887, 12471, 12058, 11647, + 11240, 10836, 10436, 10040, 9649, 9262, 8879, 8502, 8130, 7764, 7404, 7050, + 6702, 6361, 6026, 5699, 5379, 5066, 4761, 4464, 4175, 3894, 3622, 3359, + 3104, 2858, 2622, 2395, 2177, 1969, 1771, 1583, 1405, 1237, 1079, 932, + 795, 669, 554, 449, 355, 272, 200, 139, 89, 50, 22, 6 +}; + +/* Asymmetric LPC window (in Q15)*/ +const int16_t WebRtcIlbcfix_kLpcAsymWin[BLOCKL_MAX] = { + 2, 7, 15, 27, 42, 60, 81, 106, 135, 166, 201, 239, + 280, 325, 373, 424, 478, 536, 597, 661, 728, 798, 872, 949, + 1028, 1111, 1197, 1287, 1379, 1474, 1572, 1674, 1778, 1885, 1995, 2108, + 2224, 2343, 2465, 2589, 2717, 2847, 2980, 3115, 3254, 3395, 3538, 3684, + 3833, 3984, 4138, 4295, 4453, 4615, 4778, 4944, 5112, 5283, 5456, 5631, + 5808, 5987, 6169, 6352, 6538, 6725, 6915, 7106, 7300, 7495, 7692, 7891, + 8091, 8293, 8497, 8702, 8909, 9118, 9328, 9539, 9752, 9966, 10182, 10398, + 10616, 10835, 11055, 11277, 11499, 11722, 11947, 12172, 12398, 12625, 12852, 13080, + 13309, 13539, 13769, 14000, 14231, 14463, 14695, 14927, 15160, 15393, 15626, 15859, + 16092, 16326, 16559, 16792, 17026, 17259, 17492, 17725, 17957, 18189, 18421, 18653, + 18884, 19114, 19344, 19573, 19802, 20030, 20257, 20483, 20709, 20934, 21157, 21380, + 21602, 21823, 22042, 22261, 22478, 22694, 22909, 23123, 23335, 23545, 23755, 23962, + 24168, 24373, 24576, 24777, 24977, 25175, 25371, 25565, 25758, 25948, 26137, 26323, + 26508, 26690, 26871, 27049, 27225, 27399, 27571, 27740, 27907, 28072, 28234, 28394, + 28552, 28707, 28860, 29010, 29157, 29302, 29444, 29584, 29721, 29855, 29987, 30115, + 30241, 30364, 30485, 30602, 30717, 30828, 30937, 31043, 31145, 31245, 31342, 31436, + 31526, 31614, 31699, 31780, 31858, 31933, 32005, 32074, 32140, 32202, 32261, 32317, + 32370, 32420, 32466, 32509, 32549, 32585, 32618, 32648, 32675, 32698, 32718, 32734, + 32748, 32758, 32764, 32767, 32767, 32667, 32365, 31863, 31164, 30274, 29197, 27939, + 26510, 24917, 23170, 21281, 19261, 17121, 14876, 12540, 10126, 7650, 5126, 2571 +}; + +/* Lag window for LPC (Q31) */ +const int32_t WebRtcIlbcfix_kLpcLagWin[LPC_FILTERORDER + 1]={ + 2147483647, 2144885453, 2137754373, 2125918626, 2109459810, + 2088483140, 2063130336, 2033564590, 1999977009, 1962580174, + 1921610283}; + +/* WebRtcIlbcfix_kLpcChirpSyntDenum vector in Q15 corresponding + * floating point vector {1 0.9025 0.9025^2 0.9025^3 ...} + */ +const int16_t WebRtcIlbcfix_kLpcChirpSyntDenum[LPC_FILTERORDER + 1] = { + 32767, 29573, 26690, 24087, + 21739, 19619, 17707, 15980, + 14422, 13016, 11747}; + +/* WebRtcIlbcfix_kLpcChirpWeightDenum in Q15 corresponding to + * floating point vector {1 0.4222 0.4222^2... } + */ +const int16_t WebRtcIlbcfix_kLpcChirpWeightDenum[LPC_FILTERORDER + 1] = { + 32767, 13835, 5841, 2466, 1041, 440, + 186, 78, 33, 14, 6}; + +/* LSF quantization Q13 domain */ +const int16_t WebRtcIlbcfix_kLsfCb[64 * 3 + 128 * 3 + 128 * 4] = { + 1273, 2238, 3696, + 3199, 5309, 8209, + 3606, 5671, 7829, + 2815, 5262, 8778, + 2608, 4027, 5493, + 1582, 3076, 5945, + 2983, 4181, 5396, + 2437, 4322, 6902, + 1861, 2998, 4613, + 2007, 3250, 5214, + 1388, 2459, 4262, + 2563, 3805, 5269, + 2036, 3522, 5129, + 1935, 4025, 6694, + 2744, 5121, 7338, + 2810, 4248, 5723, + 3054, 5405, 7745, + 1449, 2593, 4763, + 3411, 5128, 6596, + 2484, 4659, 7496, + 1668, 2879, 4818, + 1812, 3072, 5036, + 1638, 2649, 3900, + 2464, 3550, 4644, + 1853, 2900, 4158, + 2458, 4163, 5830, + 2556, 4036, 6254, + 2703, 4432, 6519, + 3062, 4953, 7609, + 1725, 3703, 6187, + 2221, 3877, 5427, + 2339, 3579, 5197, + 2021, 4633, 7037, + 2216, 3328, 4535, + 2961, 4739, 6667, + 2807, 3955, 5099, + 2788, 4501, 6088, + 1642, 2755, 4431, + 3341, 5282, 7333, + 2414, 3726, 5727, + 1582, 2822, 5269, + 2259, 3447, 4905, + 3117, 4986, 7054, + 1825, 3491, 5542, + 3338, 5736, 8627, + 1789, 3090, 5488, + 2566, 3720, 4923, + 2846, 4682, 7161, + 1950, 3321, 5976, + 1834, 3383, 6734, + 3238, 4769, 6094, + 2031, 3978, 5903, + 1877, 4068, 7436, + 2131, 4644, 8296, + 2764, 5010, 8013, + 2194, 3667, 6302, + 2053, 3127, 4342, + 3523, 6595, 10010, + 3134, 4457, 5748, + 3142, 5819, 9414, + 2223, 4334, 6353, + 2022, 3224, 4822, + 2186, 3458, 5544, + 2552, 4757, 6870, + 10905, 12917, 14578, + 9503, 11485, 14485, + 9518, 12494, 14052, + 6222, 7487, 9174, + 7759, 9186, 10506, + 8315, 12755, 14786, + 9609, 11486, 13866, + 8909, 12077, 13643, + 7369, 9054, 11520, + 9408, 12163, 14715, + 6436, 9911, 12843, + 7109, 9556, 11884, + 7557, 10075, 11640, + 6482, 9202, 11547, + 6463, 7914, 10980, + 8611, 10427, 12752, + 7101, 9676, 12606, + 7428, 11252, 13172, + 10197, 12955, 15842, + 7487, 10955, 12613, + 5575, 7858, 13621, + 7268, 11719, 14752, + 7476, 11744, 13795, + 7049, 8686, 11922, + 8234, 11314, 13983, + 6560, 11173, 14984, + 6405, 9211, 12337, + 8222, 12054, 13801, + 8039, 10728, 13255, + 10066, 12733, 14389, + 6016, 7338, 10040, + 6896, 8648, 10234, + 7538, 9170, 12175, + 7327, 12608, 14983, + 10516, 12643, 15223, + 5538, 7644, 12213, + 6728, 12221, 14253, + 7563, 9377, 12948, + 8661, 11023, 13401, + 7280, 8806, 11085, + 7723, 9793, 12333, + 12225, 14648, 16709, + 8768, 13389, 15245, + 10267, 12197, 13812, + 5301, 7078, 11484, + 7100, 10280, 11906, + 8716, 12555, 14183, + 9567, 12464, 15434, + 7832, 12305, 14300, + 7608, 10556, 12121, + 8913, 11311, 12868, + 7414, 9722, 11239, + 8666, 11641, 13250, + 9079, 10752, 12300, + 8024, 11608, 13306, + 10453, 13607, 16449, + 8135, 9573, 10909, + 6375, 7741, 10125, + 10025, 12217, 14874, + 6985, 11063, 14109, + 9296, 13051, 14642, + 8613, 10975, 12542, + 6583, 10414, 13534, + 6191, 9368, 13430, + 5742, 6859, 9260, + 7723, 9813, 13679, + 8137, 11291, 12833, + 6562, 8973, 10641, + 6062, 8462, 11335, + 6928, 8784, 12647, + 7501, 8784, 10031, + 8372, 10045, 12135, + 8191, 9864, 12746, + 5917, 7487, 10979, + 5516, 6848, 10318, + 6819, 9899, 11421, + 7882, 12912, 15670, + 9558, 11230, 12753, + 7752, 9327, 11472, + 8479, 9980, 11358, + 11418, 14072, 16386, + 7968, 10330, 14423, + 8423, 10555, 12162, + 6337, 10306, 14391, + 8850, 10879, 14276, + 6750, 11885, 15710, + 7037, 8328, 9764, + 6914, 9266, 13476, + 9746, 13949, 15519, + 11032, 14444, 16925, + 8032, 10271, 11810, + 10962, 13451, 15833, + 10021, 11667, 13324, + 6273, 8226, 12936, + 8543, 10397, 13496, + 7936, 10302, 12745, + 6769, 8138, 10446, + 6081, 7786, 11719, + 8637, 11795, 14975, + 8790, 10336, 11812, + 7040, 8490, 10771, + 7338, 10381, 13153, + 6598, 7888, 9358, + 6518, 8237, 12030, + 9055, 10763, 12983, + 6490, 10009, 12007, + 9589, 12023, 13632, + 6867, 9447, 10995, + 7930, 9816, 11397, + 10241, 13300, 14939, + 5830, 8670, 12387, + 9870, 11915, 14247, + 9318, 11647, 13272, + 6721, 10836, 12929, + 6543, 8233, 9944, + 8034, 10854, 12394, + 9112, 11787, 14218, + 9302, 11114, 13400, + 9022, 11366, 13816, + 6962, 10461, 12480, + 11288, 13333, 15222, + 7249, 8974, 10547, + 10566, 12336, 14390, + 6697, 11339, 13521, + 11851, 13944, 15826, + 6847, 8381, 11349, + 7509, 9331, 10939, + 8029, 9618, 11909, + 13973, 17644, 19647, 22474, + 14722, 16522, 20035, 22134, + 16305, 18179, 21106, 23048, + 15150, 17948, 21394, 23225, + 13582, 15191, 17687, 22333, + 11778, 15546, 18458, 21753, + 16619, 18410, 20827, 23559, + 14229, 15746, 17907, 22474, + 12465, 15327, 20700, 22831, + 15085, 16799, 20182, 23410, + 13026, 16935, 19890, 22892, + 14310, 16854, 19007, 22944, + 14210, 15897, 18891, 23154, + 14633, 18059, 20132, 22899, + 15246, 17781, 19780, 22640, + 16396, 18904, 20912, 23035, + 14618, 17401, 19510, 21672, + 15473, 17497, 19813, 23439, + 18851, 20736, 22323, 23864, + 15055, 16804, 18530, 20916, + 16490, 18196, 19990, 21939, + 11711, 15223, 21154, 23312, + 13294, 15546, 19393, 21472, + 12956, 16060, 20610, 22417, + 11628, 15843, 19617, 22501, + 14106, 16872, 19839, 22689, + 15655, 18192, 20161, 22452, + 12953, 15244, 20619, 23549, + 15322, 17193, 19926, 21762, + 16873, 18676, 20444, 22359, + 14874, 17871, 20083, 21959, + 11534, 14486, 19194, 21857, + 17766, 19617, 21338, 23178, + 13404, 15284, 19080, 23136, + 15392, 17527, 19470, 21953, + 14462, 16153, 17985, 21192, + 17734, 19750, 21903, 23783, + 16973, 19096, 21675, 23815, + 16597, 18936, 21257, 23461, + 15966, 17865, 20602, 22920, + 15416, 17456, 20301, 22972, + 18335, 20093, 21732, 23497, + 15548, 17217, 20679, 23594, + 15208, 16995, 20816, 22870, + 13890, 18015, 20531, 22468, + 13211, 15377, 19951, 22388, + 12852, 14635, 17978, 22680, + 16002, 17732, 20373, 23544, + 11373, 14134, 19534, 22707, + 17329, 19151, 21241, 23462, + 15612, 17296, 19362, 22850, + 15422, 19104, 21285, 23164, + 13792, 17111, 19349, 21370, + 15352, 17876, 20776, 22667, + 15253, 16961, 18921, 22123, + 14108, 17264, 20294, 23246, + 15785, 17897, 20010, 21822, + 17399, 19147, 20915, 22753, + 13010, 15659, 18127, 20840, + 16826, 19422, 22218, 24084, + 18108, 20641, 22695, 24237, + 18018, 20273, 22268, 23920, + 16057, 17821, 21365, 23665, + 16005, 17901, 19892, 23016, + 13232, 16683, 21107, 23221, + 13280, 16615, 19915, 21829, + 14950, 18575, 20599, 22511, + 16337, 18261, 20277, 23216, + 14306, 16477, 21203, 23158, + 12803, 17498, 20248, 22014, + 14327, 17068, 20160, 22006, + 14402, 17461, 21599, 23688, + 16968, 18834, 20896, 23055, + 15070, 17157, 20451, 22315, + 15419, 17107, 21601, 23946, + 16039, 17639, 19533, 21424, + 16326, 19261, 21745, 23673, + 16489, 18534, 21658, 23782, + 16594, 18471, 20549, 22807, + 18973, 21212, 22890, 24278, + 14264, 18674, 21123, 23071, + 15117, 16841, 19239, 23118, + 13762, 15782, 20478, 23230, + 14111, 15949, 20058, 22354, + 14990, 16738, 21139, 23492, + 13735, 16971, 19026, 22158, + 14676, 17314, 20232, 22807, + 16196, 18146, 20459, 22339, + 14747, 17258, 19315, 22437, + 14973, 17778, 20692, 23367, + 15715, 17472, 20385, 22349, + 15702, 18228, 20829, 23410, + 14428, 16188, 20541, 23630, + 16824, 19394, 21365, 23246, + 13069, 16392, 18900, 21121, + 12047, 16640, 19463, 21689, + 14757, 17433, 19659, 23125, + 15185, 16930, 19900, 22540, + 16026, 17725, 19618, 22399, + 16086, 18643, 21179, 23472, + 15462, 17248, 19102, 21196, + 17368, 20016, 22396, 24096, + 12340, 14475, 19665, 23362, + 13636, 16229, 19462, 22728, + 14096, 16211, 19591, 21635, + 12152, 14867, 19943, 22301, + 14492, 17503, 21002, 22728, + 14834, 16788, 19447, 21411, + 14650, 16433, 19326, 22308, + 14624, 16328, 19659, 23204, + 13888, 16572, 20665, 22488, + 12977, 16102, 18841, 22246, + 15523, 18431, 21757, 23738, + 14095, 16349, 18837, 20947, + 13266, 17809, 21088, 22839, + 15427, 18190, 20270, 23143, + 11859, 16753, 20935, 22486, + 12310, 17667, 21736, 23319, + 14021, 15926, 18702, 22002, + 12286, 15299, 19178, 21126, + 15703, 17491, 21039, 23151, + 12272, 14018, 18213, 22570, + 14817, 16364, 18485, 22598, + 17109, 19683, 21851, 23677, + 12657, 14903, 19039, 22061, + 14713, 16487, 20527, 22814, + 14635, 16726, 18763, 21715, + 15878, 18550, 20718, 22906 +}; + +const int16_t WebRtcIlbcfix_kLsfDimCb[LSF_NSPLIT] = {3, 3, 4}; +const int16_t WebRtcIlbcfix_kLsfSizeCb[LSF_NSPLIT] = {64,128,128}; + +const int16_t WebRtcIlbcfix_kLsfMean[LPC_FILTERORDER] = { + 2308, 3652, 5434, 7885, + 10255, 12559, 15160, 17513, + 20328, 22752}; + +const int16_t WebRtcIlbcfix_kLspMean[LPC_FILTERORDER] = { + 31476, 29565, 25819, 18725, 10276, + 1236, -9049, -17600, -25884, -30618 +}; + +/* Q14 */ +const int16_t WebRtcIlbcfix_kLsfWeight20ms[4] = {12288, 8192, 4096, 0}; +const int16_t WebRtcIlbcfix_kLsfWeight30ms[6] = {8192, 16384, 10923, 5461, 0, 0}; + +/* + cos(x) in Q15 + WebRtcIlbcfix_kCos[i] = cos(pi*i/64.0) + used in WebRtcIlbcfix_Lsp2Lsf() +*/ + +const int16_t WebRtcIlbcfix_kCos[64] = { + 32767, 32729, 32610, 32413, 32138, 31786, 31357, 30853, + 30274, 29622, 28899, 28106, 27246, 26320, 25330, 24279, + 23170, 22006, 20788, 19520, 18205, 16846, 15447, 14010, + 12540, 11039, 9512, 7962, 6393, 4808, 3212, 1608, + 0, -1608, -3212, -4808, -6393, -7962, -9512, -11039, + -12540, -14010, -15447, -16846, -18205, -19520, -20788, -22006, + -23170, -24279, -25330, -26320, -27246, -28106, -28899, -29622, + -30274, -30853, -31357, -31786, -32138, -32413, -32610, -32729 +}; + +/* + Derivative in Q19, used to interpolate between the + WebRtcIlbcfix_kCos[] values to get a more exact y = cos(x) +*/ +const int16_t WebRtcIlbcfix_kCosDerivative[64] = { + -632, -1893, -3150, -4399, -5638, -6863, -8072, -9261, + -10428, -11570, -12684, -13767, -14817, -15832, -16808, -17744, + -18637, -19486, -20287, -21039, -21741, -22390, -22986, -23526, + -24009, -24435, -24801, -25108, -25354, -25540, -25664, -25726, + -25726, -25664, -25540, -25354, -25108, -24801, -24435, -24009, + -23526, -22986, -22390, -21741, -21039, -20287, -19486, -18637, + -17744, -16808, -15832, -14817, -13767, -12684, -11570, -10428, + -9261, -8072, -6863, -5638, -4399, -3150, -1893, -632}; + +/* + Table in Q15, used for a2lsf conversion + WebRtcIlbcfix_kCosGrid[i] = cos((2*pi*i)/(float)(2*COS_GRID_POINTS)); +*/ + +const int16_t WebRtcIlbcfix_kCosGrid[COS_GRID_POINTS + 1] = { + 32760, 32723, 32588, 32364, 32051, 31651, 31164, 30591, + 29935, 29196, 28377, 27481, 26509, 25465, 24351, 23170, + 21926, 20621, 19260, 17846, 16384, 14876, 13327, 11743, + 10125, 8480, 6812, 5126, 3425, 1714, 0, -1714, -3425, + -5126, -6812, -8480, -10125, -11743, -13327, -14876, + -16384, -17846, -19260, -20621, -21926, -23170, -24351, + -25465, -26509, -27481, -28377, -29196, -29935, -30591, + -31164, -31651, -32051, -32364, -32588, -32723, -32760 +}; + +/* + Derivative of y = acos(x) in Q12 + used in WebRtcIlbcfix_Lsp2Lsf() +*/ + +const int16_t WebRtcIlbcfix_kAcosDerivative[64] = { + -26887, -8812, -5323, -3813, -2979, -2444, -2081, -1811, + -1608, -1450, -1322, -1219, -1132, -1059, -998, -946, + -901, -861, -827, -797, -772, -750, -730, -713, + -699, -687, -677, -668, -662, -657, -654, -652, + -652, -654, -657, -662, -668, -677, -687, -699, + -713, -730, -750, -772, -797, -827, -861, -901, + -946, -998, -1059, -1132, -1219, -1322, -1450, -1608, + -1811, -2081, -2444, -2979, -3813, -5323, -8812, -26887 +}; + + +/* Tables for quantization of start state */ + +/* State quantization tables */ +const int16_t WebRtcIlbcfix_kStateSq3[8] = { /* Values in Q13 */ + -30473, -17838, -9257, -2537, + 3639, 10893, 19958, 32636 +}; + +/* This table defines the limits for the selection of the freqg + less or equal than value 0 => index = 0 + less or equal than value k => index = k +*/ +const int32_t WebRtcIlbcfix_kChooseFrgQuant[64] = { + 118, 163, 222, 305, 425, 604, + 851, 1174, 1617, 2222, 3080, 4191, + 5525, 7215, 9193, 11540, 14397, 17604, + 21204, 25209, 29863, 35720, 42531, 50375, + 59162, 68845, 80108, 93754, 110326, 129488, + 150654, 174328, 201962, 233195, 267843, 308239, + 354503, 405988, 464251, 531550, 608652, 697516, + 802526, 928793, 1080145, 1258120, 1481106, 1760881, + 2111111, 2546619, 3078825, 3748642, 4563142, 5573115, + 6887601, 8582108, 10797296, 14014513, 18625760, 25529599, + 37302935, 58819185, 109782723, WEBRTC_SPL_WORD32_MAX +}; + +const int16_t WebRtcIlbcfix_kScale[64] = { + /* Values in Q16 */ + 29485, 25003, 21345, 18316, 15578, 13128, 10973, 9310, 7955, + 6762, 5789, 4877, 4255, 3699, 3258, 2904, 2595, 2328, + 2123, 1932, 1785, 1631, 1493, 1370, 1260, 1167, 1083, + /* Values in Q21 */ + 32081, 29611, 27262, 25229, 23432, 21803, 20226, 18883, 17609, + 16408, 15311, 14327, 13390, 12513, 11693, 10919, 10163, 9435, + 8739, 8100, 7424, 6813, 6192, 5648, 5122, 4639, 4207, 3798, + 3404, 3048, 2706, 2348, 2036, 1713, 1393, 1087, 747 +}; + +/*frgq in fixpoint, but already computed like this: + for(i=0; i<64; i++){ + a = (pow(10,frgq[i])/4.5); + WebRtcIlbcfix_kFrgQuantMod[i] = round(a); + } + + Value 0 :36 in Q8 + 37:58 in Q5 + 59:63 in Q3 +*/ +const int16_t WebRtcIlbcfix_kFrgQuantMod[64] = { + /* First 37 values in Q8 */ + 569, 671, 786, 916, 1077, 1278, + 1529, 1802, 2109, 2481, 2898, 3440, + 3943, 4535, 5149, 5778, 6464, 7208, + 7904, 8682, 9397, 10285, 11240, 12246, + 13313, 14382, 15492, 16735, 18131, 19693, + 21280, 22912, 24624, 26544, 28432, 30488, + 32720, + /* 22 values in Q5 */ + 4383, 4684, 5012, 5363, 5739, 6146, + 6603, 7113, 7679, 8285, 9040, 9850, + 10838, 11882, 13103, 14467, 15950, 17669, + 19712, 22016, 24800, 28576, + /* 5 values in Q3 */ + 8240, 9792, 12040, 15440, 22472 +}; + +/* Constants for codebook search and creation */ + +/* Expansion filter to get additional cb section. + * Q12 and reversed compared to flp + */ +const int16_t WebRtcIlbcfix_kCbFiltersRev[CB_FILTERLEN]={ + -140, 446, -755, 3302, 2922, -590, 343, -138}; + +/* Weighting coefficients for short lags. + * [0.2 0.4 0.6 0.8] in Q15 */ +const int16_t WebRtcIlbcfix_kAlpha[4]={ + 6554, 13107, 19661, 26214}; + +/* Ranges for search and filters at different subframes */ + +const size_t WebRtcIlbcfix_kSearchRange[5][CB_NSTAGES]={ + {58,58,58}, {108,44,44}, {108,108,108}, {108,108,108}, {108,108,108}}; + +const size_t WebRtcIlbcfix_kFilterRange[5]={63, 85, 125, 147, 147}; + +/* Gain Quantization for the codebook gains of the 3 stages */ + +/* Q14 (one extra value (max int16_t) to simplify for the search) */ +const int16_t WebRtcIlbcfix_kGainSq3[9]={ + -16384, -10813, -5407, 0, 4096, 8192, + 12288, 16384, 32767}; + +/* Q14 (one extra value (max int16_t) to simplify for the search) */ +const int16_t WebRtcIlbcfix_kGainSq4[17]={ + -17203, -14746, -12288, -9830, -7373, -4915, + -2458, 0, 2458, 4915, 7373, 9830, + 12288, 14746, 17203, 19661, 32767}; + +/* Q14 (one extra value (max int16_t) to simplify for the search) */ +const int16_t WebRtcIlbcfix_kGainSq5[33]={ + 614, 1229, 1843, 2458, 3072, 3686, + 4301, 4915, 5530, 6144, 6758, 7373, + 7987, 8602, 9216, 9830, 10445, 11059, + 11674, 12288, 12902, 13517, 14131, 14746, + 15360, 15974, 16589, 17203, 17818, 18432, + 19046, 19661, 32767}; + +/* Q14 gain_sq5Tbl squared in Q14 */ +const int16_t WebRtcIlbcfix_kGainSq5Sq[32] = { + 23, 92, 207, 368, 576, 829, + 1129, 1474, 1866, 2304, 2787, 3317, + 3893, 4516, 5184, 5897, 6658, 7464, + 8318, 9216, 10160, 11151, 12187, 13271, + 14400, 15574, 16796, 18062, 19377, 20736, + 22140, 23593 +}; + +const int16_t* const WebRtcIlbcfix_kGain[3] = +{WebRtcIlbcfix_kGainSq5, WebRtcIlbcfix_kGainSq4, WebRtcIlbcfix_kGainSq3}; + + +/* Tables for the Enhancer, using upsamling factor 4 (ENH_UPS0 = 4) */ + +const int16_t WebRtcIlbcfix_kEnhPolyPhaser[ENH_UPS0][ENH_FLO_MULT2_PLUS1]={ + {0, 0, 0, 4096, 0, 0, 0}, + {64, -315, 1181, 3531, -436, 77, -64}, + {97, -509, 2464, 2464, -509, 97, -97}, + {77, -436, 3531, 1181, -315, 64, -77} +}; + +const int16_t WebRtcIlbcfix_kEnhWt[3] = { + 4800, 16384, 27968 /* Q16 */ +}; + +const size_t WebRtcIlbcfix_kEnhPlocs[ENH_NBLOCKS_TOT] = { + 160, 480, 800, 1120, 1440, 1760, 2080, 2400 /* Q(-2) */ +}; + +/* PLC table */ + +const int16_t WebRtcIlbcfix_kPlcPerSqr[6] = { /* Grid points for square of periodiciy in Q15 */ + 839, 1343, 2048, 2998, 4247, 5849 +}; + +const int16_t WebRtcIlbcfix_kPlcPitchFact[6] = { /* Value of y=(x^4-0.4)/(0.7-0.4) in grid points in Q15 */ + 0, 5462, 10922, 16384, 21846, 27306 +}; + +const int16_t WebRtcIlbcfix_kPlcPfSlope[6] = { /* Slope of y=(x^4-0.4)/(0.7-0.4) in Q11 */ + 26667, 18729, 13653, 10258, 7901, 6214 +}; diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.h new file mode 100644 index 0000000000..a8645c00db --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + constants.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CONSTANTS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CONSTANTS_H_ + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/* high pass filters */ + +extern const int16_t WebRtcIlbcfix_kHpInCoefs[]; +extern const int16_t WebRtcIlbcfix_kHpOutCoefs[]; + +/* Window for start state decision */ +extern const int16_t WebRtcIlbcfix_kStartSequenceEnrgWin[]; + +/* low pass filter used for downsampling */ +extern const int16_t WebRtcIlbcfix_kLpFiltCoefs[]; + +/* LPC analysis and quantization */ + +extern const int16_t WebRtcIlbcfix_kLpcWin[]; +extern const int16_t WebRtcIlbcfix_kLpcAsymWin[]; +extern const int32_t WebRtcIlbcfix_kLpcLagWin[]; +extern const int16_t WebRtcIlbcfix_kLpcChirpSyntDenum[]; +extern const int16_t WebRtcIlbcfix_kLpcChirpWeightDenum[]; +extern const int16_t WebRtcIlbcfix_kLsfDimCb[]; +extern const int16_t WebRtcIlbcfix_kLsfSizeCb[]; +extern const int16_t WebRtcIlbcfix_kLsfCb[]; +extern const int16_t WebRtcIlbcfix_kLsfWeight20ms[]; +extern const int16_t WebRtcIlbcfix_kLsfWeight30ms[]; +extern const int16_t WebRtcIlbcfix_kLsfMean[]; +extern const int16_t WebRtcIlbcfix_kLspMean[]; +extern const int16_t WebRtcIlbcfix_kCos[]; +extern const int16_t WebRtcIlbcfix_kCosDerivative[]; +extern const int16_t WebRtcIlbcfix_kCosGrid[]; +extern const int16_t WebRtcIlbcfix_kAcosDerivative[]; + +/* state quantization tables */ + +extern const int16_t WebRtcIlbcfix_kStateSq3[]; +extern const int32_t WebRtcIlbcfix_kChooseFrgQuant[]; +extern const int16_t WebRtcIlbcfix_kScale[]; +extern const int16_t WebRtcIlbcfix_kFrgQuantMod[]; + +/* Ranges for search and filters at different subframes */ + +extern const size_t WebRtcIlbcfix_kSearchRange[5][CB_NSTAGES]; +extern const size_t WebRtcIlbcfix_kFilterRange[]; + +/* gain quantization tables */ + +extern const int16_t WebRtcIlbcfix_kGainSq3[]; +extern const int16_t WebRtcIlbcfix_kGainSq4[]; +extern const int16_t WebRtcIlbcfix_kGainSq5[]; +extern const int16_t WebRtcIlbcfix_kGainSq5Sq[]; +extern const int16_t* const WebRtcIlbcfix_kGain[]; + +/* adaptive codebook definitions */ + +extern const int16_t WebRtcIlbcfix_kCbFiltersRev[]; +extern const int16_t WebRtcIlbcfix_kAlpha[]; + +/* enhancer definitions */ + +extern const int16_t WebRtcIlbcfix_kEnhPolyPhaser[ENH_UPS0] + [ENH_FLO_MULT2_PLUS1]; +extern const int16_t WebRtcIlbcfix_kEnhWt[]; +extern const size_t WebRtcIlbcfix_kEnhPlocs[]; + +/* PLC tables */ + +extern const int16_t WebRtcIlbcfix_kPlcPerSqr[]; +extern const int16_t WebRtcIlbcfix_kPlcPitchFact[]; +extern const int16_t WebRtcIlbcfix_kPlcPfSlope[]; + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.c new file mode 100644 index 0000000000..7e21faee6c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CreateAugmentedVec.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/create_augmented_vec.h" + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "rtc_base/sanitizer.h" + +/*----------------------------------------------------------------* + * Recreate a specific codebook vector from the augmented part. + * + *----------------------------------------------------------------*/ + +void WebRtcIlbcfix_CreateAugmentedVec( + size_t index, /* (i) Index for the augmented vector to be + created */ + const int16_t* buffer, /* (i) Pointer to the end of the codebook memory + that is used for creation of the augmented + codebook */ + int16_t* cbVec) { /* (o) The constructed codebook vector */ + size_t ilow; + const int16_t *ppo, *ppi; + int16_t cbVecTmp[4]; + /* Interpolation starts 4 elements before cbVec+index, but must not start + outside `cbVec`; clamping interp_len to stay within `cbVec`. + */ + size_t interp_len = WEBRTC_SPL_MIN(index, 4); + + rtc_MsanCheckInitialized(buffer - index - interp_len, sizeof(buffer[0]), + index + interp_len); + + ilow = index - interp_len; + + /* copy the first noninterpolated part */ + ppo = buffer-index; + WEBRTC_SPL_MEMCPY_W16(cbVec, ppo, index); + + /* interpolation */ + ppo = buffer - interp_len; + ppi = buffer - index - interp_len; + + /* perform cbVec[ilow+k] = ((ppi[k]*alphaTbl[k])>>15) + + ((ppo[k]*alphaTbl[interp_len-1-k])>>15); + for k = 0..interp_len-1 + */ + WebRtcSpl_ElementwiseVectorMult(&cbVec[ilow], ppi, WebRtcIlbcfix_kAlpha, + interp_len, 15); + WebRtcSpl_ReverseOrderMultArrayElements( + cbVecTmp, ppo, &WebRtcIlbcfix_kAlpha[interp_len - 1], interp_len, 15); + WebRtcSpl_AddVectorsAndShift(&cbVec[ilow], &cbVec[ilow], cbVecTmp, interp_len, + 0); + + /* copy the second noninterpolated part */ + ppo = buffer - index; + /* `tempbuff2` is declared in WebRtcIlbcfix_GetCbVec and is SUBL+5 elements + long. `buffer` points one element past the end of that vector, i.e., at + tempbuff2+SUBL+5. Since ppo=buffer-index, we cannot read any more than + `index` elements from `ppo`. + + `cbVec` is declared to be SUBL elements long in WebRtcIlbcfix_CbConstruct. + Therefore, we can only write SUBL-index elements to cbVec+index. + + These two conditions limit the number of elements to copy. + */ + WEBRTC_SPL_MEMCPY_W16(cbVec+index, ppo, WEBRTC_SPL_MIN(SUBL-index, index)); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.h new file mode 100644 index 0000000000..d7e5be1c2f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CreateAugmentedVec.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CREATE_AUGMENTED_VEC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CREATE_AUGMENTED_VEC_H_ + +#include +#include + +/*----------------------------------------------------------------* + * Recreate a specific codebook vector from the augmented part. + * + *----------------------------------------------------------------*/ + +void WebRtcIlbcfix_CreateAugmentedVec( + size_t index, /* (i) Index for the augmented vector to be + created */ + const int16_t* buffer, /* (i) Pointer to the end of the codebook memory + that is used for creation of the augmented + codebook */ + int16_t* cbVec); /* (o) The construced codebook vector */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.c new file mode 100644 index 0000000000..d7621d5b65 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Decode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/decode.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/decode_residual.h" +#include "modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/do_plc.h" +#include "modules/audio_coding/codecs/ilbc/enhancer_interface.h" +#include "modules/audio_coding/codecs/ilbc/hp_output.h" +#include "modules/audio_coding/codecs/ilbc/index_conv_dec.h" +#include "modules/audio_coding/codecs/ilbc/init_decode.h" +#include "modules/audio_coding/codecs/ilbc/lsf_check.h" +#include "modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h" +#include "modules/audio_coding/codecs/ilbc/unpack_bits.h" +#include "modules/audio_coding/codecs/ilbc/xcorr_coef.h" +#include "rtc_base/system/arch.h" + +#ifndef WEBRTC_ARCH_BIG_ENDIAN +#include "modules/audio_coding/codecs/ilbc/swap_bytes.h" +#endif + +/*----------------------------------------------------------------* + * main decoder function + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_DecodeImpl( + int16_t *decblock, /* (o) decoded signal block */ + const uint16_t *bytes, /* (i) encoded signal bits */ + IlbcDecoder *iLBCdec_inst, /* (i/o) the decoder state + structure */ + int16_t mode /* (i) 0: bad packet, PLC, + 1: normal */ + ) { + const int old_mode = iLBCdec_inst->mode; + const int old_use_enhancer = iLBCdec_inst->use_enhancer; + + size_t i; + int16_t order_plus_one; + + int16_t last_bit; + int16_t *data; + /* Stack based */ + int16_t decresidual[BLOCKL_MAX]; + int16_t PLCresidual[BLOCKL_MAX + LPC_FILTERORDER]; + int16_t syntdenum[NSUB_MAX*(LPC_FILTERORDER+1)]; + int16_t PLClpc[LPC_FILTERORDER + 1]; +#ifndef WEBRTC_ARCH_BIG_ENDIAN + uint16_t swapped[NO_OF_WORDS_30MS]; +#endif + iLBC_bits *iLBCbits_inst = (iLBC_bits*)PLCresidual; + + /* Reuse some buffers that are non overlapping in order to save stack memory */ + data = &PLCresidual[LPC_FILTERORDER]; + + if (mode) { /* the data are good */ + + /* decode data */ + + /* Unpacketize bits into parameters */ + +#ifndef WEBRTC_ARCH_BIG_ENDIAN + WebRtcIlbcfix_SwapBytes(bytes, iLBCdec_inst->no_of_words, swapped); + last_bit = WebRtcIlbcfix_UnpackBits(swapped, iLBCbits_inst, iLBCdec_inst->mode); +#else + last_bit = WebRtcIlbcfix_UnpackBits(bytes, iLBCbits_inst, iLBCdec_inst->mode); +#endif + + /* Check for bit errors */ + if (iLBCbits_inst->startIdx<1) + mode = 0; + if ((iLBCdec_inst->mode==20) && (iLBCbits_inst->startIdx>3)) + mode = 0; + if ((iLBCdec_inst->mode==30) && (iLBCbits_inst->startIdx>5)) + mode = 0; + if (last_bit==1) + mode = 0; + + if (mode) { /* No bit errors was detected, continue decoding */ + /* Stack based */ + int16_t lsfdeq[LPC_FILTERORDER*LPC_N_MAX]; + int16_t weightdenum[(LPC_FILTERORDER + 1)*NSUB_MAX]; + + /* adjust index */ + WebRtcIlbcfix_IndexConvDec(iLBCbits_inst->cb_index); + + /* decode the lsf */ + WebRtcIlbcfix_SimpleLsfDeQ(lsfdeq, (int16_t*)(iLBCbits_inst->lsf), iLBCdec_inst->lpc_n); + WebRtcIlbcfix_LsfCheck(lsfdeq, LPC_FILTERORDER, iLBCdec_inst->lpc_n); + WebRtcIlbcfix_DecoderInterpolateLsp(syntdenum, weightdenum, + lsfdeq, LPC_FILTERORDER, iLBCdec_inst); + + /* Decode the residual using the cb and gain indexes */ + if (!WebRtcIlbcfix_DecodeResidual(iLBCdec_inst, iLBCbits_inst, + decresidual, syntdenum)) + goto error; + + /* preparing the plc for a future loss! */ + WebRtcIlbcfix_DoThePlc( + PLCresidual, PLClpc, 0, decresidual, + syntdenum + (LPC_FILTERORDER + 1) * (iLBCdec_inst->nsub - 1), + iLBCdec_inst->last_lag, iLBCdec_inst); + + /* Use the output from doThePLC */ + WEBRTC_SPL_MEMCPY_W16(decresidual, PLCresidual, iLBCdec_inst->blockl); + } + + } + + if (mode == 0) { + /* the data is bad (either a PLC call + * was made or a bit error was detected) + */ + + /* packet loss conceal */ + + WebRtcIlbcfix_DoThePlc(PLCresidual, PLClpc, 1, decresidual, syntdenum, + iLBCdec_inst->last_lag, iLBCdec_inst); + + WEBRTC_SPL_MEMCPY_W16(decresidual, PLCresidual, iLBCdec_inst->blockl); + + order_plus_one = LPC_FILTERORDER + 1; + + for (i = 0; i < iLBCdec_inst->nsub; i++) { + WEBRTC_SPL_MEMCPY_W16(syntdenum+(i*order_plus_one), + PLClpc, order_plus_one); + } + } + + if ((*iLBCdec_inst).use_enhancer == 1) { /* Enhancer activated */ + + /* Update the filter and filter coefficients if there was a packet loss */ + if (iLBCdec_inst->prev_enh_pl==2) { + for (i=0;insub;i++) { + WEBRTC_SPL_MEMCPY_W16(&(iLBCdec_inst->old_syntdenum[i*(LPC_FILTERORDER+1)]), + syntdenum, (LPC_FILTERORDER+1)); + } + } + + /* post filtering */ + (*iLBCdec_inst).last_lag = + WebRtcIlbcfix_EnhancerInterface(data, decresidual, iLBCdec_inst); + + /* synthesis filtering */ + + /* Set up the filter state */ + WEBRTC_SPL_MEMCPY_W16(&data[-LPC_FILTERORDER], iLBCdec_inst->syntMem, LPC_FILTERORDER); + + if (iLBCdec_inst->mode==20) { + /* Enhancer has 40 samples delay */ + i=0; + WebRtcSpl_FilterARFastQ12( + data, data, + iLBCdec_inst->old_syntdenum + (i+iLBCdec_inst->nsub-1)*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + + for (i=1; i < iLBCdec_inst->nsub; i++) { + WebRtcSpl_FilterARFastQ12( + data+i*SUBL, data+i*SUBL, + syntdenum+(i-1)*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + } + + } else if (iLBCdec_inst->mode==30) { + /* Enhancer has 80 samples delay */ + for (i=0; i < 2; i++) { + WebRtcSpl_FilterARFastQ12( + data+i*SUBL, data+i*SUBL, + iLBCdec_inst->old_syntdenum + (i+4)*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + } + for (i=2; i < iLBCdec_inst->nsub; i++) { + WebRtcSpl_FilterARFastQ12( + data+i*SUBL, data+i*SUBL, + syntdenum+(i-2)*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + } + } + + /* Save the filter state */ + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->syntMem, &data[iLBCdec_inst->blockl-LPC_FILTERORDER], LPC_FILTERORDER); + + } else { /* Enhancer not activated */ + size_t lag; + + /* Find last lag (since the enhancer is not called to give this info) */ + lag = 20; + if (iLBCdec_inst->mode==20) { + lag = WebRtcIlbcfix_XcorrCoef( + &decresidual[iLBCdec_inst->blockl-60], + &decresidual[iLBCdec_inst->blockl-60-lag], + 60, + 80, lag, -1); + } else { + lag = WebRtcIlbcfix_XcorrCoef( + &decresidual[iLBCdec_inst->blockl-ENH_BLOCKL], + &decresidual[iLBCdec_inst->blockl-ENH_BLOCKL-lag], + ENH_BLOCKL, + 100, lag, -1); + } + + /* Store lag (it is needed if next packet is lost) */ + (*iLBCdec_inst).last_lag = lag; + + /* copy data and run synthesis filter */ + WEBRTC_SPL_MEMCPY_W16(data, decresidual, iLBCdec_inst->blockl); + + /* Set up the filter state */ + WEBRTC_SPL_MEMCPY_W16(&data[-LPC_FILTERORDER], iLBCdec_inst->syntMem, LPC_FILTERORDER); + + for (i=0; i < iLBCdec_inst->nsub; i++) { + WebRtcSpl_FilterARFastQ12( + data+i*SUBL, data+i*SUBL, + syntdenum + i*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + } + + /* Save the filter state */ + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->syntMem, &data[iLBCdec_inst->blockl-LPC_FILTERORDER], LPC_FILTERORDER); + } + + WEBRTC_SPL_MEMCPY_W16(decblock,data,iLBCdec_inst->blockl); + + /* High pass filter the signal (with upscaling a factor 2 and saturation) */ + WebRtcIlbcfix_HpOutput(decblock, (int16_t*)WebRtcIlbcfix_kHpOutCoefs, + iLBCdec_inst->hpimemy, iLBCdec_inst->hpimemx, + iLBCdec_inst->blockl); + + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->old_syntdenum, + syntdenum, iLBCdec_inst->nsub*(LPC_FILTERORDER+1)); + + iLBCdec_inst->prev_enh_pl=0; + + if (mode==0) { /* PLC was used */ + iLBCdec_inst->prev_enh_pl=1; + } + + return 0; // Success. + +error: + // The decoder got sick from eating that data. Reset it and return. + WebRtcIlbcfix_InitDecode(iLBCdec_inst, old_mode, old_use_enhancer); + return -1; // Error +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.h new file mode 100644 index 0000000000..a7d2910115 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Decode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODE_H_ + +#include + +#include "absl/base/attributes.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * main decoder function + *---------------------------------------------------------------*/ + +// Returns 0 on success, -1 on error. +ABSL_MUST_USE_RESULT +int WebRtcIlbcfix_DecodeImpl( + int16_t* decblock, /* (o) decoded signal block */ + const uint16_t* bytes, /* (i) encoded signal bits */ + IlbcDecoder* iLBCdec_inst, /* (i/o) the decoder state + structure */ + int16_t mode /* (i) 0: bad packet, PLC, + 1: normal */ +); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.c new file mode 100644 index 0000000000..a9668e2889 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DecodeResidual.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/decode_residual.h" + +#include + +#include "modules/audio_coding/codecs/ilbc/cb_construct.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/do_plc.h" +#include "modules/audio_coding/codecs/ilbc/enhancer_interface.h" +#include "modules/audio_coding/codecs/ilbc/index_conv_dec.h" +#include "modules/audio_coding/codecs/ilbc/lsf_check.h" +#include "modules/audio_coding/codecs/ilbc/state_construct.h" +#include "modules/audio_coding/codecs/ilbc/xcorr_coef.h" + +/*----------------------------------------------------------------* + * frame residual decoder function (subrutine to iLBC_decode) + *---------------------------------------------------------------*/ + +bool WebRtcIlbcfix_DecodeResidual( + IlbcDecoder *iLBCdec_inst, + /* (i/o) the decoder state structure */ + iLBC_bits *iLBC_encbits, /* (i/o) Encoded bits, which are used + for the decoding */ + int16_t *decresidual, /* (o) decoded residual frame */ + int16_t *syntdenum /* (i) the decoded synthesis filter + coefficients */ + ) { + size_t meml_gotten, diff, start_pos; + size_t subcount, subframe; + int16_t *reverseDecresidual = iLBCdec_inst->enh_buf; /* Reversed decoded data, used for decoding backwards in time (reuse memory in state) */ + int16_t *memVec = iLBCdec_inst->prevResidual; /* Memory for codebook and filter state (reuse memory in state) */ + int16_t *mem = &memVec[CB_HALFFILTERLEN]; /* Memory for codebook */ + + diff = STATE_LEN - iLBCdec_inst->state_short_len; + + if (iLBC_encbits->state_first == 1) { + start_pos = (iLBC_encbits->startIdx-1)*SUBL; + } else { + start_pos = (iLBC_encbits->startIdx-1)*SUBL + diff; + } + + /* decode scalar part of start state */ + + WebRtcIlbcfix_StateConstruct(iLBC_encbits->idxForMax, + iLBC_encbits->idxVec, &syntdenum[(iLBC_encbits->startIdx-1)*(LPC_FILTERORDER+1)], + &decresidual[start_pos], iLBCdec_inst->state_short_len + ); + + if (iLBC_encbits->state_first) { /* put adaptive part in the end */ + + /* setup memory */ + + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - iLBCdec_inst->state_short_len); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-iLBCdec_inst->state_short_len, decresidual+start_pos, + iLBCdec_inst->state_short_len); + + /* construct decoded vector */ + + if (!WebRtcIlbcfix_CbConstruct( + &decresidual[start_pos + iLBCdec_inst->state_short_len], + iLBC_encbits->cb_index, iLBC_encbits->gain_index, + mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, diff)) + return false; // Error. + + } + else {/* put adaptive part in the beginning */ + + /* setup memory */ + + meml_gotten = iLBCdec_inst->state_short_len; + WebRtcSpl_MemCpyReversedOrder(mem+CB_MEML-1, + decresidual+start_pos, meml_gotten); + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - meml_gotten); + + /* construct decoded vector */ + + if (!WebRtcIlbcfix_CbConstruct(reverseDecresidual, iLBC_encbits->cb_index, + iLBC_encbits->gain_index, + mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, + diff)) + return false; // Error. + + /* get decoded residual from reversed vector */ + + WebRtcSpl_MemCpyReversedOrder(&decresidual[start_pos-1], + reverseDecresidual, diff); + } + + /* counter for predicted subframes */ + + subcount=1; + + /* forward prediction of subframes */ + + if (iLBCdec_inst->nsub > iLBC_encbits->startIdx + 1) { + + /* setup memory */ + WebRtcSpl_MemSetW16(mem, 0, CB_MEML-STATE_LEN); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-STATE_LEN, + decresidual+(iLBC_encbits->startIdx-1)*SUBL, STATE_LEN); + + /* loop over subframes to encode */ + + size_t Nfor = iLBCdec_inst->nsub - iLBC_encbits->startIdx - 1; + for (subframe=0; subframestartIdx + 1 + subframe) * SUBL], + iLBC_encbits->cb_index + subcount * CB_NSTAGES, + iLBC_encbits->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, + SUBL)) + return false; // Error; + + /* update memory */ + memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem)); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-SUBL, + &decresidual[(iLBC_encbits->startIdx+1+subframe)*SUBL], SUBL); + + subcount++; + } + + } + + /* backward prediction of subframes */ + + if (iLBC_encbits->startIdx > 1) { + + /* setup memory */ + + meml_gotten = SUBL*(iLBCdec_inst->nsub+1-iLBC_encbits->startIdx); + if( meml_gotten > CB_MEML ) { + meml_gotten=CB_MEML; + } + + WebRtcSpl_MemCpyReversedOrder(mem+CB_MEML-1, + decresidual+(iLBC_encbits->startIdx-1)*SUBL, meml_gotten); + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - meml_gotten); + + /* loop over subframes to decode */ + + size_t Nback = iLBC_encbits->startIdx - 1; + for (subframe=0; subframecb_index + subcount * CB_NSTAGES, + iLBC_encbits->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, + SUBL)) + return false; // Error. + + /* update memory */ + memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem)); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-SUBL, + &reverseDecresidual[subframe*SUBL], SUBL); + + subcount++; + } + + /* get decoded residual from reversed vector */ + WebRtcSpl_MemCpyReversedOrder(decresidual+SUBL*Nback-1, + reverseDecresidual, SUBL*Nback); + } + + return true; // Success. +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.h new file mode 100644 index 0000000000..d079577661 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DecodeResidual.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODE_RESIDUAL_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODE_RESIDUAL_H_ + +#include +#include +#include + +#include "absl/base/attributes.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * frame residual decoder function (subrutine to iLBC_decode) + *---------------------------------------------------------------*/ + +// Returns true on success, false on failure. In case of failure, the decoder +// state may be corrupted and needs resetting. +ABSL_MUST_USE_RESULT +bool WebRtcIlbcfix_DecodeResidual( + IlbcDecoder* iLBCdec_inst, /* (i/o) the decoder state structure */ + iLBC_bits* iLBC_encbits, /* (i/o) Encoded bits, which are used + for the decoding */ + int16_t* decresidual, /* (o) decoded residual frame */ + int16_t* syntdenum /* (i) the decoded synthesis filter + coefficients */ +); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.c new file mode 100644 index 0000000000..d96bb9b2e9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DecoderInterpolateLsp.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h" + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h" + +/*----------------------------------------------------------------* + * obtain synthesis and weighting filters form lsf coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_DecoderInterpolateLsp( + int16_t *syntdenum, /* (o) synthesis filter coefficients */ + int16_t *weightdenum, /* (o) weighting denumerator + coefficients */ + int16_t *lsfdeq, /* (i) dequantized lsf coefficients */ + int16_t length, /* (i) length of lsf coefficient vector */ + IlbcDecoder *iLBCdec_inst + /* (i) the decoder state structure */ + ){ + size_t i; + int pos, lp_length; + int16_t lp[LPC_FILTERORDER + 1], *lsfdeq2; + + lsfdeq2 = lsfdeq + length; + lp_length = length + 1; + + if (iLBCdec_inst->mode==30) { + /* subframe 1: Interpolation between old and first LSF */ + + WebRtcIlbcfix_LspInterpolate2PolyDec(lp, (*iLBCdec_inst).lsfdeqold, lsfdeq, + WebRtcIlbcfix_kLsfWeight30ms[0], length); + WEBRTC_SPL_MEMCPY_W16(syntdenum,lp,lp_length); + WebRtcIlbcfix_BwExpand(weightdenum, lp, (int16_t*)WebRtcIlbcfix_kLpcChirpSyntDenum, (int16_t)lp_length); + + /* subframes 2 to 6: interpolation between first and last LSF */ + + pos = lp_length; + for (i = 1; i < 6; i++) { + WebRtcIlbcfix_LspInterpolate2PolyDec(lp, lsfdeq, lsfdeq2, + WebRtcIlbcfix_kLsfWeight30ms[i], length); + WEBRTC_SPL_MEMCPY_W16(syntdenum + pos,lp,lp_length); + WebRtcIlbcfix_BwExpand(weightdenum + pos, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpSyntDenum, (int16_t)lp_length); + pos += lp_length; + } + } else { /* iLBCdec_inst->mode=20 */ + /* subframes 1 to 4: interpolation between old and new LSF */ + pos = 0; + for (i = 0; i < iLBCdec_inst->nsub; i++) { + WebRtcIlbcfix_LspInterpolate2PolyDec(lp, iLBCdec_inst->lsfdeqold, lsfdeq, + WebRtcIlbcfix_kLsfWeight20ms[i], length); + WEBRTC_SPL_MEMCPY_W16(syntdenum+pos,lp,lp_length); + WebRtcIlbcfix_BwExpand(weightdenum+pos, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpSyntDenum, (int16_t)lp_length); + pos += lp_length; + } + } + + /* update memory */ + + if (iLBCdec_inst->mode==30) { + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->lsfdeqold, lsfdeq2, length); + } else { + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->lsfdeqold, lsfdeq, length); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h new file mode 100644 index 0000000000..8b08114467 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DecoderInterpolateLsp.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODER_INTERPOLATE_LSF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODER_INTERPOLATE_LSF_H_ + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * obtain synthesis and weighting filters form lsf coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_DecoderInterpolateLsp( + int16_t* syntdenum, /* (o) synthesis filter coefficients */ + int16_t* weightdenum, /* (o) weighting denumerator + coefficients */ + int16_t* lsfdeq, /* (i) dequantized lsf coefficients */ + int16_t length, /* (i) length of lsf coefficient vector */ + IlbcDecoder* iLBCdec_inst + /* (i) the decoder state structure */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/defines.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/defines.h new file mode 100644 index 0000000000..64135c4887 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/defines.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + define.h + +******************************************************************/ +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DEFINES_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DEFINES_H_ + +#include +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +/* general codec settings */ + +#define FS 8000 +#define BLOCKL_20MS 160 +#define BLOCKL_30MS 240 +#define BLOCKL_MAX 240 +#define NSUB_20MS 4 +#define NSUB_30MS 6 +#define NSUB_MAX 6 +#define NASUB_20MS 2 +#define NASUB_30MS 4 +#define NASUB_MAX 4 +#define SUBL 40 +#define STATE_LEN 80 +#define STATE_SHORT_LEN_30MS 58 +#define STATE_SHORT_LEN_20MS 57 + +/* LPC settings */ + +#define LPC_FILTERORDER 10 +#define LPC_LOOKBACK 60 +#define LPC_N_20MS 1 +#define LPC_N_30MS 2 +#define LPC_N_MAX 2 +#define LPC_ASYMDIFF 20 +#define LSF_NSPLIT 3 +#define LSF_NUMBER_OF_STEPS 4 +#define LPC_HALFORDER 5 +#define COS_GRID_POINTS 60 + +/* cb settings */ + +#define CB_NSTAGES 3 +#define CB_EXPAND 2 +#define CB_MEML 147 +#define CB_FILTERLEN (2 * 4) +#define CB_HALFFILTERLEN 4 +#define CB_RESRANGE 34 +#define CB_MAXGAIN_FIXQ6 83 /* error = -0.24% */ +#define CB_MAXGAIN_FIXQ14 21299 + +/* enhancer */ + +#define ENH_BLOCKL 80 /* block length */ +#define ENH_BLOCKL_HALF (ENH_BLOCKL / 2) +#define ENH_HL \ + 3 /* 2*ENH_HL+1 is number blocks \ + in said second \ + sequence */ +#define ENH_SLOP \ + 2 /* max difference estimated and \ + correct pitch period */ +#define ENH_PLOCSL \ + 8 /* pitch-estimates and \ + pitch-locations buffer \ + length */ +#define ENH_OVERHANG 2 +#define ENH_UPS0 4 /* upsampling rate */ +#define ENH_FL0 3 /* 2*FLO+1 is the length of each filter */ +#define ENH_FLO_MULT2_PLUS1 7 +#define ENH_VECTL (ENH_BLOCKL + 2 * ENH_FL0) +#define ENH_CORRDIM (2 * ENH_SLOP + 1) +#define ENH_NBLOCKS (BLOCKL / ENH_BLOCKL) +#define ENH_NBLOCKS_EXTRA 5 +#define ENH_NBLOCKS_TOT 8 /* ENH_NBLOCKS+ENH_NBLOCKS_EXTRA */ +#define ENH_BUFL (ENH_NBLOCKS_TOT) * ENH_BLOCKL +#define ENH_BUFL_FILTEROVERHEAD 3 +#define ENH_A0 819 /* Q14 */ +#define ENH_A0_MINUS_A0A0DIV4 848256041 /* Q34 */ +#define ENH_A0DIV2 26843546 /* Q30 */ + +/* PLC */ + +/* Down sampling */ + +#define FILTERORDER_DS_PLUS1 7 +#define DELAY_DS 3 +#define FACTOR_DS 2 + +/* bit stream defs */ + +#define NO_OF_BYTES_20MS 38 +#define NO_OF_BYTES_30MS 50 +#define NO_OF_WORDS_20MS 19 +#define NO_OF_WORDS_30MS 25 +#define STATE_BITS 3 +#define BYTE_LEN 8 +#define ULP_CLASSES 3 + +/* help parameters */ + +#define TWO_PI_FIX 25736 /* Q12 */ + +/* Constants for codebook search and creation */ + +#define ST_MEM_L_TBL 85 +#define MEM_LF_TBL 147 + +/* Struct for the bits */ +typedef struct iLBC_bits_t_ { + int16_t lsf[LSF_NSPLIT * LPC_N_MAX]; + int16_t cb_index[CB_NSTAGES * (NASUB_MAX + 1)]; /* First CB_NSTAGES values + contains extra CB index */ + int16_t gain_index[CB_NSTAGES * (NASUB_MAX + 1)]; /* First CB_NSTAGES values + contains extra CB gain */ + size_t idxForMax; + int16_t state_first; + int16_t idxVec[STATE_SHORT_LEN_30MS]; + int16_t firstbits; + size_t startIdx; +} iLBC_bits; + +/* type definition encoder instance */ +typedef struct IlbcEncoder_ { + /* flag for frame size mode */ + int16_t mode; + + /* basic parameters for different frame sizes */ + size_t blockl; + size_t nsub; + int16_t nasub; + size_t no_of_bytes, no_of_words; + int16_t lpc_n; + size_t state_short_len; + + /* analysis filter state */ + int16_t anaMem[LPC_FILTERORDER]; + + /* Fix-point old lsf parameters for interpolation */ + int16_t lsfold[LPC_FILTERORDER]; + int16_t lsfdeqold[LPC_FILTERORDER]; + + /* signal buffer for LP analysis */ + int16_t lpc_buffer[LPC_LOOKBACK + BLOCKL_MAX]; + + /* state of input HP filter */ + int16_t hpimemx[2]; + int16_t hpimemy[4]; + +#ifdef SPLIT_10MS + int16_t weightdenumbuf[66]; + int16_t past_samples[160]; + uint16_t bytes[25]; + int16_t section; + int16_t Nfor_flag; + int16_t Nback_flag; + int16_t start_pos; + size_t diff; +#endif + +} IlbcEncoder; + +/* type definition decoder instance */ +typedef struct IlbcDecoder_ { + /* flag for frame size mode */ + int16_t mode; + + /* basic parameters for different frame sizes */ + size_t blockl; + size_t nsub; + int16_t nasub; + size_t no_of_bytes, no_of_words; + int16_t lpc_n; + size_t state_short_len; + + /* synthesis filter state */ + int16_t syntMem[LPC_FILTERORDER]; + + /* old LSF for interpolation */ + int16_t lsfdeqold[LPC_FILTERORDER]; + + /* pitch lag estimated in enhancer and used in PLC */ + size_t last_lag; + + /* PLC state information */ + int consPLICount, prev_enh_pl; + int16_t perSquare; + + int16_t prevScale, prevPLI; + size_t prevLag; + int16_t prevLpc[LPC_FILTERORDER + 1]; + int16_t prevResidual[NSUB_MAX * SUBL]; + int16_t seed; + + /* previous synthesis filter parameters */ + + int16_t old_syntdenum[(LPC_FILTERORDER + 1) * NSUB_MAX]; + + /* state of output HP filter */ + int16_t hpimemx[2]; + int16_t hpimemy[4]; + + /* enhancer state information */ + int use_enhancer; + int16_t enh_buf[ENH_BUFL + ENH_BUFL_FILTEROVERHEAD]; + size_t enh_period[ENH_NBLOCKS_TOT]; + +} IlbcDecoder; + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.c new file mode 100644 index 0000000000..9ca6ca48e9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.c @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DoThePlc.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/do_plc.h" + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" +#include "modules/audio_coding/codecs/ilbc/comp_corr.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Packet loss concealment routine. Conceals a residual signal + * and LP parameters. If no packet loss, update state. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_DoThePlc( + int16_t *PLCresidual, /* (o) concealed residual */ + int16_t *PLClpc, /* (o) concealed LP parameters */ + int16_t PLI, /* (i) packet loss indicator + 0 - no PL, 1 = PL */ + int16_t *decresidual, /* (i) decoded residual */ + int16_t *lpc, /* (i) decoded LPC (only used for no PL) */ + size_t inlag, /* (i) pitch lag */ + IlbcDecoder *iLBCdec_inst + /* (i/o) decoder instance */ + ){ + size_t i; + int32_t cross, ener, cross_comp, ener_comp = 0; + int32_t measure, maxMeasure, energy; + int32_t noise_energy_threshold_30dB; + int16_t max, crossSquareMax, crossSquare; + size_t j, lag, randlag; + int16_t tmp1, tmp2; + int16_t shift1, shift2, shift3, shiftMax; + int16_t scale3; + size_t corrLen; + int32_t tmpW32, tmp2W32; + int16_t use_gain; + int16_t tot_gain; + int16_t max_perSquare; + int16_t scale1, scale2; + int16_t totscale; + int32_t nom; + int16_t denom; + int16_t pitchfact; + size_t use_lag; + int ind; + int16_t randvec[BLOCKL_MAX]; + + /* Packet Loss */ + if (PLI == 1) { + + (*iLBCdec_inst).consPLICount += 1; + + /* if previous frame not lost, + determine pitch pred. gain */ + + if (iLBCdec_inst->prevPLI != 1) { + + /* Maximum 60 samples are correlated, preserve as high accuracy + as possible without getting overflow */ + max = WebRtcSpl_MaxAbsValueW16((*iLBCdec_inst).prevResidual, + iLBCdec_inst->blockl); + scale3 = (WebRtcSpl_GetSizeInBits(max)<<1) - 25; + if (scale3 < 0) { + scale3 = 0; + } + + /* Store scale for use when interpolating between the + * concealment and the received packet */ + iLBCdec_inst->prevScale = scale3; + + /* Search around the previous lag +/-3 to find the + best pitch period */ + lag = inlag - 3; + + /* Guard against getting outside the frame */ + corrLen = (size_t)WEBRTC_SPL_MIN(60, iLBCdec_inst->blockl-(inlag+3)); + + WebRtcIlbcfix_CompCorr( &cross, &ener, + iLBCdec_inst->prevResidual, lag, iLBCdec_inst->blockl, corrLen, scale3); + + /* Normalize and store cross^2 and the number of shifts */ + shiftMax = WebRtcSpl_GetSizeInBits(WEBRTC_SPL_ABS_W32(cross))-15; + crossSquareMax = (int16_t)(( + (int16_t)WEBRTC_SPL_SHIFT_W32(cross, -shiftMax) * + (int16_t)WEBRTC_SPL_SHIFT_W32(cross, -shiftMax)) >> 15); + + for (j=inlag-2;j<=inlag+3;j++) { + WebRtcIlbcfix_CompCorr( &cross_comp, &ener_comp, + iLBCdec_inst->prevResidual, j, iLBCdec_inst->blockl, corrLen, scale3); + + /* Use the criteria (corr*corr)/energy to compare if + this lag is better or not. To avoid the division, + do a cross multiplication */ + shift1 = WebRtcSpl_GetSizeInBits(WEBRTC_SPL_ABS_W32(cross_comp))-15; + crossSquare = (int16_t)(( + (int16_t)WEBRTC_SPL_SHIFT_W32(cross_comp, -shift1) * + (int16_t)WEBRTC_SPL_SHIFT_W32(cross_comp, -shift1)) >> 15); + + shift2 = WebRtcSpl_GetSizeInBits(ener)-15; + measure = (int16_t)WEBRTC_SPL_SHIFT_W32(ener, -shift2) * crossSquare; + + shift3 = WebRtcSpl_GetSizeInBits(ener_comp)-15; + maxMeasure = (int16_t)WEBRTC_SPL_SHIFT_W32(ener_comp, -shift3) * + crossSquareMax; + + /* Calculate shift value, so that the two measures can + be put in the same Q domain */ + if(2 * shiftMax + shift3 > 2 * shift1 + shift2) { + tmp1 = + WEBRTC_SPL_MIN(31, 2 * shiftMax + shift3 - 2 * shift1 - shift2); + tmp2 = 0; + } else { + tmp1 = 0; + tmp2 = + WEBRTC_SPL_MIN(31, 2 * shift1 + shift2 - 2 * shiftMax - shift3); + } + + if ((measure>>tmp1) > (maxMeasure>>tmp2)) { + /* New lag is better => record lag, measure and domain */ + lag = j; + crossSquareMax = crossSquare; + cross = cross_comp; + shiftMax = shift1; + ener = ener_comp; + } + } + + /* Calculate the periodicity for the lag with the maximum correlation. + + Definition of the periodicity: + abs(corr(vec1, vec2))/(sqrt(energy(vec1))*sqrt(energy(vec2))) + + Work in the Square domain to simplify the calculations + max_perSquare is less than 1 (in Q15) + */ + tmp2W32=WebRtcSpl_DotProductWithScale(&iLBCdec_inst->prevResidual[iLBCdec_inst->blockl-corrLen], + &iLBCdec_inst->prevResidual[iLBCdec_inst->blockl-corrLen], + corrLen, scale3); + + if ((tmp2W32>0)&&(ener_comp>0)) { + /* norm energies to int16_t, compute the product of the energies and + use the upper int16_t as the denominator */ + + scale1=(int16_t)WebRtcSpl_NormW32(tmp2W32)-16; + tmp1=(int16_t)WEBRTC_SPL_SHIFT_W32(tmp2W32, scale1); + + scale2=(int16_t)WebRtcSpl_NormW32(ener)-16; + tmp2=(int16_t)WEBRTC_SPL_SHIFT_W32(ener, scale2); + denom = (int16_t)((tmp1 * tmp2) >> 16); /* in Q(scale1+scale2-16) */ + + /* Square the cross correlation and norm it such that max_perSquare + will be in Q15 after the division */ + + totscale = scale1+scale2-1; + tmp1 = (int16_t)WEBRTC_SPL_SHIFT_W32(cross, (totscale>>1)); + tmp2 = (int16_t)WEBRTC_SPL_SHIFT_W32(cross, totscale-(totscale>>1)); + + nom = tmp1 * tmp2; + max_perSquare = (int16_t)WebRtcSpl_DivW32W16(nom, denom); + + } else { + max_perSquare = 0; + } + } + + /* previous frame lost, use recorded lag and gain */ + + else { + lag = iLBCdec_inst->prevLag; + max_perSquare = iLBCdec_inst->perSquare; + } + + /* Attenuate signal and scale down pitch pred gain if + several frames lost consecutively */ + + use_gain = 32767; /* 1.0 in Q15 */ + + if (iLBCdec_inst->consPLICount*iLBCdec_inst->blockl>320) { + use_gain = 29491; /* 0.9 in Q15 */ + } else if (iLBCdec_inst->consPLICount*iLBCdec_inst->blockl>640) { + use_gain = 22938; /* 0.7 in Q15 */ + } else if (iLBCdec_inst->consPLICount*iLBCdec_inst->blockl>960) { + use_gain = 16384; /* 0.5 in Q15 */ + } else if (iLBCdec_inst->consPLICount*iLBCdec_inst->blockl>1280) { + use_gain = 0; /* 0.0 in Q15 */ + } + + /* Compute mixing factor of picth repeatition and noise: + for max_per>0.7 set periodicity to 1.0 + 0.47868) { /* periodicity > 0.7 (0.7^4=0.2401 in Q15) */ + pitchfact = 32767; + } else if (max_perSquare>839) { /* 0.4 < periodicity < 0.7 (0.4^4=0.0256 in Q15) */ + /* find best index and interpolate from that */ + ind = 5; + while ((max_perSquare0)) { + ind--; + } + /* pitch fact is approximated by first order */ + tmpW32 = (int32_t)WebRtcIlbcfix_kPlcPitchFact[ind] + + ((WebRtcIlbcfix_kPlcPfSlope[ind] * + (max_perSquare - WebRtcIlbcfix_kPlcPerSqr[ind])) >> 11); + + pitchfact = (int16_t)WEBRTC_SPL_MIN(tmpW32, 32767); /* guard against overflow */ + + } else { /* periodicity < 0.4 */ + pitchfact = 0; + } + + /* avoid repetition of same pitch cycle (buzzyness) */ + use_lag = lag; + if (lag<80) { + use_lag = 2*lag; + } + + /* compute concealed residual */ + noise_energy_threshold_30dB = (int32_t)iLBCdec_inst->blockl * 900; + energy = 0; + for (i=0; iblockl; i++) { + + /* noise component - 52 < randlagFIX < 117 */ + iLBCdec_inst->seed = (int16_t)(iLBCdec_inst->seed * 31821 + 13849); + randlag = 53 + (iLBCdec_inst->seed & 63); + if (randlag > i) { + randvec[i] = + iLBCdec_inst->prevResidual[iLBCdec_inst->blockl + i - randlag]; + } else { + randvec[i] = iLBCdec_inst->prevResidual[i - randlag]; + } + + /* pitch repeatition component */ + if (use_lag > i) { + PLCresidual[i] = + iLBCdec_inst->prevResidual[iLBCdec_inst->blockl + i - use_lag]; + } else { + PLCresidual[i] = PLCresidual[i - use_lag]; + } + + /* Attinuate total gain for each 10 ms */ + if (i<80) { + tot_gain=use_gain; + } else if (i<160) { + tot_gain = (int16_t)((31130 * use_gain) >> 15); /* 0.95*use_gain */ + } else { + tot_gain = (int16_t)((29491 * use_gain) >> 15); /* 0.9*use_gain */ + } + + + /* mix noise and pitch repeatition */ + PLCresidual[i] = (int16_t)((tot_gain * + ((pitchfact * PLCresidual[i] + (32767 - pitchfact) * randvec[i] + + 16384) >> 15)) >> 15); + + /* Compute energy until threshold for noise energy is reached */ + if (energy < noise_energy_threshold_30dB) { + energy += PLCresidual[i] * PLCresidual[i]; + } + } + + /* less than 30 dB, use only noise */ + if (energy < noise_energy_threshold_30dB) { + for (i=0; iblockl; i++) { + PLCresidual[i] = randvec[i]; + } + } + + /* use the old LPC */ + WEBRTC_SPL_MEMCPY_W16(PLClpc, (*iLBCdec_inst).prevLpc, LPC_FILTERORDER+1); + + /* Update state in case there are multiple frame losses */ + iLBCdec_inst->prevLag = lag; + iLBCdec_inst->perSquare = max_perSquare; + } + + /* no packet loss, copy input */ + + else { + WEBRTC_SPL_MEMCPY_W16(PLCresidual, decresidual, iLBCdec_inst->blockl); + WEBRTC_SPL_MEMCPY_W16(PLClpc, lpc, (LPC_FILTERORDER+1)); + iLBCdec_inst->consPLICount = 0; + } + + /* update state */ + iLBCdec_inst->prevPLI = PLI; + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->prevLpc, PLClpc, (LPC_FILTERORDER+1)); + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->prevResidual, PLCresidual, iLBCdec_inst->blockl); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.h new file mode 100644 index 0000000000..c19c4eca32 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DoThePlc.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DO_PLC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DO_PLC_H_ + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Packet loss concealment routine. Conceals a residual signal + * and LP parameters. If no packet loss, update state. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_DoThePlc( + int16_t* PLCresidual, /* (o) concealed residual */ + int16_t* PLClpc, /* (o) concealed LP parameters */ + int16_t PLI, /* (i) packet loss indicator + 0 - no PL, 1 = PL */ + int16_t* decresidual, /* (i) decoded residual */ + int16_t* lpc, /* (i) decoded LPC (only used for no PL) */ + size_t inlag, /* (i) pitch lag */ + IlbcDecoder* iLBCdec_inst + /* (i/o) decoder instance */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.c new file mode 100644 index 0000000000..8e536221cd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.c @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Encode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/encode.h" + +#include + +#include "modules/audio_coding/codecs/ilbc/cb_construct.h" +#include "modules/audio_coding/codecs/ilbc/cb_search.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/frame_classify.h" +#include "modules/audio_coding/codecs/ilbc/hp_input.h" +#include "modules/audio_coding/codecs/ilbc/index_conv_enc.h" +#include "modules/audio_coding/codecs/ilbc/lpc_encode.h" +#include "modules/audio_coding/codecs/ilbc/pack_bits.h" +#include "modules/audio_coding/codecs/ilbc/state_construct.h" +#include "modules/audio_coding/codecs/ilbc/state_search.h" +#include "rtc_base/checks.h" +#include "rtc_base/system/arch.h" + +#ifdef SPLIT_10MS +#include "modules/audio_coding/codecs/ilbc/unpack_bits.h" +#include "modules/audio_coding/codecs/ilbc/index_conv_dec.h" +#endif + +#ifndef WEBRTC_ARCH_BIG_ENDIAN +#include "modules/audio_coding/codecs/ilbc/swap_bytes.h" +#endif + +/*----------------------------------------------------------------* + * main encoder function + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_EncodeImpl( + uint16_t *bytes, /* (o) encoded data bits iLBC */ + const int16_t *block, /* (i) speech vector to encode */ + IlbcEncoder *iLBCenc_inst /* (i/o) the general encoder + state */ + ){ + size_t n, meml_gotten, Nfor; + size_t diff, start_pos; + size_t index; + size_t subcount, subframe; + size_t start_count, end_count; + int16_t *residual; + int32_t en1, en2; + int16_t scale, max; + int16_t *syntdenum; + int16_t *decresidual; + int16_t *reverseResidual; + int16_t *reverseDecresidual; + /* Stack based */ + int16_t weightdenum[(LPC_FILTERORDER + 1)*NSUB_MAX]; + int16_t dataVec[BLOCKL_MAX + LPC_FILTERORDER]; + int16_t memVec[CB_MEML+CB_FILTERLEN]; + int16_t bitsMemory[sizeof(iLBC_bits)/sizeof(int16_t)]; + iLBC_bits *iLBCbits_inst = (iLBC_bits*)bitsMemory; + + +#ifdef SPLIT_10MS + int16_t *weightdenumbuf = iLBCenc_inst->weightdenumbuf; + int16_t last_bit; +#endif + + int16_t *data = &dataVec[LPC_FILTERORDER]; + int16_t *mem = &memVec[CB_HALFFILTERLEN]; + + /* Reuse som buffers to save stack memory */ + residual = &iLBCenc_inst->lpc_buffer[LPC_LOOKBACK+BLOCKL_MAX-iLBCenc_inst->blockl]; + syntdenum = mem; /* syntdenum[(LPC_FILTERORDER + 1)*NSUB_MAX] and mem are used non overlapping in the code */ + decresidual = residual; /* Already encoded residual is overwritten by the decoded version */ + reverseResidual = data; /* data and reverseResidual are used non overlapping in the code */ + reverseDecresidual = reverseResidual; /* Already encoded residual is overwritten by the decoded version */ + +#ifdef SPLIT_10MS + + WebRtcSpl_MemSetW16 ( (int16_t *) iLBCbits_inst, 0, + sizeof(iLBC_bits) / sizeof(int16_t) ); + + start_pos = iLBCenc_inst->start_pos; + diff = iLBCenc_inst->diff; + + if (iLBCenc_inst->section != 0){ + WEBRTC_SPL_MEMCPY_W16 (weightdenum, weightdenumbuf, + SCRATCH_ENCODE_DATAVEC - SCRATCH_ENCODE_WEIGHTDENUM); + /* Un-Packetize the frame into parameters */ + last_bit = WebRtcIlbcfix_UnpackBits (iLBCenc_inst->bytes, iLBCbits_inst, iLBCenc_inst->mode); + if (last_bit) + return; + /* adjust index */ + WebRtcIlbcfix_IndexConvDec (iLBCbits_inst->cb_index); + + if (iLBCenc_inst->section == 1){ + /* Save first 80 samples of a 160/240 sample frame for 20/30msec */ + WEBRTC_SPL_MEMCPY_W16 (iLBCenc_inst->past_samples, block, 80); + } + else{ // iLBCenc_inst->section == 2 AND mode = 30ms + /* Save second 80 samples of a 240 sample frame for 30msec */ + WEBRTC_SPL_MEMCPY_W16 (iLBCenc_inst->past_samples + 80, block, 80); + } + } + else{ // iLBCenc_inst->section == 0 + /* form a complete frame of 160/240 for 20msec/30msec mode */ + WEBRTC_SPL_MEMCPY_W16 (data + (iLBCenc_inst->mode * 8) - 80, block, 80); + WEBRTC_SPL_MEMCPY_W16 (data, iLBCenc_inst->past_samples, + (iLBCenc_inst->mode * 8) - 80); + iLBCenc_inst->Nfor_flag = 0; + iLBCenc_inst->Nback_flag = 0; +#else + /* copy input block to data*/ + WEBRTC_SPL_MEMCPY_W16(data,block,iLBCenc_inst->blockl); +#endif + + /* high pass filtering of input signal and scale down the residual (*0.5) */ + WebRtcIlbcfix_HpInput(data, (int16_t*)WebRtcIlbcfix_kHpInCoefs, + iLBCenc_inst->hpimemy, iLBCenc_inst->hpimemx, + iLBCenc_inst->blockl); + + /* LPC of hp filtered input data */ + WebRtcIlbcfix_LpcEncode(syntdenum, weightdenum, iLBCbits_inst->lsf, data, + iLBCenc_inst); + + /* Set up state */ + WEBRTC_SPL_MEMCPY_W16(dataVec, iLBCenc_inst->anaMem, LPC_FILTERORDER); + + /* inverse filter to get residual */ + for (n=0; nnsub; n++ ) { + WebRtcSpl_FilterMAFastQ12( + &data[n*SUBL], &residual[n*SUBL], + &syntdenum[n*(LPC_FILTERORDER+1)], + LPC_FILTERORDER+1, SUBL); + } + + /* Copy the state for next frame */ + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->anaMem, &data[iLBCenc_inst->blockl-LPC_FILTERORDER], LPC_FILTERORDER); + + /* find state location */ + + iLBCbits_inst->startIdx = WebRtcIlbcfix_FrameClassify(iLBCenc_inst,residual); + + /* check if state should be in first or last part of the + two subframes */ + + index = (iLBCbits_inst->startIdx-1)*SUBL; + max=WebRtcSpl_MaxAbsValueW16(&residual[index], 2*SUBL); + scale = WebRtcSpl_GetSizeInBits((uint32_t)(max * max)); + + /* Scale to maximum 25 bits so that the MAC won't cause overflow */ + scale = scale - 25; + if(scale < 0) { + scale = 0; + } + + diff = STATE_LEN - iLBCenc_inst->state_short_len; + en1=WebRtcSpl_DotProductWithScale(&residual[index], &residual[index], + iLBCenc_inst->state_short_len, scale); + index += diff; + en2=WebRtcSpl_DotProductWithScale(&residual[index], &residual[index], + iLBCenc_inst->state_short_len, scale); + if (en1 > en2) { + iLBCbits_inst->state_first = 1; + start_pos = (iLBCbits_inst->startIdx-1)*SUBL; + } else { + iLBCbits_inst->state_first = 0; + start_pos = (iLBCbits_inst->startIdx-1)*SUBL + diff; + } + + /* scalar quantization of state */ + + WebRtcIlbcfix_StateSearch(iLBCenc_inst, iLBCbits_inst, &residual[start_pos], + &syntdenum[(iLBCbits_inst->startIdx-1)*(LPC_FILTERORDER+1)], + &weightdenum[(iLBCbits_inst->startIdx-1)*(LPC_FILTERORDER+1)]); + + WebRtcIlbcfix_StateConstruct(iLBCbits_inst->idxForMax, iLBCbits_inst->idxVec, + &syntdenum[(iLBCbits_inst->startIdx-1)*(LPC_FILTERORDER+1)], + &decresidual[start_pos], iLBCenc_inst->state_short_len + ); + + /* predictive quantization in state */ + + if (iLBCbits_inst->state_first) { /* put adaptive part in the end */ + + /* setup memory */ + + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - iLBCenc_inst->state_short_len); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-iLBCenc_inst->state_short_len, + decresidual+start_pos, iLBCenc_inst->state_short_len); + + /* encode subframes */ + + WebRtcIlbcfix_CbSearch(iLBCenc_inst, iLBCbits_inst->cb_index, iLBCbits_inst->gain_index, + &residual[start_pos+iLBCenc_inst->state_short_len], + mem+CB_MEML-ST_MEM_L_TBL, ST_MEM_L_TBL, diff, + &weightdenum[iLBCbits_inst->startIdx*(LPC_FILTERORDER+1)], 0); + + /* construct decoded vector */ + + RTC_CHECK(WebRtcIlbcfix_CbConstruct( + &decresidual[start_pos + iLBCenc_inst->state_short_len], + iLBCbits_inst->cb_index, iLBCbits_inst->gain_index, + mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, diff)); + + } + else { /* put adaptive part in the beginning */ + + /* create reversed vectors for prediction */ + + WebRtcSpl_MemCpyReversedOrder(&reverseResidual[diff-1], + &residual[(iLBCbits_inst->startIdx+1)*SUBL-STATE_LEN], diff); + + /* setup memory */ + + meml_gotten = iLBCenc_inst->state_short_len; + WebRtcSpl_MemCpyReversedOrder(&mem[CB_MEML-1], &decresidual[start_pos], meml_gotten); + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - iLBCenc_inst->state_short_len); + + /* encode subframes */ + WebRtcIlbcfix_CbSearch(iLBCenc_inst, iLBCbits_inst->cb_index, iLBCbits_inst->gain_index, + reverseResidual, mem+CB_MEML-ST_MEM_L_TBL, ST_MEM_L_TBL, diff, + &weightdenum[(iLBCbits_inst->startIdx-1)*(LPC_FILTERORDER+1)], + 0); + + /* construct decoded vector */ + RTC_CHECK(WebRtcIlbcfix_CbConstruct( + reverseDecresidual, iLBCbits_inst->cb_index, + iLBCbits_inst->gain_index, mem + CB_MEML - ST_MEM_L_TBL, + ST_MEM_L_TBL, diff)); + + /* get decoded residual from reversed vector */ + + WebRtcSpl_MemCpyReversedOrder(&decresidual[start_pos-1], reverseDecresidual, diff); + } + +#ifdef SPLIT_10MS + iLBCenc_inst->start_pos = start_pos; + iLBCenc_inst->diff = diff; + iLBCenc_inst->section++; + /* adjust index */ + WebRtcIlbcfix_IndexConvEnc (iLBCbits_inst->cb_index); + /* Packetize the parameters into the frame */ + WebRtcIlbcfix_PackBits (iLBCenc_inst->bytes, iLBCbits_inst, iLBCenc_inst->mode); + WEBRTC_SPL_MEMCPY_W16 (weightdenumbuf, weightdenum, + SCRATCH_ENCODE_DATAVEC - SCRATCH_ENCODE_WEIGHTDENUM); + return; + } +#endif + + /* forward prediction of subframes */ + + Nfor = iLBCenc_inst->nsub-iLBCbits_inst->startIdx-1; + + /* counter for predicted subframes */ +#ifdef SPLIT_10MS + if (iLBCenc_inst->mode == 20) + { + subcount = 1; + } + if (iLBCenc_inst->mode == 30) + { + if (iLBCenc_inst->section == 1) + { + subcount = 1; + } + if (iLBCenc_inst->section == 2) + { + subcount = 3; + } + } +#else + subcount=1; +#endif + + if( Nfor > 0 ){ + + /* setup memory */ + + WebRtcSpl_MemSetW16(mem, 0, CB_MEML-STATE_LEN); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-STATE_LEN, + decresidual+(iLBCbits_inst->startIdx-1)*SUBL, STATE_LEN); + +#ifdef SPLIT_10MS + if (iLBCenc_inst->Nfor_flag > 0) + { + for (subframe = 0; subframe < WEBRTC_SPL_MIN (Nfor, 2); subframe++) + { + /* update memory */ + WEBRTC_SPL_MEMCPY_W16 (mem, mem + SUBL, (CB_MEML - SUBL)); + WEBRTC_SPL_MEMCPY_W16 (mem + CB_MEML - SUBL, + &decresidual[(iLBCbits_inst->startIdx + 1 + + subframe) * SUBL], SUBL); + } + } + + iLBCenc_inst->Nfor_flag++; + + if (iLBCenc_inst->mode == 20) + { + start_count = 0; + end_count = Nfor; + } + if (iLBCenc_inst->mode == 30) + { + if (iLBCenc_inst->section == 1) + { + start_count = 0; + end_count = WEBRTC_SPL_MIN (Nfor, (size_t)2); + } + if (iLBCenc_inst->section == 2) + { + start_count = WEBRTC_SPL_MIN (Nfor, (size_t)2); + end_count = Nfor; + } + } +#else + start_count = 0; + end_count = Nfor; +#endif + + /* loop over subframes to encode */ + + for (subframe = start_count; subframe < end_count; subframe++){ + + /* encode subframe */ + + WebRtcIlbcfix_CbSearch(iLBCenc_inst, iLBCbits_inst->cb_index+subcount*CB_NSTAGES, + iLBCbits_inst->gain_index+subcount*CB_NSTAGES, + &residual[(iLBCbits_inst->startIdx+1+subframe)*SUBL], + mem, MEM_LF_TBL, SUBL, + &weightdenum[(iLBCbits_inst->startIdx+1+subframe)*(LPC_FILTERORDER+1)], + subcount); + + /* construct decoded vector */ + RTC_CHECK(WebRtcIlbcfix_CbConstruct( + &decresidual[(iLBCbits_inst->startIdx + 1 + subframe) * SUBL], + iLBCbits_inst->cb_index + subcount * CB_NSTAGES, + iLBCbits_inst->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, + SUBL)); + + /* update memory */ + + memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem)); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-SUBL, + &decresidual[(iLBCbits_inst->startIdx+1+subframe)*SUBL], SUBL); + + subcount++; + } + } + +#ifdef SPLIT_10MS + if ((iLBCenc_inst->section == 1) && + (iLBCenc_inst->mode == 30) && (Nfor > 0) && (end_count == 2)) + { + iLBCenc_inst->section++; + /* adjust index */ + WebRtcIlbcfix_IndexConvEnc (iLBCbits_inst->cb_index); + /* Packetize the parameters into the frame */ + WebRtcIlbcfix_PackBits (iLBCenc_inst->bytes, iLBCbits_inst, iLBCenc_inst->mode); + WEBRTC_SPL_MEMCPY_W16 (weightdenumbuf, weightdenum, + SCRATCH_ENCODE_DATAVEC - SCRATCH_ENCODE_WEIGHTDENUM); + return; + } +#endif + + /* backward prediction of subframes */ + + if (iLBCbits_inst->startIdx > 1) { + + /* create reverse order vectors + (The decresidual does not need to be copied since it is + contained in the same vector as the residual) + */ + + size_t Nback = iLBCbits_inst->startIdx - 1; + WebRtcSpl_MemCpyReversedOrder(&reverseResidual[Nback*SUBL-1], residual, Nback*SUBL); + + /* setup memory */ + + meml_gotten = SUBL*(iLBCenc_inst->nsub+1-iLBCbits_inst->startIdx); + if( meml_gotten > CB_MEML ) { + meml_gotten=CB_MEML; + } + + WebRtcSpl_MemCpyReversedOrder(&mem[CB_MEML-1], &decresidual[Nback*SUBL], meml_gotten); + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - meml_gotten); + +#ifdef SPLIT_10MS + if (iLBCenc_inst->Nback_flag > 0) + { + for (subframe = 0; subframe < WEBRTC_SPL_MAX (2 - Nfor, 0); subframe++) + { + /* update memory */ + WEBRTC_SPL_MEMCPY_W16 (mem, mem + SUBL, (CB_MEML - SUBL)); + WEBRTC_SPL_MEMCPY_W16 (mem + CB_MEML - SUBL, + &reverseDecresidual[subframe * SUBL], SUBL); + } + } + + iLBCenc_inst->Nback_flag++; + + + if (iLBCenc_inst->mode == 20) + { + start_count = 0; + end_count = Nback; + } + if (iLBCenc_inst->mode == 30) + { + if (iLBCenc_inst->section == 1) + { + start_count = 0; + end_count = (Nfor >= 2) ? 0 : (2 - NFor); + } + if (iLBCenc_inst->section == 2) + { + start_count = (Nfor >= 2) ? 0 : (2 - NFor); + end_count = Nback; + } + } +#else + start_count = 0; + end_count = Nback; +#endif + + /* loop over subframes to encode */ + + for (subframe = start_count; subframe < end_count; subframe++){ + + /* encode subframe */ + + WebRtcIlbcfix_CbSearch(iLBCenc_inst, iLBCbits_inst->cb_index+subcount*CB_NSTAGES, + iLBCbits_inst->gain_index+subcount*CB_NSTAGES, &reverseResidual[subframe*SUBL], + mem, MEM_LF_TBL, SUBL, + &weightdenum[(iLBCbits_inst->startIdx-2-subframe)*(LPC_FILTERORDER+1)], + subcount); + + /* construct decoded vector */ + RTC_CHECK(WebRtcIlbcfix_CbConstruct( + &reverseDecresidual[subframe * SUBL], + iLBCbits_inst->cb_index + subcount * CB_NSTAGES, + iLBCbits_inst->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, + SUBL)); + + /* update memory */ + memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem)); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-SUBL, + &reverseDecresidual[subframe*SUBL], SUBL); + + subcount++; + + } + + /* get decoded residual from reversed vector */ + + WebRtcSpl_MemCpyReversedOrder(&decresidual[SUBL*Nback-1], reverseDecresidual, SUBL*Nback); + } + /* end encoding part */ + + /* adjust index */ + + WebRtcIlbcfix_IndexConvEnc(iLBCbits_inst->cb_index); + + /* Packetize the parameters into the frame */ + +#ifdef SPLIT_10MS + if( (iLBCenc_inst->mode==30) && (iLBCenc_inst->section==1) ){ + WebRtcIlbcfix_PackBits(iLBCenc_inst->bytes, iLBCbits_inst, iLBCenc_inst->mode); + } + else{ + WebRtcIlbcfix_PackBits(bytes, iLBCbits_inst, iLBCenc_inst->mode); + } +#else + WebRtcIlbcfix_PackBits(bytes, iLBCbits_inst, iLBCenc_inst->mode); +#endif + +#ifndef WEBRTC_ARCH_BIG_ENDIAN + /* Swap bytes for LITTLE ENDIAN since the packbits() + function assumes BIG_ENDIAN machine */ +#ifdef SPLIT_10MS + if (( (iLBCenc_inst->section == 1) && (iLBCenc_inst->mode == 20) ) || + ( (iLBCenc_inst->section == 2) && (iLBCenc_inst->mode == 30) )){ + WebRtcIlbcfix_SwapBytes(bytes, iLBCenc_inst->no_of_words, bytes); + } +#else + WebRtcIlbcfix_SwapBytes(bytes, iLBCenc_inst->no_of_words, bytes); +#endif +#endif + +#ifdef SPLIT_10MS + if (subcount == (iLBCenc_inst->nsub - 1)) + { + iLBCenc_inst->section = 0; + } + else + { + iLBCenc_inst->section++; + WEBRTC_SPL_MEMCPY_W16 (weightdenumbuf, weightdenum, + SCRATCH_ENCODE_DATAVEC - SCRATCH_ENCODE_WEIGHTDENUM); + } +#endif + +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.h new file mode 100644 index 0000000000..bc3e187d92 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Encode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENCODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENCODE_H_ + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * main encoder function + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_EncodeImpl( + uint16_t* bytes, /* (o) encoded data bits iLBC */ + const int16_t* block, /* (i) speech vector to encode */ + IlbcEncoder* iLBCenc_inst /* (i/o) the general encoder + state */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.c new file mode 100644 index 0000000000..7f00254aea --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_EnergyInverse.c + +******************************************************************/ + +/* Inverses the in vector in into Q29 domain */ + +#include "modules/audio_coding/codecs/ilbc/energy_inverse.h" + +void WebRtcIlbcfix_EnergyInverse( + int16_t *energy, /* (i/o) Energy and inverse + energy (in Q29) */ + size_t noOfEnergies) /* (i) The length of the energy + vector */ +{ + int32_t Nom=(int32_t)0x1FFFFFFF; + int16_t *energyPtr; + size_t i; + + /* Set the minimum energy value to 16384 to avoid overflow */ + energyPtr=energy; + for (i=0; i +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/* Inverses the in vector in into Q29 domain */ + +void WebRtcIlbcfix_EnergyInverse( + int16_t* + energy, /* (i/o) Energy and inverse + energy (in Q29) */ + size_t noOfEnergies); /* (i) The length of the energy + vector */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.c new file mode 100644 index 0000000000..cd3d0a4db1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_EnhUpsample.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/enh_upsample.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * upsample finite array assuming zeros outside bounds + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_EnhUpsample( + int32_t *useq1, /* (o) upsampled output sequence */ + int16_t *seq1 /* (i) unupsampled sequence */ + ){ + int j; + int32_t *pu1, *pu11; + int16_t *ps, *w16tmp; + const int16_t *pp; + + /* filtering: filter overhangs left side of sequence */ + pu1=useq1; + for (j=0;j +#include + +/*----------------------------------------------------------------* + * perform enhancement on idata+centerStartPos through + * idata+centerStartPos+ENH_BLOCKL-1 + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Enhancer( + int16_t* odata, /* (o) smoothed block, dimension blockl */ + int16_t* idata, /* (i) data buffer used for enhancing */ + size_t idatal, /* (i) dimension idata */ + size_t centerStartPos, /* (i) first sample current block within idata */ + size_t* period, /* (i) pitch period array (pitch bward-in time) */ + const size_t* plocs, /* (i) locations where period array values valid */ + size_t periodl /* (i) dimension of period and plocs */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.c new file mode 100644 index 0000000000..ca23e19ae3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.c @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_EnhancerInterface.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/enhancer_interface.h" + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/enhancer.h" +#include "modules/audio_coding/codecs/ilbc/hp_output.h" +#include "modules/audio_coding/codecs/ilbc/xcorr_coef.h" + + + +/*----------------------------------------------------------------* + * interface for enhancer + *---------------------------------------------------------------*/ + +size_t // (o) Estimated lag in end of in[] + WebRtcIlbcfix_EnhancerInterface( + int16_t* out, // (o) enhanced signal + const int16_t* in, // (i) unenhanced signal + IlbcDecoder* iLBCdec_inst) { // (i) buffers etc + size_t iblock; + size_t lag=20, tlag=20; + size_t inLen=iLBCdec_inst->blockl+120; + int16_t scale, scale1; + size_t plc_blockl; + int16_t *enh_buf; + size_t *enh_period; + int32_t tmp1, tmp2, max; + size_t new_blocks; + int16_t *enh_bufPtr1; + size_t i; + size_t k; + int16_t EnChange; + int16_t SqrtEnChange; + int16_t inc; + int16_t win; + int16_t *tmpW16ptr; + size_t startPos; + int16_t *plc_pred; + const int16_t *target, *regressor; + int16_t max16; + int shifts; + int32_t ener; + int16_t enerSh; + int16_t corrSh; + size_t ind; + int16_t sh; + size_t start, stop; + /* Stack based */ + int16_t totsh[3]; + int16_t downsampled[(BLOCKL_MAX+120)>>1]; /* length 180 */ + int32_t corr32[50]; + int32_t corrmax[3]; + int16_t corr16[3]; + int16_t en16[3]; + size_t lagmax[3]; + + plc_pred = downsampled; /* Reuse memory since plc_pred[ENH_BLOCKL] and + downsampled are non overlapping */ + enh_buf=iLBCdec_inst->enh_buf; + enh_period=iLBCdec_inst->enh_period; + + /* Copy in the new data into the enhancer buffer */ + memmove(enh_buf, &enh_buf[iLBCdec_inst->blockl], + (ENH_BUFL - iLBCdec_inst->blockl) * sizeof(*enh_buf)); + + WEBRTC_SPL_MEMCPY_W16(&enh_buf[ENH_BUFL-iLBCdec_inst->blockl], in, + iLBCdec_inst->blockl); + + /* Set variables that are dependent on frame size */ + if (iLBCdec_inst->mode==30) { + plc_blockl=ENH_BLOCKL; + new_blocks=3; + startPos=320; /* Start position for enhancement + (640-new_blocks*ENH_BLOCKL-80) */ + } else { + plc_blockl=40; + new_blocks=2; + startPos=440; /* Start position for enhancement + (640-new_blocks*ENH_BLOCKL-40) */ + } + + /* Update the pitch prediction for each enhancer block, move the old ones */ + memmove(enh_period, &enh_period[new_blocks], + (ENH_NBLOCKS_TOT - new_blocks) * sizeof(*enh_period)); + + WebRtcSpl_DownsampleFast( + enh_buf+ENH_BUFL-inLen, /* Input samples */ + inLen + ENH_BUFL_FILTEROVERHEAD, + downsampled, + inLen / 2, + (int16_t*)WebRtcIlbcfix_kLpFiltCoefs, /* Coefficients in Q12 */ + FILTERORDER_DS_PLUS1, /* Length of filter (order-1) */ + FACTOR_DS, + DELAY_DS); + + /* Estimate the pitch in the down sampled domain. */ + for(iblock = 0; iblock> 16); + en16[i] = (int16_t)WEBRTC_SPL_SHIFT_W32(ener, enerSh); + totsh[i] = enerSh - 2 * corrSh; + } + + /* Compare lagmax[0..3] for the (corr^2)/ener criteria */ + ind = 0; + for (i=1; i<3; i++) { + if (totsh[ind] > totsh[i]) { + sh = WEBRTC_SPL_MIN(31, totsh[ind]-totsh[i]); + if (corr16[ind] * en16[i] < (corr16[i] * en16[ind]) >> sh) { + ind = i; + } + } else { + sh = WEBRTC_SPL_MIN(31, totsh[i]-totsh[ind]); + if ((corr16[ind] * en16[i]) >> sh < corr16[i] * en16[ind]) { + ind = i; + } + } + } + + lag = lagmax[ind] + 10; + + /* Store the estimated lag in the non-downsampled domain */ + enh_period[ENH_NBLOCKS_TOT - new_blocks + iblock] = lag * 8; + + /* Store the estimated lag for backward PLC */ + if (iLBCdec_inst->prev_enh_pl==1) { + if (!iblock) { + tlag = lag * 2; + } + } else { + if (iblock==1) { + tlag = lag * 2; + } + } + + lag *= 2; + } + + if ((iLBCdec_inst->prev_enh_pl==1)||(iLBCdec_inst->prev_enh_pl==2)) { + + /* Calculate the best lag of the new frame + This is used to interpolate backwards and mix with the PLC'd data + */ + + /* references */ + target=in; + regressor=in+tlag-1; + + /* scaling */ + // Note that this is not abs-max, so we will take the absolute value below. + max16 = WebRtcSpl_MaxAbsElementW16(regressor, plc_blockl + 3 - 1); + const int16_t max_target = + WebRtcSpl_MaxAbsElementW16(target, plc_blockl + 3 - 1); + const int64_t max_val = plc_blockl * abs(max16 * max_target); + const int32_t factor = max_val >> 31; + shifts = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); + + /* compute cross correlation */ + WebRtcSpl_CrossCorrelation(corr32, target, regressor, plc_blockl, 3, shifts, + 1); + + /* find lag */ + lag=WebRtcSpl_MaxIndexW32(corr32, 3); + lag+=tlag-1; + + /* Copy the backward PLC to plc_pred */ + + if (iLBCdec_inst->prev_enh_pl==1) { + if (lag>plc_blockl) { + WEBRTC_SPL_MEMCPY_W16(plc_pred, &in[lag-plc_blockl], plc_blockl); + } else { + WEBRTC_SPL_MEMCPY_W16(&plc_pred[plc_blockl-lag], in, lag); + WEBRTC_SPL_MEMCPY_W16( + plc_pred, &enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl+lag], + (plc_blockl-lag)); + } + } else { + size_t pos; + + pos = plc_blockl; + + while (lagprev_enh_pl==1) { + /* limit energy change + if energy in backward PLC is more than 4 times higher than the forward + PLC, then reduce the energy in the backward PLC vector: + sample 1...len-16 set energy of the to 4 times forward PLC + sample len-15..len interpolate between 4 times fw PLC and bw PLC energy + + Note: Compared to floating point code there is a slight change, + the window is 16 samples long instead of 10 samples to simplify the + calculations + */ + + max=WebRtcSpl_MaxAbsValueW16( + &enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl], plc_blockl); + max16=WebRtcSpl_MaxAbsValueW16(plc_pred, plc_blockl); + max = WEBRTC_SPL_MAX(max, max16); + scale=22-(int16_t)WebRtcSpl_NormW32(max); + scale=WEBRTC_SPL_MAX(scale,0); + + tmp2 = WebRtcSpl_DotProductWithScale( + &enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl], + &enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl], + plc_blockl, scale); + tmp1 = WebRtcSpl_DotProductWithScale(plc_pred, plc_pred, + plc_blockl, scale); + + /* Check the energy difference */ + if ((tmp1>0)&&((tmp1>>2)>tmp2)) { + /* EnChange is now guaranteed to be <0.5 + Calculate EnChange=tmp2/tmp1 in Q16 + */ + + scale1=(int16_t)WebRtcSpl_NormW32(tmp1); + tmp1=WEBRTC_SPL_SHIFT_W32(tmp1, (scale1-16)); /* using 15 bits */ + + tmp2=WEBRTC_SPL_SHIFT_W32(tmp2, (scale1)); + EnChange = (int16_t)WebRtcSpl_DivW32W16(tmp2, + (int16_t)tmp1); + + /* Calculate the Sqrt of the energy in Q15 ((14+16)/2) */ + SqrtEnChange = (int16_t)WebRtcSpl_SqrtFloor(EnChange << 14); + + + /* Multiply first part of vector with 2*SqrtEnChange */ + WebRtcSpl_ScaleVector(plc_pred, plc_pred, SqrtEnChange, plc_blockl-16, + 14); + + /* Calculate increase parameter for window part (16 last samples) */ + /* (1-2*SqrtEnChange)/16 in Q15 */ + inc = 2048 - (SqrtEnChange >> 3); + + win=0; + tmpW16ptr=&plc_pred[plc_blockl-16]; + + for (i=16;i>0;i--) { + *tmpW16ptr = (int16_t)( + (*tmpW16ptr * (SqrtEnChange + (win >> 1))) >> 14); + /* multiply by (2.0*SqrtEnChange+win) */ + + win += inc; + tmpW16ptr++; + } + } + + /* Make the linear interpolation between the forward PLC'd data + and the backward PLC'd data (from the new frame) + */ + + if (plc_blockl==40) { + inc=400; /* 1/41 in Q14 */ + } else { /* plc_blockl==80 */ + inc=202; /* 1/81 in Q14 */ + } + win=0; + enh_bufPtr1=&enh_buf[ENH_BUFL-1-iLBCdec_inst->blockl]; + for (i=0; i> 14); + *enh_bufPtr1 += (int16_t)( + ((16384 - win) * plc_pred[plc_blockl - 1 - i]) >> 14); + enh_bufPtr1--; + } + } else { + int16_t *synt = &downsampled[LPC_FILTERORDER]; + + enh_bufPtr1=&enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl]; + WEBRTC_SPL_MEMCPY_W16(enh_bufPtr1, plc_pred, plc_blockl); + + /* Clear fileter memory */ + WebRtcSpl_MemSetW16(iLBCdec_inst->syntMem, 0, LPC_FILTERORDER); + WebRtcSpl_MemSetW16(iLBCdec_inst->hpimemy, 0, 4); + WebRtcSpl_MemSetW16(iLBCdec_inst->hpimemx, 0, 2); + + /* Initialize filter memory by filtering through 2 lags */ + WEBRTC_SPL_MEMCPY_W16(&synt[-LPC_FILTERORDER], iLBCdec_inst->syntMem, + LPC_FILTERORDER); + WebRtcSpl_FilterARFastQ12( + enh_bufPtr1, + synt, + &iLBCdec_inst->old_syntdenum[ + (iLBCdec_inst->nsub-1)*(LPC_FILTERORDER+1)], + LPC_FILTERORDER+1, lag); + + WEBRTC_SPL_MEMCPY_W16(&synt[-LPC_FILTERORDER], &synt[lag-LPC_FILTERORDER], + LPC_FILTERORDER); + WebRtcIlbcfix_HpOutput(synt, (int16_t*)WebRtcIlbcfix_kHpOutCoefs, + iLBCdec_inst->hpimemy, iLBCdec_inst->hpimemx, + lag); + WebRtcSpl_FilterARFastQ12( + enh_bufPtr1, synt, + &iLBCdec_inst->old_syntdenum[ + (iLBCdec_inst->nsub-1)*(LPC_FILTERORDER+1)], + LPC_FILTERORDER+1, lag); + + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->syntMem, &synt[lag-LPC_FILTERORDER], + LPC_FILTERORDER); + WebRtcIlbcfix_HpOutput(synt, (int16_t*)WebRtcIlbcfix_kHpOutCoefs, + iLBCdec_inst->hpimemy, iLBCdec_inst->hpimemx, + lag); + } + } + + + /* Perform enhancement block by block */ + + for (iblock = 0; iblock +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * interface for enhancer + *---------------------------------------------------------------*/ + +size_t // (o) Estimated lag in end of in[] +WebRtcIlbcfix_EnhancerInterface(int16_t* out, // (o) enhanced signal + const int16_t* in, // (i) unenhanced signal + IlbcDecoder* iLBCdec_inst); // (i) buffers etc + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.c new file mode 100644 index 0000000000..6b4f30c96b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_FilteredCbVecs.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Construct an additional codebook vector by filtering the + * initial codebook buffer. This vector is then used to expand + * the codebook with an additional section. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_FilteredCbVecs( + int16_t *cbvectors, /* (o) Codebook vector for the higher section */ + int16_t *CBmem, /* (i) Codebook memory that is filtered to create a + second CB section */ + size_t lMem, /* (i) Length of codebook memory */ + size_t samples /* (i) Number of samples to filter */ + ) { + + /* Set up the memory, start with zero state */ + WebRtcSpl_MemSetW16(CBmem+lMem, 0, CB_HALFFILTERLEN); + WebRtcSpl_MemSetW16(CBmem-CB_HALFFILTERLEN, 0, CB_HALFFILTERLEN); + WebRtcSpl_MemSetW16(cbvectors, 0, lMem-samples); + + /* Filter to obtain the filtered CB memory */ + + WebRtcSpl_FilterMAFastQ12( + CBmem+CB_HALFFILTERLEN+lMem-samples, cbvectors+lMem-samples, + (int16_t*)WebRtcIlbcfix_kCbFiltersRev, CB_FILTERLEN, samples); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h new file mode 100644 index 0000000000..661262e42e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_FilteredCbVecs.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_FILTERED_CB_VECS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_FILTERED_CB_VECS_H_ + +#include +#include + +/*----------------------------------------------------------------* + * Construct an additional codebook vector by filtering the + * initial codebook buffer. This vector is then used to expand + * the codebook with an additional section. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_FilteredCbVecs( + int16_t* cbvectors, /* (o) Codebook vector for the higher section */ + int16_t* CBmem, /* (i) Codebook memory that is filtered to create a + second CB section */ + size_t lMem, /* (i) Length of codebook memory */ + size_t samples /* (i) Number of samples to filter */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.c new file mode 100644 index 0000000000..c1084b1645 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_FrameClassify.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/frame_classify.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Classification of subframes to localize start state + *---------------------------------------------------------------*/ + +size_t WebRtcIlbcfix_FrameClassify( + /* (o) Index to the max-energy sub frame */ + IlbcEncoder *iLBCenc_inst, + /* (i/o) the encoder state structure */ + int16_t *residualFIX /* (i) lpc residual signal */ + ){ + int16_t max, scale; + int32_t ssqEn[NSUB_MAX-1]; + int16_t *ssqPtr; + int32_t *seqEnPtr; + int32_t maxW32; + int16_t scale1; + size_t pos; + size_t n; + + /* + Calculate the energy of each of the 80 sample blocks + in the draft the 4 first and last samples are windowed with 1/5...4/5 + and 4/5...1/5 respectively. To simplify for the fixpoint we have changed + this to 0 0 1 1 and 1 1 0 0 + */ + + max = WebRtcSpl_MaxAbsValueW16(residualFIX, iLBCenc_inst->blockl); + scale = WebRtcSpl_GetSizeInBits((uint32_t)(max * max)); + + /* Scale to maximum 24 bits so that it won't overflow for 76 samples */ + scale = scale-24; + scale1 = WEBRTC_SPL_MAX(0, scale); + + /* Calculate energies */ + ssqPtr=residualFIX + 2; + seqEnPtr=ssqEn; + for (n=(iLBCenc_inst->nsub-1); n>0; n--) { + (*seqEnPtr) = WebRtcSpl_DotProductWithScale(ssqPtr, ssqPtr, 76, scale1); + ssqPtr += 40; + seqEnPtr++; + } + + /* Scale to maximum 20 bits in order to allow for the 11 bit window */ + maxW32 = WebRtcSpl_MaxValueW32(ssqEn, iLBCenc_inst->nsub - 1); + scale = WebRtcSpl_GetSizeInBits(maxW32) - 20; + scale1 = WEBRTC_SPL_MAX(0, scale); + + /* Window each 80 block with the ssqEn_winTbl window to give higher probability for + the blocks in the middle + */ + seqEnPtr=ssqEn; + if (iLBCenc_inst->mode==20) { + ssqPtr=(int16_t*)WebRtcIlbcfix_kStartSequenceEnrgWin+1; + } else { + ssqPtr=(int16_t*)WebRtcIlbcfix_kStartSequenceEnrgWin; + } + for (n=(iLBCenc_inst->nsub-1); n>0; n--) { + (*seqEnPtr)=WEBRTC_SPL_MUL(((*seqEnPtr)>>scale1), (*ssqPtr)); + seqEnPtr++; + ssqPtr++; + } + + /* Extract the best choise of start state */ + pos = WebRtcSpl_MaxIndexW32(ssqEn, iLBCenc_inst->nsub - 1) + 1; + + return(pos); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.h new file mode 100644 index 0000000000..7615106d70 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_FrameClassify.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_FRAME_CLASSIFY_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_FRAME_CLASSIFY_H_ + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +size_t WebRtcIlbcfix_FrameClassify( + /* (o) Index to the max-energy sub frame */ + IlbcEncoder* iLBCenc_inst, + /* (i/o) the encoder state structure */ + int16_t* residualFIX /* (i) lpc residual signal */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.c new file mode 100644 index 0000000000..1357dece33 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GainDequant.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/gain_dequant.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * decoder for quantized gains in the gain-shape coding of + * residual + *---------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_GainDequant( + /* (o) quantized gain value (Q14) */ + int16_t index, /* (i) quantization index */ + int16_t maxIn, /* (i) maximum of unquantized gain (Q14) */ + int16_t stage /* (i) The stage of the search */ + ){ + int16_t scale; + const int16_t *gain; + + /* obtain correct scale factor */ + + scale=WEBRTC_SPL_ABS_W16(maxIn); + scale = WEBRTC_SPL_MAX(1638, scale); /* if lower than 0.1, set it to 0.1 */ + + /* select the quantization table and return the decoded value */ + gain = WebRtcIlbcfix_kGain[stage]; + + return (int16_t)((scale * gain[index] + 8192) >> 14); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.h new file mode 100644 index 0000000000..2b97550b6c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GainDequant.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GAIN_DEQUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GAIN_DEQUANT_H_ + +#include + +/*----------------------------------------------------------------* + * decoder for quantized gains in the gain-shape coding of + * residual + *---------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_GainDequant( + /* (o) quantized gain value (Q14) */ + int16_t index, /* (i) quantization index */ + int16_t maxIn, /* (i) maximum of unquantized gain (Q14) */ + int16_t stage /* (i) The stage of the search */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.c new file mode 100644 index 0000000000..9a6d49d51a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GainQuant.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/gain_quant.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * quantizer for the gain in the gain-shape coding of residual + *---------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_GainQuant( /* (o) quantized gain value */ + int16_t gain, /* (i) gain value Q14 */ + int16_t maxIn, /* (i) maximum of gain value Q14 */ + int16_t stage, /* (i) The stage of the search */ + int16_t *index /* (o) quantization index */ + ) { + + int16_t scale, cblen; + int32_t gainW32, measure1, measure2; + const int16_t *cbPtr, *cb; + int loc, noMoves, noChecks, i; + + /* ensure a lower bound (0.1) on the scaling factor */ + + scale = WEBRTC_SPL_MAX(1638, maxIn); + + /* select the quantization table and calculate + the length of the table and the number of + steps in the binary search that are needed */ + cb = WebRtcIlbcfix_kGain[stage]; + cblen = 32>>stage; + noChecks = 4-stage; + + /* Multiply the gain with 2^14 to make the comparison + easier and with higher precision */ + gainW32 = gain << 14; + + /* Do a binary search, starting in the middle of the CB + loc - defines the current position in the table + noMoves - defines the number of steps to move in the CB in order + to get next CB location + */ + + loc = cblen>>1; + noMoves = loc; + cbPtr = cb + loc; /* Centre of CB */ + + for (i=noChecks;i>0;i--) { + noMoves>>=1; + measure1 = scale * *cbPtr; + + /* Move up if gain is larger, otherwise move down in table */ + measure1 = measure1 - gainW32; + + if (0>measure1) { + cbPtr+=noMoves; + loc+=noMoves; + } else { + cbPtr-=noMoves; + loc-=noMoves; + } + } + + /* Check which value is the closest one: loc-1, loc or loc+1 */ + + measure1 = scale * *cbPtr; + if (gainW32>measure1) { + /* Check against value above loc */ + measure2 = scale * cbPtr[1]; + if ((measure2-gainW32)<(gainW32-measure1)) { + loc+=1; + } + } else { + /* Check against value below loc */ + measure2 = scale * cbPtr[-1]; + if ((gainW32-measure2)<=(measure1-gainW32)) { + loc-=1; + } + } + + /* Guard against getting outside the table. The calculation above can give a location + which is one above the maximum value (in very rare cases) */ + loc=WEBRTC_SPL_MIN(loc, (cblen-1)); + *index=loc; + + /* Calculate and return the quantized gain value (in Q14) */ + return (int16_t)((scale * cb[loc] + 8192) >> 14); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.h new file mode 100644 index 0000000000..761f7d2f79 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GainQuant.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GAIN_QUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GAIN_QUANT_H_ + +#include + +/*----------------------------------------------------------------* + * quantizer for the gain in the gain-shape coding of residual + *---------------------------------------------------------------*/ + +int16_t +WebRtcIlbcfix_GainQuant( /* (o) quantized gain value */ + int16_t gain, /* (i) gain value Q14 */ + int16_t maxIn, /* (i) maximum of gain value Q14 */ + int16_t stage, /* (i) The stage of the search */ + int16_t* index /* (o) quantization index */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.c new file mode 100644 index 0000000000..e9cd2008e0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.c @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetCbVec.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/get_cd_vec.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/create_augmented_vec.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Construct codebook vector for given index. + *---------------------------------------------------------------*/ + +bool WebRtcIlbcfix_GetCbVec( + int16_t *cbvec, /* (o) Constructed codebook vector */ + int16_t *mem, /* (i) Codebook buffer */ + size_t index, /* (i) Codebook index */ + size_t lMem, /* (i) Length of codebook buffer */ + size_t cbveclen /* (i) Codebook vector length */ + ){ + size_t k, base_size; + size_t lag; + /* Stack based */ + int16_t tempbuff2[SUBL+5]; + + /* Determine size of codebook sections */ + + base_size=lMem-cbveclen+1; + + if (cbveclen==SUBL) { + base_size += cbveclen / 2; + } + + /* No filter -> First codebook section */ + + if (index +#include +#include + +#include "absl/base/attributes.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +// Returns true on success, false on failure. In case of failure, the decoder +// state may be corrupted and needs resetting. +ABSL_MUST_USE_RESULT +bool WebRtcIlbcfix_GetCbVec( + int16_t* cbvec, /* (o) Constructed codebook vector */ + int16_t* mem, /* (i) Codebook buffer */ + size_t index, /* (i) Codebook index */ + size_t lMem, /* (i) Length of codebook buffer */ + size_t cbveclen /* (i) Codebook vector length */ +); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.c new file mode 100644 index 0000000000..e0fb21caf0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetLspPoly.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/get_lsp_poly.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Construct the polynomials F1(z) and F2(z) from the LSP + * (Computations are done in Q24) + * + * The expansion is performed using the following recursion: + * + * f[0] = 1; + * tmp = -2.0 * lsp[0]; + * f[1] = tmp; + * for (i=2; i<=5; i++) { + * b = -2.0 * lsp[2*i-2]; + * f[i] = tmp*f[i-1] + 2.0*f[i-2]; + * for (j=i; j>=2; j--) { + * f[j] = f[j] + tmp*f[j-1] + f[j-2]; + * } + * f[i] = f[i] + tmp; + * } + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_GetLspPoly( + int16_t *lsp, /* (i) LSP in Q15 */ + int32_t *f) /* (o) polonymial in Q24 */ +{ + int32_t tmpW32; + int i, j; + int16_t high, low; + int16_t *lspPtr; + int32_t *fPtr; + + lspPtr = lsp; + fPtr = f; + /* f[0] = 1.0 (Q24) */ + (*fPtr) = (int32_t)16777216; + fPtr++; + + (*fPtr) = WEBRTC_SPL_MUL((*lspPtr), -1024); + fPtr++; + lspPtr+=2; + + for(i=2; i<=5; i++) + { + (*fPtr) = fPtr[-2]; + + for(j=i; j>1; j--) + { + /* Compute f[j] = f[j] + tmp*f[j-1] + f[j-2]; */ + high = (int16_t)(fPtr[-1] >> 16); + low = (int16_t)((fPtr[-1] & 0xffff) >> 1); + + tmpW32 = 4 * high * *lspPtr + 4 * ((low * *lspPtr) >> 15); + + (*fPtr) += fPtr[-2]; + (*fPtr) -= tmpW32; + fPtr--; + } + *fPtr -= *lspPtr * (1 << 10); + + fPtr+=i; + lspPtr+=2; + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.h new file mode 100644 index 0000000000..70c9c4d4b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetLspPoly.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_LSP_POLY_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_LSP_POLY_H_ + +#include + +/*----------------------------------------------------------------* + * Construct the polynomials F1(z) and F2(z) from the LSP + * (Computations are done in Q24) + * + * The expansion is performed using the following recursion: + * + * f[0] = 1; + * tmp = -2.0 * lsp[0]; + * f[1] = tmp; + * for (i=2; i<=5; i++) { + * b = -2.0 * lsp[2*i-2]; + * f[i] = tmp*f[i-1] + 2.0*f[i-2]; + * for (j=i; j>=2; j--) { + * f[j] = f[j] + tmp*f[j-1] + f[j-2]; + * } + * f[i] = f[i] + tmp; + * } + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_GetLspPoly(int16_t* lsp, /* (i) LSP in Q15 */ + int32_t* f); /* (o) polonymial in Q24 */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.c new file mode 100644 index 0000000000..68a569a40a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetSyncSeq.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/get_sync_seq.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/nearest_neighbor.h" +#include "modules/audio_coding/codecs/ilbc/refiner.h" + +/*----------------------------------------------------------------* + * get the pitch-synchronous sample sequence + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_GetSyncSeq( + int16_t *idata, /* (i) original data */ + size_t idatal, /* (i) dimension of data */ + size_t centerStartPos, /* (i) where current block starts */ + size_t *period, /* (i) rough-pitch-period array (Q-2) */ + const size_t *plocs, /* (i) where periods of period array are taken (Q-2) */ + size_t periodl, /* (i) dimension period array */ + size_t hl, /* (i) 2*hl+1 is the number of sequences */ + int16_t *surround /* (i/o) The contribution from this sequence + summed with earlier contributions */ + ){ + size_t i, centerEndPos, q; + /* Stack based */ + size_t lagBlock[2 * ENH_HL + 1]; + size_t blockStartPos[2 * ENH_HL + 1]; /* The position to search around (Q2) */ + size_t plocs2[ENH_PLOCSL]; + + centerEndPos = centerStartPos + ENH_BLOCKL - 1; + + /* present (find predicted lag from this position) */ + + WebRtcIlbcfix_NearestNeighbor(lagBlock + hl, + plocs, + 2 * (centerStartPos + centerEndPos), + periodl); + + blockStartPos[hl] = 4 * centerStartPos; + + /* past (find predicted position and perform a refined + search to find the best sequence) */ + + for (q = hl; q > 0; q--) { + size_t qq = q - 1; + size_t period_q = period[lagBlock[q]]; + /* Stop if this sequence would be outside the buffer; that means all + further-past sequences would also be outside the buffer. */ + if (blockStartPos[q] < period_q + (4 * ENH_OVERHANG)) + break; + blockStartPos[qq] = blockStartPos[q] - period_q; + + size_t value = blockStartPos[qq] + 4 * ENH_BLOCKL_HALF; + value = (value > period_q) ? (value - period_q) : 0; + WebRtcIlbcfix_NearestNeighbor(lagBlock + qq, plocs, value, periodl); + + /* Find the best possible sequence in the 4 times upsampled + domain around blockStartPos+q */ + WebRtcIlbcfix_Refiner(blockStartPos + qq, idata, idatal, centerStartPos, + blockStartPos[qq], surround, + WebRtcIlbcfix_kEnhWt[qq]); + } + + /* future (find predicted position and perform a refined + search to find the best sequence) */ + + for (i = 0; i < periodl; i++) { + plocs2[i] = plocs[i] - period[i]; + } + + for (q = hl + 1; q <= (2 * hl); q++) { + + WebRtcIlbcfix_NearestNeighbor( + lagBlock + q, + plocs2, + blockStartPos[q - 1] + 4 * ENH_BLOCKL_HALF, + periodl); + + blockStartPos[q]=blockStartPos[q-1]+period[lagBlock[q]]; + + if (blockStartPos[q] + 4 * (ENH_BLOCKL + ENH_OVERHANG) < 4 * idatal) { + + /* Find the best possible sequence in the 4 times upsampled + domain around blockStartPos+q */ + WebRtcIlbcfix_Refiner(blockStartPos + q, idata, idatal, centerStartPos, + blockStartPos[q], surround, + WebRtcIlbcfix_kEnhWt[2 * hl - q]); + + } else { + /* Don't add anything since this sequence would + be outside the buffer */ + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.h new file mode 100644 index 0000000000..90962fa063 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetSyncSeq.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_SYNC_SEQ_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_SYNC_SEQ_H_ + +#include +#include + +/*----------------------------------------------------------------* + * get the pitch-synchronous sample sequence + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_GetSyncSeq( + int16_t* idata, /* (i) original data */ + size_t idatal, /* (i) dimension of data */ + size_t centerStartPos, /* (i) where current block starts */ + size_t* period, /* (i) rough-pitch-period array (Q-2) */ + const size_t* plocs, /* (i) where periods of period array are taken (Q-2) */ + size_t periodl, /* (i) dimension period array */ + size_t hl, /* (i) 2*hl+1 is the number of sequences */ + int16_t* surround /* (i/o) The contribution from this sequence + summed with earlier contributions */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.c new file mode 100644 index 0000000000..be582f2e23 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_HpInput.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/hp_input.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * high-pass filter of input with *0.5 and saturation + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_HpInput( + int16_t *signal, /* (i/o) signal vector */ + int16_t *ba, /* (i) B- and A-coefficients (2:nd order) + {b[0] b[1] b[2] -a[1] -a[2]} a[0] + is assumed to be 1.0 */ + int16_t *y, /* (i/o) Filter state yhi[n-1] ylow[n-1] + yhi[n-2] ylow[n-2] */ + int16_t *x, /* (i/o) Filter state x[n-1] x[n-2] */ + size_t len) /* (i) Number of samples to filter */ +{ + size_t i; + int32_t tmpW32; + int32_t tmpW32b; + + for (i=0; i>15); + tmpW32 += y[0] * ba[3]; /* (-a[1])*y[i-1] (high part) */ + tmpW32 += y[2] * ba[4]; /* (-a[2])*y[i-2] (high part) */ + tmpW32 = (tmpW32<<1); + + tmpW32 += signal[i] * ba[0]; /* b[0]*x[0] */ + tmpW32 += x[0] * ba[1]; /* b[1]*x[i-1] */ + tmpW32 += x[1] * ba[2]; /* b[2]*x[i-2] */ + + /* Update state (input part) */ + x[1] = x[0]; + x[0] = signal[i]; + + /* Rounding in Q(12+1), i.e. add 2^12 */ + tmpW32b = tmpW32 + 4096; + + /* Saturate (to 2^28) so that the HP filtered signal does not overflow */ + tmpW32b = WEBRTC_SPL_SAT((int32_t)268435455, tmpW32b, (int32_t)-268435456); + + /* Convert back to Q0 and multiply with 0.5 */ + signal[i] = (int16_t)(tmpW32b >> 13); + + /* Update state (filtered part) */ + y[2] = y[0]; + y[3] = y[1]; + + /* upshift tmpW32 by 3 with saturation */ + if (tmpW32>268435455) { + tmpW32 = WEBRTC_SPL_WORD32_MAX; + } else if (tmpW32<-268435456) { + tmpW32 = WEBRTC_SPL_WORD32_MIN; + } else { + tmpW32 <<= 3; + } + + y[0] = (int16_t)(tmpW32 >> 16); + y[1] = (int16_t)((tmpW32 - (y[0] << 16)) >> 1); + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.h new file mode 100644 index 0000000000..9143d8efed --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_HpInput.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_HP_INPUT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_HP_INPUT_H_ + +#include +#include + +// clang-format off +// Bad job here. https://bugs.llvm.org/show_bug.cgi?id=34274 +void WebRtcIlbcfix_HpInput( + int16_t* signal, /* (i/o) signal vector */ + int16_t* ba, /* (i) B- and A-coefficients (2:nd order) + {b[0] b[1] b[2] -a[1] -a[2]} + a[0] is assumed to be 1.0 */ + int16_t* y, /* (i/o) Filter state yhi[n-1] ylow[n-1] + yhi[n-2] ylow[n-2] */ + int16_t* x, /* (i/o) Filter state x[n-1] x[n-2] */ + size_t len); /* (i) Number of samples to filter */ +// clang-format on + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.c new file mode 100644 index 0000000000..cc5f6dcd37 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_HpOutput.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/hp_output.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * high-pass filter of output and *2 with saturation + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_HpOutput( + int16_t *signal, /* (i/o) signal vector */ + int16_t *ba, /* (i) B- and A-coefficients (2:nd order) + {b[0] b[1] b[2] -a[1] -a[2]} a[0] + is assumed to be 1.0 */ + int16_t *y, /* (i/o) Filter state yhi[n-1] ylow[n-1] + yhi[n-2] ylow[n-2] */ + int16_t *x, /* (i/o) Filter state x[n-1] x[n-2] */ + size_t len) /* (i) Number of samples to filter */ +{ + size_t i; + int32_t tmpW32; + int32_t tmpW32b; + + for (i=0; i>15); + tmpW32 += y[0] * ba[3]; /* (-a[1])*y[i-1] (high part) */ + tmpW32 += y[2] * ba[4]; /* (-a[2])*y[i-2] (high part) */ + tmpW32 *= 2; + + tmpW32 += signal[i] * ba[0]; /* b[0]*x[0] */ + tmpW32 += x[0] * ba[1]; /* b[1]*x[i-1] */ + tmpW32 += x[1] * ba[2]; /* b[2]*x[i-2] */ + + /* Update state (input part) */ + x[1] = x[0]; + x[0] = signal[i]; + + /* Rounding in Q(12-1), i.e. add 2^10 */ + tmpW32b = tmpW32 + 1024; + + /* Saturate (to 2^26) so that the HP filtered signal does not overflow */ + tmpW32b = WEBRTC_SPL_SAT((int32_t)67108863, tmpW32b, (int32_t)-67108864); + + /* Convert back to Q0 and multiply with 2 */ + signal[i] = (int16_t)(tmpW32b >> 11); + + /* Update state (filtered part) */ + y[2] = y[0]; + y[3] = y[1]; + + /* upshift tmpW32 by 3 with saturation */ + if (tmpW32>268435455) { + tmpW32 = WEBRTC_SPL_WORD32_MAX; + } else if (tmpW32<-268435456) { + tmpW32 = WEBRTC_SPL_WORD32_MIN; + } else { + tmpW32 *= 8; + } + + y[0] = (int16_t)(tmpW32 >> 16); + y[1] = (int16_t)((tmpW32 & 0xffff) >> 1); + + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.h new file mode 100644 index 0000000000..6d1bd3cd88 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_HpOutput.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_HP_OUTPUT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_HP_OUTPUT_H_ + +#include +#include + +// clang-format off +// Bad job here. https://bugs.llvm.org/show_bug.cgi?id=34274 +void WebRtcIlbcfix_HpOutput( + int16_t* signal, /* (i/o) signal vector */ + int16_t* ba, /* (i) B- and A-coefficients (2:nd order) + {b[0] b[1] b[2] -a[1] -a[2]} a[0] + is assumed to be 1.0 */ + int16_t* y, /* (i/o) Filter state yhi[n-1] ylow[n-1] + yhi[n-2] ylow[n-2] */ + int16_t* x, /* (i/o) Filter state x[n-1] x[n-2] */ + size_t len); /* (i) Number of samples to filter */ +// clang-format on + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.c new file mode 100644 index 0000000000..ba6c3e46c3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + iLBCInterface.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +#include + +#include "modules/audio_coding/codecs/ilbc/decode.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/encode.h" +#include "modules/audio_coding/codecs/ilbc/init_decode.h" +#include "modules/audio_coding/codecs/ilbc/init_encode.h" +#include "rtc_base/checks.h" + +int16_t WebRtcIlbcfix_EncoderAssign(IlbcEncoderInstance** iLBC_encinst, + int16_t* ILBCENC_inst_Addr, + int16_t* size) { + *iLBC_encinst=(IlbcEncoderInstance*)ILBCENC_inst_Addr; + *size=sizeof(IlbcEncoder)/sizeof(int16_t); + if (*iLBC_encinst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcIlbcfix_DecoderAssign(IlbcDecoderInstance** iLBC_decinst, + int16_t* ILBCDEC_inst_Addr, + int16_t* size) { + *iLBC_decinst=(IlbcDecoderInstance*)ILBCDEC_inst_Addr; + *size=sizeof(IlbcDecoder)/sizeof(int16_t); + if (*iLBC_decinst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcIlbcfix_EncoderCreate(IlbcEncoderInstance **iLBC_encinst) { + *iLBC_encinst=(IlbcEncoderInstance*)malloc(sizeof(IlbcEncoder)); + if (*iLBC_encinst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcIlbcfix_DecoderCreate(IlbcDecoderInstance **iLBC_decinst) { + *iLBC_decinst=(IlbcDecoderInstance*)malloc(sizeof(IlbcDecoder)); + if (*iLBC_decinst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcIlbcfix_EncoderFree(IlbcEncoderInstance *iLBC_encinst) { + free(iLBC_encinst); + return(0); +} + +int16_t WebRtcIlbcfix_DecoderFree(IlbcDecoderInstance *iLBC_decinst) { + free(iLBC_decinst); + return(0); +} + +int16_t WebRtcIlbcfix_EncoderInit(IlbcEncoderInstance* iLBCenc_inst, + int16_t mode) { + if ((mode==20)||(mode==30)) { + WebRtcIlbcfix_InitEncode((IlbcEncoder*) iLBCenc_inst, mode); + return(0); + } else { + return(-1); + } +} + +int WebRtcIlbcfix_Encode(IlbcEncoderInstance* iLBCenc_inst, + const int16_t* speechIn, + size_t len, + uint8_t* encoded) { + size_t pos = 0; + size_t encpos = 0; + + if ((len != ((IlbcEncoder*)iLBCenc_inst)->blockl) && +#ifdef SPLIT_10MS + (len != 80) && +#endif + (len != 2*((IlbcEncoder*)iLBCenc_inst)->blockl) && + (len != 3*((IlbcEncoder*)iLBCenc_inst)->blockl)) + { + /* A maximum of 3 frames/packet is allowed */ + return(-1); + } else { + + /* call encoder */ + while (possection == 0) +#else + pos += ((IlbcEncoder*)iLBCenc_inst)->blockl; +#endif + encpos += ((IlbcEncoder*)iLBCenc_inst)->no_of_words; + } + return (int)(encpos*2); + } +} + +int16_t WebRtcIlbcfix_DecoderInit(IlbcDecoderInstance* iLBCdec_inst, + int16_t mode) { + if ((mode==20)||(mode==30)) { + WebRtcIlbcfix_InitDecode((IlbcDecoder*) iLBCdec_inst, mode, 1); + return(0); + } else { + return(-1); + } +} +void WebRtcIlbcfix_DecoderInit20Ms(IlbcDecoderInstance* iLBCdec_inst) { + WebRtcIlbcfix_InitDecode((IlbcDecoder*) iLBCdec_inst, 20, 1); +} +void WebRtcIlbcfix_Decoderinit30Ms(IlbcDecoderInstance* iLBCdec_inst) { + WebRtcIlbcfix_InitDecode((IlbcDecoder*) iLBCdec_inst, 30, 1); +} + + +int WebRtcIlbcfix_Decode(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) +{ + size_t i=0; + /* Allow for automatic switching between the frame sizes + (although you do get some discontinuity) */ + if ((len==((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==2*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==3*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)) { + /* ok, do nothing */ + } else { + /* Test if the mode has changed */ + if (((IlbcDecoder*)iLBCdec_inst)->mode==20) { + if ((len==NO_OF_BYTES_30MS)|| + (len==2*NO_OF_BYTES_30MS)|| + (len==3*NO_OF_BYTES_30MS)) { + WebRtcIlbcfix_InitDecode( + ((IlbcDecoder*)iLBCdec_inst), 30, + ((IlbcDecoder*)iLBCdec_inst)->use_enhancer); + } else { + /* Unsupported frame length */ + return(-1); + } + } else { + if ((len==NO_OF_BYTES_20MS)|| + (len==2*NO_OF_BYTES_20MS)|| + (len==3*NO_OF_BYTES_20MS)) { + WebRtcIlbcfix_InitDecode( + ((IlbcDecoder*)iLBCdec_inst), 20, + ((IlbcDecoder*)iLBCdec_inst)->use_enhancer); + } else { + /* Unsupported frame length */ + return(-1); + } + } + } + + while ((i*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)blockl], + (const uint16_t*)&encoded + [2 * i * ((IlbcDecoder*)iLBCdec_inst)->no_of_words], + (IlbcDecoder*)iLBCdec_inst, 1) == -1) + return -1; + i++; + } + /* iLBC does not support VAD/CNG yet */ + *speechType=1; + return (int)(i*((IlbcDecoder*)iLBCdec_inst)->blockl); +} + +int WebRtcIlbcfix_Decode20Ms(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) +{ + size_t i=0; + if ((len==((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==2*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==3*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)) { + /* ok, do nothing */ + } else { + return(-1); + } + + while ((i*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)blockl], + (const uint16_t*)&encoded + [2 * i * ((IlbcDecoder*)iLBCdec_inst)->no_of_words], + (IlbcDecoder*)iLBCdec_inst, 1)) + return -1; + i++; + } + /* iLBC does not support VAD/CNG yet */ + *speechType=1; + return (int)(i*((IlbcDecoder*)iLBCdec_inst)->blockl); +} + +int WebRtcIlbcfix_Decode30Ms(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) +{ + size_t i=0; + if ((len==((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==2*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==3*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)) { + /* ok, do nothing */ + } else { + return(-1); + } + + while ((i*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)blockl], + (const uint16_t*)&encoded + [2 * i * ((IlbcDecoder*)iLBCdec_inst)->no_of_words], + (IlbcDecoder*)iLBCdec_inst, 1)) + return -1; + i++; + } + /* iLBC does not support VAD/CNG yet */ + *speechType=1; + return (int)(i*((IlbcDecoder*)iLBCdec_inst)->blockl); +} + +size_t WebRtcIlbcfix_DecodePlc(IlbcDecoderInstance* iLBCdec_inst, + int16_t* decoded, + size_t noOfLostFrames) { + size_t i; + uint16_t dummy; + + for (i=0;iblockl], &dummy, + (IlbcDecoder*)iLBCdec_inst, 0); + RTC_CHECK_EQ(result, 0); + } + return (noOfLostFrames*((IlbcDecoder*)iLBCdec_inst)->blockl); +} + +size_t WebRtcIlbcfix_NetEqPlc(IlbcDecoderInstance* iLBCdec_inst, + int16_t* decoded, + size_t noOfLostFrames) { + /* Two input parameters not used, but needed for function pointers in NetEQ */ + (void)(decoded = NULL); + (void)(noOfLostFrames = 0); + + WebRtcSpl_MemSetW16(((IlbcDecoder*)iLBCdec_inst)->enh_buf, 0, ENH_BUFL); + ((IlbcDecoder*)iLBCdec_inst)->prev_enh_pl = 2; + + return (0); +} + +void WebRtcIlbcfix_version(char *version) +{ + strcpy((char*)version, "1.1.1"); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.h new file mode 100644 index 0000000000..de8cfde111 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * ilbc.h + * + * This header file contains all of the API's for iLBC. + * + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_ILBC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_ILBC_H_ + +#include +#include + +/* + * Solution to support multiple instances + * Customer has to cast instance to proper type + */ + +typedef struct iLBC_encinst_t_ IlbcEncoderInstance; + +typedef struct iLBC_decinst_t_ IlbcDecoderInstance; + +/* + * Comfort noise constants + */ + +#define ILBC_SPEECH 1 +#define ILBC_CNG 2 + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcIlbcfix_XxxAssign(...) + * + * These functions assigns the encoder/decoder instance to the specified + * memory location + * + * Input: + * - XXX_xxxinst : Pointer to created instance that should be + * assigned + * - ILBCXXX_inst_Addr : Pointer to the desired memory space + * - size : The size that this structure occupies (in Word16) + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_EncoderAssign(IlbcEncoderInstance** iLBC_encinst, + int16_t* ILBCENC_inst_Addr, + int16_t* size); +int16_t WebRtcIlbcfix_DecoderAssign(IlbcDecoderInstance** iLBC_decinst, + int16_t* ILBCDEC_inst_Addr, + int16_t* size); + +/**************************************************************************** + * WebRtcIlbcfix_XxxAssign(...) + * + * These functions create a instance to the specified structure + * + * Input: + * - XXX_inst : Pointer to created instance that should be created + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_EncoderCreate(IlbcEncoderInstance** iLBC_encinst); +int16_t WebRtcIlbcfix_DecoderCreate(IlbcDecoderInstance** iLBC_decinst); + +/**************************************************************************** + * WebRtcIlbcfix_XxxFree(...) + * + * These functions frees the dynamic memory of a specified instance + * + * Input: + * - XXX_inst : Pointer to created instance that should be freed + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_EncoderFree(IlbcEncoderInstance* iLBC_encinst); +int16_t WebRtcIlbcfix_DecoderFree(IlbcDecoderInstance* iLBC_decinst); + +/**************************************************************************** + * WebRtcIlbcfix_EncoderInit(...) + * + * This function initializes a iLBC instance + * + * Input: + * - iLBCenc_inst : iLBC instance, i.e. the user that should receive + * be initialized + * - frameLen : The frame length of the codec 20/30 (ms) + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_EncoderInit(IlbcEncoderInstance* iLBCenc_inst, + int16_t frameLen); + +/**************************************************************************** + * WebRtcIlbcfix_Encode(...) + * + * This function encodes one iLBC frame. Input speech length has be a + * multiple of the frame length. + * + * Input: + * - iLBCenc_inst : iLBC instance, i.e. the user that should encode + * a package + * - speechIn : Input speech vector + * - len : Samples in speechIn (160, 240, 320 or 480) + * + * Output: + * - encoded : The encoded data vector + * + * Return value : >0 - Length (in bytes) of coded data + * -1 - Error + */ + +int WebRtcIlbcfix_Encode(IlbcEncoderInstance* iLBCenc_inst, + const int16_t* speechIn, + size_t len, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcIlbcfix_DecoderInit(...) + * + * This function initializes a iLBC instance with either 20 or 30 ms frames + * Alternatively the WebRtcIlbcfix_DecoderInit_XXms can be used. Then it's + * not needed to specify the frame length with a variable. + * + * Input: + * - IlbcDecoderInstance : iLBC decoder instance + * - frameLen : The frame length of the codec 20/30 (ms) + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_DecoderInit(IlbcDecoderInstance* iLBCdec_inst, + int16_t frameLen); +void WebRtcIlbcfix_DecoderInit20Ms(IlbcDecoderInstance* iLBCdec_inst); +void WebRtcIlbcfix_Decoderinit30Ms(IlbcDecoderInstance* iLBCdec_inst); + +/**************************************************************************** + * WebRtcIlbcfix_Decode(...) + * + * This function decodes a packet with iLBC frame(s). Output speech length + * will be a multiple of 160 or 240 samples ((160 or 240)*frames/packet). + * + * Input: + * - iLBCdec_inst : iLBC instance, i.e. the user that should decode + * a packet + * - encoded : Encoded iLBC frame(s) + * - len : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - speechType : 1 normal, 2 CNG + * + * Return value : >0 - Samples in decoded vector + * -1 - Error + */ + +int WebRtcIlbcfix_Decode(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); +int WebRtcIlbcfix_Decode20Ms(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); +int WebRtcIlbcfix_Decode30Ms(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + +/**************************************************************************** + * WebRtcIlbcfix_DecodePlc(...) + * + * This function conducts PLC for iLBC frame(s). Output speech length + * will be a multiple of 160 or 240 samples. + * + * Input: + * - iLBCdec_inst : iLBC instance, i.e. the user that should perform + * a PLC + * - noOfLostFrames : Number of PLC frames to produce + * + * Output: + * - decoded : The "decoded" vector + * + * Return value : Samples in decoded PLC vector + */ + +size_t WebRtcIlbcfix_DecodePlc(IlbcDecoderInstance* iLBCdec_inst, + int16_t* decoded, + size_t noOfLostFrames); + +/**************************************************************************** + * WebRtcIlbcfix_NetEqPlc(...) + * + * This function updates the decoder when a packet loss has occured, but it + * does not produce any PLC data. Function can be used if another PLC method + * is used (i.e NetEq). + * + * Input: + * - iLBCdec_inst : iLBC instance that should be updated + * - noOfLostFrames : Number of lost frames + * + * Output: + * - decoded : The "decoded" vector (nothing in this case) + * + * Return value : Samples in decoded PLC vector + */ + +size_t WebRtcIlbcfix_NetEqPlc(IlbcDecoderInstance* iLBCdec_inst, + int16_t* decoded, + size_t noOfLostFrames); + +/**************************************************************************** + * WebRtcIlbcfix_version(...) + * + * This function returns the version number of iLBC + * + * Output: + * - version : Version number of iLBC (maximum 20 char) + */ + +void WebRtcIlbcfix_version(char* version); + +#ifdef __cplusplus +} +#endif + +#endif // MODULES_AUDIO_CODING_CODECS_ILBC_ILBC_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc_unittest.cc new file mode 100644 index 0000000000..689292f131 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc_unittest.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h" +#include "modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h" +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(IlbcTest, BadPacket) { + // Get a good packet. + AudioEncoderIlbcConfig config; + config.frame_size_ms = 20; // We need 20 ms rather than the default 30 ms; + // otherwise, all possible values of cb_index[2] + // are valid. + AudioEncoderIlbcImpl encoder(config, 102); + std::vector samples(encoder.SampleRateHz() / 100, 4711); + rtc::Buffer packet; + int num_10ms_chunks = 0; + while (packet.size() == 0) { + encoder.Encode(0, samples, &packet); + num_10ms_chunks += 1; + } + + // Break the packet by setting all bits of the unsigned 7-bit number + // cb_index[2] to 1, giving it a value of 127. For a 20 ms packet, this is + // too large. + EXPECT_EQ(38u, packet.size()); + rtc::Buffer bad_packet(packet.data(), packet.size()); + bad_packet[29] |= 0x3f; // Bits 1-6. + bad_packet[30] |= 0x80; // Bit 0. + + // Decode the bad packet. We expect the decoder to respond by returning -1. + AudioDecoderIlbcImpl decoder; + std::vector decoded_samples(num_10ms_chunks * samples.size()); + AudioDecoder::SpeechType speech_type; + EXPECT_EQ(-1, decoder.Decode(bad_packet.data(), bad_packet.size(), + encoder.SampleRateHz(), + sizeof(int16_t) * decoded_samples.size(), + decoded_samples.data(), &speech_type)); + + // Decode the good packet. This should work, because the failed decoding + // should not have left the decoder in a broken state. + EXPECT_EQ(static_cast(decoded_samples.size()), + decoder.Decode(packet.data(), packet.size(), encoder.SampleRateHz(), + sizeof(int16_t) * decoded_samples.size(), + decoded_samples.data(), &speech_type)); +} + +class SplitIlbcTest : public ::testing::TestWithParam > { + protected: + virtual void SetUp() { + const std::pair parameters = GetParam(); + num_frames_ = parameters.first; + frame_length_ms_ = parameters.second; + frame_length_bytes_ = (frame_length_ms_ == 20) ? 38 : 50; + } + size_t num_frames_; + int frame_length_ms_; + size_t frame_length_bytes_; +}; + +TEST_P(SplitIlbcTest, NumFrames) { + AudioDecoderIlbcImpl decoder; + const size_t frame_length_samples = frame_length_ms_ * 8; + const auto generate_payload = [](size_t payload_length_bytes) { + rtc::Buffer payload(payload_length_bytes); + // Fill payload with increasing integers {0, 1, 2, ...}. + for (size_t i = 0; i < payload.size(); ++i) { + payload[i] = static_cast(i); + } + return payload; + }; + + const auto results = decoder.ParsePayload( + generate_payload(frame_length_bytes_ * num_frames_), 0); + EXPECT_EQ(num_frames_, results.size()); + + size_t frame_num = 0; + uint8_t payload_value = 0; + for (const auto& result : results) { + EXPECT_EQ(frame_length_samples * frame_num, result.timestamp); + const LegacyEncodedAudioFrame* frame = + static_cast(result.frame.get()); + const rtc::Buffer& payload = frame->payload(); + EXPECT_EQ(frame_length_bytes_, payload.size()); + for (size_t i = 0; i < payload.size(); ++i, ++payload_value) { + EXPECT_EQ(payload_value, payload[i]); + } + ++frame_num; + } +} + +// Test 1 through 5 frames of 20 and 30 ms size. +// Also test the maximum number of frames in one packet for 20 and 30 ms. +// The maximum is defined by the largest payload length that can be uniquely +// resolved to a frame size of either 38 bytes (20 ms) or 50 bytes (30 ms). +INSTANTIATE_TEST_SUITE_P( + IlbcTest, + SplitIlbcTest, + ::testing::Values(std::pair(1, 20), // 1 frame, 20 ms. + std::pair(2, 20), // 2 frames, 20 ms. + std::pair(3, 20), // And so on. + std::pair(4, 20), + std::pair(5, 20), + std::pair(24, 20), + std::pair(1, 30), + std::pair(2, 30), + std::pair(3, 30), + std::pair(4, 30), + std::pair(5, 30), + std::pair(18, 30))); + +// Test too large payload size. +TEST(IlbcTest, SplitTooLargePayload) { + AudioDecoderIlbcImpl decoder; + constexpr size_t kPayloadLengthBytes = 950; + const auto results = + decoder.ParsePayload(rtc::Buffer(kPayloadLengthBytes), 0); + EXPECT_TRUE(results.empty()); +} + +// Payload not an integer number of frames. +TEST(IlbcTest, SplitUnevenPayload) { + AudioDecoderIlbcImpl decoder; + constexpr size_t kPayloadLengthBytes = 39; // Not an even number of frames. + const auto results = + decoder.ParsePayload(rtc::Buffer(kPayloadLengthBytes), 0); + EXPECT_TRUE(results.empty()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.c new file mode 100644 index 0000000000..d78f81a897 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_IndexConvDec.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/index_conv_dec.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_IndexConvDec( + int16_t *index /* (i/o) Codebook indexes */ + ){ + int k; + + for (k=4;k<6;k++) { + /* Readjust the second and third codebook index for the first 40 sample + so that they look the same as the first (in terms of lag) + */ + if ((index[k]>=44)&&(index[k]<108)) { + index[k]+=64; + } else if ((index[k]>=108)&&(index[k]<128)) { + index[k]+=128; + } else { + /* ERROR */ + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.h new file mode 100644 index 0000000000..4f08ce04df --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_IndexConvDec.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INDEX_CONV_DEC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INDEX_CONV_DEC_H_ + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_IndexConvDec(int16_t* index /* (i/o) Codebook indexes */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.c new file mode 100644 index 0000000000..83144150b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + IiLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_IndexConvEnc.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/index_conv_enc.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Convert the codebook indexes to make the search easier + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_IndexConvEnc( + int16_t *index /* (i/o) Codebook indexes */ + ){ + int k; + + for (k=4;k<6;k++) { + /* Readjust the second and third codebook index so that it is + packetized into 7 bits (before it was put in lag-wise the same + way as for the first codebook which uses 8 bits) + */ + if ((index[k]>=108)&&(index[k]<172)) { + index[k]-=64; + } else if (index[k]>=236) { + index[k]-=128; + } else { + /* ERROR */ + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.h new file mode 100644 index 0000000000..4fbf98084e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_IndexConvEnc.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INDEX_CONV_ENC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INDEX_CONV_ENC_H_ + +#include + +/*----------------------------------------------------------------* + * Convert the codebook indexes to make the search easier + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_IndexConvEnc(int16_t* index /* (i/o) Codebook indexes */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.c new file mode 100644 index 0000000000..3eb41e33b0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InitDecode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/init_decode.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Initiation of decoder instance. + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_InitDecode( /* (o) Number of decoded samples */ + IlbcDecoder *iLBCdec_inst, /* (i/o) Decoder instance */ + int16_t mode, /* (i) frame size mode */ + int use_enhancer) { /* (i) 1: use enhancer, 0: no enhancer */ + int i; + + iLBCdec_inst->mode = mode; + + /* Set all the variables that are dependent on the frame size mode */ + if (mode==30) { + iLBCdec_inst->blockl = BLOCKL_30MS; + iLBCdec_inst->nsub = NSUB_30MS; + iLBCdec_inst->nasub = NASUB_30MS; + iLBCdec_inst->lpc_n = LPC_N_30MS; + iLBCdec_inst->no_of_bytes = NO_OF_BYTES_30MS; + iLBCdec_inst->no_of_words = NO_OF_WORDS_30MS; + iLBCdec_inst->state_short_len=STATE_SHORT_LEN_30MS; + } + else if (mode==20) { + iLBCdec_inst->blockl = BLOCKL_20MS; + iLBCdec_inst->nsub = NSUB_20MS; + iLBCdec_inst->nasub = NASUB_20MS; + iLBCdec_inst->lpc_n = LPC_N_20MS; + iLBCdec_inst->no_of_bytes = NO_OF_BYTES_20MS; + iLBCdec_inst->no_of_words = NO_OF_WORDS_20MS; + iLBCdec_inst->state_short_len=STATE_SHORT_LEN_20MS; + } + else { + return(-1); + } + + /* Reset all the previous LSF to mean LSF */ + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->lsfdeqold, WebRtcIlbcfix_kLsfMean, LPC_FILTERORDER); + + /* Clear the synthesis filter memory */ + WebRtcSpl_MemSetW16(iLBCdec_inst->syntMem, 0, LPC_FILTERORDER); + + /* Set the old synthesis filter to {1.0 0.0 ... 0.0} */ + WebRtcSpl_MemSetW16(iLBCdec_inst->old_syntdenum, 0, ((LPC_FILTERORDER + 1)*NSUB_MAX)); + for (i=0; iold_syntdenum[i*(LPC_FILTERORDER+1)] = 4096; + } + + /* Clear the variables that are used for the PLC */ + iLBCdec_inst->last_lag = 20; + iLBCdec_inst->consPLICount = 0; + iLBCdec_inst->prevPLI = 0; + iLBCdec_inst->perSquare = 0; + iLBCdec_inst->prevLag = 120; + iLBCdec_inst->prevLpc[0] = 4096; + WebRtcSpl_MemSetW16(iLBCdec_inst->prevLpc+1, 0, LPC_FILTERORDER); + WebRtcSpl_MemSetW16(iLBCdec_inst->prevResidual, 0, BLOCKL_MAX); + + /* Initialize the seed for the random number generator */ + iLBCdec_inst->seed = 777; + + /* Set the filter state of the HP filter to 0 */ + WebRtcSpl_MemSetW16(iLBCdec_inst->hpimemx, 0, 2); + WebRtcSpl_MemSetW16(iLBCdec_inst->hpimemy, 0, 4); + + /* Set the variables that are used in the ehnahcer */ + iLBCdec_inst->use_enhancer = use_enhancer; + WebRtcSpl_MemSetW16(iLBCdec_inst->enh_buf, 0, (ENH_BUFL+ENH_BUFL_FILTEROVERHEAD)); + for (i=0;ienh_period[i]=160; /* Q(-4) */ + } + + iLBCdec_inst->prev_enh_pl = 0; + + return (int)(iLBCdec_inst->blockl); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.h new file mode 100644 index 0000000000..a2b7b91287 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InitDecode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INIT_DECODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INIT_DECODE_H_ + +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Initiation of decoder instance. + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_InitDecode(/* (o) Number of decoded samples */ + IlbcDecoder* + iLBCdec_inst, /* (i/o) Decoder instance */ + int16_t mode, /* (i) frame size mode */ + int use_enhancer /* (i) 1 to use enhancer + 0 to run without enhancer */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.c new file mode 100644 index 0000000000..aa858e94bb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InitEncode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/init_encode.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Initiation of encoder instance. + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_InitEncode( /* (o) Number of bytes encoded */ + IlbcEncoder *iLBCenc_inst, /* (i/o) Encoder instance */ + int16_t mode) { /* (i) frame size mode */ + iLBCenc_inst->mode = mode; + + /* Set all the variables that are dependent on the frame size mode */ + if (mode==30) { + iLBCenc_inst->blockl = BLOCKL_30MS; + iLBCenc_inst->nsub = NSUB_30MS; + iLBCenc_inst->nasub = NASUB_30MS; + iLBCenc_inst->lpc_n = LPC_N_30MS; + iLBCenc_inst->no_of_bytes = NO_OF_BYTES_30MS; + iLBCenc_inst->no_of_words = NO_OF_WORDS_30MS; + iLBCenc_inst->state_short_len=STATE_SHORT_LEN_30MS; + } + else if (mode==20) { + iLBCenc_inst->blockl = BLOCKL_20MS; + iLBCenc_inst->nsub = NSUB_20MS; + iLBCenc_inst->nasub = NASUB_20MS; + iLBCenc_inst->lpc_n = LPC_N_20MS; + iLBCenc_inst->no_of_bytes = NO_OF_BYTES_20MS; + iLBCenc_inst->no_of_words = NO_OF_WORDS_20MS; + iLBCenc_inst->state_short_len=STATE_SHORT_LEN_20MS; + } + else { + return(-1); + } + + /* Clear the buffers and set the previous LSF and LSP to the mean value */ + WebRtcSpl_MemSetW16(iLBCenc_inst->anaMem, 0, LPC_FILTERORDER); + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->lsfold, WebRtcIlbcfix_kLsfMean, LPC_FILTERORDER); + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->lsfdeqold, WebRtcIlbcfix_kLsfMean, LPC_FILTERORDER); + WebRtcSpl_MemSetW16(iLBCenc_inst->lpc_buffer, 0, LPC_LOOKBACK + BLOCKL_MAX); + + /* Set the filter state of the HP filter to 0 */ + WebRtcSpl_MemSetW16(iLBCenc_inst->hpimemx, 0, 2); + WebRtcSpl_MemSetW16(iLBCenc_inst->hpimemy, 0, 4); + +#ifdef SPLIT_10MS + /*Zeroing the past samples for 10msec Split*/ + WebRtcSpl_MemSetW16(iLBCenc_inst->past_samples,0,160); + iLBCenc_inst->section = 0; +#endif + + return (int)(iLBCenc_inst->no_of_bytes); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.h new file mode 100644 index 0000000000..4ada6a30c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InitEncode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INIT_ENCODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INIT_ENCODE_H_ + +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Initiation of encoder instance. + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_InitEncode(/* (o) Number of bytes encoded */ + IlbcEncoder* + iLBCenc_inst, /* (i/o) Encoder instance */ + int16_t mode /* (i) frame size mode */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.c new file mode 100644 index 0000000000..17ed244bd4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Interpolate.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/interpolate.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * interpolation between vectors + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Interpolate( + int16_t *out, /* (o) output vector */ + int16_t *in1, /* (i) first input vector */ + int16_t *in2, /* (i) second input vector */ + int16_t coef, /* (i) weight coefficient in Q14 */ + int16_t length) /* (i) number of sample is vectors */ +{ + int i; + int16_t invcoef; + + /* + Performs the operation out[i] = in[i]*coef + (1-coef)*in2[i] (with rounding) + */ + + invcoef = 16384 - coef; /* 16384 = 1.0 (Q14)*/ + for (i = 0; i < length; i++) { + out[i] = (int16_t)((coef * in1[i] + invcoef * in2[i] + 8192) >> 14); + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.h new file mode 100644 index 0000000000..892082b75c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Interpolate.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INTERPOLATE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INTERPOLATE_H_ + +#include + +/*----------------------------------------------------------------* + * interpolation between vectors + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Interpolate( + int16_t* out, /* (o) output vector */ + int16_t* in1, /* (i) first input vector */ + int16_t* in2, /* (i) second input vector */ + int16_t coef, /* (i) weight coefficient in Q14 */ + int16_t length); /* (i) number of sample is vectors */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.c new file mode 100644 index 0000000000..6dddd6fb86 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InterpolateSamples.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/interpolate_samples.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_InterpolateSamples( + int16_t *interpSamples, /* (o) The interpolated samples */ + int16_t *CBmem, /* (i) The CB memory */ + size_t lMem /* (i) Length of the CB memory */ + ) { + int16_t *ppi, *ppo, i, j, temp1, temp2; + int16_t *tmpPtr; + + /* Calculate the 20 vectors of interpolated samples (4 samples each) + that are used in the codebooks for lag 20 to 39 */ + tmpPtr = interpSamples; + for (j=0; j<20; j++) { + temp1 = 0; + temp2 = 3; + ppo = CBmem+lMem-4; + ppi = CBmem+lMem-j-24; + for (i=0; i<4; i++) { + + *tmpPtr++ = (int16_t)((WebRtcIlbcfix_kAlpha[temp2] * *ppo) >> 15) + + (int16_t)((WebRtcIlbcfix_kAlpha[temp1] * *ppi) >> 15); + + ppo++; + ppi++; + temp1++; + temp2--; + } + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.h new file mode 100644 index 0000000000..bc665d7854 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InterpolateSamples.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INTERPOLATE_SAMPLES_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INTERPOLATE_SAMPLES_H_ + +#include +#include + +/*----------------------------------------------------------------* + * Construct the interpolated samples for the Augmented CB + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_InterpolateSamples( + int16_t* interpSamples, /* (o) The interpolated samples */ + int16_t* CBmem, /* (i) The CB memory */ + size_t lMem /* (i) Length of the CB memory */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.c new file mode 100644 index 0000000000..89f6d29724 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LpcEncode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lpc_encode.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/lsf_check.h" +#include "modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h" +#include "modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h" +#include "modules/audio_coding/codecs/ilbc/simple_lsf_quant.h" + +/*----------------------------------------------------------------* + * lpc encoder + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LpcEncode( + int16_t *syntdenum, /* (i/o) synthesis filter coefficients + before/after encoding */ + int16_t *weightdenum, /* (i/o) weighting denumerator coefficients + before/after encoding */ + int16_t *lsf_index, /* (o) lsf quantization index */ + int16_t *data, /* (i) Speech to do LPC analysis on */ + IlbcEncoder *iLBCenc_inst + /* (i/o) the encoder state structure */ + ) { + /* Stack based */ + int16_t lsf[LPC_FILTERORDER * LPC_N_MAX]; + int16_t lsfdeq[LPC_FILTERORDER * LPC_N_MAX]; + + /* Calculate LSF's from the input speech */ + WebRtcIlbcfix_SimpleLpcAnalysis(lsf, data, iLBCenc_inst); + + /* Quantize the LSF's */ + WebRtcIlbcfix_SimpleLsfQ(lsfdeq, lsf_index, lsf, iLBCenc_inst->lpc_n); + + /* Stableize the LSF's if needed */ + WebRtcIlbcfix_LsfCheck(lsfdeq, LPC_FILTERORDER, iLBCenc_inst->lpc_n); + + /* Calculate the synthesis and weighting filter coefficients from + the optimal LSF and the dequantized LSF */ + WebRtcIlbcfix_SimpleInterpolateLsf(syntdenum, weightdenum, + lsf, lsfdeq, iLBCenc_inst->lsfold, + iLBCenc_inst->lsfdeqold, LPC_FILTERORDER, iLBCenc_inst); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.h new file mode 100644 index 0000000000..a67b77acbf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LpcEncode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LPC_ENCODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LPC_ENCODE_H_ + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * lpc encoder + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LpcEncode( + int16_t* syntdenum, /* (i/o) synthesis filter coefficients + before/after encoding */ + int16_t* weightdenum, /* (i/o) weighting denumerator coefficients + before/after encoding */ + int16_t* lsf_index, /* (o) lsf quantization index */ + int16_t* data, /* (i) Speech to do LPC analysis on */ + IlbcEncoder* iLBCenc_inst + /* (i/o) the encoder state structure */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.c new file mode 100644 index 0000000000..9f0e19a2d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LsfCheck.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_check.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * check for stability of lsf coefficients + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_LsfCheck( + int16_t *lsf, /* LSF parameters */ + int dim, /* dimension of LSF */ + int NoAn) /* No of analysis per frame */ +{ + int k,n,m, Nit=2, change=0,pos; + const int16_t eps=319; /* 0.039 in Q13 (50 Hz)*/ + const int16_t eps2=160; /* eps/2.0 in Q13;*/ + const int16_t maxlsf=25723; /* 3.14; (4000 Hz)*/ + const int16_t minlsf=82; /* 0.01; (0 Hz)*/ + + /* LSF separation check*/ + for (n=0;nmaxlsf) { + lsf[pos]=maxlsf; + change=1; + } + } + } + } + + return change; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.h new file mode 100644 index 0000000000..9ba90a31e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LsfCheck.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_CHECK_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_CHECK_H_ + +#include + +/*----------------------------------------------------------------* + * check for stability of lsf coefficients + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_LsfCheck(int16_t* lsf, /* LSF parameters */ + int dim, /* dimension of LSF */ + int NoAn); /* No of analysis per frame */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.c new file mode 100644 index 0000000000..04de5e7e6c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LspInterpolate2PolyDec.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/interpolate.h" +#include "modules/audio_coding/codecs/ilbc/lsf_to_poly.h" + +/*----------------------------------------------------------------* + * interpolation of lsf coefficients for the decoder + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LspInterpolate2PolyDec( + int16_t *a, /* (o) lpc coefficients Q12 */ + int16_t *lsf1, /* (i) first set of lsf coefficients Q13 */ + int16_t *lsf2, /* (i) second set of lsf coefficients Q13 */ + int16_t coef, /* (i) weighting coefficient to use between + lsf1 and lsf2 Q14 */ + int16_t length /* (i) length of coefficient vectors */ + ){ + int16_t lsftmp[LPC_FILTERORDER]; + + /* interpolate LSF */ + WebRtcIlbcfix_Interpolate(lsftmp, lsf1, lsf2, coef, length); + + /* Compute the filter coefficients from the LSF */ + WebRtcIlbcfix_Lsf2Poly(a, lsftmp); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h new file mode 100644 index 0000000000..6cc9d9746d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LspInterpolate2PolyDec.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_INTERPOLATE_TO_POLY_DEC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_INTERPOLATE_TO_POLY_DEC_H_ + +#include + +/*----------------------------------------------------------------* + * interpolation of lsf coefficients for the decoder + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LspInterpolate2PolyDec( + int16_t* a, /* (o) lpc coefficients Q12 */ + int16_t* lsf1, /* (i) first set of lsf coefficients Q13 */ + int16_t* lsf2, /* (i) second set of lsf coefficients Q13 */ + int16_t coef, /* (i) weighting coefficient to use between + lsf1 and lsf2 Q14 */ + int16_t length /* (i) length of coefficient vectors */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.c new file mode 100644 index 0000000000..618821216c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LsfInterpolate2PloyEnc.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/interpolate.h" +#include "modules/audio_coding/codecs/ilbc/lsf_to_poly.h" + +/*----------------------------------------------------------------* + * lsf interpolator and conversion from lsf to a coefficients + * (subrutine to SimpleInterpolateLSF) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LsfInterpolate2PloyEnc( + int16_t *a, /* (o) lpc coefficients Q12 */ + int16_t *lsf1, /* (i) first set of lsf coefficients Q13 */ + int16_t *lsf2, /* (i) second set of lsf coefficients Q13 */ + int16_t coef, /* (i) weighting coefficient to use between + lsf1 and lsf2 Q14 */ + int16_t length /* (i) length of coefficient vectors */ + ) { + /* Stack based */ + int16_t lsftmp[LPC_FILTERORDER]; + + /* interpolate LSF */ + WebRtcIlbcfix_Interpolate(lsftmp, lsf1, lsf2, coef, length); + + /* Compute the filter coefficients from the LSF */ + WebRtcIlbcfix_Lsf2Poly(a, lsftmp); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h new file mode 100644 index 0000000000..b278a10f4b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LsfInterpolate2PloyEnc.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_INTERPOLATE_TO_POLY_ENC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_INTERPOLATE_TO_POLY_ENC_H_ + +#include + +/*----------------------------------------------------------------* + * lsf interpolator and conversion from lsf to a coefficients + * (subrutine to SimpleInterpolateLSF) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LsfInterpolate2PloyEnc( + int16_t* a, /* (o) lpc coefficients Q12 */ + int16_t* lsf1, /* (i) first set of lsf coefficients Q13 */ + int16_t* lsf2, /* (i) second set of lsf coefficients Q13 */ + int16_t coef, /* (i) weighting coefficient to use between + lsf1 and lsf2 Q14 */ + int16_t length /* (i) length of coefficient vectors */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.c new file mode 100644 index 0000000000..ee8292f394 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsf2Lsp.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_to_lsp.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * conversion from lsf to lsp coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsf2Lsp( + int16_t *lsf, /* (i) lsf in Q13 values between 0 and pi */ + int16_t *lsp, /* (o) lsp in Q15 values between -1 and 1 */ + int16_t m /* (i) number of coefficients */ + ) { + int16_t i, k; + int16_t diff; /* difference, which is used for the + linear approximation (Q8) */ + int16_t freq; /* normalized frequency in Q15 (0..1) */ + int32_t tmpW32; + + for(i=0; i> 15); + /* 20861: 1.0/(2.0*PI) in Q17 */ + /* + Upper 8 bits give the index k and + Lower 8 bits give the difference, which needs + to be approximated linearly + */ + k = freq >> 8; + diff = (freq&0x00ff); + + /* Guard against getting outside table */ + + if (k>63) { + k = 63; + } + + /* Calculate linear approximation */ + tmpW32 = WebRtcIlbcfix_kCosDerivative[k] * diff; + lsp[i] = WebRtcIlbcfix_kCos[k] + (int16_t)(tmpW32 >> 12); + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.h new file mode 100644 index 0000000000..6bc6c44dbd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsf2Lsp.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_TO_LSP_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_TO_LSP_H_ + +#include + +/*----------------------------------------------------------------* + * conversion from lsf to lsp coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsf2Lsp( + int16_t* lsf, /* (i) lsf in Q13 values between 0 and pi */ + int16_t* lsp, /* (o) lsp in Q15 values between -1 and 1 */ + int16_t m /* (i) number of coefficients */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.c new file mode 100644 index 0000000000..8ca91d82f8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsf2Poly.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_to_poly.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/get_lsp_poly.h" +#include "modules/audio_coding/codecs/ilbc/lsf_to_lsp.h" + +void WebRtcIlbcfix_Lsf2Poly( + int16_t *a, /* (o) predictor coefficients (order = 10) in Q12 */ + int16_t *lsf /* (i) line spectral frequencies in Q13 */ + ) { + int32_t f[2][6]; /* f[0][] and f[1][] corresponds to + F1(z) and F2(z) respectivly */ + int32_t *f1ptr, *f2ptr; + int16_t *a1ptr, *a2ptr; + int32_t tmpW32; + int16_t lsp[10]; + int i; + + /* Convert lsf to lsp */ + WebRtcIlbcfix_Lsf2Lsp(lsf, lsp, LPC_FILTERORDER); + + /* Get F1(z) and F2(z) from the lsp */ + f1ptr=f[0]; + f2ptr=f[1]; + WebRtcIlbcfix_GetLspPoly(&lsp[0],f1ptr); + WebRtcIlbcfix_GetLspPoly(&lsp[1],f2ptr); + + /* for i = 5 down to 1 + Compute f1[i] += f1[i-1]; + and f2[i] += f2[i-1]; + */ + f1ptr=&f[0][5]; + f2ptr=&f[1][5]; + for (i=5; i>0; i--) + { + (*f1ptr) += (*(f1ptr-1)); + (*f2ptr) -= (*(f2ptr-1)); + f1ptr--; + f2ptr--; + } + + /* Get the A(z) coefficients + a[0] = 1.0 + for i = 1 to 5 + a[i] = (f1[i] + f2[i] + round)>>13; + for i = 1 to 5 + a[11-i] = (f1[i] - f2[i] + round)>>13; + */ + a[0]=4096; + a1ptr=&a[1]; + a2ptr=&a[10]; + f1ptr=&f[0][1]; + f2ptr=&f[1][1]; + for (i=5; i>0; i--) + { + tmpW32 = (*f1ptr) + (*f2ptr); + *a1ptr = (int16_t)((tmpW32 + 4096) >> 13); + + tmpW32 = (*f1ptr) - (*f2ptr); + *a2ptr = (int16_t)((tmpW32 + 4096) >> 13); + + a1ptr++; + a2ptr--; + f1ptr++; + f2ptr++; + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.h new file mode 100644 index 0000000000..f26d3a8d2d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsf2Poly.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_TO_POLY_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_TO_POLY_H_ + +#include + +/*----------------------------------------------------------------* + * Convert from LSF coefficients to A coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsf2Poly( + int16_t* a, /* (o) predictor coefficients (order = 10) in Q12 */ + int16_t* lsf /* (i) line spectral frequencies in Q13 */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.c new file mode 100644 index 0000000000..227f4d45b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsp2Lsf.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsp_to_lsf.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * conversion from LSP coefficients to LSF coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsp2Lsf( + int16_t *lsp, /* (i) lsp vector -1...+1 in Q15 */ + int16_t *lsf, /* (o) Lsf vector 0...Pi in Q13 + (ordered, so that lsf[i]=0; i--) + { + /* + locate value in the table, which is just above lsp[i], + basically an approximation to acos(x) + */ + while( (((int32_t)(*cosTblPtr)-(*lspPtr)) < 0)&&(k>0) ) + { + k-=1; + cosTblPtr--; + } + + /* Calculate diff, which is used in the linear approximation of acos(x) */ + diff = (*lspPtr)-(*cosTblPtr); + + /* + The linear approximation of acos(lsp[i]) : + acos(lsp[i])= k*512 + (WebRtcIlbcfix_kAcosDerivative[ind]*offset >> 11) + */ + + /* tmp (linear offset) in Q16 */ + tmp = (int16_t)((WebRtcIlbcfix_kAcosDerivative[k] * diff) >> 11); + + /* freq in Q16 */ + freq = (k << 9) + tmp; + + /* lsf = freq*2*pi */ + (*lsfPtr) = (int16_t)(((int32_t)freq*25736)>>15); + + lsfPtr--; + lspPtr--; + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.h new file mode 100644 index 0000000000..c2f4b7692d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsp2Lsf.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSP_TO_LSF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSP_TO_LSF_H_ + +#include + +/*----------------------------------------------------------------* + * conversion from LSP coefficients to LSF coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsp2Lsf( + int16_t* lsp, /* (i) lsp vector -1...+1 in Q15 */ + int16_t* lsf, /* (o) Lsf vector 0...Pi in Q13 + (ordered, so that lsf[i] +#include + +/*----------------------------------------------------------------* + * compute cross correlation between sequences + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_MyCorr(int32_t* corr, /* (o) correlation of seq1 and seq2 */ + const int16_t* seq1, /* (i) first sequence */ + size_t dim1, /* (i) dimension first seq1 */ + const int16_t* seq2, /* (i) second sequence */ + size_t dim2 /* (i) dimension seq2 */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.c new file mode 100644 index 0000000000..1ecdd96d5a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_NearestNeighbor.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/nearest_neighbor.h" + +void WebRtcIlbcfix_NearestNeighbor(size_t* index, + const size_t* array, + size_t value, + size_t arlength) { + size_t i; + size_t min_diff = (size_t)-1; + for (i = 0; i < arlength; i++) { + const size_t diff = + (array[i] < value) ? (value - array[i]) : (array[i] - value); + if (diff < min_diff) { + *index = i; + min_diff = diff; + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.h new file mode 100644 index 0000000000..704cf2a37d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_NearestNeighbor.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_NEAREST_NEIGHBOR_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_NEAREST_NEIGHBOR_H_ + +#include +#include + +/*----------------------------------------------------------------* + * Find index in array such that the array element with said + * index is the element of said array closest to "value" + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_NearestNeighbor( + size_t* index, /* (o) index of array element closest to value */ + const size_t* array, /* (i) data array (Q2) */ + size_t value, /* (i) value (Q2) */ + size_t arlength /* (i) dimension of data array (==ENH_NBLOCKS_TOT) */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.c new file mode 100644 index 0000000000..dd44eb8fb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.c @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_PackBits.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/pack_bits.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * unpacking of bits from bitstream, i.e., vector of bytes + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_PackBits( + uint16_t *bitstream, /* (o) The packetized bitstream */ + iLBC_bits *enc_bits, /* (i) Encoded bits */ + int16_t mode /* (i) Codec mode (20 or 30) */ + ){ + uint16_t *bitstreamPtr; + int i, k; + int16_t *tmpPtr; + + bitstreamPtr=bitstream; + + /* Class 1 bits of ULP */ + /* First int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->lsf[0])<<10; /* Bit 0..5 */ + (*bitstreamPtr) |= (enc_bits->lsf[1])<<3; /* Bit 6..12 */ + (*bitstreamPtr) |= (enc_bits->lsf[2]&0x70)>>4; /* Bit 13..15 */ + bitstreamPtr++; + /* Second int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->lsf[2]&0xF)<<12; /* Bit 0..3 */ + + if (mode==20) { + (*bitstreamPtr) |= (enc_bits->startIdx)<<10; /* Bit 4..5 */ + (*bitstreamPtr) |= (enc_bits->state_first)<<9; /* Bit 6 */ + (*bitstreamPtr) |= (enc_bits->idxForMax)<<3; /* Bit 7..12 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[0])&0x70)>>4; /* Bit 13..15 */ + bitstreamPtr++; + /* Third int16_t */ + (*bitstreamPtr) = ((enc_bits->cb_index[0])&0xE)<<12; /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[0])&0x18)<<8; /* Bit 3..4 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[1])&0x8)<<7; /* Bit 5 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[3])&0xFE)<<2; /* Bit 6..12 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[3])&0x10)>>2; /* Bit 13 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[4])&0x8)>>2; /* Bit 14 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[6])&0x10)>>4; /* Bit 15 */ + } else { /* mode==30 */ + (*bitstreamPtr) |= (enc_bits->lsf[3])<<6; /* Bit 4..9 */ + (*bitstreamPtr) |= (enc_bits->lsf[4]&0x7E)>>1; /* Bit 10..15 */ + bitstreamPtr++; + /* Third int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->lsf[4]&0x1)<<15; /* Bit 0 */ + (*bitstreamPtr) |= (enc_bits->lsf[5])<<8; /* Bit 1..7 */ + (*bitstreamPtr) |= (enc_bits->startIdx)<<5; /* Bit 8..10 */ + (*bitstreamPtr) |= (enc_bits->state_first)<<4; /* Bit 11 */ + (*bitstreamPtr) |= ((enc_bits->idxForMax)&0x3C)>>2; /* Bit 12..15 */ + bitstreamPtr++; + /* 4:th int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->idxForMax&0x3)<<14; /* Bit 0..1 */ + (*bitstreamPtr) |= (enc_bits->cb_index[0]&0x78)<<7; /* Bit 2..5 */ + (*bitstreamPtr) |= (enc_bits->gain_index[0]&0x10)<<5; /* Bit 6 */ + (*bitstreamPtr) |= (enc_bits->gain_index[1]&0x8)<<5; /* Bit 7 */ + (*bitstreamPtr) |= (enc_bits->cb_index[3]&0xFC); /* Bit 8..13 */ + (*bitstreamPtr) |= (enc_bits->gain_index[3]&0x10)>>3; /* Bit 14 */ + (*bitstreamPtr) |= (enc_bits->gain_index[4]&0x8)>>3; /* Bit 15 */ + } + /* Class 2 bits of ULP */ + /* 4:th to 6:th int16_t for 20 ms case + 5:th to 7:th int16_t for 30 ms case */ + bitstreamPtr++; + tmpPtr=enc_bits->idxVec; + for (k=0; k<3; k++) { + (*bitstreamPtr) = 0; + for (i=15; i>=0; i--) { + (*bitstreamPtr) |= ((uint16_t)((*tmpPtr)&0x4)>>2)<6; i--) { + (*bitstreamPtr) |= ((uint16_t)((*tmpPtr)&0x4)>>2)<gain_index[1]&0x4)<<4; /* Bit 9 */ + (*bitstreamPtr) |= (enc_bits->gain_index[3]&0xC)<<2; /* Bit 10..11 */ + (*bitstreamPtr) |= (enc_bits->gain_index[4]&0x4)<<1; /* Bit 12 */ + (*bitstreamPtr) |= (enc_bits->gain_index[6]&0x8)>>1; /* Bit 13 */ + (*bitstreamPtr) |= (enc_bits->gain_index[7]&0xC)>>2; /* Bit 14..15 */ + + } else { /* mode==30 */ + /* 8:th int16_t */ + (*bitstreamPtr) = 0; + for (i=15; i>5; i--) { + (*bitstreamPtr) |= ((uint16_t)((*tmpPtr)&0x4)>>2)<cb_index[0]&0x6)<<3; /* Bit 10..11 */ + (*bitstreamPtr) |= (enc_bits->gain_index[0]&0x8); /* Bit 12 */ + (*bitstreamPtr) |= (enc_bits->gain_index[1]&0x4); /* Bit 13 */ + (*bitstreamPtr) |= (enc_bits->cb_index[3]&0x2); /* Bit 14 */ + (*bitstreamPtr) |= (enc_bits->cb_index[6]&0x80)>>7; /* Bit 15 */ + bitstreamPtr++; + /* 9:th int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->cb_index[6]&0x7E)<<9;/* Bit 0..5 */ + (*bitstreamPtr) |= (enc_bits->cb_index[9]&0xFE)<<2; /* Bit 6..12 */ + (*bitstreamPtr) |= (enc_bits->cb_index[12]&0xE0)>>5; /* Bit 13..15 */ + bitstreamPtr++; + /* 10:th int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->cb_index[12]&0x1E)<<11;/* Bit 0..3 */ + (*bitstreamPtr) |= (enc_bits->gain_index[3]&0xC)<<8; /* Bit 4..5 */ + (*bitstreamPtr) |= (enc_bits->gain_index[4]&0x6)<<7; /* Bit 6..7 */ + (*bitstreamPtr) |= (enc_bits->gain_index[6]&0x18)<<3; /* Bit 8..9 */ + (*bitstreamPtr) |= (enc_bits->gain_index[7]&0xC)<<2; /* Bit 10..11 */ + (*bitstreamPtr) |= (enc_bits->gain_index[9]&0x10)>>1; /* Bit 12 */ + (*bitstreamPtr) |= (enc_bits->gain_index[10]&0x8)>>1; /* Bit 13 */ + (*bitstreamPtr) |= (enc_bits->gain_index[12]&0x10)>>3; /* Bit 14 */ + (*bitstreamPtr) |= (enc_bits->gain_index[13]&0x8)>>3; /* Bit 15 */ + } + bitstreamPtr++; + /* Class 3 bits of ULP */ + /* 8:th to 14:th int16_t for 20 ms case + 11:th to 17:th int16_t for 30 ms case */ + tmpPtr=enc_bits->idxVec; + for (k=0; k<7; k++) { + (*bitstreamPtr) = 0; + for (i=14; i>=0; i-=2) { + (*bitstreamPtr) |= ((uint16_t)((*tmpPtr)&0x3))<idxVec[56])&0x3))<<14;/* Bit 0..1 */ + (*bitstreamPtr) |= (((enc_bits->cb_index[0])&1))<<13; /* Bit 2 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[1]))<<6; /* Bit 3..9 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[2])&0x7E)>>1; /* Bit 10..15 */ + bitstreamPtr++; + /* 16:th int16_t */ + (*bitstreamPtr) = ((uint16_t)((enc_bits->cb_index[2])&0x1))<<15; + /* Bit 0 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[0])&0x7)<<12; /* Bit 1..3 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[1])&0x3)<<10; /* Bit 4..5 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[2]))<<7; /* Bit 6..8 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[3])&0x1)<<6; /* Bit 9 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[4])&0x7E)>>1; /* Bit 10..15 */ + bitstreamPtr++; + /* 17:th int16_t */ + (*bitstreamPtr) = ((uint16_t)((enc_bits->cb_index[4])&0x1))<<15; + /* Bit 0 */ + (*bitstreamPtr) |= (enc_bits->cb_index[5])<<8; /* Bit 1..7 */ + (*bitstreamPtr) |= (enc_bits->cb_index[6]); /* Bit 8..15 */ + bitstreamPtr++; + /* 18:th int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[7]))<<8; /* Bit 0..7 */ + (*bitstreamPtr) |= (enc_bits->cb_index[8]); /* Bit 8..15 */ + bitstreamPtr++; + /* 19:th int16_t */ + (*bitstreamPtr) = ((uint16_t)((enc_bits->gain_index[3])&0x3))<<14; + /* Bit 0..1 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[4])&0x3)<<12; /* Bit 2..3 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[5]))<<9; /* Bit 4..6 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[6])&0x7)<<6; /* Bit 7..9 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[7])&0x3)<<4; /* Bit 10..11 */ + (*bitstreamPtr) |= (enc_bits->gain_index[8])<<1; /* Bit 12..14 */ + } else { /* mode==30 */ + /* 18:th int16_t */ + (*bitstreamPtr) = ((uint16_t)((enc_bits->idxVec[56])&0x3))<<14;/* Bit 0..1 */ + (*bitstreamPtr) |= (((enc_bits->idxVec[57])&0x3))<<12; /* Bit 2..3 */ + (*bitstreamPtr) |= (((enc_bits->cb_index[0])&1))<<11; /* Bit 4 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[1]))<<4; /* Bit 5..11 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[2])&0x78)>>3; /* Bit 12..15 */ + bitstreamPtr++; + /* 19:th int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[2])&0x7)<<13; + /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[0])&0x7)<<10; /* Bit 3..5 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[1])&0x3)<<8; /* Bit 6..7 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[2])&0x7)<<5; /* Bit 8..10 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[3])&0x1)<<4; /* Bit 11 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[4])&0x78)>>3; /* Bit 12..15 */ + bitstreamPtr++; + /* 20:th int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[4])&0x7)<<13; + /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[5]))<<6; /* Bit 3..9 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[6])&0x1)<<5; /* Bit 10 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[7])&0xF8)>>3; /* Bit 11..15 */ + bitstreamPtr++; + /* 21:st int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[7])&0x7)<<13; + /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[8]))<<5; /* Bit 3..10 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[9])&0x1)<<4; /* Bit 11 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[10])&0xF0)>>4; /* Bit 12..15 */ + bitstreamPtr++; + /* 22:nd int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[10])&0xF)<<12; + /* Bit 0..3 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[11]))<<4; /* Bit 4..11 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[12])&0x1)<<3; /* Bit 12 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[13])&0xE0)>>5; /* Bit 13..15 */ + bitstreamPtr++; + /* 23:rd int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[13])&0x1F)<<11; + /* Bit 0..4 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[14]))<<3; /* Bit 5..12 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[3])&0x3)<<1; /* Bit 13..14 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[4])&0x1); /* Bit 15 */ + bitstreamPtr++; + /* 24:rd int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->gain_index[5]))<<13; + /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[6])&0x7)<<10; /* Bit 3..5 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[7])&0x3)<<8; /* Bit 6..7 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[8]))<<5; /* Bit 8..10 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[9])&0xF)<<1; /* Bit 11..14 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[10])&0x4)>>2; /* Bit 15 */ + bitstreamPtr++; + /* 25:rd int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->gain_index[10])&0x3)<<14; + /* Bit 0..1 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[11]))<<11; /* Bit 2..4 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[12])&0xF)<<7; /* Bit 5..8 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[13])&0x7)<<4; /* Bit 9..11 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[14]))<<1; /* Bit 12..14 */ + } + /* Last bit is automatically zero */ + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.h new file mode 100644 index 0000000000..8dcf41ce08 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_PackBits.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_PACK_BITS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_PACK_BITS_H_ + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * unpacking of bits from bitstream, i.e., vector of bytes + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_PackBits( + uint16_t* bitstream, /* (o) The packetized bitstream */ + iLBC_bits* enc_bits, /* (i) Encoded bits */ + int16_t mode /* (i) Codec mode (20 or 30) */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.c new file mode 100644 index 0000000000..7192eaab49 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Poly2Lsf.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/poly_to_lsf.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/lsp_to_lsf.h" +#include "modules/audio_coding/codecs/ilbc/poly_to_lsp.h" + +void WebRtcIlbcfix_Poly2Lsf( + int16_t *lsf, /* (o) lsf coefficients (Q13) */ + int16_t *a /* (i) A coefficients (Q12) */ + ) { + int16_t lsp[10]; + WebRtcIlbcfix_Poly2Lsp(a, lsp, (int16_t*)WebRtcIlbcfix_kLspMean); + WebRtcIlbcfix_Lsp2Lsf(lsp, lsf, 10); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.h new file mode 100644 index 0000000000..363e392bb2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Poly2Lsf.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_POLY_TO_LSF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_POLY_TO_LSF_H_ + +#include + +/*----------------------------------------------------------------* + * conversion from lpc coefficients to lsf coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Poly2Lsf(int16_t* lsf, /* (o) lsf coefficients (Q13) */ + int16_t* a /* (i) A coefficients (Q12) */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.c new file mode 100644 index 0000000000..ad0ecd70ab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Poly2Lsp.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/poly_to_lsp.h" + +#include "modules/audio_coding/codecs/ilbc/chebyshev.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" + +/*----------------------------------------------------------------* + * conversion from lpc coefficients to lsp coefficients + * function is only for 10:th order LPC + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Poly2Lsp( + int16_t *a, /* (o) A coefficients in Q12 */ + int16_t *lsp, /* (i) LSP coefficients in Q15 */ + int16_t *old_lsp /* (i) old LSP coefficients that are used if the new + coefficients turn out to be unstable */ + ) { + int16_t f[2][6]; /* f[0][] represents f1 and f[1][] represents f2 */ + int16_t *a_i_ptr, *a_10mi_ptr; + int16_t *f1ptr, *f2ptr; + int32_t tmpW32; + int16_t x, y, xlow, ylow, xmid, ymid, xhigh, yhigh, xint; + int16_t shifts, sign; + int i, j; + int foundFreqs; + int fi_select; + + /* + Calculate the two polynomials f1(z) and f2(z) + (the sum and the diff polynomial) + f1[0] = f2[0] = 1.0; + f1[i+1] = a[i+1] + a[10-i] - f1[i]; + f2[i+1] = a[i+1] - a[10-i] - f1[i]; + */ + + a_i_ptr = a + 1; + a_10mi_ptr = a + 10; + f1ptr = f[0]; + f2ptr = f[1]; + (*f1ptr) = 1024; /* 1.0 in Q10 */ + (*f2ptr) = 1024; /* 1.0 in Q10 */ + for (i = 0; i < 5; i++) { + *(f1ptr + 1) = + (int16_t)((((int32_t)(*a_i_ptr) + *a_10mi_ptr) >> 2) - *f1ptr); + *(f2ptr + 1) = + (int16_t)((((int32_t)(*a_i_ptr) - *a_10mi_ptr) >> 2) + *f2ptr); + a_i_ptr++; + a_10mi_ptr--; + f1ptr++; + f2ptr++; + } + + /* + find the LSPs using the Chebychev pol. evaluation + */ + + fi_select = 0; /* selector between f1 and f2, start with f1 */ + + foundFreqs = 0; + + xlow = WebRtcIlbcfix_kCosGrid[0]; + ylow = WebRtcIlbcfix_Chebyshev(xlow, f[fi_select]); + + /* + Iterate until all the 10 LSP's have been found or + all the grid points have been tried. If the 10 LSP's can + not be found, set the LSP vector to previous LSP + */ + + for (j = 1; j < COS_GRID_POINTS && foundFreqs < 10; j++) { + xhigh = xlow; + yhigh = ylow; + xlow = WebRtcIlbcfix_kCosGrid[j]; + ylow = WebRtcIlbcfix_Chebyshev(xlow, f[fi_select]); + + if (ylow * yhigh <= 0) { + /* Run 4 times to reduce the interval */ + for (i = 0; i < 4; i++) { + /* xmid =(xlow + xhigh)/2 */ + xmid = (xlow >> 1) + (xhigh >> 1); + ymid = WebRtcIlbcfix_Chebyshev(xmid, f[fi_select]); + + if (ylow * ymid <= 0) { + yhigh = ymid; + xhigh = xmid; + } else { + ylow = ymid; + xlow = xmid; + } + } + + /* + Calculater xint by linear interpolation: + xint = xlow - ylow*(xhigh-xlow)/(yhigh-ylow); + */ + + x = xhigh - xlow; + y = yhigh - ylow; + + if (y == 0) { + xint = xlow; + } else { + sign = y; + y = WEBRTC_SPL_ABS_W16(y); + shifts = (int16_t)WebRtcSpl_NormW32(y)-16; + y <<= shifts; + y = (int16_t)WebRtcSpl_DivW32W16(536838144, y); /* 1/(yhigh-ylow) */ + + tmpW32 = (x * y) >> (19 - shifts); + + /* y=(xhigh-xlow)/(yhigh-ylow) */ + y = (int16_t)(tmpW32&0xFFFF); + + if (sign < 0) { + y = -y; + } + /* tmpW32 = ylow*(xhigh-xlow)/(yhigh-ylow) */ + tmpW32 = (ylow * y) >> 10; + xint = xlow-(int16_t)(tmpW32&0xFFFF); + } + + /* Store the calculated lsp */ + lsp[foundFreqs] = (int16_t)xint; + foundFreqs++; + + /* if needed, set xlow and ylow for next recursion */ + if (foundFreqs<10) { + xlow = xint; + /* Swap between f1 and f2 (f[0][] and f[1][]) */ + fi_select = ((fi_select+1)&0x1); + + ylow = WebRtcIlbcfix_Chebyshev(xlow, f[fi_select]); + } + } + } + + /* Check if M roots found, if not then use the old LSP */ + if (foundFreqs < 10) { + WEBRTC_SPL_MEMCPY_W16(lsp, old_lsp, 10); + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.h new file mode 100644 index 0000000000..928ee4efdb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Poly2Lsp.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_POLY_TO_LSP_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_POLY_TO_LSP_H_ + +#include + +/*----------------------------------------------------------------* + * conversion from lpc coefficients to lsp coefficients + * function is only for 10:th order LPC + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Poly2Lsp( + int16_t* a, /* (o) A coefficients in Q12 */ + int16_t* lsp, /* (i) LSP coefficients in Q15 */ + int16_t* old_lsp /* (i) old LSP coefficients that are used if the new + coefficients turn out to be unstable */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.c new file mode 100644 index 0000000000..5bdab7a4b0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Refiner.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/refiner.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/enh_upsample.h" +#include "modules/audio_coding/codecs/ilbc/my_corr.h" + +/*----------------------------------------------------------------* + * find segment starting near idata+estSegPos that has highest + * correlation with idata+centerStartPos through + * idata+centerStartPos+ENH_BLOCKL-1 segment is found at a + * resolution of ENH_UPSO times the original of the original + * sampling rate + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Refiner( + size_t *updStartPos, /* (o) updated start point (Q-2) */ + int16_t *idata, /* (i) original data buffer */ + size_t idatal, /* (i) dimension of idata */ + size_t centerStartPos, /* (i) beginning center segment */ + size_t estSegPos, /* (i) estimated beginning other segment (Q-2) */ + int16_t *surround, /* (i/o) The contribution from this sequence + summed with earlier contributions */ + int16_t gain /* (i) Gain to use for this sequence */ + ){ + size_t estSegPosRounded, searchSegStartPos, searchSegEndPos, corrdim; + size_t tloc, tloc2, i; + + int32_t maxtemp, scalefact; + int16_t *filtStatePtr, *polyPtr; + /* Stack based */ + int16_t filt[7]; + int32_t corrVecUps[ENH_CORRDIM*ENH_UPS0]; + int32_t corrVecTemp[ENH_CORRDIM]; + int16_t vect[ENH_VECTL]; + int16_t corrVec[ENH_CORRDIM]; + + /* defining array bounds */ + + estSegPosRounded = (estSegPos - 2) >> 2; + + searchSegStartPos = + (estSegPosRounded < ENH_SLOP) ? 0 : (estSegPosRounded - ENH_SLOP); + + searchSegEndPos = estSegPosRounded + ENH_SLOP; + if ((searchSegEndPos + ENH_BLOCKL) >= idatal) { + searchSegEndPos = idatal - ENH_BLOCKL - 1; + } + + corrdim = searchSegEndPos + 1 - searchSegStartPos; + + /* compute upsampled correlation and find + location of max */ + + WebRtcIlbcfix_MyCorr(corrVecTemp, idata + searchSegStartPos, + corrdim + ENH_BLOCKL - 1, idata + centerStartPos, + ENH_BLOCKL); + + /* Calculate the rescaling factor for the correlation in order to + put the correlation in a int16_t vector instead */ + maxtemp = WebRtcSpl_MaxAbsValueW32(corrVecTemp, corrdim); + + scalefact = WebRtcSpl_GetSizeInBits(maxtemp) - 15; + + if (scalefact > 0) { + for (i = 0; i < corrdim; i++) { + corrVec[i] = (int16_t)(corrVecTemp[i] >> scalefact); + } + } else { + for (i = 0; i < corrdim; i++) { + corrVec[i] = (int16_t)corrVecTemp[i]; + } + } + /* In order to guarantee that all values are initialized */ + for (i = corrdim; i < ENH_CORRDIM; i++) { + corrVec[i] = 0; + } + + /* Upsample the correlation */ + WebRtcIlbcfix_EnhUpsample(corrVecUps, corrVec); + + /* Find maximum */ + tloc = WebRtcSpl_MaxIndexW32(corrVecUps, ENH_UPS0 * corrdim); + + /* make vector can be upsampled without ever running outside + bounds */ + *updStartPos = searchSegStartPos * 4 + tloc + 4; + + tloc2 = (tloc + 3) >> 2; + + /* initialize the vector to be filtered, stuff with zeros + when data is outside idata buffer */ + if (ENH_FL0 > (searchSegStartPos + tloc2)) { + const size_t st = ENH_FL0 - searchSegStartPos - tloc2; + WebRtcSpl_MemSetW16(vect, 0, st); + WEBRTC_SPL_MEMCPY_W16(&vect[st], idata, ENH_VECTL - st); + } else { + const size_t st = searchSegStartPos + tloc2 - ENH_FL0; + if ((st + ENH_VECTL) > idatal) { + const size_t en = st + ENH_VECTL - idatal; + WEBRTC_SPL_MEMCPY_W16(vect, &idata[st], ENH_VECTL - en); + WebRtcSpl_MemSetW16(&vect[ENH_VECTL - en], 0, en); + } else { + WEBRTC_SPL_MEMCPY_W16(vect, &idata[st], ENH_VECTL); + } + } + + /* compute the segment (this is actually a convolution) */ + filtStatePtr = filt + 6; + polyPtr = (int16_t*)WebRtcIlbcfix_kEnhPolyPhaser[tloc2 * ENH_UPS0 - tloc]; + for (i = 0; i < 7; i++) { + *filtStatePtr-- = *polyPtr++; + } + + WebRtcSpl_FilterMAFastQ12(&vect[6], vect, filt, ENH_FLO_MULT2_PLUS1, + ENH_BLOCKL); + + /* Add the contribution from this vector (scaled with gain) to the total + surround vector */ + WebRtcSpl_AddAffineVectorToVector(surround, vect, gain, 32768, 16, + ENH_BLOCKL); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.h new file mode 100644 index 0000000000..564c9d96e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Refiner.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_REFINER_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_REFINER_H_ + +#include +#include + +/*----------------------------------------------------------------* + * find segment starting near idata+estSegPos that has highest + * correlation with idata+centerStartPos through + * idata+centerStartPos+ENH_BLOCKL-1 segment is found at a + * resolution of ENH_UPSO times the original of the original + * sampling rate + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Refiner( + size_t* updStartPos, /* (o) updated start point (Q-2) */ + int16_t* idata, /* (i) original data buffer */ + size_t idatal, /* (i) dimension of idata */ + size_t centerStartPos, /* (i) beginning center segment */ + size_t estSegPos, /* (i) estimated beginning other segment (Q-2) */ + int16_t* surround, /* (i/o) The contribution from this sequence + summed with earlier contributions */ + int16_t gain /* (i) Gain to use for this sequence */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.c new file mode 100644 index 0000000000..7343530a5e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleInterpolateLsf.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h" + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h" + +/*----------------------------------------------------------------* + * lsf interpolator (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleInterpolateLsf( + int16_t *syntdenum, /* (o) the synthesis filter denominator + resulting from the quantized + interpolated lsf Q12 */ + int16_t *weightdenum, /* (o) the weighting filter denominator + resulting from the unquantized + interpolated lsf Q12 */ + int16_t *lsf, /* (i) the unquantized lsf coefficients Q13 */ + int16_t *lsfdeq, /* (i) the dequantized lsf coefficients Q13 */ + int16_t *lsfold, /* (i) the unquantized lsf coefficients of + the previous signal frame Q13 */ + int16_t *lsfdeqold, /* (i) the dequantized lsf coefficients of the + previous signal frame Q13 */ + int16_t length, /* (i) should equate FILTERORDER */ + IlbcEncoder *iLBCenc_inst + /* (i/o) the encoder state structure */ + ) { + size_t i; + int pos, lp_length; + + int16_t *lsf2, *lsfdeq2; + /* Stack based */ + int16_t lp[LPC_FILTERORDER + 1]; + + lsf2 = lsf + length; + lsfdeq2 = lsfdeq + length; + lp_length = length + 1; + + if (iLBCenc_inst->mode==30) { + /* subframe 1: Interpolation between old and first set of + lsf coefficients */ + + /* Calculate Analysis/Syntehsis filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfdeqold, lsfdeq, + WebRtcIlbcfix_kLsfWeight30ms[0], + length); + WEBRTC_SPL_MEMCPY_W16(syntdenum, lp, lp_length); + + /* Calculate Weighting filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfold, lsf, + WebRtcIlbcfix_kLsfWeight30ms[0], + length); + WebRtcIlbcfix_BwExpand(weightdenum, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpWeightDenum, + (int16_t)lp_length); + + /* subframe 2 to 6: Interpolation between first and second + set of lsf coefficients */ + + pos = lp_length; + for (i = 1; i < iLBCenc_inst->nsub; i++) { + + /* Calculate Analysis/Syntehsis filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfdeq, lsfdeq2, + WebRtcIlbcfix_kLsfWeight30ms[i], + length); + WEBRTC_SPL_MEMCPY_W16(syntdenum + pos, lp, lp_length); + + /* Calculate Weighting filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsf, lsf2, + WebRtcIlbcfix_kLsfWeight30ms[i], + length); + WebRtcIlbcfix_BwExpand(weightdenum + pos, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpWeightDenum, + (int16_t)lp_length); + + pos += lp_length; + } + + /* update memory */ + + WEBRTC_SPL_MEMCPY_W16(lsfold, lsf2, length); + WEBRTC_SPL_MEMCPY_W16(lsfdeqold, lsfdeq2, length); + + } else { /* iLBCenc_inst->mode==20 */ + pos = 0; + for (i = 0; i < iLBCenc_inst->nsub; i++) { + + /* Calculate Analysis/Syntehsis filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfdeqold, lsfdeq, + WebRtcIlbcfix_kLsfWeight20ms[i], + length); + WEBRTC_SPL_MEMCPY_W16(syntdenum + pos, lp, lp_length); + + /* Calculate Weighting filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfold, lsf, + WebRtcIlbcfix_kLsfWeight20ms[i], + length); + WebRtcIlbcfix_BwExpand(weightdenum+pos, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpWeightDenum, + (int16_t)lp_length); + + pos += lp_length; + } + + /* update memory */ + + WEBRTC_SPL_MEMCPY_W16(lsfold, lsf, length); + WEBRTC_SPL_MEMCPY_W16(lsfdeqold, lsfdeq, length); + + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h new file mode 100644 index 0000000000..ee53e4bd08 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleInterpolateLsf.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_INTERPOLATE_LSF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_INTERPOLATE_LSF_H_ + +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * lsf interpolator (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleInterpolateLsf( + int16_t* syntdenum, /* (o) the synthesis filter denominator + resulting from the quantized + interpolated lsf Q12 */ + int16_t* weightdenum, /* (o) the weighting filter denominator + resulting from the unquantized + interpolated lsf Q12 */ + int16_t* lsf, /* (i) the unquantized lsf coefficients Q13 */ + int16_t* lsfdeq, /* (i) the dequantized lsf coefficients Q13 */ + int16_t* lsfold, /* (i) the unquantized lsf coefficients of + the previous signal frame Q13 */ + int16_t* lsfdeqold, /* (i) the dequantized lsf coefficients of the + previous signal frame Q13 */ + int16_t length, /* (i) should equate FILTERORDER */ + IlbcEncoder* iLBCenc_inst + /* (i/o) the encoder state structure */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.c new file mode 100644 index 0000000000..fdc4553d95 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLpcAnalysis.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h" + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/poly_to_lsf.h" +#include "modules/audio_coding/codecs/ilbc/window32_w32.h" + +/*----------------------------------------------------------------* + * lpc analysis (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLpcAnalysis( + int16_t *lsf, /* (o) lsf coefficients */ + int16_t *data, /* (i) new block of speech */ + IlbcEncoder *iLBCenc_inst + /* (i/o) the encoder state structure */ + ) { + int k; + int scale; + size_t is; + int16_t stability; + /* Stack based */ + int16_t A[LPC_FILTERORDER + 1]; + int32_t R[LPC_FILTERORDER + 1]; + int16_t windowedData[BLOCKL_MAX]; + int16_t rc[LPC_FILTERORDER]; + + is=LPC_LOOKBACK+BLOCKL_MAX-iLBCenc_inst->blockl; + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->lpc_buffer+is,data,iLBCenc_inst->blockl); + + /* No lookahead, last window is asymmetric */ + + for (k = 0; k < iLBCenc_inst->lpc_n; k++) { + + is = LPC_LOOKBACK; + + if (k < (iLBCenc_inst->lpc_n - 1)) { + + /* Hanning table WebRtcIlbcfix_kLpcWin[] is in Q15-domain so the output is right-shifted 15 */ + WebRtcSpl_ElementwiseVectorMult(windowedData, iLBCenc_inst->lpc_buffer, WebRtcIlbcfix_kLpcWin, BLOCKL_MAX, 15); + } else { + + /* Hanning table WebRtcIlbcfix_kLpcAsymWin[] is in Q15-domain so the output is right-shifted 15 */ + WebRtcSpl_ElementwiseVectorMult(windowedData, iLBCenc_inst->lpc_buffer+is, WebRtcIlbcfix_kLpcAsymWin, BLOCKL_MAX, 15); + } + + /* Compute autocorrelation */ + WebRtcSpl_AutoCorrelation(windowedData, BLOCKL_MAX, LPC_FILTERORDER, R, &scale); + + /* Window autocorrelation vector */ + WebRtcIlbcfix_Window32W32(R, R, WebRtcIlbcfix_kLpcLagWin, LPC_FILTERORDER + 1 ); + + /* Calculate the A coefficients from the Autocorrelation using Levinson Durbin algorithm */ + stability=WebRtcSpl_LevinsonDurbin(R, A, rc, LPC_FILTERORDER); + + /* + Set the filter to {1.0, 0.0, 0.0,...} if filter from Levinson Durbin algorithm is unstable + This should basically never happen... + */ + if (stability!=1) { + A[0]=4096; + WebRtcSpl_MemSetW16(&A[1], 0, LPC_FILTERORDER); + } + + /* Bandwidth expand the filter coefficients */ + WebRtcIlbcfix_BwExpand(A, A, (int16_t*)WebRtcIlbcfix_kLpcChirpSyntDenum, LPC_FILTERORDER+1); + + /* Convert from A to LSF representation */ + WebRtcIlbcfix_Poly2Lsf(lsf + k*LPC_FILTERORDER, A); + } + + is=LPC_LOOKBACK+BLOCKL_MAX-iLBCenc_inst->blockl; + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->lpc_buffer, + iLBCenc_inst->lpc_buffer+LPC_LOOKBACK+BLOCKL_MAX-is, is); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h new file mode 100644 index 0000000000..b5c839ba2a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLpcAnalysis.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LPC_ANALYSIS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LPC_ANALYSIS_H_ + +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * lpc analysis (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLpcAnalysis( + int16_t* lsf, /* (o) lsf coefficients */ + int16_t* data, /* (i) new block of speech */ + IlbcEncoder* iLBCenc_inst + /* (i/o) the encoder state structure */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.c new file mode 100644 index 0000000000..e7494ceb59 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLsfDeQ.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * obtain dequantized lsf coefficients from quantization index + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLsfDeQ( + int16_t *lsfdeq, /* (o) dequantized lsf coefficients */ + int16_t *index, /* (i) quantization index */ + int16_t lpc_n /* (i) number of LPCs */ + ){ + int i, j, pos, cb_pos; + + /* decode first LSF */ + + pos = 0; + cb_pos = 0; + for (i = 0; i < LSF_NSPLIT; i++) { + for (j = 0; j < WebRtcIlbcfix_kLsfDimCb[i]; j++) { + lsfdeq[pos + j] = WebRtcIlbcfix_kLsfCb[cb_pos + j + index[i] * + WebRtcIlbcfix_kLsfDimCb[i]]; + } + pos += WebRtcIlbcfix_kLsfDimCb[i]; + cb_pos += WebRtcIlbcfix_kLsfSizeCb[i] * WebRtcIlbcfix_kLsfDimCb[i]; + } + + if (lpc_n>1) { + /* decode last LSF */ + pos = 0; + cb_pos = 0; + for (i = 0; i < LSF_NSPLIT; i++) { + for (j = 0; j < WebRtcIlbcfix_kLsfDimCb[i]; j++) { + lsfdeq[LPC_FILTERORDER + pos + j] = WebRtcIlbcfix_kLsfCb[ + cb_pos + index[LSF_NSPLIT + i] * WebRtcIlbcfix_kLsfDimCb[i] + j]; + } + pos += WebRtcIlbcfix_kLsfDimCb[i]; + cb_pos += WebRtcIlbcfix_kLsfSizeCb[i] * WebRtcIlbcfix_kLsfDimCb[i]; + } + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h new file mode 100644 index 0000000000..6d97d3df33 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLsfDeQ.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LSF_DEQUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LSF_DEQUANT_H_ + +#include + +/*----------------------------------------------------------------* + * obtain dequantized lsf coefficients from quantization index + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLsfDeQ( + int16_t* lsfdeq, /* (o) dequantized lsf coefficients */ + int16_t* index, /* (i) quantization index */ + int16_t lpc_n /* (i) number of LPCs */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.c new file mode 100644 index 0000000000..1291d1442e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLsfQ.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/simple_lsf_quant.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/split_vq.h" + +/*----------------------------------------------------------------* + * lsf quantizer (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLsfQ( + int16_t *lsfdeq, /* (o) dequantized lsf coefficients + (dimension FILTERORDER) Q13 */ + int16_t *index, /* (o) quantization index */ + int16_t *lsf, /* (i) the lsf coefficient vector to be + quantized (dimension FILTERORDER) Q13 */ + int16_t lpc_n /* (i) number of lsf sets to quantize */ + ){ + + /* Quantize first LSF with memoryless split VQ */ + WebRtcIlbcfix_SplitVq( lsfdeq, index, lsf, + (int16_t*)WebRtcIlbcfix_kLsfCb, (int16_t*)WebRtcIlbcfix_kLsfDimCb, (int16_t*)WebRtcIlbcfix_kLsfSizeCb); + + if (lpc_n==2) { + /* Quantize second LSF with memoryless split VQ */ + WebRtcIlbcfix_SplitVq( lsfdeq + LPC_FILTERORDER, index + LSF_NSPLIT, + lsf + LPC_FILTERORDER, (int16_t*)WebRtcIlbcfix_kLsfCb, + (int16_t*)WebRtcIlbcfix_kLsfDimCb, (int16_t*)WebRtcIlbcfix_kLsfSizeCb); + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.h new file mode 100644 index 0000000000..66b553213a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLsfQ.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LSF_QUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LSF_QUANT_H_ + +#include + +/*----------------------------------------------------------------* + * lsf quantizer (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLsfQ( + int16_t* lsfdeq, /* (o) dequantized lsf coefficients + (dimension FILTERORDER) Q13 */ + int16_t* index, /* (o) quantization index */ + int16_t* lsf, /* (i) the lsf coefficient vector to be + quantized (dimension FILTERORDER) Q13 */ + int16_t lpc_n /* (i) number of lsf sets to quantize */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.c new file mode 100644 index 0000000000..631b2f432a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Smooth.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/smooth.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/smooth_out_data.h" + +/*----------------------------------------------------------------* + * find the smoothed output data + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Smooth( + int16_t *odata, /* (o) smoothed output */ + int16_t *current, /* (i) the un enhanced residual for + this block */ + int16_t *surround /* (i) The approximation from the + surrounding sequences */ + ) { + int16_t scale, scale1, scale2; + int16_t A, B, C, denomW16; + int32_t B_W32, denom, num; + int32_t errs; + int32_t w00,w10,w11, endiff, crit; + int32_t w00prim, w10prim, w11_div_w00; + int16_t w11prim; + int16_t bitsw00, bitsw10, bitsw11; + int32_t w11w00, w10w10, w00w00; + uint32_t max1, max2, max12; + + /* compute some inner products (ensure no overflow by first calculating proper scale factor) */ + + w00 = w10 = w11 = 0; + + // Calculate a right shift that will let us sum ENH_BLOCKL pairwise products + // of values from the two sequences without overflowing an int32_t. (The +1 + // in max1 and max2 are because WebRtcSpl_MaxAbsValueW16 will return 2**15 - + // 1 if the input array contains -2**15.) + max1 = WebRtcSpl_MaxAbsValueW16(current, ENH_BLOCKL) + 1; + max2 = WebRtcSpl_MaxAbsValueW16(surround, ENH_BLOCKL) + 1; + max12 = WEBRTC_SPL_MAX(max1, max2); + scale = (64 - 31) - + WebRtcSpl_CountLeadingZeros64((max12 * max12) * (uint64_t)ENH_BLOCKL); + scale=WEBRTC_SPL_MAX(0, scale); + + w00=WebRtcSpl_DotProductWithScale(current,current,ENH_BLOCKL,scale); + w11=WebRtcSpl_DotProductWithScale(surround,surround,ENH_BLOCKL,scale); + w10=WebRtcSpl_DotProductWithScale(surround,current,ENH_BLOCKL,scale); + + if (w00<0) w00 = WEBRTC_SPL_WORD32_MAX; + if (w11<0) w11 = WEBRTC_SPL_WORD32_MAX; + + /* Rescale w00 and w11 to w00prim and w11prim, so that w00prim/w11prim + is in Q16 */ + + bitsw00 = WebRtcSpl_GetSizeInBits(w00); + bitsw11 = WebRtcSpl_GetSizeInBits(w11); + bitsw10 = WebRtcSpl_GetSizeInBits(WEBRTC_SPL_ABS_W32(w10)); + scale1 = 31 - bitsw00; + scale2 = 15 - bitsw11; + + if (scale2>(scale1-16)) { + scale2 = scale1 - 16; + } else { + scale1 = scale2 + 16; + } + + w00prim = w00 << scale1; + w11prim = (int16_t) WEBRTC_SPL_SHIFT_W32(w11, scale2); + + /* Perform C = sqrt(w11/w00) (C is in Q11 since (16+6)/2=11) */ + if (w11prim>64) { + endiff = WebRtcSpl_DivW32W16(w00prim, w11prim) << 6; + C = (int16_t)WebRtcSpl_SqrtFloor(endiff); /* C is in Q11 */ + } else { + C = 1; + } + + /* first try enhancement without power-constraint */ + + errs = WebRtcIlbcfix_Smooth_odata(odata, current, surround, C); + + + + /* if constraint violated by first try, add constraint */ + + if ( (6-scale+scale1) > 31) { + crit=0; + } else { + /* crit = 0.05 * w00 (Result in Q-6) */ + crit = WEBRTC_SPL_SHIFT_W32( + WEBRTC_SPL_MUL(ENH_A0, w00prim >> 14), + -(6-scale+scale1)); + } + + if (errs > crit) { + + if( w00 < 1) { + w00=1; + } + + /* Calculate w11*w00, w10*w10 and w00*w00 in the same Q domain */ + + scale1 = bitsw00-15; + scale2 = bitsw11-15; + + if (scale2>scale1) { + scale = scale2; + } else { + scale = scale1; + } + + w11w00 = (int16_t)WEBRTC_SPL_SHIFT_W32(w11, -scale) * + (int16_t)WEBRTC_SPL_SHIFT_W32(w00, -scale); + + w10w10 = (int16_t)WEBRTC_SPL_SHIFT_W32(w10, -scale) * + (int16_t)WEBRTC_SPL_SHIFT_W32(w10, -scale); + + w00w00 = (int16_t)WEBRTC_SPL_SHIFT_W32(w00, -scale) * + (int16_t)WEBRTC_SPL_SHIFT_W32(w00, -scale); + + /* Calculate (w11*w00-w10*w10)/(w00*w00) in Q16 */ + if (w00w00>65536) { + endiff = (w11w00-w10w10); + endiff = WEBRTC_SPL_MAX(0, endiff); + /* denom is in Q16 */ + denom = WebRtcSpl_DivW32W16(endiff, (int16_t)(w00w00 >> 16)); + } else { + denom = 65536; + } + + if( denom > 7){ /* eliminates numerical problems + for if smooth */ + + scale=WebRtcSpl_GetSizeInBits(denom)-15; + + if (scale>0) { + /* denomW16 is in Q(16+scale) */ + denomW16 = (int16_t)(denom >> scale); + + /* num in Q(34-scale) */ + num = ENH_A0_MINUS_A0A0DIV4 >> scale; + } else { + /* denomW16 is in Q16 */ + denomW16=(int16_t)denom; + + /* num in Q34 */ + num=ENH_A0_MINUS_A0A0DIV4; + } + + /* A sqrt( (ENH_A0-(ENH_A0^2)/4)*(w00*w00)/(w11*w00 + w10*w10) ) in Q9 */ + A = (int16_t)WebRtcSpl_SqrtFloor(WebRtcSpl_DivW32W16(num, denomW16)); + + /* B_W32 is in Q30 ( B = 1 - ENH_A0/2 - A * w10/w00 ) */ + scale1 = 31-bitsw10; + scale2 = 21-scale1; + w10prim = w10 == 0 ? 0 : w10 * (1 << scale1); + w00prim = WEBRTC_SPL_SHIFT_W32(w00, -scale2); + scale = bitsw00-scale2-15; + + if (scale>0) { + w10prim >>= scale; + w00prim >>= scale; + } + + if ((w00prim>0)&&(w10prim>0)) { + w11_div_w00=WebRtcSpl_DivW32W16(w10prim, (int16_t)w00prim); + + if (WebRtcSpl_GetSizeInBits(w11_div_w00)+WebRtcSpl_GetSizeInBits(A)>31) { + B_W32 = 0; + } else { + B_W32 = (int32_t)1073741824 - (int32_t)ENH_A0DIV2 - + WEBRTC_SPL_MUL(A, w11_div_w00); + } + B = (int16_t)(B_W32 >> 16); /* B in Q14. */ + } else { + /* No smoothing */ + A = 0; + B = 16384; /* 1 in Q14 */ + } + } + else{ /* essentially no difference between cycles; + smoothing not needed */ + + A = 0; + B = 16384; /* 1 in Q14 */ + } + + /* create smoothed sequence */ + + WebRtcSpl_ScaleAndAddVectors(surround, A, 9, + current, B, 14, + odata, ENH_BLOCKL); + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.h new file mode 100644 index 0000000000..c8752be64f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Smooth.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SMOOTH_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SMOOTH_H_ + +#include + +/*----------------------------------------------------------------* + * find the smoothed output data + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Smooth(int16_t* odata, /* (o) smoothed output */ + int16_t* current, /* (i) the un enhanced residual for + this block */ + int16_t* surround /* (i) The approximation from the + surrounding sequences */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.c new file mode 100644 index 0000000000..9f952bfb93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Smooth_odata.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/smooth_out_data.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "rtc_base/sanitizer.h" + +// An s32 + s32 -> s32 addition that's allowed to overflow. (It's still +// undefined behavior, so not a good idea; this just makes UBSan ignore the +// violation, so that our old code can continue to do what it's always been +// doing.) +static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow") + OverflowingAdd_S32_S32_To_S32(int32_t a, int32_t b) { + return a + b; +} + +int32_t WebRtcIlbcfix_Smooth_odata( + int16_t *odata, + int16_t *psseq, + int16_t *surround, + int16_t C) +{ + int i; + + int16_t err; + int32_t errs; + + for(i=0;i<80;i++) { + odata[i]= (int16_t)((C * surround[i] + 1024) >> 11); + } + + errs=0; + for(i=0;i<80;i++) { + err = (psseq[i] - odata[i]) >> 3; + errs = OverflowingAdd_S32_S32_To_S32(errs, err * err); // errs in Q-6 + } + + return errs; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.h new file mode 100644 index 0000000000..318e7b04a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Smooth_odata.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SMOOTH_OUT_DATA_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SMOOTH_OUT_DATA_H_ + +#include + +/*----------------------------------------------------------------* + * help function to WebRtcIlbcfix_Smooth() + *---------------------------------------------------------------*/ + +int32_t WebRtcIlbcfix_Smooth_odata(int16_t* odata, + int16_t* psseq, + int16_t* surround, + int16_t C); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.c new file mode 100644 index 0000000000..c3a24750f0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SortSq.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/sort_sq.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * scalar quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SortSq( + int16_t *xq, /* (o) the quantized value */ + int16_t *index, /* (o) the quantization index */ + int16_t x, /* (i) the value to quantize */ + const int16_t *cb, /* (i) the quantization codebook */ + int16_t cb_size /* (i) the size of the quantization codebook */ + ){ + int i; + + if (x <= cb[0]) { + *index = 0; + *xq = cb[0]; + } else { + i = 0; + while ((x > cb[i]) && (i < (cb_size-1))) { + i++; + } + + if (x > (((int32_t)cb[i] + cb[i - 1] + 1) >> 1)) { + *index = i; + *xq = cb[i]; + } else { + *index = i - 1; + *xq = cb[i - 1]; + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.h new file mode 100644 index 0000000000..02028dae93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SortSq.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SORT_SQ_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SORT_SQ_H_ + +#include + +/*----------------------------------------------------------------* + * scalar quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SortSq( + int16_t* xq, /* (o) the quantized value */ + int16_t* index, /* (o) the quantization index */ + int16_t x, /* (i) the value to quantize */ + const int16_t* cb, /* (i) the quantization codebook */ + int16_t cb_size /* (i) the size of the quantization codebook */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.c new file mode 100644 index 0000000000..c1f04d2287 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SplitVq.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/split_vq.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/vq3.h" +#include "modules/audio_coding/codecs/ilbc/vq4.h" + +/*----------------------------------------------------------------* + * split vector quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SplitVq( + int16_t *qX, /* (o) the quantized vector in Q13 */ + int16_t *index, /* (o) a vector of indexes for all vector + codebooks in the split */ + int16_t *X, /* (i) the vector to quantize */ + int16_t *CB, /* (i) the quantizer codebook in Q13 */ + int16_t *dim, /* (i) the dimension of X and qX */ + int16_t *cbsize /* (i) the number of vectors in the codebook */ + ) { + + int16_t *qXPtr, *indexPtr, *CBPtr, *XPtr; + + /* Quantize X with the 3 vectror quantization tables */ + + qXPtr=qX; + indexPtr=index; + CBPtr=CB; + XPtr=X; + WebRtcIlbcfix_Vq3(qXPtr, indexPtr, CBPtr, XPtr, cbsize[0]); + + qXPtr+=3; + indexPtr+=1; + CBPtr+=(dim[0]*cbsize[0]); + XPtr+=3; + WebRtcIlbcfix_Vq3(qXPtr, indexPtr, CBPtr, XPtr, cbsize[1]); + + qXPtr+=3; + indexPtr+=1; + CBPtr+=(dim[1]*cbsize[1]); + XPtr+=3; + WebRtcIlbcfix_Vq4(qXPtr, indexPtr, CBPtr, XPtr, cbsize[2]); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.h new file mode 100644 index 0000000000..e4b02a2bc2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SplitVq.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SPLIT_VQ_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SPLIT_VQ_H_ + +#include + +/*----------------------------------------------------------------* + * split vector quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SplitVq( + int16_t* qX, /* (o) the quantized vector in Q13 */ + int16_t* index, /* (o) a vector of indexes for all vector + codebooks in the split */ + int16_t* X, /* (i) the vector to quantize */ + int16_t* CB, /* (i) the quantizer codebook in Q13 */ + int16_t* dim, /* (i) the dimension of X and qX */ + int16_t* cbsize /* (i) the number of vectors in the codebook */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.c new file mode 100644 index 0000000000..c58086c03b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_StateConstruct.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/state_construct.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * decoding of the start state + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_StateConstruct( + size_t idxForMax, /* (i) 6-bit index for the quantization of + max amplitude */ + int16_t *idxVec, /* (i) vector of quantization indexes */ + int16_t *syntDenum, /* (i) synthesis filter denumerator */ + int16_t *Out_fix, /* (o) the decoded state vector */ + size_t len /* (i) length of a state vector */ + ) { + size_t k; + int16_t maxVal; + int16_t *tmp1, *tmp2, *tmp3; + /* Stack based */ + int16_t numerator[1+LPC_FILTERORDER]; + int16_t sampleValVec[2*STATE_SHORT_LEN_30MS+LPC_FILTERORDER]; + int16_t sampleMaVec[2*STATE_SHORT_LEN_30MS+LPC_FILTERORDER]; + int16_t *sampleVal = &sampleValVec[LPC_FILTERORDER]; + int16_t *sampleMa = &sampleMaVec[LPC_FILTERORDER]; + int16_t *sampleAr = &sampleValVec[LPC_FILTERORDER]; + + /* initialization of coefficients */ + + for (k=0; k> + 22); + tmp1++; + tmp2--; + } + } else if (idxForMax<59) { + for(k=0; k> + 19); + tmp1++; + tmp2--; + } + } else { + for(k=0; k> + 17); + tmp1++; + tmp2--; + } + } + + /* Set the rest of the data to zero */ + WebRtcSpl_MemSetW16(&sampleVal[len], 0, len); + + /* circular convolution with all-pass filter */ + + /* Set the state to zero */ + WebRtcSpl_MemSetW16(sampleValVec, 0, (LPC_FILTERORDER)); + + /* Run MA filter + AR filter */ + WebRtcSpl_FilterMAFastQ12( + sampleVal, sampleMa, + numerator, LPC_FILTERORDER+1, len + LPC_FILTERORDER); + WebRtcSpl_MemSetW16(&sampleMa[len + LPC_FILTERORDER], 0, (len - LPC_FILTERORDER)); + WebRtcSpl_FilterARFastQ12( + sampleMa, sampleAr, + syntDenum, LPC_FILTERORDER+1, 2 * len); + + tmp1 = &sampleAr[len-1]; + tmp2 = &sampleAr[2*len-1]; + tmp3 = Out_fix; + for(k=0;k +#include + +/*----------------------------------------------------------------* + * Generate the start state from the quantized indexes + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_StateConstruct( + size_t idxForMax, /* (i) 6-bit index for the quantization of + max amplitude */ + int16_t* idxVec, /* (i) vector of quantization indexes */ + int16_t* syntDenum, /* (i) synthesis filter denumerator */ + int16_t* Out_fix, /* (o) the decoded state vector */ + size_t len /* (i) length of a state vector */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.c new file mode 100644 index 0000000000..7227ac9d45 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_StateSearch.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/state_search.h" + +#include "modules/audio_coding/codecs/ilbc/abs_quant.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * encoding of start state + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_StateSearch( + IlbcEncoder *iLBCenc_inst, + /* (i) Encoder instance */ + iLBC_bits *iLBC_encbits,/* (i/o) Encoded bits (output idxForMax + and idxVec, input state_first) */ + int16_t *residual, /* (i) target residual vector */ + int16_t *syntDenum, /* (i) lpc synthesis filter */ + int16_t *weightDenum /* (i) weighting filter denuminator */ + ) { + size_t k, index; + int16_t maxVal; + int16_t scale, shift; + int32_t maxValsq; + int16_t scaleRes; + int16_t max; + int i; + /* Stack based */ + int16_t numerator[1+LPC_FILTERORDER]; + int16_t residualLongVec[2*STATE_SHORT_LEN_30MS+LPC_FILTERORDER]; + int16_t sampleMa[2*STATE_SHORT_LEN_30MS]; + int16_t *residualLong = &residualLongVec[LPC_FILTERORDER]; + int16_t *sampleAr = residualLong; + + /* Scale to maximum 12 bits to avoid saturation in circular convolution filter */ + max = WebRtcSpl_MaxAbsValueW16(residual, iLBCenc_inst->state_short_len); + scaleRes = WebRtcSpl_GetSizeInBits(max)-12; + scaleRes = WEBRTC_SPL_MAX(0, scaleRes); + /* Set up the filter coefficients for the circular convolution */ + for (i=0; i>scaleRes); + } + + /* Copy the residual to a temporary buffer that we can filter + * and set the remaining samples to zero. + */ + WEBRTC_SPL_MEMCPY_W16(residualLong, residual, iLBCenc_inst->state_short_len); + WebRtcSpl_MemSetW16(residualLong + iLBCenc_inst->state_short_len, 0, iLBCenc_inst->state_short_len); + + /* Run the Zero-Pole filter (Ciurcular convolution) */ + WebRtcSpl_MemSetW16(residualLongVec, 0, LPC_FILTERORDER); + WebRtcSpl_FilterMAFastQ12(residualLong, sampleMa, numerator, + LPC_FILTERORDER + 1, + iLBCenc_inst->state_short_len + LPC_FILTERORDER); + WebRtcSpl_MemSetW16(&sampleMa[iLBCenc_inst->state_short_len + LPC_FILTERORDER], 0, iLBCenc_inst->state_short_len - LPC_FILTERORDER); + + WebRtcSpl_FilterARFastQ12( + sampleMa, sampleAr, + syntDenum, LPC_FILTERORDER+1, 2 * iLBCenc_inst->state_short_len); + + for(k=0;kstate_short_len;k++){ + sampleAr[k] += sampleAr[k+iLBCenc_inst->state_short_len]; + } + + /* Find maximum absolute value in the vector */ + maxVal=WebRtcSpl_MaxAbsValueW16(sampleAr, iLBCenc_inst->state_short_len); + + /* Find the best index */ + + if ((((int32_t)maxVal)<=WebRtcIlbcfix_kChooseFrgQuant[i]) { + index=i+1; + } else { + i=63; + } + } + iLBC_encbits->idxForMax=index; + + /* Rescale the vector before quantization */ + scale=WebRtcIlbcfix_kScale[index]; + + if (index<27) { /* scale table is in Q16, fout[] is in Q(-1) and we want the result to be in Q11 */ + shift=4; + } else { /* scale table is in Q21, fout[] is in Q(-1) and we want the result to be in Q11 */ + shift=9; + } + + /* Set up vectors for AbsQuant and rescale it with the scale factor */ + WebRtcSpl_ScaleVectorWithSat(sampleAr, sampleAr, scale, + iLBCenc_inst->state_short_len, (int16_t)(shift-scaleRes)); + + /* Quantize the values in fout[] */ + WebRtcIlbcfix_AbsQuant(iLBCenc_inst, iLBC_encbits, sampleAr, weightDenum); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.h new file mode 100644 index 0000000000..6469138a0e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_StateSearch.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_STATE_SEARCH_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_STATE_SEARCH_H_ + +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * encoding of start state + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_StateSearch( + IlbcEncoder* iLBCenc_inst, + /* (i) Encoder instance */ + iLBC_bits* iLBC_encbits, /* (i/o) Encoded bits (output idxForMax + and idxVec, input state_first) */ + int16_t* residual, /* (i) target residual vector */ + int16_t* syntDenum, /* (i) lpc synthesis filter */ + int16_t* weightDenum /* (i) weighting filter denuminator */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.c new file mode 100644 index 0000000000..bbafc1a2ed --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SwapBytes.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/swap_bytes.h" + +/*----------------------------------------------------------------* + * Swap bytes (to simplify operations on Little Endian machines) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SwapBytes( + const uint16_t* input, /* (i) the sequence to swap */ + size_t wordLength, /* (i) number or uint16_t to swap */ + uint16_t* output /* (o) the swapped sequence */ + ) { + size_t k; + for (k = wordLength; k > 0; k--) { + *output++ = (*input >> 8)|(*input << 8); + input++; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.h new file mode 100644 index 0000000000..c59bf3068a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SwapBytes.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SWAP_BYTES_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SWAP_BYTES_H_ + +#include +#include + +/*----------------------------------------------------------------* + * Swap bytes (to simplify operations on Little Endian machines) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SwapBytes( + const uint16_t* input, /* (i) the sequence to swap */ + size_t wordLength, /* (i) number or uint16_t to swap */ + uint16_t* output /* (o) the swapped sequence */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/empty.cc b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/empty.cc new file mode 100644 index 0000000000..e69de29bb2 diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_test.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_test.c new file mode 100644 index 0000000000..e0ca075eda --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_test.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + iLBC_test.c + +******************************************************************/ + +#include +#include +#include +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +/*---------------------------------------------------------------* + * Main program to test iLBC encoding and decoding + * + * Usage: + * exefile_name.exe + * + * : Input file, speech for encoder (16-bit pcm file) + * : Bit stream output from the encoder + * : Output file, decoded speech (16-bit pcm file) + * : Bit error file, optional (16-bit) + * 1 - Packet received correctly + * 0 - Packet Lost + * + *--------------------------------------------------------------*/ + +#define BLOCKL_MAX 240 +#define ILBCNOOFWORDS_MAX 25 + + +int main(int argc, char* argv[]) +{ + + FILE *ifileid,*efileid,*ofileid, *cfileid; + int16_t data[BLOCKL_MAX]; + uint8_t encoded_data[2 * ILBCNOOFWORDS_MAX]; + int16_t decoded_data[BLOCKL_MAX]; + int len_int, mode; + short pli; + int blockcount = 0; + size_t frameLen, len, len_i16s; + int16_t speechType; + IlbcEncoderInstance *Enc_Inst; + IlbcDecoderInstance *Dec_Inst; + +#ifdef __ILBC_WITH_40BITACC + /* Doublecheck that long long exists */ + if (sizeof(long)>=sizeof(long long)) { + fprintf(stderr, "40-bit simulation is not be supported on this platform\n"); + exit(0); + } +#endif + + /* get arguments and open files */ + + if ((argc!=5) && (argc!=6)) { + fprintf(stderr, + "\n*-----------------------------------------------*\n"); + fprintf(stderr, + " %s <20,30> input encoded decoded (channel)\n\n", + argv[0]); + fprintf(stderr, + " mode : Frame size for the encoding/decoding\n"); + fprintf(stderr, + " 20 - 20 ms\n"); + fprintf(stderr, + " 30 - 30 ms\n"); + fprintf(stderr, + " input : Speech for encoder (16-bit pcm file)\n"); + fprintf(stderr, + " encoded : Encoded bit stream\n"); + fprintf(stderr, + " decoded : Decoded speech (16-bit pcm file)\n"); + fprintf(stderr, + " channel : Packet loss pattern, optional (16-bit)\n"); + fprintf(stderr, + " 1 - Packet received correctly\n"); + fprintf(stderr, + " 0 - Packet Lost\n"); + fprintf(stderr, + "*-----------------------------------------------*\n\n"); + exit(1); + } + mode=atoi(argv[1]); + if (mode != 20 && mode != 30) { + fprintf(stderr,"Wrong mode %s, must be 20, or 30\n", + argv[1]); + exit(2); + } + if ( (ifileid=fopen(argv[2],"rb")) == NULL) { + fprintf(stderr,"Cannot open input file %s\n", argv[2]); + exit(2);} + if ( (efileid=fopen(argv[3],"wb")) == NULL) { + fprintf(stderr, "Cannot open encoded file file %s\n", + argv[3]); exit(1);} + if ( (ofileid=fopen(argv[4],"wb")) == NULL) { + fprintf(stderr, "Cannot open decoded file %s\n", + argv[4]); exit(1);} + if (argc==6) { + if( (cfileid=fopen(argv[5],"rb")) == NULL) { + fprintf(stderr, "Cannot open channel file %s\n", + argv[5]); + exit(1); + } + } else { + cfileid=NULL; + } + + /* print info */ + + fprintf(stderr, "\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* iLBC test program *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); + fprintf(stderr,"\nMode : %2d ms\n", mode); + fprintf(stderr,"Input file : %s\n", argv[2]); + fprintf(stderr,"Encoded file : %s\n", argv[3]); + fprintf(stderr,"Output file : %s\n", argv[4]); + if (argc==6) { + fprintf(stderr,"Channel file : %s\n", argv[5]); + } + fprintf(stderr,"\n"); + + /* Create structs */ + WebRtcIlbcfix_EncoderCreate(&Enc_Inst); + WebRtcIlbcfix_DecoderCreate(&Dec_Inst); + + + /* Initialization */ + + WebRtcIlbcfix_EncoderInit(Enc_Inst, mode); + WebRtcIlbcfix_DecoderInit(Dec_Inst, mode); + frameLen = (size_t)(mode*8); + + /* loop over input blocks */ + + while (fread(data,sizeof(int16_t),frameLen,ifileid) == frameLen) { + + blockcount++; + + /* encoding */ + + fprintf(stderr, "--- Encoding block %i --- ",blockcount); + len_int = WebRtcIlbcfix_Encode(Enc_Inst, data, frameLen, encoded_data); + if (len_int < 0) { + fprintf(stderr, "Error encoding\n"); + exit(0); + } + len = (size_t)len_int; + fprintf(stderr, "\r"); + + /* write byte file */ + + len_i16s = (len + 1) / sizeof(int16_t); + if (fwrite(encoded_data, sizeof(int16_t), len_i16s, efileid) != len_i16s) { + return -1; + } + + /* get channel data if provided */ + if (argc==6) { + if (fread(&pli, sizeof(int16_t), 1, cfileid)) { + if ((pli!=0)&&(pli!=1)) { + fprintf(stderr, "Error in channel file\n"); + exit(0); + } + if (pli==0) { + /* Packet loss -> remove info from frame */ + memset(encoded_data, 0, + sizeof(int16_t)*ILBCNOOFWORDS_MAX); + } + } else { + fprintf(stderr, "Error. Channel file too short\n"); + exit(0); + } + } else { + pli=1; + } + + /* decoding */ + + fprintf(stderr, "--- Decoding block %i --- ",blockcount); + if (pli==1) { + len_int=WebRtcIlbcfix_Decode(Dec_Inst, encoded_data, + len, decoded_data,&speechType); + if (len_int < 0) { + fprintf(stderr, "Error decoding\n"); + exit(0); + } + len = (size_t)len_int; + } else { + len=WebRtcIlbcfix_DecodePlc(Dec_Inst, decoded_data, 1); + } + fprintf(stderr, "\r"); + + /* write output file */ + + if (fwrite(decoded_data, sizeof(int16_t), len, ofileid) != len) { + return -1; + } + } + + /* close files */ + + fclose(ifileid); fclose(efileid); fclose(ofileid); + if (argc==6) { + fclose(cfileid); + } + + /* Free structs */ + WebRtcIlbcfix_EncoderFree(Enc_Inst); + WebRtcIlbcfix_DecoderFree(Dec_Inst); + + + printf("\nDone with simulation\n\n"); + + return(0); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testLib.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testLib.c new file mode 100644 index 0000000000..132f3bdb37 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testLib.c @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + +iLBC Speech Coder ANSI-C Source Code + +iLBC_test.c + +******************************************************************/ + +#include +#include +#include +#include +#include +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +//#define JUNK_DATA +#ifdef JUNK_DATA +#define SEED_FILE "randseed.txt" +#endif + + +/*----------------------------------------------------------------* +* Main program to test iLBC encoding and decoding +* +* Usage: +* exefile_name.exe +* +*---------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + FILE *ifileid,*efileid,*ofileid, *chfileid; + short encoded_data[55], data[240], speechType; + int len_int, mode; + short pli; + size_t len, readlen; + int blockcount = 0; + + IlbcEncoderInstance *Enc_Inst; + IlbcDecoderInstance *Dec_Inst; +#ifdef JUNK_DATA + size_t i; + FILE *seedfile; + unsigned int random_seed = (unsigned int) time(NULL);//1196764538 +#endif + + /* Create structs */ + WebRtcIlbcfix_EncoderCreate(&Enc_Inst); + WebRtcIlbcfix_DecoderCreate(&Dec_Inst); + + /* get arguments and open files */ + + if (argc != 6 ) { + fprintf(stderr, "%s mode inputfile bytefile outputfile channelfile\n", + argv[0]); + fprintf(stderr, "Example:\n"); + fprintf(stderr, "%s <30,20> in.pcm byte.dat out.pcm T30.0.dat\n", argv[0]); + exit(1); + } + mode=atoi(argv[1]); + if (mode != 20 && mode != 30) { + fprintf(stderr,"Wrong mode %s, must be 20, or 30\n", argv[1]); + exit(2); + } + if ( (ifileid=fopen(argv[2],"rb")) == NULL) { + fprintf(stderr,"Cannot open input file %s\n", argv[2]); + exit(2);} + if ( (efileid=fopen(argv[3],"wb")) == NULL) { + fprintf(stderr, "Cannot open channelfile file %s\n", + argv[3]); exit(3);} + if( (ofileid=fopen(argv[4],"wb")) == NULL) { + fprintf(stderr, "Cannot open output file %s\n", + argv[4]); exit(3);} + if ( (chfileid=fopen(argv[5],"rb")) == NULL) { + fprintf(stderr,"Cannot open channel file file %s\n", argv[5]); + exit(2); + } + /* print info */ + fprintf(stderr, "\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* iLBCtest *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); +#ifdef SPLIT_10MS + fprintf(stderr,"\n10ms split with raw mode: %2d ms\n", mode); +#else + fprintf(stderr,"\nMode : %2d ms\n", mode); +#endif + fprintf(stderr,"\nInput file : %s\n", argv[2]); + fprintf(stderr,"Coded file : %s\n", argv[3]); + fprintf(stderr,"Output file : %s\n\n", argv[4]); + fprintf(stderr,"Channel file : %s\n\n", argv[5]); + +#ifdef JUNK_DATA + srand(random_seed); + + if ( (seedfile = fopen(SEED_FILE, "a+t") ) == NULL ) { + fprintf(stderr, "Error: Could not open file %s\n", SEED_FILE); + } + else { + fprintf(seedfile, "%u\n", random_seed); + fclose(seedfile); + } +#endif + + /* Initialization */ + WebRtcIlbcfix_EncoderInit(Enc_Inst, mode); + WebRtcIlbcfix_DecoderInit(Dec_Inst, mode); + + /* loop over input blocks */ +#ifdef SPLIT_10MS + readlen = 80; +#else + readlen = (size_t)(mode << 3); +#endif + while(fread(data, sizeof(short), readlen, ifileid) == readlen) { + blockcount++; + + /* encoding */ + fprintf(stderr, "--- Encoding block %i --- ",blockcount); + len_int=WebRtcIlbcfix_Encode(Enc_Inst, data, readlen, encoded_data); + if (len_int < 0) { + fprintf(stderr, "Error encoding\n"); + exit(0); + } + len = (size_t)len_int; + fprintf(stderr, "\r"); + +#ifdef JUNK_DATA + for ( i = 0; i < len; i++) { + encoded_data[i] = (short) (encoded_data[i] + (short) rand()); + } +#endif + /* write byte file */ + if(len != 0){ //len may be 0 in 10ms split case + fwrite(encoded_data,1,len,efileid); + + /* get channel data if provided */ + if (argc==6) { + if (fread(&pli, sizeof(int16_t), 1, chfileid)) { + if ((pli!=0)&&(pli!=1)) { + fprintf(stderr, "Error in channel file\n"); + exit(0); + } + if (pli==0) { + /* Packet loss -> remove info from frame */ + memset(encoded_data, 0, sizeof(int16_t)*25); + } + } else { + fprintf(stderr, "Error. Channel file too short\n"); + exit(0); + } + } else { + pli=1; + } + + /* decoding */ + fprintf(stderr, "--- Decoding block %i --- ",blockcount); + if (pli==1) { + len_int = WebRtcIlbcfix_Decode(Dec_Inst, encoded_data, len, data, + &speechType); + if (len_int < 0) { + fprintf(stderr, "Error decoding\n"); + exit(0); + } + len = (size_t)len_int; + } else { + len=WebRtcIlbcfix_DecodePlc(Dec_Inst, data, 1); + } + fprintf(stderr, "\r"); + + /* write output file */ + fwrite(data,sizeof(short),len,ofileid); + } + } + +#ifdef JUNK_DATA + if ( (seedfile = fopen(SEED_FILE, "a+t") ) == NULL ) { + fprintf(stderr, "Error: Could not open file %s\n", SEED_FILE); + } + else { + fprintf(seedfile, "ok\n\n"); + fclose(seedfile); + } +#endif + + /* free structs */ + WebRtcIlbcfix_EncoderFree(Enc_Inst); + WebRtcIlbcfix_DecoderFree(Dec_Inst); + + /* close files */ + fclose(ifileid); + fclose(efileid); + fclose(ofileid); + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testprogram.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testprogram.c new file mode 100644 index 0000000000..a62a42edf6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testprogram.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + iLBC_test.c + +******************************************************************/ + +#include +#include +#include +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/nit_encode.h" +#include "modules/audio_coding/codecs/ilbc/encode.h" +#include "modules/audio_coding/codecs/ilbc/init_decode.h" +#include "modules/audio_coding/codecs/ilbc/decode.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +#define ILBCNOOFWORDS_MAX (NO_OF_BYTES_30MS)/2 + +/* Runtime statistics */ +#include +/* #define CLOCKS_PER_SEC 1000 */ + +/*----------------------------------------------------------------* + * Encoder interface function + *---------------------------------------------------------------*/ + +short encode( /* (o) Number of bytes encoded */ + IlbcEncoder *iLBCenc_inst, /* (i/o) Encoder instance */ + int16_t *encoded_data, /* (o) The encoded bytes */ + int16_t *data /* (i) The signal block to encode */ + ){ + + /* do the actual encoding */ + WebRtcIlbcfix_Encode((uint16_t *)encoded_data, data, iLBCenc_inst); + + return (iLBCenc_inst->no_of_bytes); +} + +/*----------------------------------------------------------------* + * Decoder interface function + *---------------------------------------------------------------*/ + +short decode( /* (o) Number of decoded samples */ + IlbcDecoder *iLBCdec_inst, /* (i/o) Decoder instance */ + short *decoded_data, /* (o) Decoded signal block */ + short *encoded_data, /* (i) Encoded bytes */ + short mode /* (i) 0=PL, 1=Normal */ + ){ + + /* check if mode is valid */ + + if (mode<0 || mode>1) { + printf("\nERROR - Wrong mode - 0, 1 allowed\n"); exit(3);} + + /* do actual decoding of block */ + + WebRtcIlbcfix_Decode(decoded_data, (uint16_t *)encoded_data, + iLBCdec_inst, mode); + + return (iLBCdec_inst->blockl); +} + +/*----------------------------------------------------------------* + * Main program to test iLBC encoding and decoding + * + * Usage: + * exefile_name.exe + * + *---------------------------------------------------------------*/ + +#define MAXFRAMES 10000 +#define MAXFILELEN (BLOCKL_MAX*MAXFRAMES) + +int main(int argc, char* argv[]) +{ + + /* Runtime statistics */ + + float starttime1, starttime2; + float runtime1, runtime2; + float outtime; + + FILE *ifileid,*efileid,*ofileid, *chfileid; + short *inputdata, *encodeddata, *decodeddata; + short *channeldata; + int blockcount = 0, noOfBlocks=0, i, noOfLostBlocks=0; + short mode; + IlbcEncoder Enc_Inst; + IlbcDecoder Dec_Inst; + + short frameLen; + short count; +#ifdef SPLIT_10MS + short size; +#endif + + inputdata=(short*) malloc(MAXFILELEN*sizeof(short)); + if (inputdata==NULL) { + fprintf(stderr,"Could not allocate memory for vector\n"); + exit(0); + } + encodeddata=(short*) malloc(ILBCNOOFWORDS_MAX*MAXFRAMES*sizeof(short)); + if (encodeddata==NULL) { + fprintf(stderr,"Could not allocate memory for vector\n"); + free(inputdata); + exit(0); + } + decodeddata=(short*) malloc(MAXFILELEN*sizeof(short)); + if (decodeddata==NULL) { + fprintf(stderr,"Could not allocate memory for vector\n"); + free(inputdata); + free(encodeddata); + exit(0); + } + channeldata=(short*) malloc(MAXFRAMES*sizeof(short)); + if (channeldata==NULL) { + fprintf(stderr,"Could not allocate memory for vector\n"); + free(inputdata); + free(encodeddata); + free(decodeddata); + exit(0); + } + + /* get arguments and open files */ + + if (argc != 6 ) { + fprintf(stderr, "%s mode inputfile bytefile outputfile channelfile\n", + argv[0]); + fprintf(stderr, "Example:\n"); + fprintf(stderr, "%s <30,20> in.pcm byte.dat out.pcm T30.0.dat\n", argv[0]); + exit(1); + } + mode=atoi(argv[1]); + if (mode != 20 && mode != 30) { + fprintf(stderr,"Wrong mode %s, must be 20, or 30\n", argv[1]); + exit(2); + } + if ( (ifileid=fopen(argv[2],"rb")) == NULL) { + fprintf(stderr,"Cannot open input file %s\n", argv[2]); + exit(2);} + if ( (efileid=fopen(argv[3],"wb")) == NULL) { + fprintf(stderr, "Cannot open channelfile file %s\n", + argv[3]); exit(3);} + if( (ofileid=fopen(argv[4],"wb")) == NULL) { + fprintf(stderr, "Cannot open output file %s\n", + argv[4]); exit(3);} + if ( (chfileid=fopen(argv[5],"rb")) == NULL) { + fprintf(stderr,"Cannot open channel file file %s\n", argv[5]); + exit(2);} + + + /* print info */ +#ifndef PRINT_MIPS + fprintf(stderr, "\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* iLBCtest *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); +#ifdef SPLIT_10MS + fprintf(stderr,"\n10ms split with raw mode: %2d ms\n", mode); +#else + fprintf(stderr,"\nMode : %2d ms\n", mode); +#endif + fprintf(stderr,"\nInput file : %s\n", argv[2]); + fprintf(stderr,"Coded file : %s\n", argv[3]); + fprintf(stderr,"Output file : %s\n\n", argv[4]); + fprintf(stderr,"Channel file : %s\n\n", argv[5]); +#endif + + /* Initialization */ + + WebRtcIlbcfix_EncoderInit(&Enc_Inst, mode); + WebRtcIlbcfix_DecoderInit(&Dec_Inst, mode, 1); + + /* extract the input file and channel file */ + +#ifdef SPLIT_10MS + frameLen = (mode==20)? 80:160; + fread(Enc_Inst.past_samples, sizeof(short), frameLen, ifileid); + Enc_Inst.section = 0; + + while( fread(&inputdata[noOfBlocks*80], sizeof(short), + 80, ifileid) == 80 ) { + noOfBlocks++; + } + + noOfBlocks += frameLen/80; + frameLen = 80; +#else + frameLen = Enc_Inst.blockl; + + while( fread(&inputdata[noOfBlocks*Enc_Inst.blockl],sizeof(short), + Enc_Inst.blockl,ifileid)==(uint16_t)Enc_Inst.blockl){ + noOfBlocks++; + } +#endif + + + while ((fread(&channeldata[blockcount],sizeof(short), 1,chfileid)==1) + && ( blockcount < noOfBlocks/(Enc_Inst.blockl/frameLen) )) { + blockcount++; + } + + if ( blockcount < noOfBlocks/(Enc_Inst.blockl/frameLen) ) { + fprintf(stderr,"Channel file %s is too short\n", argv[4]); + free(inputdata); + free(encodeddata); + free(decodeddata); + free(channeldata); + exit(0); + } + + count=0; + + /* Runtime statistics */ + + starttime1 = clock()/(float)CLOCKS_PER_SEC; + + /* Encoding loop */ +#ifdef PRINT_MIPS + printf("-1 -1\n"); +#endif + +#ifdef SPLIT_10MS + /* "Enc_Inst.section != 0" is to make sure we run through full + lengths of all vectors for 10ms split mode. + */ + // while( (count < noOfBlocks) || (Enc_Inst.section != 0) ) { + while( count < blockcount * (Enc_Inst.blockl/frameLen) ) { + + encode(&Enc_Inst, &encodeddata[Enc_Inst.no_of_words * + (count/(Enc_Inst.nsub/2))], + &inputdata[frameLen * count] ); +#else + while (count < noOfBlocks) { + encode( &Enc_Inst, &encodeddata[Enc_Inst.no_of_words * count], + &inputdata[frameLen * count] ); +#endif + +#ifdef PRINT_MIPS + printf("-1 -1\n"); +#endif + + count++; + } + + count=0; + + /* Runtime statistics */ + + starttime2=clock()/(float)CLOCKS_PER_SEC; + runtime1 = (float)(starttime2-starttime1); + + /* Decoding loop */ + + while (count < blockcount) { + if (channeldata[count]==1) { + /* Normal decoding */ + decode(&Dec_Inst, &decodeddata[count * Dec_Inst.blockl], + &encodeddata[Dec_Inst.no_of_words * count], 1); + } else if (channeldata[count]==0) { + /* PLC */ + short emptydata[ILBCNOOFWORDS_MAX]; + memset(emptydata, 0, Dec_Inst.no_of_words*sizeof(short)); + decode(&Dec_Inst, &decodeddata[count*Dec_Inst.blockl], + emptydata, 0); + noOfLostBlocks++; + } else { + printf("Error in channel file (values have to be either 1 or 0)\n"); + exit(0); + } +#ifdef PRINT_MIPS + printf("-1 -1\n"); +#endif + + count++; + } + + /* Runtime statistics */ + + runtime2 = (float)(clock()/(float)CLOCKS_PER_SEC-starttime2); + + outtime = (float)((float)blockcount* + (float)mode/1000.0); + +#ifndef PRINT_MIPS + printf("\nLength of speech file: %.1f s\n", outtime); + printf("Lost frames : %.1f%%\n\n", 100*(float)noOfLostBlocks/(float)blockcount); + + printf("Time to run iLBC_encode+iLBC_decode:"); + printf(" %.1f s (%.1f%% of realtime)\n", runtime1+runtime2, + (100*(runtime1+runtime2)/outtime)); + + printf("Time in iLBC_encode :"); + printf(" %.1f s (%.1f%% of total runtime)\n", + runtime1, 100.0*runtime1/(runtime1+runtime2)); + + printf("Time in iLBC_decode :"); + printf(" %.1f s (%.1f%% of total runtime)\n\n", + runtime2, 100.0*runtime2/(runtime1+runtime2)); +#endif + + /* Write data to files */ + for (i=0; ilsf[0] = (*bitstreamPtr)>>10; /* Bit 0..5 */ + enc_bits->lsf[1] = ((*bitstreamPtr)>>3)&0x7F; /* Bit 6..12 */ + enc_bits->lsf[2] = ((*bitstreamPtr)&0x7)<<4; /* Bit 13..15 */ + bitstreamPtr++; + /* Second int16_t */ + enc_bits->lsf[2] |= ((*bitstreamPtr)>>12)&0xF; /* Bit 0..3 */ + + if (mode==20) { + enc_bits->startIdx = ((*bitstreamPtr)>>10)&0x3; /* Bit 4..5 */ + enc_bits->state_first = ((*bitstreamPtr)>>9)&0x1; /* Bit 6 */ + enc_bits->idxForMax = ((*bitstreamPtr)>>3)&0x3F; /* Bit 7..12 */ + enc_bits->cb_index[0] = ((*bitstreamPtr)&0x7)<<4; /* Bit 13..15 */ + bitstreamPtr++; + /* Third int16_t */ + enc_bits->cb_index[0] |= ((*bitstreamPtr)>>12)&0xE; /* Bit 0..2 */ + enc_bits->gain_index[0] = ((*bitstreamPtr)>>8)&0x18; /* Bit 3..4 */ + enc_bits->gain_index[1] = ((*bitstreamPtr)>>7)&0x8; /* Bit 5 */ + enc_bits->cb_index[3] = ((*bitstreamPtr)>>2)&0xFE; /* Bit 6..12 */ + enc_bits->gain_index[3] = ((*bitstreamPtr)<<2)&0x10; /* Bit 13 */ + enc_bits->gain_index[4] = ((*bitstreamPtr)<<2)&0x8; /* Bit 14 */ + enc_bits->gain_index[6] = ((*bitstreamPtr)<<4)&0x10; /* Bit 15 */ + } else { /* mode==30 */ + enc_bits->lsf[3] = ((*bitstreamPtr)>>6)&0x3F; /* Bit 4..9 */ + enc_bits->lsf[4] = ((*bitstreamPtr)<<1)&0x7E; /* Bit 10..15 */ + bitstreamPtr++; + /* Third int16_t */ + enc_bits->lsf[4] |= ((*bitstreamPtr)>>15)&0x1; /* Bit 0 */ + enc_bits->lsf[5] = ((*bitstreamPtr)>>8)&0x7F; /* Bit 1..7 */ + enc_bits->startIdx = ((*bitstreamPtr)>>5)&0x7; /* Bit 8..10 */ + enc_bits->state_first = ((*bitstreamPtr)>>4)&0x1; /* Bit 11 */ + enc_bits->idxForMax = ((*bitstreamPtr)<<2)&0x3C; /* Bit 12..15 */ + bitstreamPtr++; + /* 4:th int16_t */ + enc_bits->idxForMax |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->cb_index[0] = ((*bitstreamPtr)>>7)&0x78; /* Bit 2..5 */ + enc_bits->gain_index[0] = ((*bitstreamPtr)>>5)&0x10; /* Bit 6 */ + enc_bits->gain_index[1] = ((*bitstreamPtr)>>5)&0x8; /* Bit 7 */ + enc_bits->cb_index[3] = ((*bitstreamPtr))&0xFC; /* Bit 8..13 */ + enc_bits->gain_index[3] = ((*bitstreamPtr)<<3)&0x10; /* Bit 14 */ + enc_bits->gain_index[4] = ((*bitstreamPtr)<<3)&0x8; /* Bit 15 */ + } + /* Class 2 bits of ULP */ + /* 4:th to 6:th int16_t for 20 ms case + 5:th to 7:th int16_t for 30 ms case */ + bitstreamPtr++; + tmpPtr=enc_bits->idxVec; + for (k=0; k<3; k++) { + for (i=15; i>=0; i--) { + (*tmpPtr) = (((*bitstreamPtr)>>i)<<2)&0x4; + /* Bit 15-i */ + tmpPtr++; + } + bitstreamPtr++; + } + + if (mode==20) { + /* 7:th int16_t */ + for (i=15; i>6; i--) { + (*tmpPtr) = (((*bitstreamPtr)>>i)<<2)&0x4; + /* Bit 15-i */ + tmpPtr++; + } + enc_bits->gain_index[1] |= ((*bitstreamPtr)>>4)&0x4; /* Bit 9 */ + enc_bits->gain_index[3] |= ((*bitstreamPtr)>>2)&0xC; /* Bit 10..11 */ + enc_bits->gain_index[4] |= ((*bitstreamPtr)>>1)&0x4; /* Bit 12 */ + enc_bits->gain_index[6] |= ((*bitstreamPtr)<<1)&0x8; /* Bit 13 */ + enc_bits->gain_index[7] = ((*bitstreamPtr)<<2)&0xC; /* Bit 14..15 */ + + } else { /* mode==30 */ + /* 8:th int16_t */ + for (i=15; i>5; i--) { + (*tmpPtr) = (((*bitstreamPtr)>>i)<<2)&0x4; + /* Bit 15-i */ + tmpPtr++; + } + enc_bits->cb_index[0] |= ((*bitstreamPtr)>>3)&0x6; /* Bit 10..11 */ + enc_bits->gain_index[0] |= ((*bitstreamPtr))&0x8; /* Bit 12 */ + enc_bits->gain_index[1] |= ((*bitstreamPtr))&0x4; /* Bit 13 */ + enc_bits->cb_index[3] |= ((*bitstreamPtr))&0x2; /* Bit 14 */ + enc_bits->cb_index[6] = ((*bitstreamPtr)<<7)&0x80; /* Bit 15 */ + bitstreamPtr++; + /* 9:th int16_t */ + enc_bits->cb_index[6] |= ((*bitstreamPtr)>>9)&0x7E; /* Bit 0..5 */ + enc_bits->cb_index[9] = ((*bitstreamPtr)>>2)&0xFE; /* Bit 6..12 */ + enc_bits->cb_index[12] = ((*bitstreamPtr)<<5)&0xE0; /* Bit 13..15 */ + bitstreamPtr++; + /* 10:th int16_t */ + enc_bits->cb_index[12] |= ((*bitstreamPtr)>>11)&0x1E;/* Bit 0..3 */ + enc_bits->gain_index[3] |= ((*bitstreamPtr)>>8)&0xC; /* Bit 4..5 */ + enc_bits->gain_index[4] |= ((*bitstreamPtr)>>7)&0x6; /* Bit 6..7 */ + enc_bits->gain_index[6] = ((*bitstreamPtr)>>3)&0x18; /* Bit 8..9 */ + enc_bits->gain_index[7] = ((*bitstreamPtr)>>2)&0xC; /* Bit 10..11 */ + enc_bits->gain_index[9] = ((*bitstreamPtr)<<1)&0x10; /* Bit 12 */ + enc_bits->gain_index[10] = ((*bitstreamPtr)<<1)&0x8; /* Bit 13 */ + enc_bits->gain_index[12] = ((*bitstreamPtr)<<3)&0x10; /* Bit 14 */ + enc_bits->gain_index[13] = ((*bitstreamPtr)<<3)&0x8; /* Bit 15 */ + } + bitstreamPtr++; + /* Class 3 bits of ULP */ + /* 8:th to 14:th int16_t for 20 ms case + 11:th to 17:th int16_t for 30 ms case */ + tmpPtr=enc_bits->idxVec; + for (k=0; k<7; k++) { + for (i=14; i>=0; i-=2) { + (*tmpPtr) |= ((*bitstreamPtr)>>i)&0x3; /* Bit 15-i..14-i*/ + tmpPtr++; + } + bitstreamPtr++; + } + + if (mode==20) { + /* 15:th int16_t */ + enc_bits->idxVec[56] |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->cb_index[0] |= ((*bitstreamPtr)>>13)&0x1; /* Bit 2 */ + enc_bits->cb_index[1] = ((*bitstreamPtr)>>6)&0x7F; /* Bit 3..9 */ + enc_bits->cb_index[2] = ((*bitstreamPtr)<<1)&0x7E; /* Bit 10..15 */ + bitstreamPtr++; + /* 16:th int16_t */ + enc_bits->cb_index[2] |= ((*bitstreamPtr)>>15)&0x1; /* Bit 0 */ + enc_bits->gain_index[0] |= ((*bitstreamPtr)>>12)&0x7; /* Bit 1..3 */ + enc_bits->gain_index[1] |= ((*bitstreamPtr)>>10)&0x3; /* Bit 4..5 */ + enc_bits->gain_index[2] = ((*bitstreamPtr)>>7)&0x7; /* Bit 6..8 */ + enc_bits->cb_index[3] |= ((*bitstreamPtr)>>6)&0x1; /* Bit 9 */ + enc_bits->cb_index[4] = ((*bitstreamPtr)<<1)&0x7E; /* Bit 10..15 */ + bitstreamPtr++; + /* 17:th int16_t */ + enc_bits->cb_index[4] |= ((*bitstreamPtr)>>15)&0x1; /* Bit 0 */ + enc_bits->cb_index[5] = ((*bitstreamPtr)>>8)&0x7F; /* Bit 1..7 */ + enc_bits->cb_index[6] = ((*bitstreamPtr))&0xFF; /* Bit 8..15 */ + bitstreamPtr++; + /* 18:th int16_t */ + enc_bits->cb_index[7] = (*bitstreamPtr)>>8; /* Bit 0..7 */ + enc_bits->cb_index[8] = (*bitstreamPtr)&0xFF; /* Bit 8..15 */ + bitstreamPtr++; + /* 19:th int16_t */ + enc_bits->gain_index[3] |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->gain_index[4] |= ((*bitstreamPtr)>>12)&0x3; /* Bit 2..3 */ + enc_bits->gain_index[5] = ((*bitstreamPtr)>>9)&0x7; /* Bit 4..6 */ + enc_bits->gain_index[6] |= ((*bitstreamPtr)>>6)&0x7; /* Bit 7..9 */ + enc_bits->gain_index[7] |= ((*bitstreamPtr)>>4)&0x3; /* Bit 10..11 */ + enc_bits->gain_index[8] = ((*bitstreamPtr)>>1)&0x7; /* Bit 12..14 */ + } else { /* mode==30 */ + /* 18:th int16_t */ + enc_bits->idxVec[56] |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->idxVec[57] |= ((*bitstreamPtr)>>12)&0x3; /* Bit 2..3 */ + enc_bits->cb_index[0] |= ((*bitstreamPtr)>>11)&1; /* Bit 4 */ + enc_bits->cb_index[1] = ((*bitstreamPtr)>>4)&0x7F; /* Bit 5..11 */ + enc_bits->cb_index[2] = ((*bitstreamPtr)<<3)&0x78; /* Bit 12..15 */ + bitstreamPtr++; + /* 19:th int16_t */ + enc_bits->cb_index[2] |= ((*bitstreamPtr)>>13)&0x7; /* Bit 0..2 */ + enc_bits->gain_index[0] |= ((*bitstreamPtr)>>10)&0x7; /* Bit 3..5 */ + enc_bits->gain_index[1] |= ((*bitstreamPtr)>>8)&0x3; /* Bit 6..7 */ + enc_bits->gain_index[2] = ((*bitstreamPtr)>>5)&0x7; /* Bit 8..10 */ + enc_bits->cb_index[3] |= ((*bitstreamPtr)>>4)&0x1; /* Bit 11 */ + enc_bits->cb_index[4] = ((*bitstreamPtr)<<3)&0x78; /* Bit 12..15 */ + bitstreamPtr++; + /* 20:th int16_t */ + enc_bits->cb_index[4] |= ((*bitstreamPtr)>>13)&0x7; /* Bit 0..2 */ + enc_bits->cb_index[5] = ((*bitstreamPtr)>>6)&0x7F; /* Bit 3..9 */ + enc_bits->cb_index[6] |= ((*bitstreamPtr)>>5)&0x1; /* Bit 10 */ + enc_bits->cb_index[7] = ((*bitstreamPtr)<<3)&0xF8; /* Bit 11..15 */ + bitstreamPtr++; + /* 21:st int16_t */ + enc_bits->cb_index[7] |= ((*bitstreamPtr)>>13)&0x7; /* Bit 0..2 */ + enc_bits->cb_index[8] = ((*bitstreamPtr)>>5)&0xFF; /* Bit 3..10 */ + enc_bits->cb_index[9] |= ((*bitstreamPtr)>>4)&0x1; /* Bit 11 */ + enc_bits->cb_index[10] = ((*bitstreamPtr)<<4)&0xF0; /* Bit 12..15 */ + bitstreamPtr++; + /* 22:nd int16_t */ + enc_bits->cb_index[10] |= ((*bitstreamPtr)>>12)&0xF; /* Bit 0..3 */ + enc_bits->cb_index[11] = ((*bitstreamPtr)>>4)&0xFF; /* Bit 4..11 */ + enc_bits->cb_index[12] |= ((*bitstreamPtr)>>3)&0x1; /* Bit 12 */ + enc_bits->cb_index[13] = ((*bitstreamPtr)<<5)&0xE0; /* Bit 13..15 */ + bitstreamPtr++; + /* 23:rd int16_t */ + enc_bits->cb_index[13] |= ((*bitstreamPtr)>>11)&0x1F;/* Bit 0..4 */ + enc_bits->cb_index[14] = ((*bitstreamPtr)>>3)&0xFF; /* Bit 5..12 */ + enc_bits->gain_index[3] |= ((*bitstreamPtr)>>1)&0x3; /* Bit 13..14 */ + enc_bits->gain_index[4] |= ((*bitstreamPtr)&0x1); /* Bit 15 */ + bitstreamPtr++; + /* 24:rd int16_t */ + enc_bits->gain_index[5] = ((*bitstreamPtr)>>13)&0x7; /* Bit 0..2 */ + enc_bits->gain_index[6] |= ((*bitstreamPtr)>>10)&0x7; /* Bit 3..5 */ + enc_bits->gain_index[7] |= ((*bitstreamPtr)>>8)&0x3; /* Bit 6..7 */ + enc_bits->gain_index[8] = ((*bitstreamPtr)>>5)&0x7; /* Bit 8..10 */ + enc_bits->gain_index[9] |= ((*bitstreamPtr)>>1)&0xF; /* Bit 11..14 */ + enc_bits->gain_index[10] |= ((*bitstreamPtr)<<2)&0x4; /* Bit 15 */ + bitstreamPtr++; + /* 25:rd int16_t */ + enc_bits->gain_index[10] |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->gain_index[11] = ((*bitstreamPtr)>>11)&0x7; /* Bit 2..4 */ + enc_bits->gain_index[12] |= ((*bitstreamPtr)>>7)&0xF; /* Bit 5..8 */ + enc_bits->gain_index[13] |= ((*bitstreamPtr)>>4)&0x7; /* Bit 9..11 */ + enc_bits->gain_index[14] = ((*bitstreamPtr)>>1)&0x7; /* Bit 12..14 */ + } + /* Last bit should be zero, otherwise it's an "empty" frame */ + if (((*bitstreamPtr)&0x1) == 1) { + return(1); + } else { + return(0); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.h new file mode 100644 index 0000000000..1a63280e6b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_UnpackBits.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_UNPACK_BITS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_UNPACK_BITS_H_ + +#include + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * unpacking of bits from bitstream, i.e., vector of bytes + *---------------------------------------------------------------*/ + +int16_t +WebRtcIlbcfix_UnpackBits(/* (o) "Empty" frame indicator */ + const uint16_t* + bitstream, /* (i) The packatized bitstream */ + iLBC_bits* + enc_bits, /* (o) Paramerers from bitstream */ + int16_t mode /* (i) Codec mode (20 or 30) */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.c new file mode 100644 index 0000000000..d9375fb995 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Vq3.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/vq3.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" + +/*----------------------------------------------------------------* + * vector quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Vq3( + int16_t *Xq, /* quantized vector (Q13) */ + int16_t *index, + int16_t *CB, /* codebook in Q13 */ + int16_t *X, /* vector to quantize (Q13) */ + int16_t n_cb + ){ + int16_t i, j; + int16_t pos, minindex=0; + int16_t tmp; + int32_t dist, mindist; + + pos = 0; + mindist = WEBRTC_SPL_WORD32_MAX; /* start value */ + + /* Find the codebook with the lowest square distance */ + for (j = 0; j < n_cb; j++) { + tmp = X[0] - CB[pos]; + dist = tmp * tmp; + for (i = 1; i < 3; i++) { + tmp = X[i] - CB[pos + i]; + dist += tmp * tmp; + } + + if (dist < mindist) { + mindist = dist; + minindex = j; + } + pos += 3; + } + + /* Store the quantized codebook and the index */ + for (i = 0; i < 3; i++) { + Xq[i] = CB[minindex*3 + i]; + } + *index = minindex; + +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.h new file mode 100644 index 0000000000..c946478a1a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Vq3.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_VQ3_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_VQ3_H_ + +#include + +/*----------------------------------------------------------------* + * Vector quantization of order 3 (based on MSE) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Vq3( + int16_t* Xq, /* (o) the quantized vector (Q13) */ + int16_t* index, /* (o) the quantization index */ + int16_t* CB, /* (i) the vector quantization codebook (Q13) */ + int16_t* X, /* (i) the vector to quantize (Q13) */ + int16_t n_cb /* (i) the number of vectors in the codebook */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.c new file mode 100644 index 0000000000..c9a65aec2a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Vq4.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/vq4.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" + +/*----------------------------------------------------------------* + * vector quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Vq4( + int16_t *Xq, /* quantized vector (Q13) */ + int16_t *index, + int16_t *CB, /* codebook in Q13 */ + int16_t *X, /* vector to quantize (Q13) */ + int16_t n_cb + ){ + int16_t i, j; + int16_t pos, minindex=0; + int16_t tmp; + int32_t dist, mindist; + + pos = 0; + mindist = WEBRTC_SPL_WORD32_MAX; /* start value */ + + /* Find the codebook with the lowest square distance */ + for (j = 0; j < n_cb; j++) { + tmp = X[0] - CB[pos]; + dist = tmp * tmp; + for (i = 1; i < 4; i++) { + tmp = X[i] - CB[pos + i]; + dist += tmp * tmp; + } + + if (dist < mindist) { + mindist = dist; + minindex = j; + } + pos += 4; + } + + /* Store the quantized codebook and the index */ + for (i = 0; i < 4; i++) { + Xq[i] = CB[minindex*4 + i]; + } + *index = minindex; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.h new file mode 100644 index 0000000000..6d14830c03 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Vq4.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_VQ4_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_VQ4_H_ + +#include + +/*----------------------------------------------------------------* + * Vector quantization of order 4 (based on MSE) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Vq4( + int16_t* Xq, /* (o) the quantized vector (Q13) */ + int16_t* index, /* (o) the quantization index */ + int16_t* CB, /* (i) the vector quantization codebook (Q13) */ + int16_t* X, /* (i) the vector to quantize (Q13) */ + int16_t n_cb /* (i) the number of vectors in the codebook */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.c new file mode 100644 index 0000000000..e82d167220 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Window32W32.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/window32_w32.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * window multiplication + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Window32W32( + int32_t *z, /* Output */ + int32_t *x, /* Input (same domain as Output)*/ + const int32_t *y, /* Q31 Window */ + size_t N /* length to process */ + ) { + size_t i; + int16_t x_low, x_hi, y_low, y_hi; + int16_t left_shifts; + int32_t temp; + + left_shifts = (int16_t)WebRtcSpl_NormW32(x[0]); + WebRtcSpl_VectorBitShiftW32(x, N, x, (int16_t)(-left_shifts)); + + + /* The double precision numbers use a special representation: + * w32 = hi<<16 + lo<<1 + */ + for (i = 0; i < N; i++) { + /* Extract higher bytes */ + x_hi = (int16_t)(x[i] >> 16); + y_hi = (int16_t)(y[i] >> 16); + + /* Extract lower bytes, defined as (w32 - hi<<16)>>1 */ + x_low = (int16_t)((x[i] - (x_hi << 16)) >> 1); + + y_low = (int16_t)((y[i] - (y_hi << 16)) >> 1); + + /* Calculate z by a 32 bit multiplication using both low and high from x and y */ + temp = ((x_hi * y_hi) << 1) + ((x_hi * y_low) >> 14); + + z[i] = temp + ((x_low * y_hi) >> 14); + } + + WebRtcSpl_VectorBitShiftW32(z, N, z, left_shifts); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.h new file mode 100644 index 0000000000..15d72c5ba2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Window32W32.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_WINDOW32_W32_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_WINDOW32_W32_H_ + +#include +#include + +/*----------------------------------------------------------------* + * window multiplication + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Window32W32(int32_t* z, /* Output */ + int32_t* x, /* Input (same domain as Output)*/ + const int32_t* y, /* Q31 Window */ + size_t N /* length to process */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.c new file mode 100644 index 0000000000..9dc880b37e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_XcorrCoef.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/xcorr_coef.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * cross correlation which finds the optimal lag for the + * crossCorr*crossCorr/(energy) criteria + *---------------------------------------------------------------*/ + +size_t WebRtcIlbcfix_XcorrCoef( + int16_t *target, /* (i) first array */ + int16_t *regressor, /* (i) second array */ + size_t subl, /* (i) dimension arrays */ + size_t searchLen, /* (i) the search lenght */ + size_t offset, /* (i) samples offset between arrays */ + int16_t step /* (i) +1 or -1 */ + ){ + size_t k; + size_t maxlag; + int16_t pos; + int16_t max; + int16_t crossCorrScale, Energyscale; + int16_t crossCorrSqMod, crossCorrSqMod_Max; + int32_t crossCorr, Energy; + int16_t crossCorrmod, EnergyMod, EnergyMod_Max; + int16_t *tp, *rp; + int16_t *rp_beg, *rp_end; + int16_t totscale, totscale_max; + int16_t scalediff; + int32_t newCrit, maxCrit; + int shifts; + + /* Initializations, to make sure that the first one is selected */ + crossCorrSqMod_Max=0; + EnergyMod_Max=WEBRTC_SPL_WORD16_MAX; + totscale_max=-500; + maxlag=0; + pos=0; + + /* Find scale value and start position */ + if (step==1) { + max=WebRtcSpl_MaxAbsValueW16(regressor, subl + searchLen - 1); + rp_beg = regressor; + rp_end = regressor + subl; + } else { /* step==-1 */ + max = WebRtcSpl_MaxAbsValueW16(regressor - searchLen, subl + searchLen - 1); + rp_beg = regressor - 1; + rp_end = regressor + subl - 1; + } + + /* Introduce a scale factor on the Energy in int32_t in + order to make sure that the calculation does not + overflow */ + + if (max>5000) { + shifts=2; + } else { + shifts=0; + } + + /* Calculate the first energy, then do a +/- to get the other energies */ + Energy=WebRtcSpl_DotProductWithScale(regressor, regressor, subl, shifts); + + for (k=0;k0)&&(crossCorr>0)) { + + /* Put cross correlation and energy on 16 bit word */ + crossCorrScale=(int16_t)WebRtcSpl_NormW32(crossCorr)-16; + crossCorrmod=(int16_t)WEBRTC_SPL_SHIFT_W32(crossCorr, crossCorrScale); + Energyscale=(int16_t)WebRtcSpl_NormW32(Energy)-16; + EnergyMod=(int16_t)WEBRTC_SPL_SHIFT_W32(Energy, Energyscale); + + /* Square cross correlation and store upper int16_t */ + crossCorrSqMod = (int16_t)((crossCorrmod * crossCorrmod) >> 16); + + /* Calculate the total number of (dynamic) right shifts that have + been performed on (crossCorr*crossCorr)/energy + */ + totscale=Energyscale-(crossCorrScale<<1); + + /* Calculate the shift difference in order to be able to compare the two + (crossCorr*crossCorr)/energy in the same domain + */ + scalediff=totscale-totscale_max; + scalediff=WEBRTC_SPL_MIN(scalediff,31); + scalediff=WEBRTC_SPL_MAX(scalediff,-31); + + /* Compute the cross multiplication between the old best criteria + and the new one to be able to compare them without using a + division */ + + if (scalediff<0) { + newCrit = ((int32_t)crossCorrSqMod*EnergyMod_Max)>>(-scalediff); + maxCrit = ((int32_t)crossCorrSqMod_Max*EnergyMod); + } else { + newCrit = ((int32_t)crossCorrSqMod*EnergyMod_Max); + maxCrit = ((int32_t)crossCorrSqMod_Max*EnergyMod)>>scalediff; + } + + /* Store the new lag value if the new criteria is larger + than previous largest criteria */ + + if (newCrit > maxCrit) { + crossCorrSqMod_Max = crossCorrSqMod; + EnergyMod_Max = EnergyMod; + totscale_max = totscale; + maxlag = k; + } + } + pos+=step; + + /* Do a +/- to get the next energy */ + Energy += step * ((*rp_end * *rp_end - *rp_beg * *rp_beg) >> shifts); + rp_beg+=step; + rp_end+=step; + } + + return(maxlag+offset); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.h new file mode 100644 index 0000000000..3be5a296b5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_XcorrCoef.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_XCORR_COEF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_XCORR_COEF_H_ + +#include +#include + +/*----------------------------------------------------------------* + * cross correlation which finds the optimal lag for the + * crossCorr*crossCorr/(energy) criteria + *---------------------------------------------------------------*/ + +size_t WebRtcIlbcfix_XcorrCoef( + int16_t* target, /* (i) first array */ + int16_t* regressor, /* (i) second array */ + size_t subl, /* (i) dimension arrays */ + size_t searchLen, /* (i) the search lenght */ + size_t offset, /* (i) samples offset between arrays */ + int16_t step /* (i) +1 or -1 */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/bandwidth_info.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/bandwidth_info.h new file mode 100644 index 0000000000..c3830a5f7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/bandwidth_info.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_ + +#include + +typedef struct { + int in_use; + int32_t send_bw_avg; + int32_t send_max_delay_avg; + int16_t bottleneck_idx; + int16_t jitter_info; +} IsacBandwidthInfo; + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.c b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.c new file mode 100644 index 0000000000..a4f297c5a1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#ifdef WEBRTC_ANDROID +#include +#endif + +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" +#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h" + +static void WebRtcIsac_AllPoleFilter(double* InOut, + double* Coef, + size_t lengthInOut, + int orderCoef) { + /* the state of filter is assumed to be in InOut[-1] to InOut[-orderCoef] */ + double scal; + double sum; + size_t n; + int k; + + //if (fabs(Coef[0]-1.0)<0.001) { + if ( (Coef[0] > 0.9999) && (Coef[0] < 1.0001) ) + { + for(n = 0; n < lengthInOut; n++) + { + sum = Coef[1] * InOut[-1]; + for(k = 2; k <= orderCoef; k++){ + sum += Coef[k] * InOut[-k]; + } + *InOut++ -= sum; + } + } + else + { + scal = 1.0 / Coef[0]; + for(n=0;nbuffer, sizeof(double) * PITCH_WLPCBUFLEN); + memcpy(tmpbuffer+PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN); + memcpy(wfdata->buffer, tmpbuffer+PITCH_FRAME_LEN, sizeof(double) * PITCH_WLPCBUFLEN); + + dp=weoutbuf; + dp2=whoutbuf; + for (k=0;kweostate[k]; + *dp2++ = wfdata->whostate[k]; + opol[k]=0.0; + } + opol[0]=1.0; + opol[PITCH_WLPCORDER]=0.0; + weo=dp; + who=dp2; + + endpos=PITCH_WLPCBUFLEN + PITCH_SUBFRAME_LEN; + inp=tmpbuffer + PITCH_WLPCBUFLEN; + + for (n=0; nwindow[k]*tmpbuffer[start+k]; + } + + /* Get LPC polynomial */ + WebRtcIsac_AutoCorr(corr, ext, PITCH_WLPCWINLEN, PITCH_WLPCORDER); + corr[0]=1.01*corr[0]+1.0; /* White noise correction */ + WebRtcIsac_LevDurb(apol, rc, corr, PITCH_WLPCORDER); + WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER+1); + + /* Filtering */ + WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, weo); + WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, who); + + inp+=PITCH_SUBFRAME_LEN; + endpos+=PITCH_SUBFRAME_LEN; + weo+=PITCH_SUBFRAME_LEN; + who+=PITCH_SUBFRAME_LEN; + } + + /* Export filter states */ + for (k=0;kweostate[k]=weoutbuf[PITCH_FRAME_LEN+k]; + wfdata->whostate[k]=whoutbuf[PITCH_FRAME_LEN+k]; + } + + /* Export output data */ + memcpy(weiout, weoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN); + memcpy(whiout, whoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.h new file mode 100644 index 0000000000..a747a7f549 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_ + +#include + +#include "modules/audio_coding/codecs/isac/main/source/structs.h" + +void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order); + +void WebRtcIsac_WeightingFilter(const double* in, + double* weiout, + double* whiout, + WeightFiltstr* wfdata); + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.c b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.c new file mode 100644 index 0000000000..57cf0c39da --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h" + +#include + +void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata) { + int k; + + for (k = 0; k < PITCH_BUFFSIZE; k++) { + pitchfiltdata->ubuf[k] = 0.0; + } + pitchfiltdata->ystate[0] = 0.0; + for (k = 1; k < (PITCH_DAMPORDER); k++) { + pitchfiltdata->ystate[k] = 0.0; + } + pitchfiltdata->oldlagp[0] = 50.0; + pitchfiltdata->oldgainp[0] = 0.0; +} + +static void WebRtcIsac_InitWeightingFilter(WeightFiltstr* wfdata) { + int k; + double t, dtmp, dtmp2, denum, denum2; + + for (k = 0; k < PITCH_WLPCBUFLEN; k++) + wfdata->buffer[k] = 0.0; + + for (k = 0; k < PITCH_WLPCORDER; k++) { + wfdata->istate[k] = 0.0; + wfdata->weostate[k] = 0.0; + wfdata->whostate[k] = 0.0; + } + + /* next part should be in Matlab, writing to a global table */ + t = 0.5; + denum = 1.0 / ((double)PITCH_WLPCWINLEN); + denum2 = denum * denum; + for (k = 0; k < PITCH_WLPCWINLEN; k++) { + dtmp = PITCH_WLPCASYM * t * denum + (1 - PITCH_WLPCASYM) * t * t * denum2; + dtmp *= 3.14159265; + dtmp2 = sin(dtmp); + wfdata->window[k] = dtmp2 * dtmp2; + t++; + } +} + +void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* State) { + int k; + + for (k = 0; k < PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 - + PITCH_FRAME_LEN / 2 + 2; + k++) + State->dec_buffer[k] = 0.0; + for (k = 0; k < 2 * ALLPASSSECTIONS + 1; k++) + State->decimator_state[k] = 0.0; + for (k = 0; k < 2; k++) + State->hp_state[k] = 0.0; + for (k = 0; k < QLOOKAHEAD; k++) + State->whitened_buf[k] = 0.0; + for (k = 0; k < QLOOKAHEAD; k++) + State->inbuf[k] = 0.0; + + WebRtcIsac_InitPitchFilter(&(State->PFstr_wght)); + + WebRtcIsac_InitPitchFilter(&(State->PFstr)); + + WebRtcIsac_InitWeightingFilter(&(State->Wghtstr)); +} + +void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata) { + int k; + + for (k = 0; k < QLOOKAHEAD; k++) { + prefiltdata->INLABUF1[k] = 0; + prefiltdata->INLABUF2[k] = 0; + + prefiltdata->INLABUF1_float[k] = 0; + prefiltdata->INLABUF2_float[k] = 0; + } + for (k = 0; k < 2 * (QORDER - 1); k++) { + prefiltdata->INSTAT1[k] = 0; + prefiltdata->INSTAT2[k] = 0; + prefiltdata->INSTATLA1[k] = 0; + prefiltdata->INSTATLA2[k] = 0; + + prefiltdata->INSTAT1_float[k] = 0; + prefiltdata->INSTAT2_float[k] = 0; + prefiltdata->INSTATLA1_float[k] = 0; + prefiltdata->INSTATLA2_float[k] = 0; + } + + /* High pass filter states */ + prefiltdata->HPstates[0] = 0.0; + prefiltdata->HPstates[1] = 0.0; + + prefiltdata->HPstates_float[0] = 0.0f; + prefiltdata->HPstates_float[1] = 0.0f; + + return; +} + +double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order) { + const double LEVINSON_EPS = 1.0e-10; + + double sum, alpha; + size_t m, m_h, i; + alpha = 0; // warning -DH + a[0] = 1.0; + if (r[0] < LEVINSON_EPS) { /* if r[0] <= 0, set LPC coeff. to zero */ + for (i = 0; i < order; i++) { + k[i] = 0; + a[i + 1] = 0; + } + } else { + a[1] = k[0] = -r[1] / r[0]; + alpha = r[0] + r[1] * k[0]; + for (m = 1; m < order; m++) { + sum = r[m + 1]; + for (i = 0; i < m; i++) { + sum += a[i + 1] * r[m - i]; + } + k[m] = -sum / alpha; + alpha += k[m] * sum; + m_h = (m + 1) >> 1; + for (i = 0; i < m_h; i++) { + sum = a[i + 1] + k[m] * a[m - i]; + a[m - i] += k[m] * a[i + 1]; + a[i + 1] = sum; + } + a[m + 1] = k[m]; + } + } + return alpha; +} + +/* The upper channel all-pass filter factors */ +const float WebRtcIsac_kUpperApFactorsFloat[2] = {0.03470000000000f, + 0.38260000000000f}; + +/* The lower channel all-pass filter factors */ +const float WebRtcIsac_kLowerApFactorsFloat[2] = {0.15440000000000f, + 0.74400000000000f}; + +/* This function performs all-pass filtering--a series of first order all-pass + * sections are used to filter the input in a cascade manner. + * The input is overwritten!! + */ +void WebRtcIsac_AllPassFilter2Float(float* InOut, + const float* APSectionFactors, + int lengthInOut, + int NumberOfSections, + float* FilterState) { + int n, j; + float temp; + for (j = 0; j < NumberOfSections; j++) { + for (n = 0; n < lengthInOut; n++) { + temp = FilterState[j] + APSectionFactors[j] * InOut[n]; + FilterState[j] = -APSectionFactors[j] * temp + InOut[n]; + InOut[n] = temp; + } + } +} + +/* The number of composite all-pass filter factors */ +#define NUMBEROFCOMPOSITEAPSECTIONS 4 + +/* Function WebRtcIsac_SplitAndFilter + * This function creates low-pass and high-pass decimated versions of part of + the input signal, and part of the signal in the input 'lookahead buffer'. + + INPUTS: + in: a length FRAMESAMPLES array of input samples + prefiltdata: input data structure containing the filterbank states + and lookahead samples from the previous encoding + iteration. + OUTPUTS: + LP: a FRAMESAMPLES_HALF array of low-pass filtered samples that + have been phase equalized. The first QLOOKAHEAD samples are + based on the samples in the two prefiltdata->INLABUFx arrays + each of length QLOOKAHEAD. + The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based + on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input + array in[]. + HP: a FRAMESAMPLES_HALF array of high-pass filtered samples that + have been phase equalized. The first QLOOKAHEAD samples are + based on the samples in the two prefiltdata->INLABUFx arrays + each of length QLOOKAHEAD. + The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based + on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input + array in[]. + + LP_la: a FRAMESAMPLES_HALF array of low-pass filtered samples. + These samples are not phase equalized. They are computed + from the samples in the in[] array. + HP_la: a FRAMESAMPLES_HALF array of high-pass filtered samples + that are not phase equalized. They are computed from + the in[] vector. + prefiltdata: this input data structure's filterbank state and + lookahead sample buffers are updated for the next + encoding iteration. +*/ +void WebRtcIsac_SplitAndFilterFloat(float* pin, + float* LP, + float* HP, + double* LP_la, + double* HP_la, + PreFiltBankstr* prefiltdata) { + int k, n; + float CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS]; + float ForTransform_CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS]; + float ForTransform_CompositeAPFilterState2[NUMBEROFCOMPOSITEAPSECTIONS]; + float tempinoutvec[FRAMESAMPLES + MAX_AR_MODEL_ORDER]; + float tempin_ch1[FRAMESAMPLES + MAX_AR_MODEL_ORDER]; + float tempin_ch2[FRAMESAMPLES + MAX_AR_MODEL_ORDER]; + float in[FRAMESAMPLES]; + float ftmp; + + /* HPstcoeff_in = {a1, a2, b1 - b0 * a1, b2 - b0 * a2}; */ + static const float kHpStCoefInFloat[4] = { + -1.94895953203325f, 0.94984516000000f, -0.05101826139794f, + 0.05015484000000f}; + + /* The composite all-pass filter factors */ + static const float WebRtcIsac_kCompositeApFactorsFloat[4] = { + 0.03470000000000f, 0.15440000000000f, 0.38260000000000f, + 0.74400000000000f}; + + // The matrix for transforming the backward composite state to upper channel + // state. + static const float WebRtcIsac_kTransform1Float[8] = { + -0.00158678506084f, 0.00127157815343f, -0.00104805672709f, + 0.00084837248079f, 0.00134467983258f, -0.00107756549387f, + 0.00088814793277f, -0.00071893072525f}; + + // The matrix for transforming the backward composite state to lower channel + // state. + static const float WebRtcIsac_kTransform2Float[8] = { + -0.00170686041697f, 0.00136780109829f, -0.00112736532350f, + 0.00091257055385f, 0.00103094281812f, -0.00082615076557f, + 0.00068092756088f, -0.00055119165484f}; + + /* High pass filter */ + + for (k = 0; k < FRAMESAMPLES; k++) { + in[k] = pin[k] + kHpStCoefInFloat[2] * prefiltdata->HPstates_float[0] + + kHpStCoefInFloat[3] * prefiltdata->HPstates_float[1]; + ftmp = pin[k] - kHpStCoefInFloat[0] * prefiltdata->HPstates_float[0] - + kHpStCoefInFloat[1] * prefiltdata->HPstates_float[1]; + prefiltdata->HPstates_float[1] = prefiltdata->HPstates_float[0]; + prefiltdata->HPstates_float[0] = ftmp; + } + + /* First Channel */ + + /*initial state of composite filter is zero */ + for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) { + CompositeAPFilterState[k] = 0.0; + } + /* put every other sample of input into a temporary vector in reverse + * (backward) order*/ + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempinoutvec[k] = in[FRAMESAMPLES - 1 - 2 * k]; + } + + /* now all-pass filter the backwards vector. Output values overwrite the + * input vector. */ + WebRtcIsac_AllPassFilter2Float( + tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF, + NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState); + + /* save the backwards filtered output for later forward filtering, + but write it in forward order*/ + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempin_ch1[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k]; + } + + /* save the backwards filter state becaue it will be transformed + later into a forward state */ + for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) { + ForTransform_CompositeAPFilterState[k] = CompositeAPFilterState[k]; + } + + /* now backwards filter the samples in the lookahead buffer. The samples were + placed there in the encoding of the previous frame. The output samples + overwrite the input samples */ + WebRtcIsac_AllPassFilter2Float( + prefiltdata->INLABUF1_float, WebRtcIsac_kCompositeApFactorsFloat, + QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState); + + /* save the output, but write it in forward order */ + /* write the lookahead samples for the next encoding iteration. Every other + sample at the end of the input frame is written in reverse order for the + lookahead length. Exported in the prefiltdata structure. */ + for (k = 0; k < QLOOKAHEAD; k++) { + tempin_ch1[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF1_float[k]; + prefiltdata->INLABUF1_float[k] = in[FRAMESAMPLES - 1 - 2 * k]; + } + + /* Second Channel. This is exactly like the first channel, except that the + even samples are now filtered instead (lower channel). */ + for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) { + CompositeAPFilterState[k] = 0.0; + } + + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempinoutvec[k] = in[FRAMESAMPLES - 2 - 2 * k]; + } + + WebRtcIsac_AllPassFilter2Float( + tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF, + NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState); + + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempin_ch2[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k]; + } + + for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) { + ForTransform_CompositeAPFilterState2[k] = CompositeAPFilterState[k]; + } + + WebRtcIsac_AllPassFilter2Float( + prefiltdata->INLABUF2_float, WebRtcIsac_kCompositeApFactorsFloat, + QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState); + + for (k = 0; k < QLOOKAHEAD; k++) { + tempin_ch2[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF2_float[k]; + prefiltdata->INLABUF2_float[k] = in[FRAMESAMPLES - 2 - 2 * k]; + } + + /* Transform filter states from backward to forward */ + /*At this point, each of the states of the backwards composite filters for the + two channels are transformed into forward filtering states for the + corresponding forward channel filters. Each channel's forward filtering + state from the previous + encoding iteration is added to the transformed state to get a proper forward + state */ + + /* So the existing NUMBEROFCOMPOSITEAPSECTIONS x 1 (4x1) state vector is + multiplied by a NUMBEROFCHANNELAPSECTIONSxNUMBEROFCOMPOSITEAPSECTIONS (2x4) + transform matrix to get the new state that is added to the previous 2x1 + input state */ + + for (k = 0; k < NUMBEROFCHANNELAPSECTIONS; k++) { /* k is row variable */ + for (n = 0; n < NUMBEROFCOMPOSITEAPSECTIONS; + n++) { /* n is column variable */ + prefiltdata->INSTAT1_float[k] += + ForTransform_CompositeAPFilterState[n] * + WebRtcIsac_kTransform1Float[k * NUMBEROFCHANNELAPSECTIONS + n]; + prefiltdata->INSTAT2_float[k] += + ForTransform_CompositeAPFilterState2[n] * + WebRtcIsac_kTransform2Float[k * NUMBEROFCHANNELAPSECTIONS + n]; + } + } + + /*obtain polyphase components by forward all-pass filtering through each + * channel */ + /* the backward filtered samples are now forward filtered with the + * corresponding channel filters */ + /* The all pass filtering automatically updates the filter states which are + exported in the prefiltdata structure */ + WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat, + FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS, + prefiltdata->INSTAT1_float); + WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat, + FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS, + prefiltdata->INSTAT2_float); + + /* Now Construct low-pass and high-pass signals as combinations of polyphase + * components */ + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + LP[k] = 0.5f * (tempin_ch1[k] + tempin_ch2[k]); /* low pass signal*/ + HP[k] = 0.5f * (tempin_ch1[k] - tempin_ch2[k]); /* high pass signal*/ + } + + /* Lookahead LP and HP signals */ + /* now create low pass and high pass signals of the input vector. However, no + backwards filtering is performed, and hence no phase equalization is + involved. Also, the input contains some samples that are lookahead samples. + The high pass and low pass signals that are created are used outside this + function for analysis (not encoding) purposes */ + + /* set up input */ + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempin_ch1[k] = in[2 * k + 1]; + tempin_ch2[k] = in[2 * k]; + } + + /* the input filter states are passed in and updated by the all-pass filtering + routine and exported in the prefiltdata structure*/ + WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat, + FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS, + prefiltdata->INSTATLA1_float); + WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat, + FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS, + prefiltdata->INSTATLA2_float); + + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + LP_la[k] = (float)(0.5f * (tempin_ch1[k] + tempin_ch2[k])); /*low pass */ + HP_la[k] = (double)(0.5f * (tempin_ch1[k] - tempin_ch2[k])); /* high pass */ + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.h new file mode 100644 index 0000000000..1aecfc4046 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_ + +#include + +#include "modules/audio_coding/codecs/isac/main/source/structs.h" + +void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata); +void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* state); +void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata); + +double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order); + +/* The number of all-pass filter factors in an upper or lower channel*/ +#define NUMBEROFCHANNELAPSECTIONS 2 + +/* The upper channel all-pass filter factors */ +extern const float WebRtcIsac_kUpperApFactorsFloat[2]; + +/* The lower channel all-pass filter factors */ +extern const float WebRtcIsac_kLowerApFactorsFloat[2]; + +void WebRtcIsac_AllPassFilter2Float(float* InOut, + const float* APSectionFactors, + int lengthInOut, + int NumberOfSections, + float* FilterState); +void WebRtcIsac_SplitAndFilterFloat(float* in, + float* LP, + float* HP, + double* LP_la, + double* HP_la, + PreFiltBankstr* prefiltdata); + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h new file mode 100644 index 0000000000..fe9afa4ba2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_ + +#include + +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_POSIX) +#define WebRtcIsac_lrint lrint +#elif (defined(WEBRTC_ARCH_X86) && defined(WIN32)) +static __inline long int WebRtcIsac_lrint(double x_dbl) { + long int x_int; + + __asm { + fld x_dbl + fistp x_int + } + ; + + return x_int; +} +#else // Do a slow but correct implementation of lrint + +static __inline long int WebRtcIsac_lrint(double x_dbl) { + long int x_int; + x_int = (long int)floor(x_dbl + 0.499999999999); + return x_int; +} + +#endif + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c new file mode 100644 index 0000000000..8a19ac1710 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c @@ -0,0 +1,695 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" + +#include +#include +#include +#ifdef WEBRTC_ANDROID +#include +#endif + +#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h" +#include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h" +#include "rtc_base/system/ignore_warnings.h" + +static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160, + 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640}; + +/* interpolation filter */ +__inline static void IntrepolFilter(double *data_ptr, double *intrp) +{ + *intrp = kInterpolWin[0] * data_ptr[-3]; + *intrp += kInterpolWin[1] * data_ptr[-2]; + *intrp += kInterpolWin[2] * data_ptr[-1]; + *intrp += kInterpolWin[3] * data_ptr[0]; + *intrp += kInterpolWin[4] * data_ptr[1]; + *intrp += kInterpolWin[5] * data_ptr[2]; + *intrp += kInterpolWin[6] * data_ptr[3]; + *intrp += kInterpolWin[7] * data_ptr[4]; +} + + +/* 2D parabolic interpolation */ +/* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */ +__inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val) +{ + double c, b[2], A[2][2]; + double t1, t2, d; + double delta1, delta2; + + + // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}}; + // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0 + + c = T[1][1]; + b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]); + b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]); + A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]); + t1 = 0.5 * (T[0][0] + T[2][2]) - c; + t2 = 0.5 * (T[2][0] + T[0][2]) - c; + d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2; + A[0][0] = -t1 - 0.5 * d; + A[1][1] = -t2 - 0.5 * d; + + /* deal with singularities or ill-conditioned cases */ + if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) { + *peak_val = T[1][1]; + return; + } + + /* Cholesky decomposition: replace A by upper-triangular factor */ + A[0][0] = sqrt(A[0][0]); + A[0][1] = A[0][1] / A[0][0]; + A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]); + + /* compute [x; y] = -0.5 * inv(A) * b */ + t1 = b[0] / A[0][0]; + t2 = (b[1] - t1 * A[0][1]) / A[1][1]; + delta2 = t2 / A[1][1]; + delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0]; + delta2 *= 0.5; + + /* limit norm */ + t1 = delta1 * delta1 + delta2 * delta2; + if (t1 > 1.0) { + delta1 /= t1; + delta2 /= t1; + } + + *peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c; + + *x += delta1; + *y += delta2; +} + + +static void PCorr(const double *in, double *outcorr) +{ + double sum, ysum, prod; + const double *x, *inptr; + int k, n; + + //ysum = 1e-6; /* use this with float (i.s.o. double)! */ + ysum = 1e-13; + sum = 0.0; + x = in + PITCH_MAX_LAG/2 + 2; + for (n = 0; n < PITCH_CORR_LEN2; n++) { + ysum += in[n] * in[n]; + sum += x[n] * in[n]; + } + + outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */ + *outcorr = sum / sqrt(ysum); + + for (k = 1; k < PITCH_LAG_SPAN2; k++) { + ysum -= in[k-1] * in[k-1]; + ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1]; + sum = 0.0; + inptr = &in[k]; + prod = x[0] * inptr[0]; + for (n = 1; n < PITCH_CORR_LEN2; n++) { + sum += prod; + prod = x[n] * inptr[n]; + } + sum += prod; + outcorr--; + *outcorr = sum / sqrt(ysum); + } +} + +static void WebRtcIsac_AllpassFilterForDec(double* InOut, + const double* APSectionFactors, + size_t lengthInOut, + double* FilterState) { + // This performs all-pass filtering--a series of first order all-pass + // sections are used to filter the input in a cascade manner. + size_t n, j; + double temp; + for (j = 0; j < ALLPASSSECTIONS; j++) { + for (n = 0; n < lengthInOut; n += 2) { + temp = InOut[n]; // store input + InOut[n] = FilterState[j] + APSectionFactors[j] * temp; + FilterState[j] = -APSectionFactors[j] * InOut[n] + temp; + } + } +} + +static void WebRtcIsac_DecimateAllpass( + const double* in, + double* state_in, // array of size: 2*ALLPASSSECTIONS+1 + size_t N, // number of input samples + double* out) { // array of size N/2 + + static const double APupper[ALLPASSSECTIONS] = {0.0347, 0.3826}; + static const double APlower[ALLPASSSECTIONS] = {0.1544, 0.744}; + + size_t n; + double data_vec[PITCH_FRAME_LEN]; + + /* copy input */ + memcpy(data_vec + 1, in, sizeof(double) * (N - 1)); + + data_vec[0] = state_in[2 * ALLPASSSECTIONS]; // the z^(-1) state + state_in[2 * ALLPASSSECTIONS] = in[N - 1]; + + WebRtcIsac_AllpassFilterForDec(data_vec + 1, APupper, N, state_in); + WebRtcIsac_AllpassFilterForDec(data_vec, APlower, N, + state_in + ALLPASSSECTIONS); + + for (n = 0; n < N / 2; n++) + out[n] = data_vec[2 * n] + data_vec[2 * n + 1]; +} + +RTC_PUSH_IGNORING_WFRAME_LARGER_THAN() + +static void WebRtcIsac_InitializePitch(const double* in, + const double old_lag, + const double old_gain, + PitchAnalysisStruct* State, + double* lags) { + double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2]; + double ratio, log_lag, gain_bias; + double bias; + double corrvec1[PITCH_LAG_SPAN2]; + double corrvec2[PITCH_LAG_SPAN2]; + int m, k; + // Allocating 10 extra entries at the begining of the CorrSurf + double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)]; + double* CorrSurf[2*PITCH_BW+3]; + double *CorrSurfPtr1, *CorrSurfPtr2; + double LagWin[3] = {0.2, 0.5, 0.98}; + int ind1, ind2, peaks_ind, peak, max_ind; + int peaks[PITCH_MAX_NUM_PEAKS]; + double adj, gain_tmp; + double corr, corr_max; + double intrp_a, intrp_b, intrp_c, intrp_d; + double peak_vals[PITCH_MAX_NUM_PEAKS]; + double lags1[PITCH_MAX_NUM_PEAKS]; + double lags2[PITCH_MAX_NUM_PEAKS]; + double T[3][3]; + int row; + + for(k = 0; k < 2*PITCH_BW+3; k++) + { + CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)]; + } + /* reset CorrSurf matrix */ + memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4))); + + //warnings -DH + max_ind = 0; + peak = 0; + + /* copy old values from state buffer */ + memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); + + /* decimation; put result after the old values */ + WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN, + &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]); + + /* low-pass filtering */ + for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++) + buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2]; + + /* copy end part back into state buffer */ + memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); + + /* compute correlation for first and second half of the frame */ + PCorr(buf_dec, corrvec1); + PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2); + + /* bias towards pitch lag of previous frame */ + log_lag = log(0.5 * old_lag); + gain_bias = 4.0 * old_gain * old_gain; + if (gain_bias > 0.8) gain_bias = 0.8; + for (k = 0; k < PITCH_LAG_SPAN2; k++) + { + ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag; + bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio); + corrvec1[k] *= bias; + } + + /* taper correlation functions */ + for (k = 0; k < 3; k++) { + gain_tmp = LagWin[k]; + corrvec1[k] *= gain_tmp; + corrvec2[k] *= gain_tmp; + corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp; + corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp; + } + + corr_max = 0.0; + /* fill middle row of correlation surface */ + ind1 = 0; + ind2 = 0; + CorrSurfPtr1 = &CorrSurf[PITCH_BW][2]; + for (k = 0; k < PITCH_LAG_SPAN2; k++) { + corr = corrvec1[ind1++] + corrvec2[ind2++]; + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + } + /* fill first and last rows of correlation surface */ + ind1 = 0; + ind2 = PITCH_BW; + CorrSurfPtr1 = &CorrSurf[0][2]; + CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2]; + for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) { + ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); + adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + corr = adj * (corrvec1[ind1] + corrvec2[ind2]); + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); + CorrSurfPtr2[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); + } + } + /* fill second and next to last rows of correlation surface */ + ind1 = 0; + ind2 = PITCH_BW-1; + CorrSurfPtr1 = &CorrSurf[1][2]; + CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1]; + for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) { + ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); + adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + corr = adj * (corrvec1[ind1] + corrvec2[ind2]); + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); + CorrSurfPtr2[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); + } + } + /* fill remainder of correlation surface */ + for (m = 2; m < PITCH_BW; m++) { + ind1 = 0; + ind2 = PITCH_BW - m; /* always larger than ind1 */ + CorrSurfPtr1 = &CorrSurf[m][2]; + CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m]; + for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) { + ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); + adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + corr = adj * (corrvec1[ind1] + corrvec2[ind2]); + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); + CorrSurfPtr2[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); + } + } + } + + /* threshold value to qualify as a peak */ + corr_max *= 0.6; + + peaks_ind = 0; + /* find peaks */ + for (m = 1; m < PITCH_BW+1; m++) { + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + CorrSurfPtr1 = &CorrSurf[m][2]; + for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) { + corr = CorrSurfPtr1[k]; + if (corr > corr_max) { + if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { + if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { + /* found a peak; store index into matrix */ + peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + } + } + } + } + } + for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) { + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + CorrSurfPtr1 = &CorrSurf[m][2]; + for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) { + corr = CorrSurfPtr1[k]; + if (corr > corr_max) { + if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { + if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { + /* found a peak; store index into matrix */ + peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + } + } + } + } + } + + if (peaks_ind > 0) { + /* examine each peak */ + CorrSurfPtr1 = &CorrSurf[0][0]; + for (k = 0; k < peaks_ind; k++) { + peak = peaks[k]; + + /* compute four interpolated values around current peak */ + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a); + IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b); + IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c); + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d); + + /* determine maximum of the interpolated values */ + corr = CorrSurfPtr1[peak]; + corr_max = intrp_a; + if (intrp_b > corr_max) corr_max = intrp_b; + if (intrp_c > corr_max) corr_max = intrp_c; + if (intrp_d > corr_max) corr_max = intrp_d; + + /* determine where the peak sits and fill a 3x3 matrix around it */ + row = peak / (PITCH_LAG_SPAN2+4); + lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); + lags2[k] = (double) (lags1[k] + PITCH_BW - row); + if ( corr > corr_max ) { + T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[1][1] = corr; + T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + T[1][0] = intrp_a; + T[0][1] = intrp_b; + T[2][1] = intrp_c; + T[1][2] = intrp_d; + } else { + if (intrp_a == corr_max) { + lags1[k] -= 0.5; + lags2[k] += 0.5; + IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]); + IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]); + T[1][1] = intrp_a; + T[0][2] = intrp_b; + T[2][2] = intrp_c; + T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)]; + T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[1][2] = corr; + } else if (intrp_b == corr_max) { + lags1[k] -= 0.5; + lags2[k] -= 0.5; + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]); + T[2][0] = intrp_a; + T[1][1] = intrp_b; + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]); + T[2][2] = intrp_d; + T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[0][1] = CorrSurfPtr1[peak - 1]; + T[2][1] = corr; + T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + } else if (intrp_c == corr_max) { + lags1[k] += 0.5; + lags2[k] += 0.5; + T[0][0] = intrp_a; + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]); + T[1][1] = intrp_c; + T[0][2] = intrp_d; + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]); + T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[0][1] = corr; + T[2][1] = CorrSurfPtr1[peak + 1]; + T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + } else { + lags1[k] += 0.5; + lags2[k] -= 0.5; + T[0][0] = intrp_b; + T[2][0] = intrp_c; + T[1][1] = intrp_d; + IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]); + IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]); + T[1][0] = corr; + T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)]; + } + } + + /* 2D parabolic interpolation gives more accurate lags and peak value */ + Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]); + } + + /* determine the highest peak, after applying a bias towards short lags */ + corr_max = 0.0; + for (k = 0; k < peaks_ind; k++) { + corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k])); + if (corr > corr_max) { + corr_max = corr; + peak = k; + } + } + + lags1[peak] *= 2.0; + lags2[peak] *= 2.0; + + if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG; + if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG; + if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG; + if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG; + + /* store lags of highest peak in output array */ + lags[0] = lags1[peak]; + lags[1] = lags1[peak]; + lags[2] = lags2[peak]; + lags[3] = lags2[peak]; + } + else + { + row = max_ind / (PITCH_LAG_SPAN2+4); + lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); + lags2[0] = (double) (lags1[0] + PITCH_BW - row); + + if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG; + if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG; + if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG; + if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG; + + /* store lags of highest peak in output array */ + lags[0] = lags1[0]; + lags[1] = lags1[0]; + lags[2] = lags2[0]; + lags[3] = lags2[0]; + } +} + +RTC_POP_IGNORING_WFRAME_LARGER_THAN() + +/* create weighting matrix by orthogonalizing a basis of polynomials of increasing order + * t = (0:4)'; + * A = [t.^0, t.^1, t.^2, t.^3, t.^4]; + * [Q, dummy] = qr(A); + * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */ +static const double kWeight[5][5] = { + { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286}, + {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857}, + {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286}, + { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143}, + { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286} +}; + +/* second order high-pass filter */ +static void WebRtcIsac_Highpass(const double* in, + double* out, + double* state, + size_t N) { + /* create high-pass filter ocefficients + * z = 0.998 * exp(j*2*pi*35/8000); + * p = 0.94 * exp(j*2*pi*140/8000); + * HP_b = [1, -2*real(z), abs(z)^2]; + * HP_a = [1, -2*real(p), abs(p)^2]; */ + static const double a_coef[2] = { 1.86864659625574, -0.88360000000000}; + static const double b_coef[2] = {-1.99524591718270, 0.99600400000000}; + + size_t k; + + for (k=0; khp_state, PITCH_FRAME_LEN); + + /* copy from state into buffer */ + memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD); + + /* compute weighted and whitened signals */ + WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr)); + + /* copy from buffer into state */ + memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD); + + old_lag = State->PFstr_wght.oldlagp[0]; + old_gain = State->PFstr_wght.oldgainp[0]; + + /* inital pitch estimate */ + WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags); + + + /* Iterative optimization of lags - to be done */ + + /* compute energy of whitened signal */ + nrg_wht = 0.0; + for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++) + nrg_wht += Whitened[k] * Whitened[k]; + + + /* Iterative optimization of gains */ + + /* set weights for energy, gain fluctiation, and spectral gain penalty functions */ + Wnrg = 1.0 / nrg_wht; + Wgain = 0.005; + Wfluct = 3.0; + + /* set initial gains */ + for (k = 0; k < 4; k++) + gains[k] = PITCH_MAX_GAIN_06; + + /* two iterations should be enough */ + for (iter = 0; iter < 2; iter++) { + /* compute Jacobian of pre-filter output towards gains */ + WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains); + + /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */ + for (k = 0; k < 4; k++) { + tmp = 0.0; + for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) + tmp += out_G[n] * out_dG[k][n]; + grad[k] = tmp * Wnrg; + } + for (k = 0; k < 4; k++) { + for (m = 0; m <= k; m++) { + tmp = 0.0; + for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) + tmp += out_dG[m][n] * out_dG[k][n]; + H[k][m] = tmp * Wnrg; + } + } + + /* add gradient and Hessian (lower triangle) for dampening fast gain changes */ + for (k = 0; k < 4; k++) { + tmp = kWeight[k+1][0] * old_gain; + for (m = 0; m < 4; m++) + tmp += kWeight[k+1][m+1] * gains[m]; + grad[k] += tmp * Wfluct; + } + for (k = 0; k < 4; k++) { + for (m = 0; m <= k; m++) { + H[k][m] += kWeight[k+1][m+1] * Wfluct; + } + } + + /* add gradient and Hessian for dampening gain */ + for (k = 0; k < 3; k++) { + tmp = 1.0 / (1 - gains[k]); + grad[k] += tmp * tmp * Wgain; + H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain); + } + tmp = 1.0 / (1 - gains[3]); + grad[3] += 1.33 * (tmp * tmp * Wgain); + H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain); + + + /* compute Cholesky factorization of Hessian + * by overwritting the upper triangle; scale factors on diagonal + * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */ + H[0][1] = H[1][0] / H[0][0]; + H[0][2] = H[2][0] / H[0][0]; + H[0][3] = H[3][0] / H[0][0]; + H[1][1] -= H[0][0] * H[0][1] * H[0][1]; + H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1]; + H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1]; + H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2]; + H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2]; + H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3]; + + /* Compute update as delta_gains = -inv(H) * grad */ + /* copy and negate */ + for (k = 0; k < 4; k++) + dG[k] = -grad[k]; + /* back substitution */ + dG[1] -= dG[0] * H[0][1]; + dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2]; + dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3]; + /* scale */ + for (k = 0; k < 4; k++) + dG[k] /= H[k][k]; + /* back substitution */ + dG[2] -= dG[3] * H[2][3]; + dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2]; + dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1]; + + /* update gains and check range */ + for (k = 0; k < 4; k++) { + gains[k] += dG[k]; + if (gains[k] > PITCH_MAX_GAIN) + gains[k] = PITCH_MAX_GAIN; + else if (gains[k] < 0.0) + gains[k] = 0.0; + } + } + + /* update state for next frame */ + WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains); + + /* concatenate previous input's end and current input */ + memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD); + memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN); + + /* lookahead pitch filtering for masking analysis */ + WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains); + + /* store last part of input */ + for (k = 0; k < QLOOKAHEAD; k++) + State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN]; +} + +RTC_POP_IGNORING_WFRAME_LARGER_THAN() diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h new file mode 100644 index 0000000000..4ab78c20ad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * pitch_estimator.h + * + * Pitch functions + * + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ + +#include + +#include "modules/audio_coding/codecs/isac/main/source/structs.h" + +void WebRtcIsac_PitchAnalysis( + const double* in, /* PITCH_FRAME_LEN samples */ + double* out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ + PitchAnalysisStruct* State, + double* lags, + double* gains); + +#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.c b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.c new file mode 100644 index 0000000000..bf03dfff2e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.c @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" +#include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h" +#include "rtc_base/compile_assert_c.h" + +/* + * We are implementing the following filters; + * + * Pre-filtering: + * y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag); + * + * Post-filtering: + * y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag); + * + * Note that `lag` is a floating number so we perform an interpolation to + * obtain the correct `lag`. + * + */ + +static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25, + -0.07}; + +/* interpolation coefficients; generated by design_pitch_filter.m */ +static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = { + {-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125, + 0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109, + 0.01754159521746}, + {-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643, + 0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084, + 0.01654127246314}, + {-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787, + 0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311, + 0.01295781500709}, + {-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393, + 0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166, + 0.00719783432422}, + {-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001, + 0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000, + -0.00000000000000}, + {0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562, + 0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377, + -0.00764851320885}, + {0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650, + 0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059, + -0.01463300534216}, + {0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190, + 0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865, + -0.01985640750433} +}; + +/* + * Enumerating the operation of the filter. + * iSAC has 4 different pitch-filter which are very similar in their structure. + * + * kPitchFilterPre : In this mode the filter is operating as pitch + * pre-filter. This is used at the encoder. + * kPitchFilterPost : In this mode the filter is operating as pitch + * post-filter. This is the inverse of pre-filter and used + * in the decoder. + * kPitchFilterPreLa : This is, in structure, similar to pre-filtering but + * utilizing 3 millisecond lookahead. It is used to + * obtain the signal for LPC analysis. + * kPitchFilterPreGain : This is, in structure, similar to pre-filtering but + * differential changes in gain is considered. This is + * used to find the optimal gain. + */ +typedef enum { + kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain +} PitchFilterOperation; + +/* + * Structure with parameters used for pitch-filtering. + * buffer : a buffer where the sum of previous inputs and outputs + * are stored. + * damper_state : the state of the damping filter. The filter is defined by + * `kDampFilter`. + * interpol_coeff : pointer to a set of coefficient which are used to utilize + * fractional pitch by interpolation. + * gain : pitch-gain to be applied to the current segment of input. + * lag : pitch-lag for the current segment of input. + * lag_offset : the offset of lag w.r.t. current sample. + * sub_frame : sub-frame index, there are 4 pitch sub-frames in an iSAC + * frame. + * This specifies the usage of the filter. See + * 'PitchFilterOperation' for operational modes. + * num_samples : number of samples to be processed in each segment. + * index : index of the input and output sample. + * damper_state_dg : state of damping filter for different trial gains. + * gain_mult : differential changes to gain. + */ +typedef struct { + double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD]; + double damper_state[PITCH_DAMPORDER]; + const double *interpol_coeff; + double gain; + double lag; + int lag_offset; + + int sub_frame; + PitchFilterOperation mode; + int num_samples; + int index; + + double damper_state_dg[4][PITCH_DAMPORDER]; + double gain_mult[4]; +} PitchFilterParam; + +/********************************************************************** + * FilterSegment() + * Filter one segment, a quarter of a frame. + * + * Inputs + * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate. + * filter_param : pitch filter parameters. + * + * Outputs + * out_data : pointer to a buffer where the filtered signal is written to. + * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer + * where the output of different gain values (differential + * change to gain) is written. + */ +static void FilterSegment(const double* in_data, PitchFilterParam* parameters, + double* out_data, + double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { + int n; + int m; + int j; + double sum; + double sum2; + /* Index of `parameters->buffer` where the output is written to. */ + int pos = parameters->index + PITCH_BUFFSIZE; + /* Index of `parameters->buffer` where samples are read for fractional-lag + * computation. */ + int pos_lag = pos - parameters->lag_offset; + + for (n = 0; n < parameters->num_samples; ++n) { + /* Shift low pass filter states. */ + for (m = PITCH_DAMPORDER - 1; m > 0; --m) { + parameters->damper_state[m] = parameters->damper_state[m - 1]; + } + /* Filter to get fractional pitch. */ + sum = 0.0; + for (m = 0; m < PITCH_FRACORDER; ++m) { + sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m]; + } + /* Multiply with gain. */ + parameters->damper_state[0] = parameters->gain * sum; + + if (parameters->mode == kPitchFilterPreGain) { + int lag_index = parameters->index - parameters->lag_offset; + int m_tmp = (lag_index < 0) ? -lag_index : 0; + /* Update the damper state for the new sample. */ + for (m = PITCH_DAMPORDER - 1; m > 0; --m) { + for (j = 0; j < 4; ++j) { + parameters->damper_state_dg[j][m] = + parameters->damper_state_dg[j][m - 1]; + } + } + + for (j = 0; j < parameters->sub_frame + 1; ++j) { + /* Filter for fractional pitch. */ + sum2 = 0.0; + for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) { + /* `lag_index + m` is always larger than or equal to zero, see how + * m_tmp is computed. This is equivalent to assume samples outside + * `out_dg[j]` are zero. */ + sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m]; + } + /* Add the contribution of differential gain change. */ + parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum + + parameters->gain * sum2; + } + + /* Filter with damping filter, and store the results. */ + for (j = 0; j < parameters->sub_frame + 1; ++j) { + sum = 0.0; + for (m = 0; m < PITCH_DAMPORDER; ++m) { + sum -= parameters->damper_state_dg[j][m] * kDampFilter[m]; + } + out_dg[j][parameters->index] = sum; + } + } + /* Filter with damping filter. */ + sum = 0.0; + for (m = 0; m < PITCH_DAMPORDER; ++m) { + sum += parameters->damper_state[m] * kDampFilter[m]; + } + + /* Subtract from input and update buffer. */ + out_data[parameters->index] = in_data[parameters->index] - sum; + parameters->buffer[pos] = in_data[parameters->index] + + out_data[parameters->index]; + + ++parameters->index; + ++pos; + ++pos_lag; + } + return; +} + +/* Update filter parameters based on the pitch-gains and pitch-lags. */ +static void Update(PitchFilterParam* parameters) { + double fraction; + int fraction_index; + /* Compute integer lag-offset. */ + parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY + + 0.5); + /* Find correct set of coefficients for computing fractional pitch. */ + fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY); + fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5); + parameters->interpol_coeff = kIntrpCoef[fraction_index]; + + if (parameters->mode == kPitchFilterPreGain) { + /* If in this mode make a differential change to pitch gain. */ + parameters->gain_mult[parameters->sub_frame] += 0.2; + if (parameters->gain_mult[parameters->sub_frame] > 1.0) { + parameters->gain_mult[parameters->sub_frame] = 1.0; + } + if (parameters->sub_frame > 0) { + parameters->gain_mult[parameters->sub_frame - 1] -= 0.2; + } + } +} + +/****************************************************************************** + * FilterFrame() + * Filter a frame of 30 millisecond, given pitch-lags and pitch-gains. + * + * Inputs + * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate. + * lags : pointer to pitch-lags, 4 lags per frame. + * gains : pointer to pitch-gians, 4 gains per frame. + * mode : defining the functionality of the filter. It takes the + * following values. + * kPitchFilterPre: Pitch pre-filter, used at encoder. + * kPitchFilterPost: Pitch post-filter, used at decoder. + * kPitchFilterPreLa: Pitch pre-filter with lookahead. + * kPitchFilterPreGain: Pitch pre-filter used to otain optimal + * pitch-gains. + * + * Outputs + * out_data : pointer to a buffer where the filtered signal is written to. + * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer + * where the output of different gain values (differential + * change to gain) is written. + */ +static void FilterFrame(const double* in_data, PitchFiltstr* filter_state, + double* lags, double* gains, PitchFilterOperation mode, + double* out_data, + double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { + PitchFilterParam filter_parameters; + double gain_delta, lag_delta; + double old_lag, old_gain; + int n; + int m; + const double kEnhancer = 1.3; + + /* Set up buffer and states. */ + filter_parameters.index = 0; + filter_parameters.lag_offset = 0; + filter_parameters.mode = mode; + /* Copy states to local variables. */ + memcpy(filter_parameters.buffer, filter_state->ubuf, + sizeof(filter_state->ubuf)); + RTC_COMPILE_ASSERT(sizeof(filter_parameters.buffer) >= + sizeof(filter_state->ubuf)); + memset(filter_parameters.buffer + + sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]), + 0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf)); + memcpy(filter_parameters.damper_state, filter_state->ystate, + sizeof(filter_state->ystate)); + + if (mode == kPitchFilterPreGain) { + /* Clear buffers. */ + memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult)); + memset(filter_parameters.damper_state_dg, 0, + sizeof(filter_parameters.damper_state_dg)); + for (n = 0; n < PITCH_SUBFRAMES; ++n) { + //memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD)); + memset(out_dg[n], 0, sizeof(out_dg[n])); + } + } else if (mode == kPitchFilterPost) { + /* Make output more periodic. Negative sign is to change the structure + * of the filter. */ + for (n = 0; n < PITCH_SUBFRAMES; ++n) { + gains[n] *= -kEnhancer; + } + } + + old_lag = *filter_state->oldlagp; + old_gain = *filter_state->oldgainp; + + /* No interpolation if pitch lag step is big. */ + if ((lags[0] > (PITCH_UPSTEP * old_lag)) || + (lags[0] < (PITCH_DOWNSTEP * old_lag))) { + old_lag = lags[0]; + old_gain = gains[0]; + + if (mode == kPitchFilterPreGain) { + filter_parameters.gain_mult[0] = 1.0; + } + } + + filter_parameters.num_samples = PITCH_UPDATE; + for (m = 0; m < PITCH_SUBFRAMES; ++m) { + /* Set the sub-frame value. */ + filter_parameters.sub_frame = m; + /* Calculate interpolation steps for pitch-lag and pitch-gain. */ + lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME; + filter_parameters.lag = old_lag; + gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME; + filter_parameters.gain = old_gain; + /* Store for the next sub-frame. */ + old_lag = lags[m]; + old_gain = gains[m]; + + for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) { + /* Step-wise interpolation of pitch gains and lags. As pitch-lag changes, + * some parameters of filter need to be update. */ + filter_parameters.gain += gain_delta; + filter_parameters.lag += lag_delta; + /* Update parameters according to new lag value. */ + Update(&filter_parameters); + /* Filter a segment of input. */ + FilterSegment(in_data, &filter_parameters, out_data, out_dg); + } + } + + if (mode != kPitchFilterPreGain) { + /* Export buffer and states. */ + memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN], + sizeof(filter_state->ubuf)); + memcpy(filter_state->ystate, filter_parameters.damper_state, + sizeof(filter_state->ystate)); + + /* Store for the next frame. */ + *filter_state->oldlagp = old_lag; + *filter_state->oldgainp = old_gain; + } + + if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) { + /* Filter the lookahead segment, this is treated as the last sub-frame. So + * set `pf_param` to last sub-frame. */ + filter_parameters.sub_frame = PITCH_SUBFRAMES - 1; + filter_parameters.num_samples = QLOOKAHEAD; + FilterSegment(in_data, &filter_parameters, out_data, out_dg); + } +} + +void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data, + PitchFiltstr* pf_state, double* lags, + double* gains) { + FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL); +} + +void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data, + PitchFiltstr* pf_state, double* lags, + double* gains) { + FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data, + NULL); +} + +void WebRtcIsac_PitchfilterPre_gains( + double* in_data, double* out_data, + double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state, + double* lags, double* gains) { + FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data, + out_dg); +} + +void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data, + PitchFiltstr* pf_state, double* lags, + double* gains) { + FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.h new file mode 100644 index 0000000000..9a232de87b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_ + +#include "modules/audio_coding/codecs/isac/main/source/structs.h" + +void WebRtcIsac_PitchfilterPre(double* indat, + double* outdat, + PitchFiltstr* pfp, + double* lags, + double* gains); + +void WebRtcIsac_PitchfilterPost(double* indat, + double* outdat, + PitchFiltstr* pfp, + double* lags, + double* gains); + +void WebRtcIsac_PitchfilterPre_la(double* indat, + double* outdat, + PitchFiltstr* pfp, + double* lags, + double* gains); + +void WebRtcIsac_PitchfilterPre_gains( + double* indat, + double* outdat, + double out_dG[][PITCH_FRAME_LEN + QLOOKAHEAD], + PitchFiltstr* pfp, + double* lags, + double* gains); + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/settings.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/settings.h new file mode 100644 index 0000000000..abce90c4f5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/settings.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * settings.h + * + * Declaration of #defines used in the iSAC codec + * + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ + +/* sampling frequency (Hz) */ +#define FS 16000 + +/* number of samples per frame (either 320 (20ms), 480 (30ms) or 960 (60ms)) */ +#define INITIAL_FRAMESAMPLES 960 + +/* do not modify the following; this will have to be modified if we + * have a 20ms framesize option */ +/**********************************************************************/ +/* miliseconds */ +#define FRAMESIZE 30 +/* number of samples per frame processed in the encoder, 480 */ +#define FRAMESAMPLES 480 /* ((FRAMESIZE*FS)/1000) */ +#define FRAMESAMPLES_HALF 240 +#define FRAMESAMPLES_QUARTER 120 +/**********************************************************************/ + +/* max number of samples per frame (= 60 ms frame) */ +#define MAX_FRAMESAMPLES 960 +#define MAX_SWBFRAMESAMPLES (MAX_FRAMESAMPLES * 2) +/* number of samples per 10ms frame */ +#define FRAMESAMPLES_10ms ((10 * FS) / 1000) +#define SWBFRAMESAMPLES_10ms (FRAMESAMPLES_10ms * 2) +/* number of samples in 30 ms frame */ +#define FRAMESAMPLES_30ms 480 +/* number of subframes */ +#define SUBFRAMES 6 +/* length of a subframe */ +#define UPDATE 80 +/* length of half a subframe (low/high band) */ +#define HALF_SUBFRAMELEN (UPDATE / 2) +/* samples of look ahead (in a half-band, so actually + * half the samples of look ahead @ FS) */ +#define QLOOKAHEAD 24 /* 3 ms */ +/* order of AR model in spectral entropy coder */ +#define AR_ORDER 6 +/* order of LP model in spectral entropy coder */ +#define LP_ORDER 0 + +/* window length (masking analysis) */ +#define WINLEN 256 +/* order of low-band pole filter used to approximate masking curve */ +#define ORDERLO 12 +/* order of hi-band pole filter used to approximate masking curve */ +#define ORDERHI 6 + +#define UB_LPC_ORDER 4 +#define UB_LPC_VEC_PER_FRAME 2 +#define UB16_LPC_VEC_PER_FRAME 4 +#define UB_ACTIVE_SUBFRAMES 2 +#define UB_MAX_LPC_ORDER 6 +#define UB_INTERPOL_SEGMENTS 1 +#define UB16_INTERPOL_SEGMENTS 3 +#define LB_TOTAL_DELAY_SAMPLES 48 +enum ISACBandwidth { isac8kHz = 8, isac12kHz = 12, isac16kHz = 16 }; +enum ISACBand { + kIsacLowerBand = 0, + kIsacUpperBand12 = 1, + kIsacUpperBand16 = 2 +}; +enum IsacSamplingRate { kIsacWideband = 16, kIsacSuperWideband = 32 }; +#define UB_LPC_GAIN_DIM SUBFRAMES +#define FB_STATE_SIZE_WORD32 6 + +/* order for post_filter_bank */ +#define POSTQORDER 3 +/* order for pre-filterbank */ +#define QORDER 3 +/* another order */ +#define QORDER_ALL (POSTQORDER + QORDER - 1) +/* for decimator */ +#define ALLPASSSECTIONS 2 + +/* array size for byte stream in number of bytes. */ +/* The old maximum size still needed for the decoding */ +#define STREAM_SIZE_MAX 600 +#define STREAM_SIZE_MAX_30 200 /* 200 bytes=53.4 kbps @ 30 ms.framelength */ +#define STREAM_SIZE_MAX_60 400 /* 400 bytes=53.4 kbps @ 60 ms.framelength */ + +/* storage size for bit counts */ +#define BIT_COUNTER_SIZE 30 +/* maximum order of any AR model or filter */ +#define MAX_AR_MODEL_ORDER 12 // 50 + +/* For pitch analysis */ +#define PITCH_FRAME_LEN (FRAMESAMPLES_HALF) /* 30 ms */ +#define PITCH_MAX_LAG 140 /* 57 Hz */ +#define PITCH_MIN_LAG 20 /* 400 Hz */ +#define PITCH_MAX_GAIN 0.45 +#define PITCH_MAX_GAIN_06 0.27 /* PITCH_MAX_GAIN*0.6 */ +#define PITCH_MAX_GAIN_Q12 1843 +#define PITCH_LAG_SPAN2 (PITCH_MAX_LAG / 2 - PITCH_MIN_LAG / 2 + 5) +#define PITCH_CORR_LEN2 60 /* 15 ms */ +#define PITCH_CORR_STEP2 (PITCH_FRAME_LEN / 4) +#define PITCH_BW 11 /* half the band width of correlation surface */ +#define PITCH_SUBFRAMES 4 +#define PITCH_GRAN_PER_SUBFRAME 5 +#define PITCH_SUBFRAME_LEN (PITCH_FRAME_LEN / PITCH_SUBFRAMES) +#define PITCH_UPDATE (PITCH_SUBFRAME_LEN / PITCH_GRAN_PER_SUBFRAME) +/* maximum number of peaks to be examined in correlation surface */ +#define PITCH_MAX_NUM_PEAKS 10 +#define PITCH_PEAK_DECAY 0.85 +/* For weighting filter */ +#define PITCH_WLPCORDER 6 +#define PITCH_WLPCWINLEN PITCH_FRAME_LEN +#define PITCH_WLPCASYM 0.3 /* asymmetry parameter */ +#define PITCH_WLPCBUFLEN PITCH_WLPCWINLEN +/* For pitch filter */ +/* Extra 50 for fraction and LP filters */ +#define PITCH_BUFFSIZE (PITCH_MAX_LAG + 50) +#define PITCH_INTBUFFSIZE (PITCH_FRAME_LEN + PITCH_BUFFSIZE) +/* Max rel. step for interpolation */ +#define PITCH_UPSTEP 1.5 +/* Max rel. step for interpolation */ +#define PITCH_DOWNSTEP 0.67 +#define PITCH_FRACS 8 +#define PITCH_FRACORDER 9 +#define PITCH_DAMPORDER 5 +#define PITCH_FILTDELAY 1.5f +/* stepsize for quantization of the pitch Gain */ +#define PITCH_GAIN_STEPSIZE 0.125 + +/* Order of high pass filter */ +#define HPORDER 2 + +/* some mathematical constants */ +/* log2(exp) */ +#define LOG2EXP 1.44269504088896 +#define PI 3.14159265358979 + +/* Maximum number of iterations allowed to limit payload size */ +#define MAX_PAYLOAD_LIMIT_ITERATION 5 + +/* Redundant Coding */ +#define RCU_BOTTLENECK_BPS 16000 +#define RCU_TRANSCODING_SCALE 0.40f +#define RCU_TRANSCODING_SCALE_INVERSE 2.5f + +#define RCU_TRANSCODING_SCALE_UB 0.50f +#define RCU_TRANSCODING_SCALE_UB_INVERSE 2.0f + +/* Define Error codes */ +/* 6000 General */ +#define ISAC_MEMORY_ALLOCATION_FAILED 6010 +#define ISAC_MODE_MISMATCH 6020 +#define ISAC_DISALLOWED_BOTTLENECK 6030 +#define ISAC_DISALLOWED_FRAME_LENGTH 6040 +#define ISAC_UNSUPPORTED_SAMPLING_FREQUENCY 6050 + +/* 6200 Bandwidth estimator */ +#define ISAC_RANGE_ERROR_BW_ESTIMATOR 6240 +/* 6400 Encoder */ +#define ISAC_ENCODER_NOT_INITIATED 6410 +#define ISAC_DISALLOWED_CODING_MODE 6420 +#define ISAC_DISALLOWED_FRAME_MODE_ENCODER 6430 +#define ISAC_DISALLOWED_BITSTREAM_LENGTH 6440 +#define ISAC_PAYLOAD_LARGER_THAN_LIMIT 6450 +#define ISAC_DISALLOWED_ENCODER_BANDWIDTH 6460 +/* 6600 Decoder */ +#define ISAC_DECODER_NOT_INITIATED 6610 +#define ISAC_EMPTY_PACKET 6620 +#define ISAC_DISALLOWED_FRAME_MODE_DECODER 6630 +#define ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH 6640 +#define ISAC_RANGE_ERROR_DECODE_BANDWIDTH 6650 +#define ISAC_RANGE_ERROR_DECODE_PITCH_GAIN 6660 +#define ISAC_RANGE_ERROR_DECODE_PITCH_LAG 6670 +#define ISAC_RANGE_ERROR_DECODE_LPC 6680 +#define ISAC_RANGE_ERROR_DECODE_SPECTRUM 6690 +#define ISAC_LENGTH_MISMATCH 6730 +#define ISAC_RANGE_ERROR_DECODE_BANDWITH 6740 +#define ISAC_DISALLOWED_BANDWIDTH_MODE_DECODER 6750 +#define ISAC_DISALLOWED_LPC_MODEL 6760 +/* 6800 Call setup formats */ +#define ISAC_INCOMPATIBLE_FORMATS 6810 + +#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/structs.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/structs.h new file mode 100644 index 0000000000..6861ca42bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/structs.h @@ -0,0 +1,448 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * structs.h + * + * This header file contains all the structs used in the ISAC codec + * + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ + +#include "modules/audio_coding/codecs/isac/bandwidth_info.h" +#include "modules/audio_coding/codecs/isac/main/source/settings.h" +#include "modules/third_party/fft/fft.h" + +typedef struct Bitstreamstruct { + uint8_t stream[STREAM_SIZE_MAX]; + uint32_t W_upper; + uint32_t streamval; + uint32_t stream_index; + +} Bitstr; + +typedef struct { + double DataBufferLo[WINLEN]; + double DataBufferHi[WINLEN]; + + double CorrBufLo[ORDERLO + 1]; + double CorrBufHi[ORDERHI + 1]; + + float PreStateLoF[ORDERLO + 1]; + float PreStateLoG[ORDERLO + 1]; + float PreStateHiF[ORDERHI + 1]; + float PreStateHiG[ORDERHI + 1]; + float PostStateLoF[ORDERLO + 1]; + float PostStateLoG[ORDERLO + 1]; + float PostStateHiF[ORDERHI + 1]; + float PostStateHiG[ORDERHI + 1]; + + double OldEnergy; + +} MaskFiltstr; + +typedef struct { + // state vectors for each of the two analysis filters + double INSTAT1[2 * (QORDER - 1)]; + double INSTAT2[2 * (QORDER - 1)]; + double INSTATLA1[2 * (QORDER - 1)]; + double INSTATLA2[2 * (QORDER - 1)]; + double INLABUF1[QLOOKAHEAD]; + double INLABUF2[QLOOKAHEAD]; + + float INSTAT1_float[2 * (QORDER - 1)]; + float INSTAT2_float[2 * (QORDER - 1)]; + float INSTATLA1_float[2 * (QORDER - 1)]; + float INSTATLA2_float[2 * (QORDER - 1)]; + float INLABUF1_float[QLOOKAHEAD]; + float INLABUF2_float[QLOOKAHEAD]; + + /* High pass filter */ + double HPstates[HPORDER]; + float HPstates_float[HPORDER]; + +} PreFiltBankstr; + +typedef struct { + // state vectors for each of the two analysis filters + double STATE_0_LOWER[2 * POSTQORDER]; + double STATE_0_UPPER[2 * POSTQORDER]; + + /* High pass filter */ + double HPstates1[HPORDER]; + double HPstates2[HPORDER]; + + float STATE_0_LOWER_float[2 * POSTQORDER]; + float STATE_0_UPPER_float[2 * POSTQORDER]; + + float HPstates1_float[HPORDER]; + float HPstates2_float[HPORDER]; + +} PostFiltBankstr; + +typedef struct { + // data buffer for pitch filter + double ubuf[PITCH_BUFFSIZE]; + + // low pass state vector + double ystate[PITCH_DAMPORDER]; + + // old lag and gain + double oldlagp[1]; + double oldgainp[1]; + +} PitchFiltstr; + +typedef struct { + // data buffer + double buffer[PITCH_WLPCBUFLEN]; + + // state vectors + double istate[PITCH_WLPCORDER]; + double weostate[PITCH_WLPCORDER]; + double whostate[PITCH_WLPCORDER]; + + // LPC window -> should be a global array because constant + double window[PITCH_WLPCWINLEN]; + +} WeightFiltstr; + +typedef struct { + // for inital estimator + double dec_buffer[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 - + PITCH_FRAME_LEN / 2 + 2]; + double decimator_state[2 * ALLPASSSECTIONS + 1]; + double hp_state[2]; + + double whitened_buf[QLOOKAHEAD]; + + double inbuf[QLOOKAHEAD]; + + PitchFiltstr PFstr_wght; + PitchFiltstr PFstr; + WeightFiltstr Wghtstr; + +} PitchAnalysisStruct; + +/* Have instance of struct together with other iSAC structs */ +typedef struct { + /* Previous frame length (in ms) */ + int32_t prev_frame_length; + + /* Previous RTP timestamp from received + packet (in samples relative beginning) */ + int32_t prev_rec_rtp_number; + + /* Send timestamp for previous packet (in ms using timeGetTime()) */ + uint32_t prev_rec_send_ts; + + /* Arrival time for previous packet (in ms using timeGetTime()) */ + uint32_t prev_rec_arr_ts; + + /* rate of previous packet, derived from RTP timestamps (in bits/s) */ + float prev_rec_rtp_rate; + + /* Time sinse the last update of the BN estimate (in ms) */ + uint32_t last_update_ts; + + /* Time sinse the last reduction (in ms) */ + uint32_t last_reduction_ts; + + /* How many times the estimate was update in the beginning */ + int32_t count_tot_updates_rec; + + /* The estimated bottle neck rate from there to here (in bits/s) */ + int32_t rec_bw; + float rec_bw_inv; + float rec_bw_avg; + float rec_bw_avg_Q; + + /* The estimated mean absolute jitter value, + as seen on this side (in ms) */ + float rec_jitter; + float rec_jitter_short_term; + float rec_jitter_short_term_abs; + float rec_max_delay; + float rec_max_delay_avg_Q; + + /* (assumed) bitrate for headers (bps) */ + float rec_header_rate; + + /* The estimated bottle neck rate from here to there (in bits/s) */ + float send_bw_avg; + + /* The estimated mean absolute jitter value, as seen on + the other siee (in ms) */ + float send_max_delay_avg; + + // number of packets received since last update + int num_pkts_rec; + + int num_consec_rec_pkts_over_30k; + + // flag for marking that a high speed network has been + // detected downstream + int hsn_detect_rec; + + int num_consec_snt_pkts_over_30k; + + // flag for marking that a high speed network has + // been detected upstream + int hsn_detect_snd; + + uint32_t start_wait_period; + + int in_wait_period; + + int change_to_WB; + + uint32_t senderTimestamp; + uint32_t receiverTimestamp; + // enum IsacSamplingRate incomingStreamSampFreq; + uint16_t numConsecLatePkts; + float consecLatency; + int16_t inWaitLatePkts; + + IsacBandwidthInfo external_bw_info; +} BwEstimatorstr; + +typedef struct { + /* boolean, flags if previous packet exceeded B.N. */ + int PrevExceed; + /* ms */ + int ExceedAgo; + /* packets left to send in current burst */ + int BurstCounter; + /* packets */ + int InitCounter; + /* ms remaining in buffer when next packet will be sent */ + double StillBuffered; + +} RateModel; + +/* The following strutc is used to store data from encoding, to make it + fast and easy to construct a new bitstream with a different Bandwidth + estimate. All values (except framelength and minBytes) is double size to + handle 60 ms of data. +*/ +typedef struct { + /* Used to keep track of if it is first or second part of 60 msec packet */ + int startIdx; + + /* Frame length in samples */ + int16_t framelength; + + /* Pitch Gain */ + int pitchGain_index[2]; + + /* Pitch Lag */ + double meanGain[2]; + int pitchIndex[PITCH_SUBFRAMES * 2]; + + /* LPC */ + int LPCindex_s[108 * 2]; /* KLT_ORDER_SHAPE = 108 */ + int LPCindex_g[12 * 2]; /* KLT_ORDER_GAIN = 12 */ + double LPCcoeffs_lo[(ORDERLO + 1) * SUBFRAMES * 2]; + double LPCcoeffs_hi[(ORDERHI + 1) * SUBFRAMES * 2]; + + /* Encode Spec */ + int16_t fre[FRAMESAMPLES]; + int16_t fim[FRAMESAMPLES]; + int16_t AvgPitchGain[2]; + + /* Used in adaptive mode only */ + int minBytes; + +} IsacSaveEncoderData; + +typedef struct { + int indexLPCShape[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + double lpcGain[SUBFRAMES << 1]; + int lpcGainIndex[SUBFRAMES << 1]; + + Bitstr bitStreamObj; + + int16_t realFFT[FRAMESAMPLES_HALF]; + int16_t imagFFT[FRAMESAMPLES_HALF]; +} ISACUBSaveEncDataStruct; + +typedef struct { + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PreFiltBankstr prefiltbankstr_obj; + PitchFiltstr pitchfiltstr_obj; + PitchAnalysisStruct pitchanalysisstr_obj; + FFTstr fftstr_obj; + IsacSaveEncoderData SaveEnc_obj; + + int buffer_index; + int16_t current_framesamples; + + float data_buffer_float[FRAMESAMPLES_30ms]; + + int frame_nb; + double bottleneck; + int16_t new_framelength; + double s2nr; + + /* Maximum allowed number of bits for a 30 msec packet */ + int16_t payloadLimitBytes30; + /* Maximum allowed number of bits for a 30 msec packet */ + int16_t payloadLimitBytes60; + /* Maximum allowed number of bits for both 30 and 60 msec packet */ + int16_t maxPayloadBytes; + /* Maximum allowed rate in bytes per 30 msec packet */ + int16_t maxRateInBytes; + + /*--- + If set to 1 iSAC will not adapt the frame-size, if used in + channel-adaptive mode. The initial value will be used for all rates. + ---*/ + int16_t enforceFrameSize; + + /*----- + This records the BWE index the encoder injected into the bit-stream. + It will be used in RCU. The same BWE index of main payload will be in + the redundant payload. We can not retrieve it from BWE because it is + a recursive procedure (WebRtcIsac_GetDownlinkBwJitIndexImpl) and has to be + called only once per each encode. + -----*/ + int16_t lastBWIdx; +} ISACLBEncStruct; + +typedef struct { + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PreFiltBankstr prefiltbankstr_obj; + FFTstr fftstr_obj; + ISACUBSaveEncDataStruct SaveEnc_obj; + + int buffer_index; + float data_buffer_float[MAX_FRAMESAMPLES + LB_TOTAL_DELAY_SAMPLES]; + double bottleneck; + /* Maximum allowed number of bits for a 30 msec packet */ + // int16_t payloadLimitBytes30; + /* Maximum allowed number of bits for both 30 and 60 msec packet */ + // int16_t maxPayloadBytes; + int16_t maxPayloadSizeBytes; + + double lastLPCVec[UB_LPC_ORDER]; + int16_t numBytesUsed; + int16_t lastJitterInfo; +} ISACUBEncStruct; + +typedef struct { + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PostFiltBankstr postfiltbankstr_obj; + PitchFiltstr pitchfiltstr_obj; + FFTstr fftstr_obj; + +} ISACLBDecStruct; + +typedef struct { + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PostFiltBankstr postfiltbankstr_obj; + FFTstr fftstr_obj; + +} ISACUBDecStruct; + +typedef struct { + ISACLBEncStruct ISACencLB_obj; + ISACLBDecStruct ISACdecLB_obj; +} ISACLBStruct; + +typedef struct { + ISACUBEncStruct ISACencUB_obj; + ISACUBDecStruct ISACdecUB_obj; +} ISACUBStruct; + +/* + This struct is used to take a snapshot of the entropy coder and LPC gains + right before encoding LPC gains. This allows us to go back to that state + if we like to limit the payload size. +*/ +typedef struct { + /* 6 lower-band & 6 upper-band */ + double loFiltGain[SUBFRAMES]; + double hiFiltGain[SUBFRAMES]; + /* Upper boundary of interval W */ + uint32_t W_upper; + uint32_t streamval; + /* Index to the current position in bytestream */ + uint32_t stream_index; + uint8_t stream[3]; +} transcode_obj; + +typedef struct { + // TODO(kwiberg): The size of these tables could be reduced by storing floats + // instead of doubles, and by making use of the identity cos(x) = + // sin(x+pi/2). They could also be made global constants that we fill in at + // compile time. + double costab1[FRAMESAMPLES_HALF]; + double sintab1[FRAMESAMPLES_HALF]; + double costab2[FRAMESAMPLES_QUARTER]; + double sintab2[FRAMESAMPLES_QUARTER]; +} TransformTables; + +typedef struct { + // lower-band codec instance + ISACLBStruct instLB; + // upper-band codec instance + ISACUBStruct instUB; + + // Bandwidth Estimator and model for the rate. + BwEstimatorstr bwestimator_obj; + RateModel rate_data_obj; + double MaxDelay; + + /* 0 = adaptive; 1 = instantaneous */ + int16_t codingMode; + + // overall bottleneck of the codec + int32_t bottleneck; + + // QMF Filter state + int32_t analysisFBState1[FB_STATE_SIZE_WORD32]; + int32_t analysisFBState2[FB_STATE_SIZE_WORD32]; + int32_t synthesisFBState1[FB_STATE_SIZE_WORD32]; + int32_t synthesisFBState2[FB_STATE_SIZE_WORD32]; + + // Error Code + int16_t errorCode; + + // bandwidth of the encoded audio 8, 12 or 16 kHz + enum ISACBandwidth bandwidthKHz; + // Sampling rate of audio, encoder and decode, 8 or 16 kHz + enum IsacSamplingRate encoderSamplingRateKHz; + enum IsacSamplingRate decoderSamplingRateKHz; + // Flag to keep track of initializations, lower & upper-band + // encoder and decoder. + int16_t initFlag; + + // Flag to to indicate signal bandwidth switch + int16_t resetFlag_8kHz; + + // Maximum allowed rate, measured in Bytes per 30 ms. + int16_t maxRateBytesPer30Ms; + // Maximum allowed payload-size, measured in Bytes. + int16_t maxPayloadSizeBytes; + /* The expected sampling rate of the input signal. Valid values are 16000 + * and 32000. This is not the operation sampling rate of the codec. */ + uint16_t in_sample_rate_hz; + + // Trig tables for WebRtcIsac_Time2Spec and WebRtcIsac_Spec2time. + TransformTables transform_tables; +} ISACMainStruct; + +#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.cc b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.cc new file mode 100644 index 0000000000..dacf325082 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" + +#include +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +LegacyEncodedAudioFrame::LegacyEncodedAudioFrame(AudioDecoder* decoder, + rtc::Buffer&& payload) + : decoder_(decoder), payload_(std::move(payload)) {} + +LegacyEncodedAudioFrame::~LegacyEncodedAudioFrame() = default; + +size_t LegacyEncodedAudioFrame::Duration() const { + const int ret = decoder_->PacketDuration(payload_.data(), payload_.size()); + return (ret < 0) ? 0 : static_cast(ret); +} + +absl::optional +LegacyEncodedAudioFrame::Decode(rtc::ArrayView decoded) const { + AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech; + const int ret = decoder_->Decode( + payload_.data(), payload_.size(), decoder_->SampleRateHz(), + decoded.size() * sizeof(int16_t), decoded.data(), &speech_type); + + if (ret < 0) + return absl::nullopt; + + return DecodeResult{static_cast(ret), speech_type}; +} + +std::vector LegacyEncodedAudioFrame::SplitBySamples( + AudioDecoder* decoder, + rtc::Buffer&& payload, + uint32_t timestamp, + size_t bytes_per_ms, + uint32_t timestamps_per_ms) { + RTC_DCHECK(payload.data()); + std::vector results; + size_t split_size_bytes = payload.size(); + + // Find a "chunk size" >= 20 ms and < 40 ms. + const size_t min_chunk_size = bytes_per_ms * 20; + if (min_chunk_size >= payload.size()) { + std::unique_ptr frame( + new LegacyEncodedAudioFrame(decoder, std::move(payload))); + results.emplace_back(timestamp, 0, std::move(frame)); + } else { + // Reduce the split size by half as long as `split_size_bytes` is at least + // twice the minimum chunk size (so that the resulting size is at least as + // large as the minimum chunk size). + while (split_size_bytes >= 2 * min_chunk_size) { + split_size_bytes /= 2; + } + + const uint32_t timestamps_per_chunk = static_cast( + split_size_bytes * timestamps_per_ms / bytes_per_ms); + size_t byte_offset; + uint32_t timestamp_offset; + for (byte_offset = 0, timestamp_offset = 0; byte_offset < payload.size(); + byte_offset += split_size_bytes, + timestamp_offset += timestamps_per_chunk) { + split_size_bytes = + std::min(split_size_bytes, payload.size() - byte_offset); + rtc::Buffer new_payload(payload.data() + byte_offset, split_size_bytes); + std::unique_ptr frame( + new LegacyEncodedAudioFrame(decoder, std::move(new_payload))); + results.emplace_back(timestamp + timestamp_offset, 0, std::move(frame)); + } + } + + return results; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.h b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.h new file mode 100644 index 0000000000..21da1367ed --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_LEGACY_ENCODED_AUDIO_FRAME_H_ +#define MODULES_AUDIO_CODING_CODECS_LEGACY_ENCODED_AUDIO_FRAME_H_ + +#include +#include + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class LegacyEncodedAudioFrame final : public AudioDecoder::EncodedAudioFrame { + public: + LegacyEncodedAudioFrame(AudioDecoder* decoder, rtc::Buffer&& payload); + ~LegacyEncodedAudioFrame() override; + + static std::vector SplitBySamples( + AudioDecoder* decoder, + rtc::Buffer&& payload, + uint32_t timestamp, + size_t bytes_per_ms, + uint32_t timestamps_per_ms); + + size_t Duration() const override; + + absl::optional Decode( + rtc::ArrayView decoded) const override; + + // For testing: + const rtc::Buffer& payload() const { return payload_; } + + private: + AudioDecoder* const decoder_; + const rtc::Buffer payload_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_LEGACY_ENCODED_AUDIO_FRAME_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame_unittest.cc new file mode 100644 index 0000000000..f81aeeea80 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame_unittest.cc @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { + +enum class NetEqDecoder { + kDecoderPCMu, + kDecoderPCMa, + kDecoderPCMu_2ch, + kDecoderPCMa_2ch, + kDecoderPCM16B, + kDecoderPCM16Bwb, + kDecoderPCM16Bswb32kHz, + kDecoderPCM16Bswb48kHz, + kDecoderPCM16B_2ch, + kDecoderPCM16Bwb_2ch, + kDecoderPCM16Bswb32kHz_2ch, + kDecoderPCM16Bswb48kHz_2ch, + kDecoderPCM16B_5ch, + kDecoderG722, +}; + +class SplitBySamplesTest : public ::testing::TestWithParam { + protected: + virtual void SetUp() { + decoder_type_ = GetParam(); + switch (decoder_type_) { + case NetEqDecoder::kDecoderPCMu: + case NetEqDecoder::kDecoderPCMa: + bytes_per_ms_ = 8; + samples_per_ms_ = 8; + break; + case NetEqDecoder::kDecoderPCMu_2ch: + case NetEqDecoder::kDecoderPCMa_2ch: + bytes_per_ms_ = 2 * 8; + samples_per_ms_ = 8; + break; + case NetEqDecoder::kDecoderG722: + bytes_per_ms_ = 8; + samples_per_ms_ = 16; + break; + case NetEqDecoder::kDecoderPCM16B: + bytes_per_ms_ = 16; + samples_per_ms_ = 8; + break; + case NetEqDecoder::kDecoderPCM16Bwb: + bytes_per_ms_ = 32; + samples_per_ms_ = 16; + break; + case NetEqDecoder::kDecoderPCM16Bswb32kHz: + bytes_per_ms_ = 64; + samples_per_ms_ = 32; + break; + case NetEqDecoder::kDecoderPCM16Bswb48kHz: + bytes_per_ms_ = 96; + samples_per_ms_ = 48; + break; + case NetEqDecoder::kDecoderPCM16B_2ch: + bytes_per_ms_ = 2 * 16; + samples_per_ms_ = 8; + break; + case NetEqDecoder::kDecoderPCM16Bwb_2ch: + bytes_per_ms_ = 2 * 32; + samples_per_ms_ = 16; + break; + case NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch: + bytes_per_ms_ = 2 * 64; + samples_per_ms_ = 32; + break; + case NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch: + bytes_per_ms_ = 2 * 96; + samples_per_ms_ = 48; + break; + case NetEqDecoder::kDecoderPCM16B_5ch: + bytes_per_ms_ = 5 * 16; + samples_per_ms_ = 8; + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + } + size_t bytes_per_ms_; + int samples_per_ms_; + NetEqDecoder decoder_type_; +}; + +// Test splitting sample-based payloads. +TEST_P(SplitBySamplesTest, PayloadSizes) { + constexpr uint32_t kBaseTimestamp = 0x12345678; + struct ExpectedSplit { + size_t payload_size_ms; + size_t num_frames; + // For simplicity. We only expect up to two packets per split. + size_t frame_sizes[2]; + }; + // The payloads are expected to be split as follows: + // 10 ms -> 10 ms + // 20 ms -> 20 ms + // 30 ms -> 30 ms + // 40 ms -> 20 + 20 ms + // 50 ms -> 25 + 25 ms + // 60 ms -> 30 + 30 ms + ExpectedSplit expected_splits[] = {{10, 1, {10}}, {20, 1, {20}}, + {30, 1, {30}}, {40, 2, {20, 20}}, + {50, 2, {25, 25}}, {60, 2, {30, 30}}}; + + for (const auto& expected_split : expected_splits) { + // The payload values are set to steadily increase (modulo 256), so that the + // resulting frames can be checked and we can be reasonably certain no + // sample was missed or repeated. + const auto generate_payload = [](size_t num_bytes) { + rtc::Buffer payload(num_bytes); + uint8_t value = 0; + // Allow wrap-around of value in counter below. + for (size_t i = 0; i != payload.size(); ++i, ++value) { + payload[i] = value; + } + return payload; + }; + + const auto results = LegacyEncodedAudioFrame::SplitBySamples( + nullptr, + generate_payload(expected_split.payload_size_ms * bytes_per_ms_), + kBaseTimestamp, bytes_per_ms_, samples_per_ms_); + + EXPECT_EQ(expected_split.num_frames, results.size()); + uint32_t expected_timestamp = kBaseTimestamp; + uint8_t value = 0; + for (size_t i = 0; i != expected_split.num_frames; ++i) { + const auto& result = results[i]; + const LegacyEncodedAudioFrame* frame = + static_cast(result.frame.get()); + const size_t length_bytes = expected_split.frame_sizes[i] * bytes_per_ms_; + EXPECT_EQ(length_bytes, frame->payload().size()); + EXPECT_EQ(expected_timestamp, result.timestamp); + const rtc::Buffer& payload = frame->payload(); + // Allow wrap-around of value in counter below. + for (size_t i = 0; i != payload.size(); ++i, ++value) { + ASSERT_EQ(value, payload[i]); + } + + expected_timestamp += rtc::checked_cast( + expected_split.frame_sizes[i] * samples_per_ms_); + } + } +} + +INSTANTIATE_TEST_SUITE_P( + LegacyEncodedAudioFrame, + SplitBySamplesTest, + ::testing::Values(NetEqDecoder::kDecoderPCMu, + NetEqDecoder::kDecoderPCMa, + NetEqDecoder::kDecoderPCMu_2ch, + NetEqDecoder::kDecoderPCMa_2ch, + NetEqDecoder::kDecoderG722, + NetEqDecoder::kDecoderPCM16B, + NetEqDecoder::kDecoderPCM16Bwb, + NetEqDecoder::kDecoderPCM16Bswb32kHz, + NetEqDecoder::kDecoderPCM16Bswb48kHz, + NetEqDecoder::kDecoderPCM16B_2ch, + NetEqDecoder::kDecoderPCM16Bwb_2ch, + NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch, + NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch, + NetEqDecoder::kDecoderPCM16B_5ch)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/DEPS b/third_party/libwebrtc/modules/audio_coding/codecs/opus/DEPS new file mode 100644 index 0000000000..c2530726ad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/DEPS @@ -0,0 +1,5 @@ +specific_include_rules = { + "opus_inst\.h": [ + "+third_party/opus", + ], +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.cc new file mode 100644 index 0000000000..03c02186d0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" + +#include "absl/strings/string_view.h" + +namespace webrtc { + +absl::optional GetFormatParameter(const SdpAudioFormat& format, + absl::string_view param) { + auto it = format.parameters.find(std::string(param)); + if (it == format.parameters.end()) + return absl::nullopt; + + return it->second; +} + +// Parses a comma-separated string "1,2,0,6" into a std::vector. +template <> +absl::optional> GetFormatParameter( + const SdpAudioFormat& format, + absl::string_view param) { + std::vector result; + const std::string comma_separated_list = + GetFormatParameter(format, param).value_or(""); + size_t pos = 0; + while (pos < comma_separated_list.size()) { + const size_t next_comma = comma_separated_list.find(',', pos); + const size_t distance_to_next_comma = next_comma == std::string::npos + ? std::string::npos + : (next_comma - pos); + auto substring_with_number = + comma_separated_list.substr(pos, distance_to_next_comma); + auto conv = rtc::StringToNumber(substring_with_number); + if (!conv.has_value()) { + return absl::nullopt; + } + result.push_back(*conv); + pos += substring_with_number.size() + 1; + } + return result; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.h new file mode 100644 index 0000000000..5ebb51b577 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_decoder.h" +#include "api/audio_codecs/audio_format.h" +#include "rtc_base/string_to_number.h" + +namespace webrtc { + +absl::optional GetFormatParameter(const SdpAudioFormat& format, + absl::string_view param); + +template +absl::optional GetFormatParameter(const SdpAudioFormat& format, + absl::string_view param) { + return rtc::StringToNumber(GetFormatParameter(format, param).value_or("")); +} + +template <> +absl::optional> GetFormatParameter( + const SdpAudioFormat& format, + absl::string_view param); + +class OpusFrame : public AudioDecoder::EncodedAudioFrame { + public: + OpusFrame(AudioDecoder* decoder, + rtc::Buffer&& payload, + bool is_primary_payload) + : decoder_(decoder), + payload_(std::move(payload)), + is_primary_payload_(is_primary_payload) {} + + size_t Duration() const override { + int ret; + if (is_primary_payload_) { + ret = decoder_->PacketDuration(payload_.data(), payload_.size()); + } else { + ret = decoder_->PacketDurationRedundant(payload_.data(), payload_.size()); + } + return (ret < 0) ? 0 : static_cast(ret); + } + + bool IsDtxPacket() const override { return payload_.size() <= 2; } + + absl::optional Decode( + rtc::ArrayView decoded) const override { + AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech; + int ret; + if (is_primary_payload_) { + ret = decoder_->Decode( + payload_.data(), payload_.size(), decoder_->SampleRateHz(), + decoded.size() * sizeof(int16_t), decoded.data(), &speech_type); + } else { + ret = decoder_->DecodeRedundant( + payload_.data(), payload_.size(), decoder_->SampleRateHz(), + decoded.size() * sizeof(int16_t), decoded.data(), &speech_type); + } + + if (ret < 0) + return absl::nullopt; + + return DecodeResult{static_cast(ret), speech_type}; + } + + private: + AudioDecoder* const decoder_; + const rtc::Buffer payload_; + const bool is_primary_payload_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.cc new file mode 100644 index 0000000000..285ea89959 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h" + +#include +#include +#include +#include +#include + +#include "absl/memory/memory.h" +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "rtc_base/string_to_number.h" + +namespace webrtc { + +std::unique_ptr +AudioDecoderMultiChannelOpusImpl::MakeAudioDecoder( + AudioDecoderMultiChannelOpusConfig config) { + if (!config.IsOk()) { + RTC_DCHECK_NOTREACHED(); + return nullptr; + } + // Fill the pointer with a working decoder through the C interface. This + // allocates memory. + OpusDecInst* dec_state = nullptr; + const int error = WebRtcOpus_MultistreamDecoderCreate( + &dec_state, config.num_channels, config.num_streams, + config.coupled_streams, config.channel_mapping.data()); + if (error != 0) { + return nullptr; + } + + // Pass the ownership to DecoderImpl. Not using 'make_unique' because the + // c-tor is private. + return std::unique_ptr( + new AudioDecoderMultiChannelOpusImpl(dec_state, config)); +} + +AudioDecoderMultiChannelOpusImpl::AudioDecoderMultiChannelOpusImpl( + OpusDecInst* dec_state, + AudioDecoderMultiChannelOpusConfig config) + : dec_state_(dec_state), config_(config) { + RTC_DCHECK(dec_state); + WebRtcOpus_DecoderInit(dec_state_); +} + +AudioDecoderMultiChannelOpusImpl::~AudioDecoderMultiChannelOpusImpl() { + WebRtcOpus_DecoderFree(dec_state_); +} + +absl::optional +AudioDecoderMultiChannelOpusImpl::SdpToConfig(const SdpAudioFormat& format) { + AudioDecoderMultiChannelOpusConfig config; + config.num_channels = format.num_channels; + auto num_streams = GetFormatParameter(format, "num_streams"); + if (!num_streams.has_value()) { + return absl::nullopt; + } + config.num_streams = *num_streams; + + auto coupled_streams = GetFormatParameter(format, "coupled_streams"); + if (!coupled_streams.has_value()) { + return absl::nullopt; + } + config.coupled_streams = *coupled_streams; + + auto channel_mapping = + GetFormatParameter>(format, "channel_mapping"); + if (!channel_mapping.has_value()) { + return absl::nullopt; + } + config.channel_mapping = *channel_mapping; + if (!config.IsOk()) { + return absl::nullopt; + } + return config; +} + +std::vector +AudioDecoderMultiChannelOpusImpl::ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) { + std::vector results; + + if (PacketHasFec(payload.data(), payload.size())) { + const int duration = + PacketDurationRedundant(payload.data(), payload.size()); + RTC_DCHECK_GE(duration, 0); + rtc::Buffer payload_copy(payload.data(), payload.size()); + std::unique_ptr fec_frame( + new OpusFrame(this, std::move(payload_copy), false)); + results.emplace_back(timestamp - duration, 1, std::move(fec_frame)); + } + std::unique_ptr frame( + new OpusFrame(this, std::move(payload), true)); + results.emplace_back(timestamp, 0, std::move(frame)); + return results; +} + +int AudioDecoderMultiChannelOpusImpl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz, 48000); + int16_t temp_type = 1; // Default is speech. + int ret = + WebRtcOpus_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type); + if (ret > 0) + ret *= static_cast( + config_.num_channels); // Return total number of samples. + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderMultiChannelOpusImpl::DecodeRedundantInternal( + const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + if (!PacketHasFec(encoded, encoded_len)) { + // This packet is a RED packet. + return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded, + speech_type); + } + + RTC_DCHECK_EQ(sample_rate_hz, 48000); + int16_t temp_type = 1; // Default is speech. + int ret = WebRtcOpus_DecodeFec(dec_state_, encoded, encoded_len, decoded, + &temp_type); + if (ret > 0) + ret *= static_cast( + config_.num_channels); // Return total number of samples. + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +void AudioDecoderMultiChannelOpusImpl::Reset() { + WebRtcOpus_DecoderInit(dec_state_); +} + +int AudioDecoderMultiChannelOpusImpl::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + return WebRtcOpus_DurationEst(dec_state_, encoded, encoded_len); +} + +int AudioDecoderMultiChannelOpusImpl::PacketDurationRedundant( + const uint8_t* encoded, + size_t encoded_len) const { + if (!PacketHasFec(encoded, encoded_len)) { + // This packet is a RED packet. + return PacketDuration(encoded, encoded_len); + } + + return WebRtcOpus_FecDurationEst(encoded, encoded_len, 48000); +} + +bool AudioDecoderMultiChannelOpusImpl::PacketHasFec(const uint8_t* encoded, + size_t encoded_len) const { + int fec; + fec = WebRtcOpus_PacketHasFec(encoded, encoded_len); + return (fec == 1); +} + +int AudioDecoderMultiChannelOpusImpl::SampleRateHz() const { + return 48000; +} + +size_t AudioDecoderMultiChannelOpusImpl::Channels() const { + return config_.num_channels; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h new file mode 100644 index 0000000000..2ff47a8a53 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_ + +#include + +#include +#include + +#include "api/audio_codecs/audio_decoder.h" +#include "api/audio_codecs/audio_format.h" +#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus_config.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class AudioDecoderMultiChannelOpusImpl final : public AudioDecoder { + public: + static std::unique_ptr MakeAudioDecoder( + AudioDecoderMultiChannelOpusConfig config); + + ~AudioDecoderMultiChannelOpusImpl() override; + + AudioDecoderMultiChannelOpusImpl(const AudioDecoderMultiChannelOpusImpl&) = + delete; + AudioDecoderMultiChannelOpusImpl& operator=( + const AudioDecoderMultiChannelOpusImpl&) = delete; + + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + void Reset() override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const override; + bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + static absl::optional SdpToConfig( + const SdpAudioFormat& format); + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + int DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + AudioDecoderMultiChannelOpusImpl(OpusDecInst* dec_state, + AudioDecoderMultiChannelOpusConfig config); + + OpusDecInst* dec_state_; + const AudioDecoderMultiChannelOpusConfig config_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_unittest.cc new file mode 100644 index 0000000000..57e2107f3c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_unittest.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus.h" + +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +using ::testing::NiceMock; +using ::testing::Return; + +TEST(AudioDecoderMultiOpusTest, GetFormatParameter) { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "0,1,2,3"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + + EXPECT_EQ(GetFormatParameter(sdp_format, "channel_mapping"), + absl::optional("0,1,2,3")); + + EXPECT_EQ(GetFormatParameter(sdp_format, "coupled_streams"), + absl::optional(2)); + + EXPECT_EQ(GetFormatParameter(sdp_format, "missing"), absl::nullopt); + + EXPECT_EQ(GetFormatParameter(sdp_format, "channel_mapping"), + absl::nullopt); +} + +TEST(AudioDecoderMultiOpusTest, InvalidChannelMappings) { + { + // Can't use channel 3 if there are only 2 channels. + const SdpAudioFormat sdp_format("multiopus", 48000, 2, + {{"channel_mapping", "3,0"}, + {"coupled_streams", "1"}, + {"num_streams", "2"}}); + const absl::optional decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + EXPECT_FALSE(decoder_config.has_value()); + } + { + // The mapping is too long. There are only 5 channels, but 6 elements in the + // mapping. + const SdpAudioFormat sdp_format("multiopus", 48000, 5, + {{"channel_mapping", "0,1,2,3,4,5"}, + {"coupled_streams", "0"}, + {"num_streams", "2"}}); + const absl::optional decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + EXPECT_FALSE(decoder_config.has_value()); + } + { + // The mapping doesn't parse correctly. + const SdpAudioFormat sdp_format( + "multiopus", 48000, 5, + {{"channel_mapping", "0,1,two,3,4"}, {"coupled_streams", "0"}}); + const absl::optional decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + EXPECT_FALSE(decoder_config.has_value()); + } +} + +TEST(AudioDecoderMultiOpusTest, ValidSdpToConfigProducesCorrectConfig) { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "3,1,2,0"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + + const absl::optional decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + + ASSERT_TRUE(decoder_config.has_value()); + EXPECT_TRUE(decoder_config->IsOk()); + EXPECT_EQ(decoder_config->coupled_streams, 2); + EXPECT_THAT(decoder_config->channel_mapping, + ::testing::ContainerEq(std::vector({3, 1, 2, 0}))); +} + +TEST(AudioDecoderMultiOpusTest, InvalidSdpToConfigDoesNotProduceConfig) { + { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "0,1,2,3"}, + {"coupled_stream", "2"}, + {"num_streams", "2"}}); + + const absl::optional decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + + EXPECT_FALSE(decoder_config.has_value()); + } + + { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "0,1,2 3"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + + const absl::optional decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + + EXPECT_FALSE(decoder_config.has_value()); + } +} + +TEST(AudioDecoderMultiOpusTest, CodecsCanBeCreated) { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "0,1,2,3"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + + const absl::optional decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + + ASSERT_TRUE(decoder_config.has_value()); + + const std::unique_ptr opus_decoder = + AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config); + + EXPECT_TRUE(opus_decoder); +} + +TEST(AudioDecoderMultiOpusTest, AdvertisedCodecsCanBeCreated) { + std::vector specs; + AudioDecoderMultiChannelOpus::AppendSupportedDecoders(&specs); + + EXPECT_FALSE(specs.empty()); + + for (const AudioCodecSpec& spec : specs) { + const absl::optional decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(spec.format); + ASSERT_TRUE(decoder_config.has_value()); + + const std::unique_ptr opus_decoder = + AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config); + + EXPECT_TRUE(opus_decoder); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc new file mode 100644 index 0000000000..cff9685548 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h" + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioDecoderOpusImpl::AudioDecoderOpusImpl(size_t num_channels, + int sample_rate_hz) + : channels_{num_channels}, sample_rate_hz_{sample_rate_hz} { + RTC_DCHECK(num_channels == 1 || num_channels == 2); + RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 48000); + const int error = + WebRtcOpus_DecoderCreate(&dec_state_, channels_, sample_rate_hz_); + RTC_DCHECK(error == 0); + WebRtcOpus_DecoderInit(dec_state_); +} + +AudioDecoderOpusImpl::~AudioDecoderOpusImpl() { + WebRtcOpus_DecoderFree(dec_state_); +} + +std::vector AudioDecoderOpusImpl::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + std::vector results; + + if (PacketHasFec(payload.data(), payload.size())) { + const int duration = + PacketDurationRedundant(payload.data(), payload.size()); + RTC_DCHECK_GE(duration, 0); + rtc::Buffer payload_copy(payload.data(), payload.size()); + std::unique_ptr fec_frame( + new OpusFrame(this, std::move(payload_copy), false)); + results.emplace_back(timestamp - duration, 1, std::move(fec_frame)); + } + std::unique_ptr frame( + new OpusFrame(this, std::move(payload), true)); + results.emplace_back(timestamp, 0, std::move(frame)); + return results; +} + +int AudioDecoderOpusImpl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz, sample_rate_hz_); + int16_t temp_type = 1; // Default is speech. + int ret = + WebRtcOpus_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type); + if (ret > 0) + ret *= static_cast(channels_); // Return total number of samples. + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderOpusImpl::DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + if (!PacketHasFec(encoded, encoded_len)) { + // This packet is a RED packet. + return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded, + speech_type); + } + + RTC_DCHECK_EQ(sample_rate_hz, sample_rate_hz_); + int16_t temp_type = 1; // Default is speech. + int ret = WebRtcOpus_DecodeFec(dec_state_, encoded, encoded_len, decoded, + &temp_type); + if (ret > 0) + ret *= static_cast(channels_); // Return total number of samples. + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +void AudioDecoderOpusImpl::Reset() { + WebRtcOpus_DecoderInit(dec_state_); +} + +int AudioDecoderOpusImpl::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + return WebRtcOpus_DurationEst(dec_state_, encoded, encoded_len); +} + +int AudioDecoderOpusImpl::PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const { + if (!PacketHasFec(encoded, encoded_len)) { + // This packet is a RED packet. + return PacketDuration(encoded, encoded_len); + } + + return WebRtcOpus_FecDurationEst(encoded, encoded_len, sample_rate_hz_); +} + +bool AudioDecoderOpusImpl::PacketHasFec(const uint8_t* encoded, + size_t encoded_len) const { + int fec; + fec = WebRtcOpus_PacketHasFec(encoded, encoded_len); + return (fec == 1); +} + +int AudioDecoderOpusImpl::SampleRateHz() const { + return sample_rate_hz_; +} + +size_t AudioDecoderOpusImpl::Channels() const { + return channels_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h new file mode 100644 index 0000000000..e8fd0440bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_ + +#include +#include + +#include + +#include "api/audio_codecs/audio_decoder.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class AudioDecoderOpusImpl final : public AudioDecoder { + public: + explicit AudioDecoderOpusImpl(size_t num_channels, + int sample_rate_hz = 48000); + ~AudioDecoderOpusImpl() override; + + AudioDecoderOpusImpl(const AudioDecoderOpusImpl&) = delete; + AudioDecoderOpusImpl& operator=(const AudioDecoderOpusImpl&) = delete; + + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + void Reset() override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const override; + bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + int DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + OpusDecInst* dec_state_; + const size_t channels_; + const int sample_rate_hz_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.cc new file mode 100644 index 0000000000..38a11c123d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.cc @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * LEFT TO DO: + * - WRITE TESTS for the stuff in this file. + * - Check the creation, maybe make it safer by returning an empty optional or + * unique_ptr. --- It looks OK, but RecreateEncoderInstance can perhaps crash + * on a valid config. Can run it in the fuzzer for some time. Should prbl also + * fuzz the config. + */ + +#include "modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h" + +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/string_to_number.h" + +namespace webrtc { + +namespace { + +// Recommended bitrates for one channel: +// 8-12 kb/s for NB speech, +// 16-20 kb/s for WB speech, +// 28-40 kb/s for FB speech, +// 48-64 kb/s for FB mono music, and +// 64-128 kb/s for FB stereo music. +// The current implementation multiplies these values by the number of channels. +constexpr int kOpusBitrateNbBps = 12000; +constexpr int kOpusBitrateWbBps = 20000; +constexpr int kOpusBitrateFbBps = 32000; + +constexpr int kDefaultMaxPlaybackRate = 48000; +// These two lists must be sorted from low to high +#if WEBRTC_OPUS_SUPPORT_120MS_PTIME +constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60, 120}; +#else +constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60}; +#endif + +int GetBitrateBps(const AudioEncoderMultiChannelOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + return config.bitrate_bps; +} +int GetMaxPlaybackRate(const SdpAudioFormat& format) { + const auto param = GetFormatParameter(format, "maxplaybackrate"); + if (param && *param >= 8000) { + return std::min(*param, kDefaultMaxPlaybackRate); + } + return kDefaultMaxPlaybackRate; +} + +int GetFrameSizeMs(const SdpAudioFormat& format) { + const auto ptime = GetFormatParameter(format, "ptime"); + if (ptime.has_value()) { + // Pick the next highest supported frame length from + // kOpusSupportedFrameLengths. + for (const int supported_frame_length : kOpusSupportedFrameLengths) { + if (supported_frame_length >= *ptime) { + return supported_frame_length; + } + } + // If none was found, return the largest supported frame length. + return *(std::end(kOpusSupportedFrameLengths) - 1); + } + + return AudioEncoderOpusConfig::kDefaultFrameSizeMs; +} + +int CalculateDefaultBitrate(int max_playback_rate, size_t num_channels) { + const int bitrate = [&] { + if (max_playback_rate <= 8000) { + return kOpusBitrateNbBps * rtc::dchecked_cast(num_channels); + } else if (max_playback_rate <= 16000) { + return kOpusBitrateWbBps * rtc::dchecked_cast(num_channels); + } else { + return kOpusBitrateFbBps * rtc::dchecked_cast(num_channels); + } + }(); + RTC_DCHECK_GE(bitrate, AudioEncoderMultiChannelOpusConfig::kMinBitrateBps); + return bitrate; +} + +// Get the maxaveragebitrate parameter in string-form, so we can properly figure +// out how invalid it is and accurately log invalid values. +int CalculateBitrate(int max_playback_rate_hz, + size_t num_channels, + absl::optional bitrate_param) { + const int default_bitrate = + CalculateDefaultBitrate(max_playback_rate_hz, num_channels); + + if (bitrate_param) { + const auto bitrate = rtc::StringToNumber(*bitrate_param); + if (bitrate) { + const int chosen_bitrate = + std::max(AudioEncoderOpusConfig::kMinBitrateBps, + std::min(*bitrate, AudioEncoderOpusConfig::kMaxBitrateBps)); + if (bitrate != chosen_bitrate) { + RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate " << *bitrate + << " clamped to " << chosen_bitrate; + } + return chosen_bitrate; + } + RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate \"" << *bitrate_param + << "\" replaced by default bitrate " << default_bitrate; + } + + return default_bitrate; +} + +} // namespace + +std::unique_ptr +AudioEncoderMultiChannelOpusImpl::MakeAudioEncoder( + const AudioEncoderMultiChannelOpusConfig& config, + int payload_type) { + if (!config.IsOk()) { + RTC_DCHECK_NOTREACHED(); + return nullptr; + } + return std::make_unique(config, + payload_type); +} + +AudioEncoderMultiChannelOpusImpl::AudioEncoderMultiChannelOpusImpl( + const AudioEncoderMultiChannelOpusConfig& config, + int payload_type) + : payload_type_(payload_type), inst_(nullptr) { + RTC_DCHECK(0 <= payload_type && payload_type <= 127); + + RTC_CHECK(RecreateEncoderInstance(config)); +} + +AudioEncoderMultiChannelOpusImpl::~AudioEncoderMultiChannelOpusImpl() { + RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_)); +} + +size_t AudioEncoderMultiChannelOpusImpl::SufficientOutputBufferSize() const { + // Calculate the number of bytes we expect the encoder to produce, + // then multiply by two to give a wide margin for error. + const size_t bytes_per_millisecond = + static_cast(GetBitrateBps(config_) / (1000 * 8) + 1); + const size_t approx_encoded_bytes = + Num10msFramesPerPacket() * 10 * bytes_per_millisecond; + return 2 * approx_encoded_bytes; +} + +void AudioEncoderMultiChannelOpusImpl::Reset() { + RTC_CHECK(RecreateEncoderInstance(config_)); +} + +absl::optional> +AudioEncoderMultiChannelOpusImpl::GetFrameLengthRange() const { + return {{TimeDelta::Millis(config_.frame_size_ms), + TimeDelta::Millis(config_.frame_size_ms)}}; +} + +// If the given config is OK, recreate the Opus encoder instance with those +// settings, save the config, and return true. Otherwise, do nothing and return +// false. +bool AudioEncoderMultiChannelOpusImpl::RecreateEncoderInstance( + const AudioEncoderMultiChannelOpusConfig& config) { + if (!config.IsOk()) + return false; + config_ = config; + if (inst_) + RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_)); + input_buffer_.clear(); + input_buffer_.reserve(Num10msFramesPerPacket() * SamplesPer10msFrame()); + RTC_CHECK_EQ( + 0, WebRtcOpus_MultistreamEncoderCreate( + &inst_, config.num_channels, + config.application == + AudioEncoderMultiChannelOpusConfig::ApplicationMode::kVoip + ? 0 + : 1, + config.num_streams, config.coupled_streams, + config.channel_mapping.data())); + const int bitrate = GetBitrateBps(config); + RTC_CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, bitrate)); + RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps."; + if (config.fec_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus enable FEC"; + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus disable FEC"; + } + RTC_CHECK_EQ( + 0, WebRtcOpus_SetMaxPlaybackRate(inst_, config.max_playback_rate_hz)); + RTC_LOG(LS_VERBOSE) << "Set Opus playback rate to " + << config.max_playback_rate_hz << " hz."; + + // Use the DEFAULT complexity. + RTC_CHECK_EQ( + 0, WebRtcOpus_SetComplexity(inst_, AudioEncoderOpusConfig().complexity)); + RTC_LOG(LS_VERBOSE) << "Set Opus coding complexity to " + << AudioEncoderOpusConfig().complexity; + + if (config.dtx_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus enable DTX"; + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus disable DTX"; + } + + if (config.cbr_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableCbr(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus enable CBR"; + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableCbr(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus disable CBR"; + } + num_channels_to_encode_ = NumChannels(); + next_frame_length_ms_ = config_.frame_size_ms; + RTC_LOG(LS_VERBOSE) << "Set Opus frame length to " << config_.frame_size_ms + << " ms"; + return true; +} + +absl::optional +AudioEncoderMultiChannelOpusImpl::SdpToConfig(const SdpAudioFormat& format) { + if (!absl::EqualsIgnoreCase(format.name, "multiopus") || + format.clockrate_hz != 48000) { + return absl::nullopt; + } + + AudioEncoderMultiChannelOpusConfig config; + config.num_channels = format.num_channels; + config.frame_size_ms = GetFrameSizeMs(format); + config.max_playback_rate_hz = GetMaxPlaybackRate(format); + config.fec_enabled = (GetFormatParameter(format, "useinbandfec") == "1"); + config.dtx_enabled = (GetFormatParameter(format, "usedtx") == "1"); + config.cbr_enabled = (GetFormatParameter(format, "cbr") == "1"); + config.bitrate_bps = + CalculateBitrate(config.max_playback_rate_hz, config.num_channels, + GetFormatParameter(format, "maxaveragebitrate")); + config.application = + config.num_channels == 1 + ? AudioEncoderMultiChannelOpusConfig::ApplicationMode::kVoip + : AudioEncoderMultiChannelOpusConfig::ApplicationMode::kAudio; + + config.supported_frame_lengths_ms.clear(); + std::copy(std::begin(kOpusSupportedFrameLengths), + std::end(kOpusSupportedFrameLengths), + std::back_inserter(config.supported_frame_lengths_ms)); + + auto num_streams = GetFormatParameter(format, "num_streams"); + if (!num_streams.has_value()) { + return absl::nullopt; + } + config.num_streams = *num_streams; + + auto coupled_streams = GetFormatParameter(format, "coupled_streams"); + if (!coupled_streams.has_value()) { + return absl::nullopt; + } + config.coupled_streams = *coupled_streams; + + auto channel_mapping = + GetFormatParameter>(format, "channel_mapping"); + if (!channel_mapping.has_value()) { + return absl::nullopt; + } + config.channel_mapping = *channel_mapping; + + if (!config.IsOk()) { + return absl::nullopt; + } + return config; +} + +AudioCodecInfo AudioEncoderMultiChannelOpusImpl::QueryAudioEncoder( + const AudioEncoderMultiChannelOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + AudioCodecInfo info(48000, config.num_channels, config.bitrate_bps, + AudioEncoderOpusConfig::kMinBitrateBps, + AudioEncoderOpusConfig::kMaxBitrateBps); + info.allow_comfort_noise = false; + info.supports_network_adaption = false; + return info; +} + +size_t AudioEncoderMultiChannelOpusImpl::Num10msFramesPerPacket() const { + return static_cast(rtc::CheckedDivExact(config_.frame_size_ms, 10)); +} +size_t AudioEncoderMultiChannelOpusImpl::SamplesPer10msFrame() const { + return rtc::CheckedDivExact(48000, 100) * config_.num_channels; +} +int AudioEncoderMultiChannelOpusImpl::SampleRateHz() const { + return 48000; +} +size_t AudioEncoderMultiChannelOpusImpl::NumChannels() const { + return config_.num_channels; +} +size_t AudioEncoderMultiChannelOpusImpl::Num10MsFramesInNextPacket() const { + return Num10msFramesPerPacket(); +} +size_t AudioEncoderMultiChannelOpusImpl::Max10MsFramesInAPacket() const { + return Num10msFramesPerPacket(); +} +int AudioEncoderMultiChannelOpusImpl::GetTargetBitrate() const { + return GetBitrateBps(config_); +} + +AudioEncoder::EncodedInfo AudioEncoderMultiChannelOpusImpl::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) { + if (input_buffer_.empty()) + first_timestamp_in_buffer_ = rtp_timestamp; + + input_buffer_.insert(input_buffer_.end(), audio.cbegin(), audio.cend()); + if (input_buffer_.size() < + (Num10msFramesPerPacket() * SamplesPer10msFrame())) { + return EncodedInfo(); + } + RTC_CHECK_EQ(input_buffer_.size(), + Num10msFramesPerPacket() * SamplesPer10msFrame()); + + const size_t max_encoded_bytes = SufficientOutputBufferSize(); + EncodedInfo info; + info.encoded_bytes = encoded->AppendData( + max_encoded_bytes, [&](rtc::ArrayView encoded) { + int status = WebRtcOpus_Encode( + inst_, &input_buffer_[0], + rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels), + rtc::saturated_cast(max_encoded_bytes), encoded.data()); + + RTC_CHECK_GE(status, 0); // Fails only if fed invalid data. + + return static_cast(status); + }); + input_buffer_.clear(); + + // Will use new packet size for next encoding. + config_.frame_size_ms = next_frame_length_ms_; + + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.send_even_if_empty = true; // Allows Opus to send empty packets. + + info.speech = true; + info.encoder_type = CodecType::kOther; + + return info; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h new file mode 100644 index 0000000000..8a7210515c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_ + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/audio_format.h" +#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus_config.h" +#include "api/units/time_delta.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" + +namespace webrtc { + +class RtcEventLog; + +class AudioEncoderMultiChannelOpusImpl final : public AudioEncoder { + public: + AudioEncoderMultiChannelOpusImpl( + const AudioEncoderMultiChannelOpusConfig& config, + int payload_type); + ~AudioEncoderMultiChannelOpusImpl() override; + + AudioEncoderMultiChannelOpusImpl(const AudioEncoderMultiChannelOpusImpl&) = + delete; + AudioEncoderMultiChannelOpusImpl& operator=( + const AudioEncoderMultiChannelOpusImpl&) = delete; + + // Static interface for use by BuiltinAudioEncoderFactory. + static constexpr const char* GetPayloadName() { return "multiopus"; } + static absl::optional QueryAudioEncoder( + const SdpAudioFormat& format); + + int SampleRateHz() const override; + size_t NumChannels() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + + void Reset() override; + absl::optional> GetFrameLengthRange() + const override; + + protected: + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) override; + + private: + static absl::optional SdpToConfig( + const SdpAudioFormat& format); + static AudioCodecInfo QueryAudioEncoder( + const AudioEncoderMultiChannelOpusConfig& config); + static std::unique_ptr MakeAudioEncoder( + const AudioEncoderMultiChannelOpusConfig&, + int payload_type); + + size_t Num10msFramesPerPacket() const; + size_t SamplesPer10msFrame() const; + size_t SufficientOutputBufferSize() const; + bool RecreateEncoderInstance( + const AudioEncoderMultiChannelOpusConfig& config); + void SetFrameLength(int frame_length_ms); + void SetNumChannelsToEncode(size_t num_channels_to_encode); + void SetProjectedPacketLossRate(float fraction); + + AudioEncoderMultiChannelOpusConfig config_; + const int payload_type_; + std::vector input_buffer_; + OpusEncInst* inst_; + uint32_t first_timestamp_in_buffer_; + size_t num_channels_to_encode_; + int next_frame_length_ms_; + + friend struct AudioEncoderMultiChannelOpus; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_unittest.cc new file mode 100644 index 0000000000..92f6f2c169 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_unittest.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus.h" + +#include "test/gmock.h" + +namespace webrtc { +using ::testing::NiceMock; +using ::testing::Return; + +namespace { +constexpr int kOpusPayloadType = 120; +} // namespace + +TEST(AudioEncoderMultiOpusTest, CheckConfigValidity) { + { + const SdpAudioFormat sdp_format("multiopus", 48000, 2, + {{"channel_mapping", "3,0"}, + {"coupled_streams", "1"}, + {"num_streams", "2"}}); + const absl::optional encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + // Maps input channel 0 to coded channel 3, which doesn't exist. + EXPECT_FALSE(encoder_config.has_value()); + } + + { + const SdpAudioFormat sdp_format("multiopus", 48000, 2, + {{"channel_mapping", "0"}, + {"coupled_streams", "1"}, + {"num_streams", "2"}}); + const absl::optional encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + // The mapping is too short. + EXPECT_FALSE(encoder_config.has_value()); + } + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "0,0,0"}, + {"coupled_streams", "0"}, + {"num_streams", "1"}}); + const absl::optional encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + // Coded channel 0 comes from both input channels 0, 1 and 2. + EXPECT_FALSE(encoder_config.has_value()); + } + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "0,255,255"}, + {"coupled_streams", "0"}, + {"num_streams", "1"}}); + const absl::optional encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + ASSERT_TRUE(encoder_config.has_value()); + + // This is fine, because channels 1, 2 are set to be ignored. + EXPECT_TRUE(encoder_config->IsOk()); + } + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "0,255,255"}, + {"coupled_streams", "0"}, + {"num_streams", "2"}}); + const absl::optional encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + // This is NOT fine, because channels nothing says how coded channel 1 + // should be coded. + EXPECT_FALSE(encoder_config.has_value()); + } +} + +TEST(AudioEncoderMultiOpusTest, ConfigValuesAreParsedCorrectly) { + SdpAudioFormat sdp_format({"multiopus", + 48000, + 6, + {{"minptime", "10"}, + {"useinbandfec", "1"}, + {"channel_mapping", "0,4,1,2,3,5"}, + {"num_streams", "4"}, + {"coupled_streams", "2"}}}); + const absl::optional encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + ASSERT_TRUE(encoder_config.has_value()); + + EXPECT_EQ(encoder_config->coupled_streams, 2); + EXPECT_EQ(encoder_config->num_streams, 4); + EXPECT_THAT( + encoder_config->channel_mapping, + testing::ContainerEq(std::vector({0, 4, 1, 2, 3, 5}))); +} + +TEST(AudioEncoderMultiOpusTest, CreateFromValidConfig) { + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "0,255,255"}, + {"coupled_streams", "0"}, + {"num_streams", "2"}}); + const absl::optional encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + ASSERT_FALSE(encoder_config.has_value()); + } + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "1,255,0"}, + {"coupled_streams", "1"}, + {"num_streams", "1"}}); + const absl::optional encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + ASSERT_TRUE(encoder_config.has_value()); + + EXPECT_THAT(encoder_config->channel_mapping, + testing::ContainerEq(std::vector({1, 255, 0}))); + + EXPECT_TRUE(encoder_config->IsOk()); + + const std::unique_ptr opus_encoder = + AudioEncoderMultiChannelOpus::MakeAudioEncoder(*encoder_config, + kOpusPayloadType); + + // Creating an encoder from a valid config should work. + EXPECT_TRUE(opus_encoder); + } +} + +TEST(AudioEncoderMultiOpusTest, AdvertisedCodecsCanBeCreated) { + std::vector specs; + AudioEncoderMultiChannelOpus::AppendSupportedEncoders(&specs); + + EXPECT_FALSE(specs.empty()); + + for (const AudioCodecSpec& spec : specs) { + const absl::optional encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(spec.format); + ASSERT_TRUE(encoder_config.has_value()); + + const std::unique_ptr opus_encoder = + AudioEncoderMultiChannelOpus::MakeAudioEncoder(*encoder_config, + kOpusPayloadType); + + EXPECT_TRUE(opus_encoder); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc new file mode 100644 index 0000000000..17e0e33b1d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc @@ -0,0 +1,824 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h" + +#include +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h" +#include "modules/audio_coding/audio_network_adaptor/controller_manager.h" +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/exp_filter.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/string_encode.h" +#include "rtc_base/string_to_number.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +// Codec parameters for Opus. +// draft-spittka-payload-rtp-opus-03 + +// Recommended bitrates: +// 8-12 kb/s for NB speech, +// 16-20 kb/s for WB speech, +// 28-40 kb/s for FB speech, +// 48-64 kb/s for FB mono music, and +// 64-128 kb/s for FB stereo music. +// The current implementation applies the following values to mono signals, +// and multiplies them by 2 for stereo. +constexpr int kOpusBitrateNbBps = 12000; +constexpr int kOpusBitrateWbBps = 20000; +constexpr int kOpusBitrateFbBps = 32000; + +constexpr int kRtpTimestampRateHz = 48000; +constexpr int kDefaultMaxPlaybackRate = 48000; + +// These two lists must be sorted from low to high +#if WEBRTC_OPUS_SUPPORT_120MS_PTIME +constexpr int kANASupportedFrameLengths[] = {20, 40, 60, 120}; +constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60, 120}; +#else +constexpr int kANASupportedFrameLengths[] = {20, 40, 60}; +constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60}; +#endif + +// PacketLossFractionSmoother uses an exponential filter with a time constant +// of -1.0 / ln(0.9999) = 10000 ms. +constexpr float kAlphaForPacketLossFractionSmoother = 0.9999f; +constexpr float kMaxPacketLossFraction = 0.2f; + +int CalculateDefaultBitrate(int max_playback_rate, size_t num_channels) { + const int bitrate = [&] { + if (max_playback_rate <= 8000) { + return kOpusBitrateNbBps * rtc::dchecked_cast(num_channels); + } else if (max_playback_rate <= 16000) { + return kOpusBitrateWbBps * rtc::dchecked_cast(num_channels); + } else { + return kOpusBitrateFbBps * rtc::dchecked_cast(num_channels); + } + }(); + RTC_DCHECK_GE(bitrate, AudioEncoderOpusConfig::kMinBitrateBps); + RTC_DCHECK_LE(bitrate, AudioEncoderOpusConfig::kMaxBitrateBps); + return bitrate; +} + +// Get the maxaveragebitrate parameter in string-form, so we can properly figure +// out how invalid it is and accurately log invalid values. +int CalculateBitrate(int max_playback_rate_hz, + size_t num_channels, + absl::optional bitrate_param) { + const int default_bitrate = + CalculateDefaultBitrate(max_playback_rate_hz, num_channels); + + if (bitrate_param) { + const auto bitrate = rtc::StringToNumber(*bitrate_param); + if (bitrate) { + const int chosen_bitrate = + std::max(AudioEncoderOpusConfig::kMinBitrateBps, + std::min(*bitrate, AudioEncoderOpusConfig::kMaxBitrateBps)); + if (bitrate != chosen_bitrate) { + RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate " << *bitrate + << " clamped to " << chosen_bitrate; + } + return chosen_bitrate; + } + RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate \"" << *bitrate_param + << "\" replaced by default bitrate " << default_bitrate; + } + + return default_bitrate; +} + +int GetChannelCount(const SdpAudioFormat& format) { + const auto param = GetFormatParameter(format, "stereo"); + if (param == "1") { + return 2; + } else { + return 1; + } +} + +int GetMaxPlaybackRate(const SdpAudioFormat& format) { + const auto param = GetFormatParameter(format, "maxplaybackrate"); + if (param && *param >= 8000) { + return std::min(*param, kDefaultMaxPlaybackRate); + } + return kDefaultMaxPlaybackRate; +} + +int GetFrameSizeMs(const SdpAudioFormat& format) { + const auto ptime = GetFormatParameter(format, "ptime"); + if (ptime) { + // Pick the next highest supported frame length from + // kOpusSupportedFrameLengths. + for (const int supported_frame_length : kOpusSupportedFrameLengths) { + if (supported_frame_length >= *ptime) { + return supported_frame_length; + } + } + // If none was found, return the largest supported frame length. + return *(std::end(kOpusSupportedFrameLengths) - 1); + } + + return AudioEncoderOpusConfig::kDefaultFrameSizeMs; +} + +void FindSupportedFrameLengths(int min_frame_length_ms, + int max_frame_length_ms, + std::vector* out) { + out->clear(); + std::copy_if(std::begin(kANASupportedFrameLengths), + std::end(kANASupportedFrameLengths), std::back_inserter(*out), + [&](int frame_length_ms) { + return frame_length_ms >= min_frame_length_ms && + frame_length_ms <= max_frame_length_ms; + }); + RTC_DCHECK(std::is_sorted(out->begin(), out->end())); +} + +int GetBitrateBps(const AudioEncoderOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + return *config.bitrate_bps; +} + +std::vector GetBitrateMultipliers() { + constexpr char kBitrateMultipliersName[] = + "WebRTC-Audio-OpusBitrateMultipliers"; + const bool use_bitrate_multipliers = + webrtc::field_trial::IsEnabled(kBitrateMultipliersName); + if (use_bitrate_multipliers) { + const std::string field_trial_string = + webrtc::field_trial::FindFullName(kBitrateMultipliersName); + std::vector pieces; + rtc::tokenize(field_trial_string, '-', &pieces); + if (pieces.size() < 2 || pieces[0] != "Enabled") { + RTC_LOG(LS_WARNING) << "Invalid parameters for " + << kBitrateMultipliersName + << ", not using custom values."; + return std::vector(); + } + std::vector multipliers(pieces.size() - 1); + for (size_t i = 1; i < pieces.size(); i++) { + if (!rtc::FromString(pieces[i], &multipliers[i - 1])) { + RTC_LOG(LS_WARNING) + << "Invalid parameters for " << kBitrateMultipliersName + << ", not using custom values."; + return std::vector(); + } + } + RTC_LOG(LS_INFO) << "Using custom bitrate multipliers: " + << field_trial_string; + return multipliers; + } + return std::vector(); +} + +int GetMultipliedBitrate(int bitrate, const std::vector& multipliers) { + // The multipliers are valid from 5 kbps. + const size_t bitrate_kbps = static_cast(bitrate / 1000); + if (bitrate_kbps < 5 || bitrate_kbps >= multipliers.size() + 5) { + return bitrate; + } + return static_cast(multipliers[bitrate_kbps - 5] * bitrate); +} +} // namespace + +void AudioEncoderOpusImpl::AppendSupportedEncoders( + std::vector* specs) { + const SdpAudioFormat fmt = {"opus", + kRtpTimestampRateHz, + 2, + {{"minptime", "10"}, {"useinbandfec", "1"}}}; + const AudioCodecInfo info = QueryAudioEncoder(*SdpToConfig(fmt)); + specs->push_back({fmt, info}); +} + +AudioCodecInfo AudioEncoderOpusImpl::QueryAudioEncoder( + const AudioEncoderOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + AudioCodecInfo info(config.sample_rate_hz, config.num_channels, + *config.bitrate_bps, + AudioEncoderOpusConfig::kMinBitrateBps, + AudioEncoderOpusConfig::kMaxBitrateBps); + info.allow_comfort_noise = false; + info.supports_network_adaption = true; + return info; +} + +std::unique_ptr AudioEncoderOpusImpl::MakeAudioEncoder( + const AudioEncoderOpusConfig& config, + int payload_type) { + if (!config.IsOk()) { + RTC_DCHECK_NOTREACHED(); + return nullptr; + } + return std::make_unique(config, payload_type); +} + +absl::optional AudioEncoderOpusImpl::SdpToConfig( + const SdpAudioFormat& format) { + if (!absl::EqualsIgnoreCase(format.name, "opus") || + format.clockrate_hz != kRtpTimestampRateHz) { + return absl::nullopt; + } + + AudioEncoderOpusConfig config; + config.num_channels = GetChannelCount(format); + config.frame_size_ms = GetFrameSizeMs(format); + config.max_playback_rate_hz = GetMaxPlaybackRate(format); + config.fec_enabled = (GetFormatParameter(format, "useinbandfec") == "1"); + config.dtx_enabled = (GetFormatParameter(format, "usedtx") == "1"); + config.cbr_enabled = (GetFormatParameter(format, "cbr") == "1"); + config.bitrate_bps = + CalculateBitrate(config.max_playback_rate_hz, config.num_channels, + GetFormatParameter(format, "maxaveragebitrate")); + config.application = config.num_channels == 1 + ? AudioEncoderOpusConfig::ApplicationMode::kVoip + : AudioEncoderOpusConfig::ApplicationMode::kAudio; + + constexpr int kMinANAFrameLength = kANASupportedFrameLengths[0]; + constexpr int kMaxANAFrameLength = + kANASupportedFrameLengths[arraysize(kANASupportedFrameLengths) - 1]; + + // For now, minptime and maxptime are only used with ANA. If ptime is outside + // of this range, it will get adjusted once ANA takes hold. Ideally, we'd know + // if ANA was to be used when setting up the config, and adjust accordingly. + const int min_frame_length_ms = + GetFormatParameter(format, "minptime").value_or(kMinANAFrameLength); + const int max_frame_length_ms = + GetFormatParameter(format, "maxptime").value_or(kMaxANAFrameLength); + + FindSupportedFrameLengths(min_frame_length_ms, max_frame_length_ms, + &config.supported_frame_lengths_ms); + if (!config.IsOk()) { + RTC_DCHECK_NOTREACHED(); + return absl::nullopt; + } + return config; +} + +absl::optional AudioEncoderOpusImpl::GetNewComplexity( + const AudioEncoderOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + const int bitrate_bps = GetBitrateBps(config); + if (bitrate_bps >= config.complexity_threshold_bps - + config.complexity_threshold_window_bps && + bitrate_bps <= config.complexity_threshold_bps + + config.complexity_threshold_window_bps) { + // Within the hysteresis window; make no change. + return absl::nullopt; + } else { + return bitrate_bps <= config.complexity_threshold_bps + ? config.low_rate_complexity + : config.complexity; + } +} + +absl::optional AudioEncoderOpusImpl::GetNewBandwidth( + const AudioEncoderOpusConfig& config, + OpusEncInst* inst) { + constexpr int kMinWidebandBitrate = 8000; + constexpr int kMaxNarrowbandBitrate = 9000; + constexpr int kAutomaticThreshold = 11000; + RTC_DCHECK(config.IsOk()); + const int bitrate = GetBitrateBps(config); + if (bitrate > kAutomaticThreshold) { + return absl::optional(OPUS_AUTO); + } + const int bandwidth = WebRtcOpus_GetBandwidth(inst); + RTC_DCHECK_GE(bandwidth, 0); + if (bitrate > kMaxNarrowbandBitrate && bandwidth < OPUS_BANDWIDTH_WIDEBAND) { + return absl::optional(OPUS_BANDWIDTH_WIDEBAND); + } else if (bitrate < kMinWidebandBitrate && + bandwidth > OPUS_BANDWIDTH_NARROWBAND) { + return absl::optional(OPUS_BANDWIDTH_NARROWBAND); + } + return absl::optional(); +} + +class AudioEncoderOpusImpl::PacketLossFractionSmoother { + public: + explicit PacketLossFractionSmoother() + : last_sample_time_ms_(rtc::TimeMillis()), + smoother_(kAlphaForPacketLossFractionSmoother) {} + + // Gets the smoothed packet loss fraction. + float GetAverage() const { + float value = smoother_.filtered(); + return (value == rtc::ExpFilter::kValueUndefined) ? 0.0f : value; + } + + // Add new observation to the packet loss fraction smoother. + void AddSample(float packet_loss_fraction) { + int64_t now_ms = rtc::TimeMillis(); + smoother_.Apply(static_cast(now_ms - last_sample_time_ms_), + packet_loss_fraction); + last_sample_time_ms_ = now_ms; + } + + private: + int64_t last_sample_time_ms_; + + // An exponential filter is used to smooth the packet loss fraction. + rtc::ExpFilter smoother_; +}; + +AudioEncoderOpusImpl::AudioEncoderOpusImpl(const AudioEncoderOpusConfig& config, + int payload_type) + : AudioEncoderOpusImpl( + config, + payload_type, + [this](absl::string_view config_string, RtcEventLog* event_log) { + return DefaultAudioNetworkAdaptorCreator(config_string, event_log); + }, + // We choose 5sec as initial time constant due to empirical data. + std::make_unique(5000)) {} + +AudioEncoderOpusImpl::AudioEncoderOpusImpl( + const AudioEncoderOpusConfig& config, + int payload_type, + const AudioNetworkAdaptorCreator& audio_network_adaptor_creator, + std::unique_ptr bitrate_smoother) + : payload_type_(payload_type), + use_stable_target_for_adaptation_(!webrtc::field_trial::IsDisabled( + "WebRTC-Audio-StableTargetAdaptation")), + adjust_bandwidth_( + webrtc::field_trial::IsEnabled("WebRTC-AdjustOpusBandwidth")), + bitrate_changed_(true), + bitrate_multipliers_(GetBitrateMultipliers()), + packet_loss_rate_(0.0), + inst_(nullptr), + packet_loss_fraction_smoother_(new PacketLossFractionSmoother()), + audio_network_adaptor_creator_(audio_network_adaptor_creator), + bitrate_smoother_(std::move(bitrate_smoother)), + consecutive_dtx_frames_(0) { + RTC_DCHECK(0 <= payload_type && payload_type <= 127); + + // Sanity check of the redundant payload type field that we want to get rid + // of. See https://bugs.chromium.org/p/webrtc/issues/detail?id=7847 + RTC_CHECK(config.payload_type == -1 || config.payload_type == payload_type); + + RTC_CHECK(RecreateEncoderInstance(config)); + SetProjectedPacketLossRate(packet_loss_rate_); +} + +AudioEncoderOpusImpl::AudioEncoderOpusImpl(int payload_type, + const SdpAudioFormat& format) + : AudioEncoderOpusImpl(*SdpToConfig(format), payload_type) {} + +AudioEncoderOpusImpl::~AudioEncoderOpusImpl() { + RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_)); +} + +int AudioEncoderOpusImpl::SampleRateHz() const { + return config_.sample_rate_hz; +} + +size_t AudioEncoderOpusImpl::NumChannels() const { + return config_.num_channels; +} + +int AudioEncoderOpusImpl::RtpTimestampRateHz() const { + return kRtpTimestampRateHz; +} + +size_t AudioEncoderOpusImpl::Num10MsFramesInNextPacket() const { + return Num10msFramesPerPacket(); +} + +size_t AudioEncoderOpusImpl::Max10MsFramesInAPacket() const { + return Num10msFramesPerPacket(); +} + +int AudioEncoderOpusImpl::GetTargetBitrate() const { + return GetBitrateBps(config_); +} + +void AudioEncoderOpusImpl::Reset() { + RTC_CHECK(RecreateEncoderInstance(config_)); +} + +bool AudioEncoderOpusImpl::SetFec(bool enable) { + if (enable) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_)); + } + config_.fec_enabled = enable; + return true; +} + +bool AudioEncoderOpusImpl::SetDtx(bool enable) { + if (enable) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_)); + } + config_.dtx_enabled = enable; + return true; +} + +bool AudioEncoderOpusImpl::GetDtx() const { + return config_.dtx_enabled; +} + +bool AudioEncoderOpusImpl::SetApplication(Application application) { + auto conf = config_; + switch (application) { + case Application::kSpeech: + conf.application = AudioEncoderOpusConfig::ApplicationMode::kVoip; + break; + case Application::kAudio: + conf.application = AudioEncoderOpusConfig::ApplicationMode::kAudio; + break; + } + return RecreateEncoderInstance(conf); +} + +void AudioEncoderOpusImpl::SetMaxPlaybackRate(int frequency_hz) { + auto conf = config_; + conf.max_playback_rate_hz = frequency_hz; + RTC_CHECK(RecreateEncoderInstance(conf)); +} + +bool AudioEncoderOpusImpl::EnableAudioNetworkAdaptor( + const std::string& config_string, + RtcEventLog* event_log) { + audio_network_adaptor_ = + audio_network_adaptor_creator_(config_string, event_log); + return audio_network_adaptor_.get() != nullptr; +} + +void AudioEncoderOpusImpl::DisableAudioNetworkAdaptor() { + audio_network_adaptor_.reset(nullptr); +} + +void AudioEncoderOpusImpl::OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) { + if (audio_network_adaptor_) { + audio_network_adaptor_->SetUplinkPacketLossFraction( + uplink_packet_loss_fraction); + ApplyAudioNetworkAdaptor(); + } + packet_loss_fraction_smoother_->AddSample(uplink_packet_loss_fraction); + float average_fraction_loss = packet_loss_fraction_smoother_->GetAverage(); + SetProjectedPacketLossRate(average_fraction_loss); +} + +void AudioEncoderOpusImpl::OnReceivedTargetAudioBitrate( + int target_audio_bitrate_bps) { + SetTargetBitrate(target_audio_bitrate_bps); +} + +void AudioEncoderOpusImpl::OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional bwe_period_ms, + absl::optional stable_target_bitrate_bps) { + if (audio_network_adaptor_) { + audio_network_adaptor_->SetTargetAudioBitrate(target_audio_bitrate_bps); + if (use_stable_target_for_adaptation_) { + if (stable_target_bitrate_bps) + audio_network_adaptor_->SetUplinkBandwidth(*stable_target_bitrate_bps); + } else { + // We give smoothed bitrate allocation to audio network adaptor as + // the uplink bandwidth. + // The BWE spikes should not affect the bitrate smoother more than 25%. + // To simplify the calculations we use a step response as input signal. + // The step response of an exponential filter is + // u(t) = 1 - e^(-t / time_constant). + // In order to limit the affect of a BWE spike within 25% of its value + // before + // the next BWE update, we would choose a time constant that fulfills + // 1 - e^(-bwe_period_ms / time_constant) < 0.25 + // Then 4 * bwe_period_ms is a good choice. + if (bwe_period_ms) + bitrate_smoother_->SetTimeConstantMs(*bwe_period_ms * 4); + bitrate_smoother_->AddSample(target_audio_bitrate_bps); + } + + ApplyAudioNetworkAdaptor(); + } else { + if (!overhead_bytes_per_packet_) { + RTC_LOG(LS_INFO) + << "AudioEncoderOpusImpl: Overhead unknown, target audio bitrate " + << target_audio_bitrate_bps << " bps is ignored."; + return; + } + const int overhead_bps = static_cast( + *overhead_bytes_per_packet_ * 8 * 100 / Num10MsFramesInNextPacket()); + SetTargetBitrate( + std::min(AudioEncoderOpusConfig::kMaxBitrateBps, + std::max(AudioEncoderOpusConfig::kMinBitrateBps, + target_audio_bitrate_bps - overhead_bps))); + } +} +void AudioEncoderOpusImpl::OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional bwe_period_ms) { + OnReceivedUplinkBandwidth(target_audio_bitrate_bps, bwe_period_ms, + absl::nullopt); +} + +void AudioEncoderOpusImpl::OnReceivedUplinkAllocation( + BitrateAllocationUpdate update) { + OnReceivedUplinkBandwidth(update.target_bitrate.bps(), update.bwe_period.ms(), + update.stable_target_bitrate.bps()); +} + +void AudioEncoderOpusImpl::OnReceivedRtt(int rtt_ms) { + if (!audio_network_adaptor_) + return; + audio_network_adaptor_->SetRtt(rtt_ms); + ApplyAudioNetworkAdaptor(); +} + +void AudioEncoderOpusImpl::OnReceivedOverhead( + size_t overhead_bytes_per_packet) { + if (audio_network_adaptor_) { + audio_network_adaptor_->SetOverhead(overhead_bytes_per_packet); + ApplyAudioNetworkAdaptor(); + } else { + overhead_bytes_per_packet_ = overhead_bytes_per_packet; + } +} + +void AudioEncoderOpusImpl::SetReceiverFrameLengthRange( + int min_frame_length_ms, + int max_frame_length_ms) { + // Ensure that `SetReceiverFrameLengthRange` is called before + // `EnableAudioNetworkAdaptor`, otherwise we need to recreate + // `audio_network_adaptor_`, which is not a needed use case. + RTC_DCHECK(!audio_network_adaptor_); + FindSupportedFrameLengths(min_frame_length_ms, max_frame_length_ms, + &config_.supported_frame_lengths_ms); +} + +AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) { + MaybeUpdateUplinkBandwidth(); + + if (input_buffer_.empty()) + first_timestamp_in_buffer_ = rtp_timestamp; + + input_buffer_.insert(input_buffer_.end(), audio.cbegin(), audio.cend()); + if (input_buffer_.size() < + (Num10msFramesPerPacket() * SamplesPer10msFrame())) { + return EncodedInfo(); + } + RTC_CHECK_EQ(input_buffer_.size(), + Num10msFramesPerPacket() * SamplesPer10msFrame()); + + const size_t max_encoded_bytes = SufficientOutputBufferSize(); + EncodedInfo info; + info.encoded_bytes = encoded->AppendData( + max_encoded_bytes, [&](rtc::ArrayView encoded) { + int status = WebRtcOpus_Encode( + inst_, &input_buffer_[0], + rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels), + rtc::saturated_cast(max_encoded_bytes), encoded.data()); + + RTC_CHECK_GE(status, 0); // Fails only if fed invalid data. + + return static_cast(status); + }); + input_buffer_.clear(); + + bool dtx_frame = (info.encoded_bytes <= 2); + + // Will use new packet size for next encoding. + config_.frame_size_ms = next_frame_length_ms_; + + if (adjust_bandwidth_ && bitrate_changed_) { + const auto bandwidth = GetNewBandwidth(config_, inst_); + if (bandwidth) { + RTC_CHECK_EQ(0, WebRtcOpus_SetBandwidth(inst_, *bandwidth)); + } + bitrate_changed_ = false; + } + + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.send_even_if_empty = true; // Allows Opus to send empty packets. + // After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame + // coding the background noise. Avoid flagging this frame as speech + // (even though there is a probability of the frame being speech). + info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20); + info.encoder_type = CodecType::kOpus; + + // Increase or reset DTX counter. + consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0); + + return info; +} + +size_t AudioEncoderOpusImpl::Num10msFramesPerPacket() const { + return static_cast(rtc::CheckedDivExact(config_.frame_size_ms, 10)); +} + +size_t AudioEncoderOpusImpl::SamplesPer10msFrame() const { + return rtc::CheckedDivExact(config_.sample_rate_hz, 100) * + config_.num_channels; +} + +size_t AudioEncoderOpusImpl::SufficientOutputBufferSize() const { + // Calculate the number of bytes we expect the encoder to produce, + // then multiply by two to give a wide margin for error. + const size_t bytes_per_millisecond = + static_cast(GetBitrateBps(config_) / (1000 * 8) + 1); + const size_t approx_encoded_bytes = + Num10msFramesPerPacket() * 10 * bytes_per_millisecond; + return 2 * approx_encoded_bytes; +} + +// If the given config is OK, recreate the Opus encoder instance with those +// settings, save the config, and return true. Otherwise, do nothing and return +// false. +bool AudioEncoderOpusImpl::RecreateEncoderInstance( + const AudioEncoderOpusConfig& config) { + if (!config.IsOk()) + return false; + config_ = config; + if (inst_) + RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_)); + input_buffer_.clear(); + input_buffer_.reserve(Num10msFramesPerPacket() * SamplesPer10msFrame()); + RTC_CHECK_EQ(0, WebRtcOpus_EncoderCreate( + &inst_, config.num_channels, + config.application == + AudioEncoderOpusConfig::ApplicationMode::kVoip + ? 0 + : 1, + config.sample_rate_hz)); + const int bitrate = GetBitrateBps(config); + RTC_CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, bitrate)); + RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps."; + if (config.fec_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_)); + } + RTC_CHECK_EQ( + 0, WebRtcOpus_SetMaxPlaybackRate(inst_, config.max_playback_rate_hz)); + // Use the default complexity if the start bitrate is within the hysteresis + // window. + complexity_ = GetNewComplexity(config).value_or(config.complexity); + RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_)); + bitrate_changed_ = true; + if (config.dtx_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_)); + } + RTC_CHECK_EQ(0, + WebRtcOpus_SetPacketLossRate( + inst_, static_cast(packet_loss_rate_ * 100 + .5))); + if (config.cbr_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableCbr(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableCbr(inst_)); + } + num_channels_to_encode_ = NumChannels(); + next_frame_length_ms_ = config_.frame_size_ms; + return true; +} + +void AudioEncoderOpusImpl::SetFrameLength(int frame_length_ms) { + if (next_frame_length_ms_ != frame_length_ms) { + RTC_LOG(LS_VERBOSE) << "Update Opus frame length " + << "from " << next_frame_length_ms_ << " ms " + << "to " << frame_length_ms << " ms."; + } + next_frame_length_ms_ = frame_length_ms; +} + +void AudioEncoderOpusImpl::SetNumChannelsToEncode( + size_t num_channels_to_encode) { + RTC_DCHECK_GT(num_channels_to_encode, 0); + RTC_DCHECK_LE(num_channels_to_encode, config_.num_channels); + + if (num_channels_to_encode_ == num_channels_to_encode) + return; + + RTC_CHECK_EQ(0, WebRtcOpus_SetForceChannels(inst_, num_channels_to_encode)); + num_channels_to_encode_ = num_channels_to_encode; +} + +void AudioEncoderOpusImpl::SetProjectedPacketLossRate(float fraction) { + fraction = std::min(std::max(fraction, 0.0f), kMaxPacketLossFraction); + if (packet_loss_rate_ != fraction) { + packet_loss_rate_ = fraction; + RTC_CHECK_EQ( + 0, WebRtcOpus_SetPacketLossRate( + inst_, static_cast(packet_loss_rate_ * 100 + .5))); + } +} + +void AudioEncoderOpusImpl::SetTargetBitrate(int bits_per_second) { + const int new_bitrate = rtc::SafeClamp( + bits_per_second, AudioEncoderOpusConfig::kMinBitrateBps, + AudioEncoderOpusConfig::kMaxBitrateBps); + if (config_.bitrate_bps && *config_.bitrate_bps != new_bitrate) { + config_.bitrate_bps = new_bitrate; + RTC_DCHECK(config_.IsOk()); + const int bitrate = GetBitrateBps(config_); + RTC_CHECK_EQ( + 0, WebRtcOpus_SetBitRate( + inst_, GetMultipliedBitrate(bitrate, bitrate_multipliers_))); + RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps."; + bitrate_changed_ = true; + } + + const auto new_complexity = GetNewComplexity(config_); + if (new_complexity && complexity_ != *new_complexity) { + complexity_ = *new_complexity; + RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_)); + } +} + +void AudioEncoderOpusImpl::ApplyAudioNetworkAdaptor() { + auto config = audio_network_adaptor_->GetEncoderRuntimeConfig(); + + if (config.bitrate_bps) + SetTargetBitrate(*config.bitrate_bps); + if (config.frame_length_ms) + SetFrameLength(*config.frame_length_ms); + if (config.enable_dtx) + SetDtx(*config.enable_dtx); + if (config.num_channels) + SetNumChannelsToEncode(*config.num_channels); +} + +std::unique_ptr +AudioEncoderOpusImpl::DefaultAudioNetworkAdaptorCreator( + absl::string_view config_string, + RtcEventLog* event_log) const { + AudioNetworkAdaptorImpl::Config config; + config.event_log = event_log; + return std::unique_ptr(new AudioNetworkAdaptorImpl( + config, ControllerManagerImpl::Create( + config_string, NumChannels(), supported_frame_lengths_ms(), + AudioEncoderOpusConfig::kMinBitrateBps, + num_channels_to_encode_, next_frame_length_ms_, + GetTargetBitrate(), config_.fec_enabled, GetDtx()))); +} + +void AudioEncoderOpusImpl::MaybeUpdateUplinkBandwidth() { + if (audio_network_adaptor_ && !use_stable_target_for_adaptation_) { + int64_t now_ms = rtc::TimeMillis(); + if (!bitrate_smoother_last_update_time_ || + now_ms - *bitrate_smoother_last_update_time_ >= + config_.uplink_bandwidth_update_interval_ms) { + absl::optional smoothed_bitrate = bitrate_smoother_->GetAverage(); + if (smoothed_bitrate) + audio_network_adaptor_->SetUplinkBandwidth(*smoothed_bitrate); + bitrate_smoother_last_update_time_ = now_ms; + } + } +} + +ANAStats AudioEncoderOpusImpl::GetANAStats() const { + if (audio_network_adaptor_) { + return audio_network_adaptor_->GetStats(); + } + return ANAStats(); +} + +absl::optional > +AudioEncoderOpusImpl::GetFrameLengthRange() const { + if (audio_network_adaptor_) { + if (config_.supported_frame_lengths_ms.empty()) { + return absl::nullopt; + } + return {{TimeDelta::Millis(config_.supported_frame_lengths_ms.front()), + TimeDelta::Millis(config_.supported_frame_lengths_ms.back())}}; + } else { + return {{TimeDelta::Millis(config_.frame_size_ms), + TimeDelta::Millis(config_.frame_size_ms)}}; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.h new file mode 100644 index 0000000000..8c5c235016 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_ + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/audio_format.h" +#include "api/audio_codecs/opus/audio_encoder_opus_config.h" +#include "common_audio/smoothing_filter.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" + +namespace webrtc { + +class RtcEventLog; + +class AudioEncoderOpusImpl final : public AudioEncoder { + public: + // Returns empty if the current bitrate falls within the hysteresis window, + // defined by complexity_threshold_bps +/- complexity_threshold_window_bps. + // Otherwise, returns the current complexity depending on whether the + // current bitrate is above or below complexity_threshold_bps. + static absl::optional GetNewComplexity( + const AudioEncoderOpusConfig& config); + + // Returns OPUS_AUTO if the the current bitrate is above wideband threshold. + // Returns empty if it is below, but bandwidth coincides with the desired one. + // Otherwise returns the desired bandwidth. + static absl::optional GetNewBandwidth( + const AudioEncoderOpusConfig& config, + OpusEncInst* inst); + + using AudioNetworkAdaptorCreator = + std::function(absl::string_view, + RtcEventLog*)>; + + AudioEncoderOpusImpl(const AudioEncoderOpusConfig& config, int payload_type); + + // Dependency injection for testing. + AudioEncoderOpusImpl( + const AudioEncoderOpusConfig& config, + int payload_type, + const AudioNetworkAdaptorCreator& audio_network_adaptor_creator, + std::unique_ptr bitrate_smoother); + + AudioEncoderOpusImpl(int payload_type, const SdpAudioFormat& format); + ~AudioEncoderOpusImpl() override; + + AudioEncoderOpusImpl(const AudioEncoderOpusImpl&) = delete; + AudioEncoderOpusImpl& operator=(const AudioEncoderOpusImpl&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + int RtpTimestampRateHz() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + + void Reset() override; + bool SetFec(bool enable) override; + + // Set Opus DTX. Once enabled, Opus stops transmission, when it detects + // voice being inactive. During that, it still sends 2 packets (one for + // content, one for signaling) about every 400 ms. + bool SetDtx(bool enable) override; + bool GetDtx() const override; + + bool SetApplication(Application application) override; + void SetMaxPlaybackRate(int frequency_hz) override; + bool EnableAudioNetworkAdaptor(const std::string& config_string, + RtcEventLog* event_log) override; + void DisableAudioNetworkAdaptor() override; + void OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) override; + void OnReceivedTargetAudioBitrate(int target_audio_bitrate_bps) override; + void OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional bwe_period_ms) override; + void OnReceivedUplinkAllocation(BitrateAllocationUpdate update) override; + void OnReceivedRtt(int rtt_ms) override; + void OnReceivedOverhead(size_t overhead_bytes_per_packet) override; + void SetReceiverFrameLengthRange(int min_frame_length_ms, + int max_frame_length_ms) override; + ANAStats GetANAStats() const override; + absl::optional > GetFrameLengthRange() + const override; + rtc::ArrayView supported_frame_lengths_ms() const { + return config_.supported_frame_lengths_ms; + } + + // Getters for testing. + float packet_loss_rate() const { return packet_loss_rate_; } + AudioEncoderOpusConfig::ApplicationMode application() const { + return config_.application; + } + bool fec_enabled() const { return config_.fec_enabled; } + size_t num_channels_to_encode() const { return num_channels_to_encode_; } + int next_frame_length_ms() const { return next_frame_length_ms_; } + + protected: + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) override; + + private: + class PacketLossFractionSmoother; + + static absl::optional SdpToConfig( + const SdpAudioFormat& format); + static void AppendSupportedEncoders(std::vector* specs); + static AudioCodecInfo QueryAudioEncoder(const AudioEncoderOpusConfig& config); + static std::unique_ptr MakeAudioEncoder( + const AudioEncoderOpusConfig&, + int payload_type); + + size_t Num10msFramesPerPacket() const; + size_t SamplesPer10msFrame() const; + size_t SufficientOutputBufferSize() const; + bool RecreateEncoderInstance(const AudioEncoderOpusConfig& config); + void SetFrameLength(int frame_length_ms); + void SetNumChannelsToEncode(size_t num_channels_to_encode); + void SetProjectedPacketLossRate(float fraction); + + void OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional bwe_period_ms, + absl::optional link_capacity_allocation); + + // TODO(minyue): remove "override" when we can deprecate + // `AudioEncoder::SetTargetBitrate`. + void SetTargetBitrate(int target_bps) override; + + void ApplyAudioNetworkAdaptor(); + std::unique_ptr DefaultAudioNetworkAdaptorCreator( + absl::string_view config_string, + RtcEventLog* event_log) const; + + void MaybeUpdateUplinkBandwidth(); + + AudioEncoderOpusConfig config_; + const int payload_type_; + const bool use_stable_target_for_adaptation_; + const bool adjust_bandwidth_; + bool bitrate_changed_; + // A multiplier for bitrates at 5 kbps and higher. The target bitrate + // will be multiplied by these multipliers, each multiplier is applied to a + // 1 kbps range. + std::vector bitrate_multipliers_; + float packet_loss_rate_; + std::vector input_buffer_; + OpusEncInst* inst_; + uint32_t first_timestamp_in_buffer_; + size_t num_channels_to_encode_; + int next_frame_length_ms_; + int complexity_; + std::unique_ptr packet_loss_fraction_smoother_; + const AudioNetworkAdaptorCreator audio_network_adaptor_creator_; + std::unique_ptr audio_network_adaptor_; + absl::optional overhead_bytes_per_packet_; + const std::unique_ptr bitrate_smoother_; + absl::optional bitrate_smoother_last_update_time_; + int consecutive_dtx_frames_; + + friend struct AudioEncoderOpus; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc new file mode 100644 index 0000000000..a2ebe43bbe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc @@ -0,0 +1,914 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_encoder_opus.h" + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "common_audio/mocks/mock_smoothing_filter.h" +#include "modules/audio_coding/audio_network_adaptor/mock/mock_audio_network_adaptor.h" +#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "rtc_base/checks.h" +#include "rtc_base/fake_clock.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +using ::testing::NiceMock; +using ::testing::Return; + +namespace { + +constexpr int kDefaultOpusPayloadType = 105; +constexpr int kDefaultOpusRate = 32000; +constexpr int kDefaultOpusPacSize = 960; +constexpr int64_t kInitialTimeUs = 12345678; + +AudioEncoderOpusConfig CreateConfigWithParameters( + const SdpAudioFormat::Parameters& params) { + const SdpAudioFormat format("opus", 48000, 2, params); + return *AudioEncoderOpus::SdpToConfig(format); +} + +struct AudioEncoderOpusStates { + MockAudioNetworkAdaptor* mock_audio_network_adaptor; + MockSmoothingFilter* mock_bitrate_smoother; + std::unique_ptr encoder; + std::unique_ptr fake_clock; + AudioEncoderOpusConfig config; +}; + +std::unique_ptr CreateCodec(int sample_rate_hz, + size_t num_channels) { + std::unique_ptr states = + std::make_unique(); + states->mock_audio_network_adaptor = nullptr; + states->fake_clock.reset(new rtc::ScopedFakeClock()); + states->fake_clock->SetTime(Timestamp::Micros(kInitialTimeUs)); + + MockAudioNetworkAdaptor** mock_ptr = &states->mock_audio_network_adaptor; + AudioEncoderOpusImpl::AudioNetworkAdaptorCreator creator = + [mock_ptr](absl::string_view, RtcEventLog* event_log) { + std::unique_ptr adaptor( + new NiceMock()); + EXPECT_CALL(*adaptor, Die()); + *mock_ptr = adaptor.get(); + return adaptor; + }; + + AudioEncoderOpusConfig config; + config.frame_size_ms = rtc::CheckedDivExact(kDefaultOpusPacSize, 48); + config.sample_rate_hz = sample_rate_hz; + config.num_channels = num_channels; + config.bitrate_bps = kDefaultOpusRate; + config.application = num_channels == 1 + ? AudioEncoderOpusConfig::ApplicationMode::kVoip + : AudioEncoderOpusConfig::ApplicationMode::kAudio; + config.supported_frame_lengths_ms.push_back(config.frame_size_ms); + states->config = config; + + std::unique_ptr bitrate_smoother( + new MockSmoothingFilter()); + states->mock_bitrate_smoother = bitrate_smoother.get(); + + states->encoder.reset( + new AudioEncoderOpusImpl(states->config, kDefaultOpusPayloadType, creator, + std::move(bitrate_smoother))); + return states; +} + +AudioEncoderRuntimeConfig CreateEncoderRuntimeConfig() { + constexpr int kBitrate = 40000; + constexpr int kFrameLength = 60; + constexpr bool kEnableDtx = false; + constexpr size_t kNumChannels = 1; + AudioEncoderRuntimeConfig config; + config.bitrate_bps = kBitrate; + config.frame_length_ms = kFrameLength; + config.enable_dtx = kEnableDtx; + config.num_channels = kNumChannels; + return config; +} + +void CheckEncoderRuntimeConfig(const AudioEncoderOpusImpl* encoder, + const AudioEncoderRuntimeConfig& config) { + EXPECT_EQ(*config.bitrate_bps, encoder->GetTargetBitrate()); + EXPECT_EQ(*config.frame_length_ms, encoder->next_frame_length_ms()); + EXPECT_EQ(*config.enable_dtx, encoder->GetDtx()); + EXPECT_EQ(*config.num_channels, encoder->num_channels_to_encode()); +} + +// Create 10ms audio data blocks for a total packet size of "packet_size_ms". +std::unique_ptr Create10msAudioBlocks( + const std::unique_ptr& encoder, + int packet_size_ms) { + const std::string file_name = + test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + + std::unique_ptr speech_data(new test::AudioLoop()); + int audio_samples_per_ms = + rtc::CheckedDivExact(encoder->SampleRateHz(), 1000); + if (!speech_data->Init( + file_name, + packet_size_ms * audio_samples_per_ms * + encoder->num_channels_to_encode(), + 10 * audio_samples_per_ms * encoder->num_channels_to_encode())) + return nullptr; + return speech_data; +} + +} // namespace + +class AudioEncoderOpusTest : public ::testing::TestWithParam { + protected: + int sample_rate_hz_{GetParam()}; +}; +INSTANTIATE_TEST_SUITE_P(Param, + AudioEncoderOpusTest, + ::testing::Values(16000, 48000)); + +TEST_P(AudioEncoderOpusTest, DefaultApplicationModeMono) { + auto states = CreateCodec(sample_rate_hz_, 1); + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip, + states->encoder->application()); +} + +TEST_P(AudioEncoderOpusTest, DefaultApplicationModeStereo) { + auto states = CreateCodec(sample_rate_hz_, 2); + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kAudio, + states->encoder->application()); +} + +TEST_P(AudioEncoderOpusTest, ChangeApplicationMode) { + auto states = CreateCodec(sample_rate_hz_, 2); + EXPECT_TRUE( + states->encoder->SetApplication(AudioEncoder::Application::kSpeech)); + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip, + states->encoder->application()); +} + +TEST_P(AudioEncoderOpusTest, ResetWontChangeApplicationMode) { + auto states = CreateCodec(sample_rate_hz_, 2); + + // Trigger a reset. + states->encoder->Reset(); + // Verify that the mode is still kAudio. + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kAudio, + states->encoder->application()); + + // Now change to kVoip. + EXPECT_TRUE( + states->encoder->SetApplication(AudioEncoder::Application::kSpeech)); + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip, + states->encoder->application()); + + // Trigger a reset again. + states->encoder->Reset(); + // Verify that the mode is still kVoip. + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip, + states->encoder->application()); +} + +TEST_P(AudioEncoderOpusTest, ToggleDtx) { + auto states = CreateCodec(sample_rate_hz_, 2); + // Enable DTX + EXPECT_TRUE(states->encoder->SetDtx(true)); + EXPECT_TRUE(states->encoder->GetDtx()); + // Turn off DTX. + EXPECT_TRUE(states->encoder->SetDtx(false)); + EXPECT_FALSE(states->encoder->GetDtx()); +} + +TEST_P(AudioEncoderOpusTest, + OnReceivedUplinkBandwidthWithoutAudioNetworkAdaptor) { + auto states = CreateCodec(sample_rate_hz_, 1); + // Constants are replicated from audio_states->encoderopus.cc. + const int kMinBitrateBps = 6000; + const int kMaxBitrateBps = 510000; + const int kOverheadBytesPerPacket = 64; + states->encoder->OnReceivedOverhead(kOverheadBytesPerPacket); + const int kOverheadBps = 8 * kOverheadBytesPerPacket * + rtc::CheckedDivExact(48000, kDefaultOpusPacSize); + // Set a too low bitrate. + states->encoder->OnReceivedUplinkBandwidth(kMinBitrateBps + kOverheadBps - 1, + absl::nullopt); + EXPECT_EQ(kMinBitrateBps, states->encoder->GetTargetBitrate()); + // Set a too high bitrate. + states->encoder->OnReceivedUplinkBandwidth(kMaxBitrateBps + kOverheadBps + 1, + absl::nullopt); + EXPECT_EQ(kMaxBitrateBps, states->encoder->GetTargetBitrate()); + // Set the minimum rate. + states->encoder->OnReceivedUplinkBandwidth(kMinBitrateBps + kOverheadBps, + absl::nullopt); + EXPECT_EQ(kMinBitrateBps, states->encoder->GetTargetBitrate()); + // Set the maximum rate. + states->encoder->OnReceivedUplinkBandwidth(kMaxBitrateBps + kOverheadBps, + absl::nullopt); + EXPECT_EQ(kMaxBitrateBps, states->encoder->GetTargetBitrate()); + // Set rates from kMaxBitrateBps up to 32000 bps. + for (int rate = kMinBitrateBps + kOverheadBps; rate <= 32000 + kOverheadBps; + rate += 1000) { + states->encoder->OnReceivedUplinkBandwidth(rate, absl::nullopt); + EXPECT_EQ(rate - kOverheadBps, states->encoder->GetTargetBitrate()); + } +} + +TEST_P(AudioEncoderOpusTest, SetReceiverFrameLengthRange) { + auto states = CreateCodec(sample_rate_hz_, 2); + // Before calling to `SetReceiverFrameLengthRange`, + // `supported_frame_lengths_ms` should contain only the frame length being + // used. + using ::testing::ElementsAre; + EXPECT_THAT(states->encoder->supported_frame_lengths_ms(), + ElementsAre(states->encoder->next_frame_length_ms())); + states->encoder->SetReceiverFrameLengthRange(0, 12345); + states->encoder->SetReceiverFrameLengthRange(21, 60); + EXPECT_THAT(states->encoder->supported_frame_lengths_ms(), + ElementsAre(40, 60)); + states->encoder->SetReceiverFrameLengthRange(20, 59); + EXPECT_THAT(states->encoder->supported_frame_lengths_ms(), + ElementsAre(20, 40)); +} + +TEST_P(AudioEncoderOpusTest, + InvokeAudioNetworkAdaptorOnReceivedUplinkPacketLossFraction) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + // Since using mock audio network adaptor, any packet loss fraction is fine. + constexpr float kUplinkPacketLoss = 0.1f; + EXPECT_CALL(*states->mock_audio_network_adaptor, + SetUplinkPacketLossFraction(kUplinkPacketLoss)); + states->encoder->OnReceivedUplinkPacketLossFraction(kUplinkPacketLoss); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, + InvokeAudioNetworkAdaptorOnReceivedUplinkBandwidth) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-Audio-StableTargetAdaptation/Disabled/"); + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + // Since using mock audio network adaptor, any target audio bitrate is fine. + constexpr int kTargetAudioBitrate = 30000; + constexpr int64_t kProbingIntervalMs = 3000; + EXPECT_CALL(*states->mock_audio_network_adaptor, + SetTargetAudioBitrate(kTargetAudioBitrate)); + EXPECT_CALL(*states->mock_bitrate_smoother, + SetTimeConstantMs(kProbingIntervalMs * 4)); + EXPECT_CALL(*states->mock_bitrate_smoother, AddSample(kTargetAudioBitrate)); + states->encoder->OnReceivedUplinkBandwidth(kTargetAudioBitrate, + kProbingIntervalMs); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, + InvokeAudioNetworkAdaptorOnReceivedUplinkAllocation) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + BitrateAllocationUpdate update; + update.target_bitrate = DataRate::BitsPerSec(30000); + update.stable_target_bitrate = DataRate::BitsPerSec(20000); + update.bwe_period = TimeDelta::Millis(200); + EXPECT_CALL(*states->mock_audio_network_adaptor, + SetTargetAudioBitrate(update.target_bitrate.bps())); + EXPECT_CALL(*states->mock_audio_network_adaptor, + SetUplinkBandwidth(update.stable_target_bitrate.bps())); + states->encoder->OnReceivedUplinkAllocation(update); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, InvokeAudioNetworkAdaptorOnReceivedRtt) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + // Since using mock audio network adaptor, any rtt is fine. + constexpr int kRtt = 30; + EXPECT_CALL(*states->mock_audio_network_adaptor, SetRtt(kRtt)); + states->encoder->OnReceivedRtt(kRtt); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, InvokeAudioNetworkAdaptorOnReceivedOverhead) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + // Since using mock audio network adaptor, any overhead is fine. + constexpr size_t kOverhead = 64; + EXPECT_CALL(*states->mock_audio_network_adaptor, SetOverhead(kOverhead)); + states->encoder->OnReceivedOverhead(kOverhead); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, + PacketLossFractionSmoothedOnSetUplinkPacketLossFraction) { + auto states = CreateCodec(sample_rate_hz_, 2); + + // The values are carefully chosen so that if no smoothing is made, the test + // will fail. + constexpr float kPacketLossFraction_1 = 0.02f; + constexpr float kPacketLossFraction_2 = 0.198f; + // `kSecondSampleTimeMs` is chosen to ease the calculation since + // 0.9999 ^ 6931 = 0.5. + constexpr int64_t kSecondSampleTimeMs = 6931; + + // First time, no filtering. + states->encoder->OnReceivedUplinkPacketLossFraction(kPacketLossFraction_1); + EXPECT_FLOAT_EQ(0.02f, states->encoder->packet_loss_rate()); + + states->fake_clock->AdvanceTime(TimeDelta::Millis(kSecondSampleTimeMs)); + states->encoder->OnReceivedUplinkPacketLossFraction(kPacketLossFraction_2); + + // Now the output of packet loss fraction smoother should be + // (0.02 + 0.198) / 2 = 0.109. + EXPECT_NEAR(0.109f, states->encoder->packet_loss_rate(), 0.001); +} + +TEST_P(AudioEncoderOpusTest, PacketLossRateUpperBounded) { + auto states = CreateCodec(sample_rate_hz_, 2); + + states->encoder->OnReceivedUplinkPacketLossFraction(0.5); + EXPECT_FLOAT_EQ(0.2f, states->encoder->packet_loss_rate()); +} + +TEST_P(AudioEncoderOpusTest, DoNotInvokeSetTargetBitrateIfOverheadUnknown) { + auto states = CreateCodec(sample_rate_hz_, 2); + + states->encoder->OnReceivedUplinkBandwidth(kDefaultOpusRate * 2, + absl::nullopt); + + // Since `OnReceivedOverhead` has not been called, the codec bitrate should + // not change. + EXPECT_EQ(kDefaultOpusRate, states->encoder->GetTargetBitrate()); +} + +// Verifies that the complexity adaptation in the config works as intended. +TEST(AudioEncoderOpusTest, ConfigComplexityAdaptation) { + AudioEncoderOpusConfig config; + config.low_rate_complexity = 8; + config.complexity = 6; + + // Bitrate within hysteresis window. Expect empty output. + config.bitrate_bps = 12500; + EXPECT_EQ(absl::nullopt, AudioEncoderOpusImpl::GetNewComplexity(config)); + + // Bitrate below hysteresis window. Expect higher complexity. + config.bitrate_bps = 10999; + EXPECT_EQ(8, AudioEncoderOpusImpl::GetNewComplexity(config)); + + // Bitrate within hysteresis window. Expect empty output. + config.bitrate_bps = 12500; + EXPECT_EQ(absl::nullopt, AudioEncoderOpusImpl::GetNewComplexity(config)); + + // Bitrate above hysteresis window. Expect lower complexity. + config.bitrate_bps = 14001; + EXPECT_EQ(6, AudioEncoderOpusImpl::GetNewComplexity(config)); +} + +// Verifies that the bandwidth adaptation in the config works as intended. +TEST_P(AudioEncoderOpusTest, ConfigBandwidthAdaptation) { + AudioEncoderOpusConfig config; + const size_t opus_rate_khz = rtc::CheckedDivExact(sample_rate_hz_, 1000); + const std::vector silence( + opus_rate_khz * config.frame_size_ms * config.num_channels, 0); + constexpr size_t kMaxBytes = 1000; + uint8_t bitstream[kMaxBytes]; + + OpusEncInst* inst; + EXPECT_EQ(0, WebRtcOpus_EncoderCreate( + &inst, config.num_channels, + config.application == + AudioEncoderOpusConfig::ApplicationMode::kVoip + ? 0 + : 1, + sample_rate_hz_)); + + // Bitrate below minmum wideband. Expect narrowband. + config.bitrate_bps = absl::optional(7999); + auto bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional(OPUS_BANDWIDTH_NARROWBAND), bandwidth); + WebRtcOpus_SetBandwidth(inst, *bandwidth); + // It is necessary to encode here because Opus has some logic in the encoder + // that goes from the user-set bandwidth to the used and returned one. + WebRtcOpus_Encode(inst, silence.data(), + rtc::CheckedDivExact(silence.size(), config.num_channels), + kMaxBytes, bitstream); + + // Bitrate not yet above maximum narrowband. Expect empty. + config.bitrate_bps = absl::optional(9000); + bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional(), bandwidth); + + // Bitrate above maximum narrowband. Expect wideband. + config.bitrate_bps = absl::optional(9001); + bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional(OPUS_BANDWIDTH_WIDEBAND), bandwidth); + WebRtcOpus_SetBandwidth(inst, *bandwidth); + // It is necessary to encode here because Opus has some logic in the encoder + // that goes from the user-set bandwidth to the used and returned one. + WebRtcOpus_Encode(inst, silence.data(), + rtc::CheckedDivExact(silence.size(), config.num_channels), + kMaxBytes, bitstream); + + // Bitrate not yet below minimum wideband. Expect empty. + config.bitrate_bps = absl::optional(8000); + bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional(), bandwidth); + + // Bitrate above automatic threshold. Expect automatic. + config.bitrate_bps = absl::optional(12001); + bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional(OPUS_AUTO), bandwidth); + + EXPECT_EQ(0, WebRtcOpus_EncoderFree(inst)); +} + +TEST_P(AudioEncoderOpusTest, EmptyConfigDoesNotAffectEncoderSettings) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + AudioEncoderRuntimeConfig empty_config; + + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)) + .WillOnce(Return(empty_config)); + + constexpr size_t kOverhead = 64; + EXPECT_CALL(*states->mock_audio_network_adaptor, SetOverhead(kOverhead)) + .Times(2); + states->encoder->OnReceivedOverhead(kOverhead); + states->encoder->OnReceivedOverhead(kOverhead); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, UpdateUplinkBandwidthInAudioNetworkAdaptor) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-Audio-StableTargetAdaptation/Disabled/"); + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + const size_t opus_rate_khz = rtc::CheckedDivExact(sample_rate_hz_, 1000); + const std::vector audio(opus_rate_khz * 10 * 2, 0); + rtc::Buffer encoded; + EXPECT_CALL(*states->mock_bitrate_smoother, GetAverage()) + .WillOnce(Return(50000)); + EXPECT_CALL(*states->mock_audio_network_adaptor, SetUplinkBandwidth(50000)); + states->encoder->Encode( + 0, rtc::ArrayView(audio.data(), audio.size()), &encoded); + + // Repeat update uplink bandwidth tests. + for (int i = 0; i < 5; i++) { + // Don't update till it is time to update again. + states->fake_clock->AdvanceTime(TimeDelta::Millis( + states->config.uplink_bandwidth_update_interval_ms - 1)); + states->encoder->Encode( + 0, rtc::ArrayView(audio.data(), audio.size()), &encoded); + + // Update when it is time to update. + EXPECT_CALL(*states->mock_bitrate_smoother, GetAverage()) + .WillOnce(Return(40000)); + EXPECT_CALL(*states->mock_audio_network_adaptor, SetUplinkBandwidth(40000)); + states->fake_clock->AdvanceTime(TimeDelta::Millis(1)); + states->encoder->Encode( + 0, rtc::ArrayView(audio.data(), audio.size()), &encoded); + } +} + +TEST_P(AudioEncoderOpusTest, EncodeAtMinBitrate) { + auto states = CreateCodec(sample_rate_hz_, 1); + constexpr int kNumPacketsToEncode = 2; + auto audio_frames = + Create10msAudioBlocks(states->encoder, kNumPacketsToEncode * 20); + ASSERT_TRUE(audio_frames) << "Create10msAudioBlocks failed"; + rtc::Buffer encoded; + uint32_t rtp_timestamp = 12345; // Just a number not important to this test. + + states->encoder->OnReceivedUplinkBandwidth(0, absl::nullopt); + for (int packet_index = 0; packet_index < kNumPacketsToEncode; + packet_index++) { + // Make sure we are not encoding before we have enough data for + // a 20ms packet. + for (int index = 0; index < 1; index++) { + states->encoder->Encode(rtp_timestamp, audio_frames->GetNextBlock(), + &encoded); + EXPECT_EQ(0u, encoded.size()); + } + + // Should encode now. + states->encoder->Encode(rtp_timestamp, audio_frames->GetNextBlock(), + &encoded); + EXPECT_GT(encoded.size(), 0u); + encoded.Clear(); + } +} + +TEST(AudioEncoderOpusTest, TestConfigDefaults) { + const auto config_opt = AudioEncoderOpus::SdpToConfig({"opus", 48000, 2}); + ASSERT_TRUE(config_opt); + EXPECT_EQ(48000, config_opt->max_playback_rate_hz); + EXPECT_EQ(1u, config_opt->num_channels); + EXPECT_FALSE(config_opt->fec_enabled); + EXPECT_FALSE(config_opt->dtx_enabled); + EXPECT_EQ(20, config_opt->frame_size_ms); +} + +TEST(AudioEncoderOpusTest, TestConfigFromParams) { + const auto config1 = CreateConfigWithParameters({{"stereo", "0"}}); + EXPECT_EQ(1U, config1.num_channels); + + const auto config2 = CreateConfigWithParameters({{"stereo", "1"}}); + EXPECT_EQ(2U, config2.num_channels); + + const auto config3 = CreateConfigWithParameters({{"useinbandfec", "0"}}); + EXPECT_FALSE(config3.fec_enabled); + + const auto config4 = CreateConfigWithParameters({{"useinbandfec", "1"}}); + EXPECT_TRUE(config4.fec_enabled); + + const auto config5 = CreateConfigWithParameters({{"usedtx", "0"}}); + EXPECT_FALSE(config5.dtx_enabled); + + const auto config6 = CreateConfigWithParameters({{"usedtx", "1"}}); + EXPECT_TRUE(config6.dtx_enabled); + + const auto config7 = CreateConfigWithParameters({{"cbr", "0"}}); + EXPECT_FALSE(config7.cbr_enabled); + + const auto config8 = CreateConfigWithParameters({{"cbr", "1"}}); + EXPECT_TRUE(config8.cbr_enabled); + + const auto config9 = + CreateConfigWithParameters({{"maxplaybackrate", "12345"}}); + EXPECT_EQ(12345, config9.max_playback_rate_hz); + + const auto config10 = + CreateConfigWithParameters({{"maxaveragebitrate", "96000"}}); + EXPECT_EQ(96000, config10.bitrate_bps); + + const auto config11 = CreateConfigWithParameters({{"maxptime", "40"}}); + for (int frame_length : config11.supported_frame_lengths_ms) { + EXPECT_LE(frame_length, 40); + } + + const auto config12 = CreateConfigWithParameters({{"minptime", "40"}}); + for (int frame_length : config12.supported_frame_lengths_ms) { + EXPECT_GE(frame_length, 40); + } + + const auto config13 = CreateConfigWithParameters({{"ptime", "40"}}); + EXPECT_EQ(40, config13.frame_size_ms); + + constexpr int kMinSupportedFrameLength = 10; + constexpr int kMaxSupportedFrameLength = + WEBRTC_OPUS_SUPPORT_120MS_PTIME ? 120 : 60; + + const auto config14 = CreateConfigWithParameters({{"ptime", "1"}}); + EXPECT_EQ(kMinSupportedFrameLength, config14.frame_size_ms); + + const auto config15 = CreateConfigWithParameters({{"ptime", "2000"}}); + EXPECT_EQ(kMaxSupportedFrameLength, config15.frame_size_ms); +} + +TEST(AudioEncoderOpusTest, TestConfigFromInvalidParams) { + const webrtc::SdpAudioFormat format("opus", 48000, 2); + const auto default_config = *AudioEncoderOpus::SdpToConfig(format); +#if WEBRTC_OPUS_SUPPORT_120MS_PTIME + const std::vector default_supported_frame_lengths_ms({20, 40, 60, 120}); +#else + const std::vector default_supported_frame_lengths_ms({20, 40, 60}); +#endif + + AudioEncoderOpusConfig config; + config = CreateConfigWithParameters({{"stereo", "invalid"}}); + EXPECT_EQ(default_config.num_channels, config.num_channels); + + config = CreateConfigWithParameters({{"useinbandfec", "invalid"}}); + EXPECT_EQ(default_config.fec_enabled, config.fec_enabled); + + config = CreateConfigWithParameters({{"usedtx", "invalid"}}); + EXPECT_EQ(default_config.dtx_enabled, config.dtx_enabled); + + config = CreateConfigWithParameters({{"cbr", "invalid"}}); + EXPECT_EQ(default_config.dtx_enabled, config.dtx_enabled); + + config = CreateConfigWithParameters({{"maxplaybackrate", "0"}}); + EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz); + + config = CreateConfigWithParameters({{"maxplaybackrate", "-23"}}); + EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz); + + config = CreateConfigWithParameters({{"maxplaybackrate", "not a number!"}}); + EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz); + + config = CreateConfigWithParameters({{"maxaveragebitrate", "0"}}); + EXPECT_EQ(6000, config.bitrate_bps); + + config = CreateConfigWithParameters({{"maxaveragebitrate", "-1000"}}); + EXPECT_EQ(6000, config.bitrate_bps); + + config = CreateConfigWithParameters({{"maxaveragebitrate", "1024000"}}); + EXPECT_EQ(510000, config.bitrate_bps); + + config = CreateConfigWithParameters({{"maxaveragebitrate", "not a number!"}}); + EXPECT_EQ(default_config.bitrate_bps, config.bitrate_bps); + + config = CreateConfigWithParameters({{"maxptime", "invalid"}}); + EXPECT_EQ(default_supported_frame_lengths_ms, + config.supported_frame_lengths_ms); + + config = CreateConfigWithParameters({{"minptime", "invalid"}}); + EXPECT_EQ(default_supported_frame_lengths_ms, + config.supported_frame_lengths_ms); + + config = CreateConfigWithParameters({{"ptime", "invalid"}}); + EXPECT_EQ(default_supported_frame_lengths_ms, + config.supported_frame_lengths_ms); +} + +TEST(AudioEncoderOpusTest, GetFrameLenghtRange) { + AudioEncoderOpusConfig config = + CreateConfigWithParameters({{"maxptime", "10"}, {"ptime", "10"}}); + std::unique_ptr encoder = + AudioEncoderOpus::MakeAudioEncoder(config, kDefaultOpusPayloadType); + auto ptime = webrtc::TimeDelta::Millis(10); + absl::optional> range = { + {ptime, ptime}}; + EXPECT_EQ(encoder->GetFrameLengthRange(), range); +} + +// Test that bitrate will be overridden by the "maxaveragebitrate" parameter. +// Also test that the "maxaveragebitrate" can't be set to values outside the +// range of 6000 and 510000 +TEST(AudioEncoderOpusTest, SetSendCodecOpusMaxAverageBitrate) { + // Ignore if less than 6000. + const auto config1 = AudioEncoderOpus::SdpToConfig( + {"opus", 48000, 2, {{"maxaveragebitrate", "5999"}}}); + EXPECT_EQ(6000, config1->bitrate_bps); + + // Ignore if larger than 510000. + const auto config2 = AudioEncoderOpus::SdpToConfig( + {"opus", 48000, 2, {{"maxaveragebitrate", "510001"}}}); + EXPECT_EQ(510000, config2->bitrate_bps); + + const auto config3 = AudioEncoderOpus::SdpToConfig( + {"opus", 48000, 2, {{"maxaveragebitrate", "200000"}}}); + EXPECT_EQ(200000, config3->bitrate_bps); +} + +// Test maxplaybackrate <= 8000 triggers Opus narrow band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateNb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "8000"}}); + EXPECT_EQ(8000, config.max_playback_rate_hz); + EXPECT_EQ(12000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "8000"}, {"stereo", "1"}}); + EXPECT_EQ(8000, config.max_playback_rate_hz); + EXPECT_EQ(24000, config.bitrate_bps); +} + +// Test 8000 < maxplaybackrate <= 12000 triggers Opus medium band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateMb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "8001"}}); + EXPECT_EQ(8001, config.max_playback_rate_hz); + EXPECT_EQ(20000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "8001"}, {"stereo", "1"}}); + EXPECT_EQ(8001, config.max_playback_rate_hz); + EXPECT_EQ(40000, config.bitrate_bps); +} + +// Test 12000 < maxplaybackrate <= 16000 triggers Opus wide band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateWb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "12001"}}); + EXPECT_EQ(12001, config.max_playback_rate_hz); + EXPECT_EQ(20000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "12001"}, {"stereo", "1"}}); + EXPECT_EQ(12001, config.max_playback_rate_hz); + EXPECT_EQ(40000, config.bitrate_bps); +} + +// Test 16000 < maxplaybackrate <= 24000 triggers Opus super wide band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateSwb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "16001"}}); + EXPECT_EQ(16001, config.max_playback_rate_hz); + EXPECT_EQ(32000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "16001"}, {"stereo", "1"}}); + EXPECT_EQ(16001, config.max_playback_rate_hz); + EXPECT_EQ(64000, config.bitrate_bps); +} + +// Test 24000 < maxplaybackrate triggers Opus full band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateFb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "24001"}}); + EXPECT_EQ(24001, config.max_playback_rate_hz); + EXPECT_EQ(32000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "24001"}, {"stereo", "1"}}); + EXPECT_EQ(24001, config.max_playback_rate_hz); + EXPECT_EQ(64000, config.bitrate_bps); +} + +TEST_P(AudioEncoderOpusTest, OpusFlagDtxAsNonSpeech) { + // Create encoder with DTX enabled. + AudioEncoderOpusConfig config; + config.dtx_enabled = true; + config.sample_rate_hz = sample_rate_hz_; + constexpr int payload_type = 17; + const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type); + + // Open file containing speech and silence. + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + test::AudioLoop audio_loop; + // Use the file as if it were sampled at our desired input rate. + const size_t max_loop_length_samples = + sample_rate_hz_ * 10; // Max 10 second loop. + const size_t input_block_size_samples = + 10 * sample_rate_hz_ / 1000; // 10 ms. + EXPECT_TRUE(audio_loop.Init(kInputFileName, max_loop_length_samples, + input_block_size_samples)); + + // Encode. + AudioEncoder::EncodedInfo info; + rtc::Buffer encoded(500); + int nonspeech_frames = 0; + int max_nonspeech_frames = 0; + int dtx_frames = 0; + int max_dtx_frames = 0; + uint32_t rtp_timestamp = 0u; + for (size_t i = 0; i < 500; ++i) { + encoded.Clear(); + + // Every second call to the encoder will generate an Opus packet. + for (int j = 0; j < 2; j++) { + info = + encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded); + rtp_timestamp += input_block_size_samples; + } + + // Bookkeeping of number of DTX frames. + if (info.encoded_bytes <= 2) { + ++dtx_frames; + } else { + if (dtx_frames > max_dtx_frames) + max_dtx_frames = dtx_frames; + dtx_frames = 0; + } + + // Bookkeeping of number of non-speech frames. + if (info.speech == 0) { + ++nonspeech_frames; + } else { + if (nonspeech_frames > max_nonspeech_frames) + max_nonspeech_frames = nonspeech_frames; + nonspeech_frames = 0; + } + } + + // Maximum number of consecutive non-speech packets should exceed 15. + EXPECT_GT(max_nonspeech_frames, 15); +} + +TEST(AudioEncoderOpusTest, OpusDtxFilteringHighEnergyRefreshPackets) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-Audio-OpusAvoidNoisePumpingDuringDtx/Enabled/"); + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/testfile16kHz", "pcm"); + constexpr int kSampleRateHz = 16000; + AudioEncoderOpusConfig config; + config.dtx_enabled = true; + config.sample_rate_hz = kSampleRateHz; + constexpr int payload_type = 17; + const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type); + test::AudioLoop audio_loop; + constexpr size_t kMaxLoopLengthSaples = kSampleRateHz * 11.6f; + constexpr size_t kInputBlockSizeSamples = kSampleRateHz / 100; + EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSaples, + kInputBlockSizeSamples)); + AudioEncoder::EncodedInfo info; + rtc::Buffer encoded(500); + // Encode the audio file and store the last part that corresponds to silence. + constexpr size_t kSilenceDurationSamples = kSampleRateHz * 0.2f; + std::array silence; + uint32_t rtp_timestamp = 0; + bool last_packet_dtx_frame = false; + bool opus_entered_dtx = false; + bool silence_filled = false; + size_t timestamp_start_silence = 0; + while (!silence_filled && rtp_timestamp < kMaxLoopLengthSaples) { + encoded.Clear(); + // Every second call to the encoder will generate an Opus packet. + for (int j = 0; j < 2; j++) { + auto next_frame = audio_loop.GetNextBlock(); + info = encoder->Encode(rtp_timestamp, next_frame, &encoded); + if (opus_entered_dtx) { + size_t silence_frame_start = rtp_timestamp - timestamp_start_silence; + silence_filled = silence_frame_start >= kSilenceDurationSamples; + if (!silence_filled) { + std::copy(next_frame.begin(), next_frame.end(), + silence.begin() + silence_frame_start); + } + } + rtp_timestamp += kInputBlockSizeSamples; + } + EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame); + last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2 + : last_packet_dtx_frame; + if (info.encoded_bytes <= 2 && !opus_entered_dtx) { + timestamp_start_silence = rtp_timestamp; + } + opus_entered_dtx = info.encoded_bytes <= 2; + } + + EXPECT_TRUE(silence_filled); + // The copied 200 ms of silence is used for creating 6 bursts that are fed to + // the encoder, the first three ones with a larger energy and the last three + // with a lower energy. This test verifies that the encoder just sends refresh + // DTX packets during the last bursts. + int number_non_empty_packets_during_increase = 0; + int number_non_empty_packets_during_decrease = 0; + for (size_t burst = 0; burst < 6; ++burst) { + uint32_t rtp_timestamp_start = rtp_timestamp; + const bool increase_noise = burst < 3; + const float gain = increase_noise ? 1.4f : 0.0f; + while (rtp_timestamp < rtp_timestamp_start + kSilenceDurationSamples) { + encoded.Clear(); + // Every second call to the encoder will generate an Opus packet. + for (int j = 0; j < 2; j++) { + std::array silence_frame; + size_t silence_frame_start = rtp_timestamp - rtp_timestamp_start; + std::transform( + silence.begin() + silence_frame_start, + silence.begin() + silence_frame_start + kInputBlockSizeSamples, + silence_frame.begin(), [gain](float s) { return gain * s; }); + info = encoder->Encode(rtp_timestamp, silence_frame, &encoded); + rtp_timestamp += kInputBlockSizeSamples; + } + EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame); + last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2 + : last_packet_dtx_frame; + // Tracking the number of non empty packets. + if (increase_noise && info.encoded_bytes > 2) { + number_non_empty_packets_during_increase++; + } + if (!increase_noise && info.encoded_bytes > 2) { + number_non_empty_packets_during_decrease++; + } + } + } + // Check that the refresh DTX packets are just sent during the decrease energy + // region. + EXPECT_EQ(number_non_empty_packets_during_increase, 0); + EXPECT_GT(number_non_empty_packets_during_decrease, 0); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_bandwidth_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_bandwidth_unittest.cc new file mode 100644 index 0000000000..38b60c6187 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_bandwidth_unittest.cc @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_decoder_opus.h" +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/window_generator.h" +#include "modules/audio_coding/codecs/opus/test/lapped_transform.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "test/field_trial.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace { + +constexpr size_t kNumChannels = 1u; +constexpr int kSampleRateHz = 48000; +constexpr size_t kMaxLoopLengthSamples = kSampleRateHz * 50; // 50 seconds. +constexpr size_t kInputBlockSizeSamples = 10 * kSampleRateHz / 1000; // 10 ms +constexpr size_t kOutputBlockSizeSamples = 20 * kSampleRateHz / 1000; // 20 ms +constexpr size_t kFftSize = 1024; +constexpr size_t kNarrowbandSize = 4000 * kFftSize / kSampleRateHz; +constexpr float kKbdAlpha = 1.5f; + +class PowerRatioEstimator : public LappedTransform::Callback { + public: + PowerRatioEstimator() : low_pow_(0.f), high_pow_(0.f) { + WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); + transform_.reset(new LappedTransform(kNumChannels, 0u, + kInputBlockSizeSamples, window_, + kFftSize, kFftSize / 2, this)); + } + + void ProcessBlock(float* data) { transform_->ProcessChunk(&data, nullptr); } + + float PowerRatio() { return high_pow_ / low_pow_; } + + protected: + void ProcessAudioBlock(const std::complex* const* input, + size_t num_input_channels, + size_t num_freq_bins, + size_t num_output_channels, + std::complex* const* output) override { + float low_pow = 0.f; + float high_pow = 0.f; + for (size_t i = 0u; i < num_input_channels; ++i) { + for (size_t j = 0u; j < kNarrowbandSize; ++j) { + float low_mag = std::abs(input[i][j]); + low_pow += low_mag * low_mag; + float high_mag = std::abs(input[i][j + kNarrowbandSize]); + high_pow += high_mag * high_mag; + } + } + low_pow_ += low_pow / (num_input_channels * kFftSize); + high_pow_ += high_pow / (num_input_channels * kFftSize); + } + + private: + std::unique_ptr transform_; + float window_[kFftSize]; + float low_pow_; + float high_pow_; +}; + +float EncodedPowerRatio(AudioEncoder* encoder, + AudioDecoder* decoder, + test::AudioLoop* audio_loop) { + // Encode and decode. + uint32_t rtp_timestamp = 0u; + constexpr size_t kBufferSize = 500; + rtc::Buffer encoded(kBufferSize); + std::vector decoded(kOutputBlockSizeSamples); + std::vector decoded_float(kOutputBlockSizeSamples); + AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech; + PowerRatioEstimator power_ratio_estimator; + for (size_t i = 0; i < 1000; ++i) { + encoded.Clear(); + AudioEncoder::EncodedInfo encoder_info = + encoder->Encode(rtp_timestamp, audio_loop->GetNextBlock(), &encoded); + rtp_timestamp += kInputBlockSizeSamples; + if (encoded.size() > 0) { + int decoder_info = decoder->Decode( + encoded.data(), encoded.size(), kSampleRateHz, + decoded.size() * sizeof(decoded[0]), decoded.data(), &speech_type); + if (decoder_info > 0) { + S16ToFloat(decoded.data(), decoded.size(), decoded_float.data()); + power_ratio_estimator.ProcessBlock(decoded_float.data()); + } + } + } + return power_ratio_estimator.PowerRatio(); +} + +} // namespace + +// TODO(ivoc): Remove this test, WebRTC-AdjustOpusBandwidth is obsolete. +TEST(BandwidthAdaptationTest, BandwidthAdaptationTest) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-AdjustOpusBandwidth/Enabled/"); + + constexpr float kMaxNarrowbandRatio = 0.0035f; + constexpr float kMinWidebandRatio = 0.01f; + + // Create encoder. + AudioEncoderOpusConfig enc_config; + enc_config.bitrate_bps = absl::optional(7999); + enc_config.num_channels = kNumChannels; + constexpr int payload_type = 17; + auto encoder = AudioEncoderOpus::MakeAudioEncoder(enc_config, payload_type); + + // Create decoder. + AudioDecoderOpus::Config dec_config; + dec_config.num_channels = kNumChannels; + auto decoder = AudioDecoderOpus::MakeAudioDecoder(dec_config); + + // Open speech file. + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm"); + test::AudioLoop audio_loop; + EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz()); + ASSERT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples, + kInputBlockSizeSamples)); + + EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMaxNarrowbandRatio); + + encoder->OnReceivedTargetAudioBitrate(9000); + EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMaxNarrowbandRatio); + + encoder->OnReceivedTargetAudioBitrate(9001); + EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMinWidebandRatio); + + encoder->OnReceivedTargetAudioBitrate(8000); + EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMinWidebandRatio); + + encoder->OnReceivedTargetAudioBitrate(12001); + EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMinWidebandRatio); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_complexity_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_complexity_unittest.cc new file mode 100644 index 0000000000..e8c131092c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_complexity_unittest.cc @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "api/test/metrics/global_metrics_logger_and_exporter.h" +#include "api/test/metrics/metric.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "rtc_base/time_utils.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace { + +using ::webrtc::test::GetGlobalMetricsLogger; +using ::webrtc::test::ImprovementDirection; +using ::webrtc::test::Unit; + +int64_t RunComplexityTest(const AudioEncoderOpusConfig& config) { + // Create encoder. + constexpr int payload_type = 17; + const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type); + // Open speech file. + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm"); + test::AudioLoop audio_loop; + constexpr int kSampleRateHz = 48000; + EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz()); + constexpr size_t kMaxLoopLengthSamples = + kSampleRateHz * 10; // 10 second loop. + constexpr size_t kInputBlockSizeSamples = + 10 * kSampleRateHz / 1000; // 60 ms. + EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples, + kInputBlockSizeSamples)); + // Encode. + const int64_t start_time_ms = rtc::TimeMillis(); + AudioEncoder::EncodedInfo info; + rtc::Buffer encoded(500); + uint32_t rtp_timestamp = 0u; + for (size_t i = 0; i < 10000; ++i) { + encoded.Clear(); + info = encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded); + rtp_timestamp += kInputBlockSizeSamples; + } + return rtc::TimeMillis() - start_time_ms; +} + +// This test encodes an audio file using Opus twice with different bitrates +// (~11 kbps and 15.5 kbps). The runtime for each is measured, and the ratio +// between the two is calculated and tracked. This test explicitly sets the +// low_rate_complexity to 9. When running on desktop platforms, this is the same +// as the regular complexity, and the expectation is that the resulting ratio +// should be less than 100% (since the encoder runs faster at lower bitrates, +// given a fixed complexity setting). On the other hand, when running on +// mobiles, the regular complexity is 5, and we expect the resulting ratio to +// be higher, since we have explicitly asked for a higher complexity setting at +// the lower rate. +TEST(AudioEncoderOpusComplexityAdaptationTest, Adaptation_On) { + // Create config. + AudioEncoderOpusConfig config; + // The limit -- including the hysteresis window -- at which the complexity + // shuold be increased. + config.bitrate_bps = 11000 - 1; + config.low_rate_complexity = 9; + int64_t runtime_10999bps = RunComplexityTest(config); + + config.bitrate_bps = 15500; + int64_t runtime_15500bps = RunComplexityTest(config); + + GetGlobalMetricsLogger()->LogSingleValueMetric( + "opus_encoding_complexity_ratio", "adaptation_on", + 100.0 * runtime_10999bps / runtime_15500bps, Unit::kPercent, + ImprovementDirection::kNeitherIsBetter); +} + +// This test is identical to the one above, but without the complexity +// adaptation enabled (neither on desktop, nor on mobile). The expectation is +// that the resulting ratio is less than 100% at all times. +TEST(AudioEncoderOpusComplexityAdaptationTest, Adaptation_Off) { + // Create config. + AudioEncoderOpusConfig config; + // The limit -- including the hysteresis window -- at which the complexity + // shuold be increased (but not in this test since complexity adaptation is + // disabled). + config.bitrate_bps = 11000 - 1; + int64_t runtime_10999bps = RunComplexityTest(config); + + config.bitrate_bps = 15500; + int64_t runtime_15500bps = RunComplexityTest(config); + + GetGlobalMetricsLogger()->LogSingleValueMetric( + "opus_encoding_complexity_ratio", "adaptation_off", + 100.0 * runtime_10999bps / runtime_15500bps, Unit::kPercent, + ImprovementDirection::kNeitherIsBetter); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_fec_test.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_fec_test.cc new file mode 100644 index 0000000000..815f26e31c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_fec_test.cc @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using std::get; +using std::string; +using std::tuple; +using ::testing::TestWithParam; + +namespace webrtc { + +// Define coding parameter as . +typedef tuple coding_param; +typedef struct mode mode; + +struct mode { + bool fec; + uint8_t target_packet_loss_rate; +}; + +const int kOpusBlockDurationMs = 20; +const int kOpusSamplingKhz = 48; + +class OpusFecTest : public TestWithParam { + protected: + OpusFecTest(); + + void SetUp() override; + void TearDown() override; + + virtual void EncodeABlock(); + + virtual void DecodeABlock(bool lost_previous, bool lost_current); + + int block_duration_ms_; + int sampling_khz_; + size_t block_length_sample_; + + size_t channels_; + int bit_rate_; + + size_t data_pointer_; + size_t loop_length_samples_; + size_t max_bytes_; + size_t encoded_bytes_; + + WebRtcOpusEncInst* opus_encoder_; + WebRtcOpusDecInst* opus_decoder_; + + string in_filename_; + + std::unique_ptr in_data_; + std::unique_ptr out_data_; + std::unique_ptr bit_stream_; +}; + +void OpusFecTest::SetUp() { + channels_ = get<0>(GetParam()); + bit_rate_ = get<1>(GetParam()); + printf("Coding %zu channel signal at %d bps.\n", channels_, bit_rate_); + + in_filename_ = test::ResourcePath(get<2>(GetParam()), get<3>(GetParam())); + + FILE* fp = fopen(in_filename_.c_str(), "rb"); + ASSERT_FALSE(fp == NULL); + + // Obtain file size. + fseek(fp, 0, SEEK_END); + loop_length_samples_ = ftell(fp) / sizeof(int16_t); + rewind(fp); + + // Allocate memory to contain the whole file. + in_data_.reset( + new int16_t[loop_length_samples_ + block_length_sample_ * channels_]); + + // Copy the file into the buffer. + ASSERT_EQ(fread(&in_data_[0], sizeof(int16_t), loop_length_samples_, fp), + loop_length_samples_); + fclose(fp); + + // The audio will be used in a looped manner. To ease the acquisition of an + // audio frame that crosses the end of the excerpt, we add an extra block + // length of samples to the end of the array, starting over again from the + // beginning of the array. Audio frames cross the end of the excerpt always + // appear as a continuum of memory. + memcpy(&in_data_[loop_length_samples_], &in_data_[0], + block_length_sample_ * channels_ * sizeof(int16_t)); + + // Maximum number of bytes in output bitstream. + max_bytes_ = block_length_sample_ * channels_ * sizeof(int16_t); + + out_data_.reset(new int16_t[2 * block_length_sample_ * channels_]); + bit_stream_.reset(new uint8_t[max_bytes_]); + + // If channels_ == 1, use Opus VOIP mode, otherwise, audio mode. + int app = channels_ == 1 ? 0 : 1; + + // Create encoder memory. + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app, 48000)); + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, channels_, 48000)); + // Set bitrate. + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_)); +} + +void OpusFecTest::TearDown() { + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +OpusFecTest::OpusFecTest() + : block_duration_ms_(kOpusBlockDurationMs), + sampling_khz_(kOpusSamplingKhz), + block_length_sample_( + static_cast(block_duration_ms_ * sampling_khz_)), + data_pointer_(0), + max_bytes_(0), + encoded_bytes_(0), + opus_encoder_(NULL), + opus_decoder_(NULL) {} + +void OpusFecTest::EncodeABlock() { + int value = + WebRtcOpus_Encode(opus_encoder_, &in_data_[data_pointer_], + block_length_sample_, max_bytes_, &bit_stream_[0]); + EXPECT_GT(value, 0); + + encoded_bytes_ = static_cast(value); +} + +void OpusFecTest::DecodeABlock(bool lost_previous, bool lost_current) { + int16_t audio_type; + int value_1 = 0, value_2 = 0; + + if (lost_previous) { + // Decode previous frame. + if (!lost_current && + WebRtcOpus_PacketHasFec(&bit_stream_[0], encoded_bytes_) == 1) { + value_1 = + WebRtcOpus_DecodeFec(opus_decoder_, &bit_stream_[0], encoded_bytes_, + &out_data_[0], &audio_type); + } else { + // Call decoder PLC. + while (value_1 < static_cast(block_length_sample_)) { + int ret = WebRtcOpus_Decode(opus_decoder_, NULL, 0, &out_data_[value_1], + &audio_type); + EXPECT_EQ(ret, sampling_khz_ * 10); // Should return 10 ms of samples. + value_1 += ret; + } + } + EXPECT_EQ(static_cast(block_length_sample_), value_1); + } + + if (!lost_current) { + // Decode current frame. + value_2 = WebRtcOpus_Decode(opus_decoder_, &bit_stream_[0], encoded_bytes_, + &out_data_[value_1 * channels_], &audio_type); + EXPECT_EQ(static_cast(block_length_sample_), value_2); + } +} + +TEST_P(OpusFecTest, RandomPacketLossTest) { + const int kDurationMs = 200000; + int time_now_ms, fec_frames; + int actual_packet_loss_rate; + bool lost_current, lost_previous; + mode mode_set[3] = {{true, 0}, {false, 0}, {true, 50}}; + + lost_current = false; + for (int i = 0; i < 3; i++) { + if (mode_set[i].fec) { + EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate( + opus_encoder_, mode_set[i].target_packet_loss_rate)); + printf("FEC is ON, target at packet loss rate %d percent.\n", + mode_set[i].target_packet_loss_rate); + } else { + EXPECT_EQ(0, WebRtcOpus_DisableFec(opus_encoder_)); + printf("FEC is OFF.\n"); + } + // In this test, we let the target packet loss rate match the actual rate. + actual_packet_loss_rate = mode_set[i].target_packet_loss_rate; + // Run every mode a certain time. + time_now_ms = 0; + fec_frames = 0; + while (time_now_ms < kDurationMs) { + // Encode & decode. + EncodeABlock(); + + // Check if payload has FEC. + int fec = WebRtcOpus_PacketHasFec(&bit_stream_[0], encoded_bytes_); + + // If FEC is disabled or the target packet loss rate is set to 0, there + // should be no FEC in the bit stream. + if (!mode_set[i].fec || mode_set[i].target_packet_loss_rate == 0) { + EXPECT_EQ(fec, 0); + } else if (fec == 1) { + fec_frames++; + } + + lost_previous = lost_current; + lost_current = rand() < actual_packet_loss_rate * (RAND_MAX / 100); + DecodeABlock(lost_previous, lost_current); + + time_now_ms += block_duration_ms_; + + // `data_pointer_` is incremented and wrapped across + // `loop_length_samples_`. + data_pointer_ = (data_pointer_ + block_length_sample_ * channels_) % + loop_length_samples_; + } + if (mode_set[i].fec) { + printf("%.2f percent frames has FEC.\n", + static_cast(fec_frames) * block_duration_ms_ / 2000); + } + } +} + +const coding_param param_set[] = { + std::make_tuple(1, + 64000, + string("audio_coding/testfile32kHz"), + string("pcm")), + std::make_tuple(1, + 32000, + string("audio_coding/testfile32kHz"), + string("pcm")), + std::make_tuple(2, + 64000, + string("audio_coding/teststereo32kHz"), + string("pcm"))}; + +// 64 kbps, stereo +INSTANTIATE_TEST_SUITE_P(AllTest, OpusFecTest, ::testing::ValuesIn(param_set)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_inst.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_inst.h new file mode 100644 index 0000000000..92c5c354a7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_inst.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_ + +#include + +#include "rtc_base/ignore_wundef.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "third_party/opus/src/include/opus.h" +#include "third_party/opus/src/include/opus_multistream.h" +RTC_POP_IGNORING_WUNDEF() + +struct WebRtcOpusEncInst { + OpusEncoder* encoder; + OpusMSEncoder* multistream_encoder; + size_t channels; + int in_dtx_mode; + bool avoid_noise_pumping_during_dtx; + int sample_rate_hz; + float smooth_energy_non_active_frames; +}; + +struct WebRtcOpusDecInst { + OpusDecoder* decoder; + OpusMSDecoder* multistream_decoder; + int prev_decoded_samples; + bool plc_use_prev_decoded_samples; + size_t channels; + int in_dtx_mode; + int sample_rate_hz; +}; + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.cc new file mode 100644 index 0000000000..67d8619b34 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.cc @@ -0,0 +1,881 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/opus_interface.h" + +#include + +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +enum { +#if WEBRTC_OPUS_SUPPORT_120MS_PTIME + /* Maximum supported frame size in WebRTC is 120 ms. */ + kWebRtcOpusMaxEncodeFrameSizeMs = 120, +#else + /* Maximum supported frame size in WebRTC is 60 ms. */ + kWebRtcOpusMaxEncodeFrameSizeMs = 60, +#endif + + /* The format allows up to 120 ms frames. Since we don't control the other + * side, we must allow for packets of that size. NetEq is currently limited + * to 60 ms on the receive side. */ + kWebRtcOpusMaxDecodeFrameSizeMs = 120, + + // Duration of audio that each call to packet loss concealment covers. + kWebRtcOpusPlcFrameSizeMs = 10, +}; + +constexpr char kPlcUsePrevDecodedSamplesFieldTrial[] = + "WebRTC-Audio-OpusPlcUsePrevDecodedSamples"; + +constexpr char kAvoidNoisePumpingDuringDtxFieldTrial[] = + "WebRTC-Audio-OpusAvoidNoisePumpingDuringDtx"; + +constexpr char kSetSignalVoiceWithDtxFieldTrial[] = + "WebRTC-Audio-OpusSetSignalVoiceWithDtx"; + +static int FrameSizePerChannel(int frame_size_ms, int sample_rate_hz) { + RTC_DCHECK_GT(frame_size_ms, 0); + RTC_DCHECK_EQ(frame_size_ms % 10, 0); + RTC_DCHECK_GT(sample_rate_hz, 0); + RTC_DCHECK_EQ(sample_rate_hz % 1000, 0); + return frame_size_ms * (sample_rate_hz / 1000); +} + +// Maximum sample count per channel. +static int MaxFrameSizePerChannel(int sample_rate_hz) { + return FrameSizePerChannel(kWebRtcOpusMaxDecodeFrameSizeMs, sample_rate_hz); +} + +// Default sample count per channel. +static int DefaultFrameSizePerChannel(int sample_rate_hz) { + return FrameSizePerChannel(20, sample_rate_hz); +} + +// Returns true if the `encoded` payload corresponds to a refresh DTX packet +// whose energy is larger than the expected for non activity packets. +static bool WebRtcOpus_IsHighEnergyRefreshDtxPacket( + OpusEncInst* inst, + rtc::ArrayView frame, + rtc::ArrayView encoded) { + if (encoded.size() <= 2) { + return false; + } + int number_frames = + frame.size() / DefaultFrameSizePerChannel(inst->sample_rate_hz); + if (number_frames > 0 && + WebRtcOpus_PacketHasVoiceActivity(encoded.data(), encoded.size()) == 0) { + const float average_frame_energy = + std::accumulate(frame.begin(), frame.end(), 0.0f, + [](float a, int32_t b) { return a + b * b; }) / + number_frames; + if (WebRtcOpus_GetInDtx(inst) == 1 && + average_frame_energy >= inst->smooth_energy_non_active_frames * 0.5f) { + // This is a refresh DTX packet as the encoder is in DTX and has + // produced a payload > 2 bytes. This refresh packet has a higher energy + // than the smooth energy of non activity frames (with a 3 dB negative + // margin) and, therefore, it is flagged as a high energy refresh DTX + // packet. + return true; + } + // The average energy is tracked in a similar way as the modeling of the + // comfort noise in the Silk decoder in Opus + // (third_party/opus/src/silk/CNG.c). + if (average_frame_energy < inst->smooth_energy_non_active_frames * 0.5f) { + inst->smooth_energy_non_active_frames = average_frame_energy; + } else { + inst->smooth_energy_non_active_frames += + (average_frame_energy - inst->smooth_energy_non_active_frames) * + 0.25f; + } + } + return false; +} + +int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, + size_t channels, + int32_t application, + int sample_rate_hz) { + int opus_app; + if (!inst) + return -1; + + switch (application) { + case 0: + opus_app = OPUS_APPLICATION_VOIP; + break; + case 1: + opus_app = OPUS_APPLICATION_AUDIO; + break; + default: + return -1; + } + + OpusEncInst* state = + reinterpret_cast(calloc(1, sizeof(OpusEncInst))); + RTC_DCHECK(state); + + int error; + state->encoder = opus_encoder_create( + sample_rate_hz, static_cast(channels), opus_app, &error); + + if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) { + WebRtcOpus_EncoderFree(state); + return -1; + } + + state->in_dtx_mode = 0; + state->channels = channels; + state->sample_rate_hz = sample_rate_hz; + state->smooth_energy_non_active_frames = 0.0f; + state->avoid_noise_pumping_during_dtx = + webrtc::field_trial::IsEnabled(kAvoidNoisePumpingDuringDtxFieldTrial); + + *inst = state; + return 0; +} + +int16_t WebRtcOpus_MultistreamEncoderCreate( + OpusEncInst** inst, + size_t channels, + int32_t application, + size_t streams, + size_t coupled_streams, + const unsigned char* channel_mapping) { + int opus_app; + if (!inst) + return -1; + + switch (application) { + case 0: + opus_app = OPUS_APPLICATION_VOIP; + break; + case 1: + opus_app = OPUS_APPLICATION_AUDIO; + break; + default: + return -1; + } + + OpusEncInst* state = + reinterpret_cast(calloc(1, sizeof(OpusEncInst))); + RTC_DCHECK(state); + + int error; + const int sample_rate_hz = 48000; + state->multistream_encoder = opus_multistream_encoder_create( + sample_rate_hz, channels, streams, coupled_streams, channel_mapping, + opus_app, &error); + + if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) { + WebRtcOpus_EncoderFree(state); + return -1; + } + + state->in_dtx_mode = 0; + state->channels = channels; + state->sample_rate_hz = sample_rate_hz; + state->smooth_energy_non_active_frames = 0.0f; + state->avoid_noise_pumping_during_dtx = false; + + *inst = state; + return 0; +} + +int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) { + if (inst) { + if (inst->encoder) { + opus_encoder_destroy(inst->encoder); + } else { + opus_multistream_encoder_destroy(inst->multistream_encoder); + } + free(inst); + return 0; + } else { + return -1; + } +} + +int WebRtcOpus_Encode(OpusEncInst* inst, + const int16_t* audio_in, + size_t samples, + size_t length_encoded_buffer, + uint8_t* encoded) { + int res; + + if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) { + return -1; + } + + if (inst->encoder) { + res = opus_encode(inst->encoder, (const opus_int16*)audio_in, + static_cast(samples), encoded, + static_cast(length_encoded_buffer)); + } else { + res = opus_multistream_encode( + inst->multistream_encoder, (const opus_int16*)audio_in, + static_cast(samples), encoded, + static_cast(length_encoded_buffer)); + } + + if (res <= 0) { + return -1; + } + + if (res <= 2) { + // Indicates DTX since the packet has nothing but a header. In principle, + // there is no need to send this packet. However, we do transmit the first + // occurrence to let the decoder know that the encoder enters DTX mode. + if (inst->in_dtx_mode) { + return 0; + } else { + inst->in_dtx_mode = 1; + return res; + } + } + + if (inst->avoid_noise_pumping_during_dtx && WebRtcOpus_GetUseDtx(inst) == 1 && + WebRtcOpus_IsHighEnergyRefreshDtxPacket( + inst, rtc::MakeArrayView(audio_in, samples), + rtc::MakeArrayView(encoded, res))) { + // This packet is a high energy refresh DTX packet. For avoiding an increase + // of the energy in the DTX region at the decoder, this packet is + // substituted by a TOC byte with one empty frame. + // The number of frames described in the TOC byte + // (https://tools.ietf.org/html/rfc6716#section-3.1) are overwritten to + // always indicate one frame (last two bits equal to 0). + encoded[0] = encoded[0] & 0b11111100; + inst->in_dtx_mode = 1; + // The payload is just the TOC byte and has 1 byte as length. + return 1; + } + inst->in_dtx_mode = 0; + return res; +} + +#define ENCODER_CTL(inst, vargs) \ + (inst->encoder \ + ? opus_encoder_ctl(inst->encoder, vargs) \ + : opus_multistream_encoder_ctl(inst->multistream_encoder, vargs)) + +int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_BITRATE(rate)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_PACKET_LOSS_PERC(loss_rate)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) { + opus_int32 set_bandwidth; + + if (!inst) + return -1; + + if (frequency_hz <= 8000) { + set_bandwidth = OPUS_BANDWIDTH_NARROWBAND; + } else if (frequency_hz <= 12000) { + set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; + } else if (frequency_hz <= 16000) { + set_bandwidth = OPUS_BANDWIDTH_WIDEBAND; + } else if (frequency_hz <= 24000) { + set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; + } else { + set_bandwidth = OPUS_BANDWIDTH_FULLBAND; + } + return ENCODER_CTL(inst, OPUS_SET_MAX_BANDWIDTH(set_bandwidth)); +} + +int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst, + int32_t* result_hz) { + if (inst->encoder) { + if (opus_encoder_ctl(inst->encoder, OPUS_GET_MAX_BANDWIDTH(result_hz)) == + OPUS_OK) { + return 0; + } + return -1; + } + + opus_int32 max_bandwidth; + int s; + int ret; + + max_bandwidth = 0; + ret = OPUS_OK; + s = 0; + while (ret == OPUS_OK) { + OpusEncoder* enc; + opus_int32 bandwidth; + + ret = ENCODER_CTL(inst, OPUS_MULTISTREAM_GET_ENCODER_STATE(s, &enc)); + if (ret == OPUS_BAD_ARG) + break; + if (ret != OPUS_OK) + return -1; + if (opus_encoder_ctl(enc, OPUS_GET_MAX_BANDWIDTH(&bandwidth)) != OPUS_OK) + return -1; + + if (max_bandwidth != 0 && max_bandwidth != bandwidth) + return -1; + + max_bandwidth = bandwidth; + s++; + } + *result_hz = max_bandwidth; + return 0; +} + +int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(1)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(0)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) { + if (inst) { + if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) { + int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); + if (ret != OPUS_OK) { + return ret; + } + } + return ENCODER_CTL(inst, OPUS_SET_DTX(1)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) { + if (inst) { + if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) { + int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_AUTO)); + if (ret != OPUS_OK) { + return ret; + } + } + return ENCODER_CTL(inst, OPUS_SET_DTX(0)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst) { + if (inst) { + opus_int32 use_dtx; + if (ENCODER_CTL(inst, OPUS_GET_DTX(&use_dtx)) == 0) { + return use_dtx; + } + } + return -1; +} + +int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_VBR(0)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_VBR(1)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_COMPLEXITY(complexity)); + } else { + return -1; + } +} + +int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) { + if (!inst) { + return -1; + } + int32_t bandwidth; + if (ENCODER_CTL(inst, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) { + return bandwidth; + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_BANDWIDTH(bandwidth)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) { + if (!inst) + return -1; + if (num_channels == 0) { + return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(OPUS_AUTO)); + } else if (num_channels == 1 || num_channels == 2) { + return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(num_channels)); + } else { + return -1; + } +} + +int32_t WebRtcOpus_GetInDtx(OpusEncInst* inst) { + if (!inst) { + return -1; + } +#ifdef OPUS_GET_IN_DTX + int32_t in_dtx; + if (ENCODER_CTL(inst, OPUS_GET_IN_DTX(&in_dtx)) == 0) { + return in_dtx; + } +#endif + return -1; +} + +int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, + size_t channels, + int sample_rate_hz) { + int error; + OpusDecInst* state; + + if (inst != NULL) { + // Create Opus decoder state. + state = reinterpret_cast(calloc(1, sizeof(OpusDecInst))); + if (state == NULL) { + return -1; + } + + state->decoder = + opus_decoder_create(sample_rate_hz, static_cast(channels), &error); + if (error == OPUS_OK && state->decoder) { + // Creation of memory all ok. + state->channels = channels; + state->sample_rate_hz = sample_rate_hz; + state->plc_use_prev_decoded_samples = + webrtc::field_trial::IsEnabled(kPlcUsePrevDecodedSamplesFieldTrial); + if (state->plc_use_prev_decoded_samples) { + state->prev_decoded_samples = + DefaultFrameSizePerChannel(state->sample_rate_hz); + } + state->in_dtx_mode = 0; + *inst = state; + return 0; + } + + // If memory allocation was unsuccessful, free the entire state. + if (state->decoder) { + opus_decoder_destroy(state->decoder); + } + free(state); + } + return -1; +} + +int16_t WebRtcOpus_MultistreamDecoderCreate( + OpusDecInst** inst, + size_t channels, + size_t streams, + size_t coupled_streams, + const unsigned char* channel_mapping) { + int error; + OpusDecInst* state; + + if (inst != NULL) { + // Create Opus decoder state. + state = reinterpret_cast(calloc(1, sizeof(OpusDecInst))); + if (state == NULL) { + return -1; + } + + // Create new memory, always at 48000 Hz. + state->multistream_decoder = opus_multistream_decoder_create( + 48000, channels, streams, coupled_streams, channel_mapping, &error); + + if (error == OPUS_OK && state->multistream_decoder) { + // Creation of memory all ok. + state->channels = channels; + state->sample_rate_hz = 48000; + state->plc_use_prev_decoded_samples = + webrtc::field_trial::IsEnabled(kPlcUsePrevDecodedSamplesFieldTrial); + if (state->plc_use_prev_decoded_samples) { + state->prev_decoded_samples = + DefaultFrameSizePerChannel(state->sample_rate_hz); + } + state->in_dtx_mode = 0; + *inst = state; + return 0; + } + + // If memory allocation was unsuccessful, free the entire state. + opus_multistream_decoder_destroy(state->multistream_decoder); + free(state); + } + return -1; +} + +int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) { + if (inst) { + if (inst->decoder) { + opus_decoder_destroy(inst->decoder); + } else if (inst->multistream_decoder) { + opus_multistream_decoder_destroy(inst->multistream_decoder); + } + free(inst); + return 0; + } else { + return -1; + } +} + +size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) { + return inst->channels; +} + +void WebRtcOpus_DecoderInit(OpusDecInst* inst) { + if (inst->decoder) { + opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE); + } else { + opus_multistream_decoder_ctl(inst->multistream_decoder, OPUS_RESET_STATE); + } + inst->in_dtx_mode = 0; +} + +/* For decoder to determine if it is to output speech or comfort noise. */ +static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) { + // Audio type becomes comfort noise if `encoded_byte` is 1 and keeps + // to be so if the following `encoded_byte` are 0 or 1. + if (encoded_bytes == 0 && inst->in_dtx_mode) { + return 2; // Comfort noise. + } else if (encoded_bytes == 1 || encoded_bytes == 2) { + // TODO(henrik.lundin): There is a slight risk that a 2-byte payload is in + // fact a 1-byte TOC with a 1-byte payload. That will be erroneously + // interpreted as comfort noise output, but such a payload is probably + // faulty anyway. + + // TODO(webrtc:10218): This is wrong for multistream opus. Then are several + // single-stream packets glued together with some packet size bytes in + // between. See https://tools.ietf.org/html/rfc6716#appendix-B + inst->in_dtx_mode = 1; + return 2; // Comfort noise. + } else { + inst->in_dtx_mode = 0; + return 0; // Speech. + } +} + +/* `frame_size` is set to maximum Opus frame size in the normal case, and + * is set to the number of samples needed for PLC in case of losses. + * It is up to the caller to make sure the value is correct. */ +static int DecodeNative(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int frame_size, + int16_t* decoded, + int16_t* audio_type, + int decode_fec) { + int res = -1; + if (inst->decoder) { + res = opus_decode( + inst->decoder, encoded, static_cast(encoded_bytes), + reinterpret_cast(decoded), frame_size, decode_fec); + } else { + res = opus_multistream_decode(inst->multistream_decoder, encoded, + static_cast(encoded_bytes), + reinterpret_cast(decoded), + frame_size, decode_fec); + } + + if (res <= 0) + return -1; + + *audio_type = DetermineAudioType(inst, encoded_bytes); + + return res; +} + +static int DecodePlc(OpusDecInst* inst, int16_t* decoded) { + int16_t audio_type = 0; + int decoded_samples; + int plc_samples = + FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz); + + if (inst->plc_use_prev_decoded_samples) { + /* The number of samples we ask for is `number_of_lost_frames` times + * `prev_decoded_samples_`. Limit the number of samples to maximum + * `MaxFrameSizePerChannel()`. */ + plc_samples = inst->prev_decoded_samples; + const int max_samples_per_channel = + MaxFrameSizePerChannel(inst->sample_rate_hz); + plc_samples = plc_samples <= max_samples_per_channel + ? plc_samples + : max_samples_per_channel; + } + decoded_samples = + DecodeNative(inst, NULL, 0, plc_samples, decoded, &audio_type, 0); + if (decoded_samples < 0) { + return -1; + } + + return decoded_samples; +} + +int WebRtcOpus_Decode(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int16_t* decoded, + int16_t* audio_type) { + int decoded_samples; + + if (encoded_bytes == 0) { + *audio_type = DetermineAudioType(inst, encoded_bytes); + decoded_samples = DecodePlc(inst, decoded); + } else { + decoded_samples = DecodeNative(inst, encoded, encoded_bytes, + MaxFrameSizePerChannel(inst->sample_rate_hz), + decoded, audio_type, 0); + } + if (decoded_samples < 0) { + return -1; + } + + if (inst->plc_use_prev_decoded_samples) { + /* Update decoded sample memory, to be used by the PLC in case of losses. */ + inst->prev_decoded_samples = decoded_samples; + } + + return decoded_samples; +} + +int WebRtcOpus_DecodeFec(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int16_t* decoded, + int16_t* audio_type) { + int decoded_samples; + int fec_samples; + + if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) { + return 0; + } + + fec_samples = + opus_packet_get_samples_per_frame(encoded, inst->sample_rate_hz); + + decoded_samples = DecodeNative(inst, encoded, encoded_bytes, fec_samples, + decoded, audio_type, 1); + if (decoded_samples < 0) { + return -1; + } + + return decoded_samples; +} + +int WebRtcOpus_DurationEst(OpusDecInst* inst, + const uint8_t* payload, + size_t payload_length_bytes) { + if (payload_length_bytes == 0) { + // WebRtcOpus_Decode calls PLC when payload length is zero. So we return + // PLC duration correspondingly. + return WebRtcOpus_PlcDuration(inst); + } + + int frames, samples; + frames = opus_packet_get_nb_frames( + payload, static_cast(payload_length_bytes)); + if (frames < 0) { + /* Invalid payload data. */ + return 0; + } + samples = + frames * opus_packet_get_samples_per_frame(payload, inst->sample_rate_hz); + if (samples > 120 * inst->sample_rate_hz / 1000) { + // More than 120 ms' worth of samples. + return 0; + } + return samples; +} + +int WebRtcOpus_PlcDuration(OpusDecInst* inst) { + if (inst->plc_use_prev_decoded_samples) { + /* The number of samples we ask for is `number_of_lost_frames` times + * `prev_decoded_samples_`. Limit the number of samples to maximum + * `MaxFrameSizePerChannel()`. */ + const int plc_samples = inst->prev_decoded_samples; + const int max_samples_per_channel = + MaxFrameSizePerChannel(inst->sample_rate_hz); + return plc_samples <= max_samples_per_channel ? plc_samples + : max_samples_per_channel; + } + return FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz); +} + +int WebRtcOpus_FecDurationEst(const uint8_t* payload, + size_t payload_length_bytes, + int sample_rate_hz) { + if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) { + return 0; + } + const int samples = + opus_packet_get_samples_per_frame(payload, sample_rate_hz); + const int samples_per_ms = sample_rate_hz / 1000; + if (samples < 10 * samples_per_ms || samples > 120 * samples_per_ms) { + /* Invalid payload duration. */ + return 0; + } + return samples; +} + +int WebRtcOpus_NumSilkFrames(const uint8_t* payload) { + // For computing the payload length in ms, the sample rate is not important + // since it cancels out. We use 48 kHz, but any valid sample rate would work. + int payload_length_ms = + opus_packet_get_samples_per_frame(payload, 48000) / 48; + if (payload_length_ms < 10) + payload_length_ms = 10; + + int silk_frames; + switch (payload_length_ms) { + case 10: + case 20: + silk_frames = 1; + break; + case 40: + silk_frames = 2; + break; + case 60: + silk_frames = 3; + break; + default: + return 0; // It is actually even an invalid packet. + } + return silk_frames; +} + +// This method is based on Definition of the Opus Audio Codec +// (https://tools.ietf.org/html/rfc6716). Basically, this method is based on +// parsing the LP layer of an Opus packet, particularly the LBRR flag. +int WebRtcOpus_PacketHasFec(const uint8_t* payload, + size_t payload_length_bytes) { + if (payload == NULL || payload_length_bytes == 0) + return 0; + + // In CELT_ONLY mode, packets should not have FEC. + if (payload[0] & 0x80) + return 0; + + int silk_frames = WebRtcOpus_NumSilkFrames(payload); + if (silk_frames == 0) + return 0; // Not valid. + + const int channels = opus_packet_get_nb_channels(payload); + RTC_DCHECK(channels == 1 || channels == 2); + + // Max number of frames in an Opus packet is 48. + opus_int16 frame_sizes[48]; + const unsigned char* frame_data[48]; + + // Parse packet to get the frames. But we only care about the first frame, + // since we can only decode the FEC from the first one. + if (opus_packet_parse(payload, static_cast(payload_length_bytes), + NULL, frame_data, frame_sizes, NULL) < 0) { + return 0; + } + + if (frame_sizes[0] < 1) { + return 0; + } + + // A frame starts with the LP layer. The LP layer begins with two to eight + // header bits.These consist of one VAD bit per SILK frame (up to 3), + // followed by a single flag indicating the presence of LBRR frames. + // For a stereo packet, these first flags correspond to the mid channel, and + // a second set of flags is included for the side channel. Because these are + // the first symbols decoded by the range coder and because they are coded + // as binary values with uniform probability, they can be extracted directly + // from the most significant bits of the first byte of compressed data. + for (int n = 0; n < channels; n++) { + // The LBRR bit for channel 1 is on the (`silk_frames` + 1)-th bit, and + // that of channel 2 is on the |(`silk_frames` + 1) * 2 + 1|-th bit. + if (frame_data[0][0] & (0x80 >> ((n + 1) * (silk_frames + 1) - 1))) + return 1; + } + + return 0; +} + +int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload, + size_t payload_length_bytes) { + if (payload == NULL || payload_length_bytes == 0) + return 0; + + // In CELT_ONLY mode we can not determine whether there is VAD. + if (payload[0] & 0x80) + return -1; + + int silk_frames = WebRtcOpus_NumSilkFrames(payload); + if (silk_frames == 0) + return -1; + + const int channels = opus_packet_get_nb_channels(payload); + RTC_DCHECK(channels == 1 || channels == 2); + + // Max number of frames in an Opus packet is 48. + opus_int16 frame_sizes[48]; + const unsigned char* frame_data[48]; + + // Parse packet to get the frames. + int frames = + opus_packet_parse(payload, static_cast(payload_length_bytes), + NULL, frame_data, frame_sizes, NULL); + if (frames < 0) + return -1; + + // Iterate over all Opus frames which may contain multiple SILK frames. + for (int frame = 0; frame < frames; frame++) { + if (frame_sizes[frame] < 1) { + continue; + } + if (frame_data[frame][0] >> (8 - silk_frames)) + return 1; + if (channels == 2 && + (frame_data[frame][0] << (silk_frames + 1)) >> (8 - silk_frames)) + return 1; + } + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.h new file mode 100644 index 0000000000..89159ce1c0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.h @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_ + +#include +#include + +#include "modules/audio_coding/codecs/opus/opus_inst.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Opaque wrapper types for the codec state. +typedef struct WebRtcOpusEncInst OpusEncInst; +typedef struct WebRtcOpusDecInst OpusDecInst; + +/**************************************************************************** + * WebRtcOpus_EncoderCreate(...) + * + * This function creates an Opus encoder that encodes mono or stereo. + * + * Input: + * - channels : number of channels; 1 or 2. + * - application : 0 - VOIP applications. + * Favor speech intelligibility. + * 1 - Audio applications. + * Favor faithfulness to the original input. + * - sample_rate_hz : sample rate of input audio + * + * Output: + * - inst : a pointer to Encoder context that is created + * if success. + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, + size_t channels, + int32_t application, + int sample_rate_hz); + +/**************************************************************************** + * WebRtcOpus_MultistreamEncoderCreate(...) + * + * This function creates an Opus encoder with any supported channel count. + * + * Input: + * - channels : number of channels in the input of the encoder. + * - application : 0 - VOIP applications. + * Favor speech intelligibility. + * 1 - Audio applications. + * Favor faithfulness to the original input. + * - streams : number of streams, as described in RFC 7845. + * - coupled_streams : number of coupled streams, as described in + * RFC 7845. + * - channel_mapping : the channel mapping; pointer to array of + * `channel` bytes, as described in RFC 7845. + * + * Output: + * - inst : a pointer to Encoder context that is created + * if success. + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_MultistreamEncoderCreate( + OpusEncInst** inst, + size_t channels, + int32_t application, + size_t streams, + size_t coupled_streams, + const unsigned char* channel_mapping); + +int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_Encode(...) + * + * This function encodes audio as a series of Opus frames and inserts + * it into a packet. Input buffer can be any length. + * + * Input: + * - inst : Encoder context + * - audio_in : Input speech data buffer + * - samples : Samples per channel in audio_in + * - length_encoded_buffer : Output buffer size + * + * Output: + * - encoded : Output compressed data buffer + * + * Return value : >=0 - Length (in bytes) of coded data + * -1 - Error + */ +int WebRtcOpus_Encode(OpusEncInst* inst, + const int16_t* audio_in, + size_t samples, + size_t length_encoded_buffer, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcOpus_SetBitRate(...) + * + * This function adjusts the target bitrate of the encoder. + * + * Input: + * - inst : Encoder context + * - rate : New target bitrate + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate); + +/**************************************************************************** + * WebRtcOpus_SetPacketLossRate(...) + * + * This function configures the encoder's expected packet loss percentage. + * + * Input: + * - inst : Encoder context + * - loss_rate : loss percentage in the range 0-100, inclusive. + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate); + +/**************************************************************************** + * WebRtcOpus_SetMaxPlaybackRate(...) + * + * Configures the maximum playback rate for encoding. Due to hardware + * limitations, the receiver may render audio up to a playback rate. Opus + * encoder can use this information to optimize for network usage and encoding + * complexity. This will affect the audio bandwidth in the coded audio. However, + * the input/output sample rate is not affected. + * + * Input: + * - inst : Encoder context + * - frequency_hz : Maximum playback rate in Hz. + * This parameter can take any value. The relation + * between the value and the Opus internal mode is + * as following: + * frequency_hz <= 8000 narrow band + * 8000 < frequency_hz <= 12000 medium band + * 12000 < frequency_hz <= 16000 wide band + * 16000 < frequency_hz <= 24000 super wide band + * frequency_hz > 24000 full band + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz); + +/**************************************************************************** + * WebRtcOpus_GetMaxPlaybackRate(...) + * + * Queries the maximum playback rate for encoding. If different single-stream + * encoders have different maximum playback rates, this function fails. + * + * Input: + * - inst : Encoder context. + * Output: + * - result_hz : The maximum playback rate in Hz. + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst, + int32_t* result_hz); + +/* TODO(minyue): Check whether an API to check the FEC and the packet loss rate + * is needed. It might not be very useful since there are not many use cases and + * the caller can always maintain the states. */ + +/**************************************************************************** + * WebRtcOpus_EnableFec() + * + * This function enables FEC for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_EnableFec(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_DisableFec() + * + * This function disables FEC for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_DisableFec(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_EnableDtx() + * + * This function enables Opus internal DTX for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_DisableDtx() + * + * This function disables Opus internal DTX for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_GetUseDtx() + * + * This function gets the DTX configuration used for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Encoder does not use DTX. + * 1 - Encoder uses DTX. + * -1 - Error. + */ +int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_EnableCbr() + * + * This function enables CBR for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_DisableCbr() + * + * This function disables CBR for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst); + +/* + * WebRtcOpus_SetComplexity(...) + * + * This function adjusts the computational complexity. The effect is the same as + * calling the complexity setting of Opus as an Opus encoder related CTL. + * + * Input: + * - inst : Encoder context + * - complexity : New target complexity (0-10, inclusive) + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity); + +/* + * WebRtcOpus_GetBandwidth(...) + * + * This function returns the current bandwidth. + * + * Input: + * - inst : Encoder context + * + * Return value : Bandwidth - Success + * -1 - Error + */ +int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst); + +/* + * WebRtcOpus_SetBandwidth(...) + * + * By default Opus decides which bandwidth to encode the signal in depending on + * the the bitrate. This function overrules the previous setting and forces the + * encoder to encode in narrowband/wideband/fullband/etc. + * + * Input: + * - inst : Encoder context + * - bandwidth : New target bandwidth. Valid values are: + * OPUS_BANDWIDTH_NARROWBAND + * OPUS_BANDWIDTH_MEDIUMBAND + * OPUS_BANDWIDTH_WIDEBAND + * OPUS_BANDWIDTH_SUPERWIDEBAND + * OPUS_BANDWIDTH_FULLBAND + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth); + +/* + * WebRtcOpus_GetInDtx(...) + * + * Gets the DTX state of the encoder. + * + * Input: + * - inst : Encoder context + * + * Return value : -1 - Error. + * 1 - Last encoded frame was comfort noise update during DTX. + * 0 - Last encoded frame was encoded with encoder not in DTX. + */ +int32_t WebRtcOpus_GetInDtx(OpusEncInst* inst); + +/* + * WebRtcOpus_SetForceChannels(...) + * + * If the encoder is initialized as a stereo encoder, Opus will by default + * decide whether to encode in mono or stereo based on the bitrate. This + * function overrules the previous setting, and forces the encoder to encode + * in auto/mono/stereo. + * + * If the Encoder is initialized as a mono encoder, and one tries to force + * stereo, the function will return an error. + * + * Input: + * - inst : Encoder context + * - num_channels : 0 - Not forced + * 1 - Mono + * 2 - Stereo + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels); + +int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, + size_t channels, + int sample_rate_hz); + +/**************************************************************************** + * WebRtcOpus_MultistreamDecoderCreate(...) + * + * This function creates an Opus decoder with any supported channel count. + * + * Input: + * - channels : number of output channels that the decoder + * will produce. + * - streams : number of encoded streams, as described in + * RFC 7845. + * - coupled_streams : number of coupled streams, as described in + * RFC 7845. + * - channel_mapping : the channel mapping; pointer to array of + * `channel` bytes, as described in RFC 7845. + * + * Output: + * - inst : a pointer to a Decoder context that is created + * if success. + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_MultistreamDecoderCreate( + OpusDecInst** inst, + size_t channels, + size_t streams, + size_t coupled_streams, + const unsigned char* channel_mapping); + +int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst); + +/**************************************************************************** + * WebRtcOpus_DecoderChannels(...) + * + * This function returns the number of channels created for Opus decoder. + */ +size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst); + +/**************************************************************************** + * WebRtcOpus_DecoderInit(...) + * + * This function resets state of the decoder. + * + * Input: + * - inst : Decoder context + */ +void WebRtcOpus_DecoderInit(OpusDecInst* inst); + +/**************************************************************************** + * WebRtcOpus_Decode(...) + * + * This function decodes an Opus packet into one or more audio frames at the + * ACM interface's sampling rate (32 kHz). + * + * Input: + * - inst : Decoder context + * - encoded : Encoded data + * - encoded_bytes : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - audio_type : 1 normal, 2 CNG (for Opus it should + * always return 1 since we're not using Opus's + * built-in DTX/CNG scheme) + * + * Return value : >0 - Samples per channel in decoded vector + * -1 - Error + */ +int WebRtcOpus_Decode(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int16_t* decoded, + int16_t* audio_type); + +/**************************************************************************** + * WebRtcOpus_DecodeFec(...) + * + * This function decodes the FEC data from an Opus packet into one or more audio + * frames at the ACM interface's sampling rate (32 kHz). + * + * Input: + * - inst : Decoder context + * - encoded : Encoded data + * - encoded_bytes : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector (previous frame) + * + * Return value : >0 - Samples per channel in decoded vector + * 0 - No FEC data in the packet + * -1 - Error + */ +int WebRtcOpus_DecodeFec(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int16_t* decoded, + int16_t* audio_type); + +/**************************************************************************** + * WebRtcOpus_DurationEst(...) + * + * This function calculates the duration of an opus packet. + * Input: + * - inst : Decoder context + * - payload : Encoded data pointer + * - payload_length_bytes : Bytes of encoded data + * + * Return value : The duration of the packet, in samples per + * channel. + */ +int WebRtcOpus_DurationEst(OpusDecInst* inst, + const uint8_t* payload, + size_t payload_length_bytes); + +/**************************************************************************** + * WebRtcOpus_PlcDuration(...) + * + * This function calculates the duration of a frame returned by packet loss + * concealment (PLC). + * + * Input: + * - inst : Decoder context + * + * Return value : The duration of a frame returned by PLC, in + * samples per channel. + */ +int WebRtcOpus_PlcDuration(OpusDecInst* inst); + +/* TODO(minyue): Check whether it is needed to add a decoder context to the + * arguments, like WebRtcOpus_DurationEst(...). In fact, the packet itself tells + * the duration. The decoder context in WebRtcOpus_DurationEst(...) is not used. + * So it may be advisable to remove it from WebRtcOpus_DurationEst(...). */ + +/**************************************************************************** + * WebRtcOpus_FecDurationEst(...) + * + * This function calculates the duration of the FEC data within an opus packet. + * Input: + * - payload : Encoded data pointer + * - payload_length_bytes : Bytes of encoded data + * - sample_rate_hz : Sample rate of output audio + * + * Return value : >0 - The duration of the FEC data in the + * packet in samples per channel. + * 0 - No FEC data in the packet. + */ +int WebRtcOpus_FecDurationEst(const uint8_t* payload, + size_t payload_length_bytes, + int sample_rate_hz); + +/**************************************************************************** + * WebRtcOpus_PacketHasFec(...) + * + * This function detects if an opus packet has FEC. + * Input: + * - payload : Encoded data pointer + * - payload_length_bytes : Bytes of encoded data + * + * Return value : 0 - the packet does NOT contain FEC. + * 1 - the packet contains FEC. + */ +int WebRtcOpus_PacketHasFec(const uint8_t* payload, + size_t payload_length_bytes); + +/**************************************************************************** + * WebRtcOpus_PacketHasVoiceActivity(...) + * + * This function returns the SILK VAD information encoded in the opus packet. + * For CELT-only packets that do not have VAD information, it returns -1. + * Input: + * - payload : Encoded data pointer + * - payload_length_bytes : Bytes of encoded data + * + * Return value : 0 - no frame had the VAD flag set. + * 1 - at least one frame had the VAD flag set. + * -1 - VAD status could not be determined. + */ +int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload, + size_t payload_length_bytes); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_speed_test.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_speed_test.cc new file mode 100644 index 0000000000..4477e8a5f8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_speed_test.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/codecs/tools/audio_codec_speed_test.h" + +using ::std::string; + +namespace webrtc { + +static const int kOpusBlockDurationMs = 20; +static const int kOpusSamplingKhz = 48; + +class OpusSpeedTest : public AudioCodecSpeedTest { + protected: + OpusSpeedTest(); + void SetUp() override; + void TearDown() override; + float EncodeABlock(int16_t* in_data, + uint8_t* bit_stream, + size_t max_bytes, + size_t* encoded_bytes) override; + float DecodeABlock(const uint8_t* bit_stream, + size_t encoded_bytes, + int16_t* out_data) override; + WebRtcOpusEncInst* opus_encoder_; + WebRtcOpusDecInst* opus_decoder_; +}; + +OpusSpeedTest::OpusSpeedTest() + : AudioCodecSpeedTest(kOpusBlockDurationMs, + kOpusSamplingKhz, + kOpusSamplingKhz), + opus_encoder_(NULL), + opus_decoder_(NULL) {} + +void OpusSpeedTest::SetUp() { + AudioCodecSpeedTest::SetUp(); + // If channels_ == 1, use Opus VOIP mode, otherwise, audio mode. + int app = channels_ == 1 ? 0 : 1; + /* Create encoder memory. */ + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app, 48000)); + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, channels_, 48000)); + /* Set bitrate. */ + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_)); +} + +void OpusSpeedTest::TearDown() { + AudioCodecSpeedTest::TearDown(); + /* Free memory. */ + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +float OpusSpeedTest::EncodeABlock(int16_t* in_data, + uint8_t* bit_stream, + size_t max_bytes, + size_t* encoded_bytes) { + clock_t clocks = clock(); + int value = WebRtcOpus_Encode(opus_encoder_, in_data, input_length_sample_, + max_bytes, bit_stream); + clocks = clock() - clocks; + EXPECT_GT(value, 0); + *encoded_bytes = static_cast(value); + return 1000.0 * clocks / CLOCKS_PER_SEC; +} + +float OpusSpeedTest::DecodeABlock(const uint8_t* bit_stream, + size_t encoded_bytes, + int16_t* out_data) { + int value; + int16_t audio_type; + clock_t clocks = clock(); + value = WebRtcOpus_Decode(opus_decoder_, bit_stream, encoded_bytes, out_data, + &audio_type); + clocks = clock() - clocks; + EXPECT_EQ(output_length_sample_, static_cast(value)); + return 1000.0 * clocks / CLOCKS_PER_SEC; +} + +/* Test audio length in second. */ +constexpr size_t kDurationSec = 400; + +#define ADD_TEST(complexity) \ + TEST_P(OpusSpeedTest, OpusSetComplexityTest##complexity) { \ + /* Set complexity. */ \ + printf("Setting complexity to %d ...\n", complexity); \ + EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, complexity)); \ + EncodeDecode(kDurationSec); \ + } + +ADD_TEST(10) +ADD_TEST(9) +ADD_TEST(8) +ADD_TEST(7) +ADD_TEST(6) +ADD_TEST(5) +ADD_TEST(4) +ADD_TEST(3) +ADD_TEST(2) +ADD_TEST(1) +ADD_TEST(0) + +#define ADD_BANDWIDTH_TEST(bandwidth) \ + TEST_P(OpusSpeedTest, OpusSetBandwidthTest##bandwidth) { \ + /* Set bandwidth. */ \ + printf("Setting bandwidth to %d ...\n", bandwidth); \ + EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, bandwidth)); \ + EncodeDecode(kDurationSec); \ + } + +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_NARROWBAND) +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_MEDIUMBAND) +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_WIDEBAND) +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_SUPERWIDEBAND) +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_FULLBAND) + +// List all test cases: (channel, bit rat, filename, extension). +const coding_param param_set[] = { + std::make_tuple(1, + 64000, + string("audio_coding/speech_mono_32_48kHz"), + string("pcm"), + true), + std::make_tuple(1, + 32000, + string("audio_coding/speech_mono_32_48kHz"), + string("pcm"), + true), + std::make_tuple(2, + 64000, + string("audio_coding/music_stereo_48kHz"), + string("pcm"), + true)}; + +INSTANTIATE_TEST_SUITE_P(AllTest, + OpusSpeedTest, + ::testing::ValuesIn(param_set)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_unittest.cc new file mode 100644 index 0000000000..4a9156ad58 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -0,0 +1,979 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_coding/codecs/opus/opus_inst.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace { +// Equivalent to SDP params +// {{"channel_mapping", "0,1,2,3"}, {"coupled_streams", "2"}}. +constexpr unsigned char kQuadChannelMapping[] = {0, 1, 2, 3}; +constexpr int kQuadTotalStreams = 2; +constexpr int kQuadCoupledStreams = 2; + +constexpr unsigned char kStereoChannelMapping[] = {0, 1}; +constexpr int kStereoTotalStreams = 1; +constexpr int kStereoCoupledStreams = 1; + +constexpr unsigned char kMonoChannelMapping[] = {0}; +constexpr int kMonoTotalStreams = 1; +constexpr int kMonoCoupledStreams = 0; + +void CreateSingleOrMultiStreamEncoder(WebRtcOpusEncInst** opus_encoder, + int channels, + int application, + bool use_multistream, + int encoder_sample_rate_hz) { + EXPECT_TRUE(channels == 1 || channels == 2 || use_multistream); + if (use_multistream) { + EXPECT_EQ(encoder_sample_rate_hz, 48000); + if (channels == 1) { + EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate( + opus_encoder, channels, application, kMonoTotalStreams, + kMonoCoupledStreams, kMonoChannelMapping)); + } else if (channels == 2) { + EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate( + opus_encoder, channels, application, kStereoTotalStreams, + kStereoCoupledStreams, kStereoChannelMapping)); + } else if (channels == 4) { + EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate( + opus_encoder, channels, application, kQuadTotalStreams, + kQuadCoupledStreams, kQuadChannelMapping)); + } else { + EXPECT_TRUE(false) << channels; + } + } else { + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(opus_encoder, channels, application, + encoder_sample_rate_hz)); + } +} + +void CreateSingleOrMultiStreamDecoder(WebRtcOpusDecInst** opus_decoder, + int channels, + bool use_multistream, + int decoder_sample_rate_hz) { + EXPECT_TRUE(channels == 1 || channels == 2 || use_multistream); + if (use_multistream) { + EXPECT_EQ(decoder_sample_rate_hz, 48000); + if (channels == 1) { + EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate( + opus_decoder, channels, kMonoTotalStreams, + kMonoCoupledStreams, kMonoChannelMapping)); + } else if (channels == 2) { + EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate( + opus_decoder, channels, kStereoTotalStreams, + kStereoCoupledStreams, kStereoChannelMapping)); + } else if (channels == 4) { + EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate( + opus_decoder, channels, kQuadTotalStreams, + kQuadCoupledStreams, kQuadChannelMapping)); + } else { + EXPECT_TRUE(false) << channels; + } + } else { + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(opus_decoder, channels, + decoder_sample_rate_hz)); + } +} + +int SamplesPerChannel(int sample_rate_hz, int duration_ms) { + const int samples_per_ms = rtc::CheckedDivExact(sample_rate_hz, 1000); + return samples_per_ms * duration_ms; +} + +using test::AudioLoop; +using ::testing::Combine; +using ::testing::TestWithParam; +using ::testing::Values; + +// Maximum number of bytes in output bitstream. +const size_t kMaxBytes = 2000; + +class OpusTest + : public TestWithParam<::testing::tuple> { + protected: + OpusTest() = default; + + void TestDtxEffect(bool dtx, int block_length_ms); + + void TestCbrEffect(bool dtx, int block_length_ms); + + // Prepare `speech_data_` for encoding, read from a hard-coded file. + // After preparation, `speech_data_.GetNextBlock()` returns a pointer to a + // block of `block_length_ms` milliseconds. The data is looped every + // `loop_length_ms` milliseconds. + void PrepareSpeechData(int block_length_ms, int loop_length_ms); + + int EncodeDecode(WebRtcOpusEncInst* encoder, + rtc::ArrayView input_audio, + WebRtcOpusDecInst* decoder, + int16_t* output_audio, + int16_t* audio_type); + + void SetMaxPlaybackRate(WebRtcOpusEncInst* encoder, + opus_int32 expect, + int32_t set); + + void CheckAudioBounded(const int16_t* audio, + size_t samples, + size_t channels, + uint16_t bound) const; + + WebRtcOpusEncInst* opus_encoder_ = nullptr; + WebRtcOpusDecInst* opus_decoder_ = nullptr; + AudioLoop speech_data_; + uint8_t bitstream_[kMaxBytes]; + size_t encoded_bytes_ = 0; + const size_t channels_{std::get<0>(GetParam())}; + const int application_{std::get<1>(GetParam())}; + const bool use_multistream_{std::get<2>(GetParam())}; + const int encoder_sample_rate_hz_{std::get<3>(GetParam())}; + const int decoder_sample_rate_hz_{std::get<4>(GetParam())}; +}; + +} // namespace + +// Singlestream: Try all combinations. +INSTANTIATE_TEST_SUITE_P(Singlestream, + OpusTest, + testing::Combine(testing::Values(1, 2), + testing::Values(0, 1), + testing::Values(false), + testing::Values(16000, 48000), + testing::Values(16000, 48000))); + +// Multistream: Some representative cases (only 48 kHz for now). +INSTANTIATE_TEST_SUITE_P( + Multistream, + OpusTest, + testing::Values(std::make_tuple(1, 0, true, 48000, 48000), + std::make_tuple(2, 1, true, 48000, 48000), + std::make_tuple(4, 0, true, 48000, 48000), + std::make_tuple(4, 1, true, 48000, 48000))); + +void OpusTest::PrepareSpeechData(int block_length_ms, int loop_length_ms) { + std::map channel_to_basename = { + {1, "audio_coding/testfile32kHz"}, + {2, "audio_coding/teststereo32kHz"}, + {4, "audio_coding/speech_4_channels_48k_one_second"}}; + std::map channel_to_suffix = { + {1, "pcm"}, {2, "pcm"}, {4, "wav"}}; + const std::string file_name = webrtc::test::ResourcePath( + channel_to_basename[channels_], channel_to_suffix[channels_]); + if (loop_length_ms < block_length_ms) { + loop_length_ms = block_length_ms; + } + const int sample_rate_khz = + rtc::CheckedDivExact(encoder_sample_rate_hz_, 1000); + EXPECT_TRUE(speech_data_.Init(file_name, + loop_length_ms * sample_rate_khz * channels_, + block_length_ms * sample_rate_khz * channels_)); +} + +void OpusTest::SetMaxPlaybackRate(WebRtcOpusEncInst* encoder, + opus_int32 expect, + int32_t set) { + opus_int32 bandwidth; + EXPECT_EQ(0, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, set)); + EXPECT_EQ(0, WebRtcOpus_GetMaxPlaybackRate(opus_encoder_, &bandwidth)); + EXPECT_EQ(expect, bandwidth); +} + +void OpusTest::CheckAudioBounded(const int16_t* audio, + size_t samples, + size_t channels, + uint16_t bound) const { + for (size_t i = 0; i < samples; ++i) { + for (size_t c = 0; c < channels; ++c) { + ASSERT_GE(audio[i * channels + c], -bound); + ASSERT_LE(audio[i * channels + c], bound); + } + } +} + +int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder, + rtc::ArrayView input_audio, + WebRtcOpusDecInst* decoder, + int16_t* output_audio, + int16_t* audio_type) { + const int input_samples_per_channel = + rtc::CheckedDivExact(input_audio.size(), channels_); + int encoded_bytes_int = + WebRtcOpus_Encode(encoder, input_audio.data(), input_samples_per_channel, + kMaxBytes, bitstream_); + EXPECT_GE(encoded_bytes_int, 0); + encoded_bytes_ = static_cast(encoded_bytes_int); + if (encoded_bytes_ != 0) { + int est_len = WebRtcOpus_DurationEst(decoder, bitstream_, encoded_bytes_); + int act_len = WebRtcOpus_Decode(decoder, bitstream_, encoded_bytes_, + output_audio, audio_type); + EXPECT_EQ(est_len, act_len); + return act_len; + } else { + int total_dtx_len = 0; + const int output_samples_per_channel = input_samples_per_channel * + decoder_sample_rate_hz_ / + encoder_sample_rate_hz_; + while (total_dtx_len < output_samples_per_channel) { + int est_len = WebRtcOpus_DurationEst(decoder, NULL, 0); + int act_len = WebRtcOpus_Decode(decoder, NULL, 0, + &output_audio[total_dtx_len * channels_], + audio_type); + EXPECT_EQ(est_len, act_len); + total_dtx_len += act_len; + } + return total_dtx_len; + } +} + +// Test if encoder/decoder can enter DTX mode properly and do not enter DTX when +// they should not. This test is signal dependent. +void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) { + PrepareSpeechData(block_length_ms, 2000); + const size_t input_samples = + rtc::CheckedDivExact(encoder_sample_rate_hz_, 1000) * block_length_ms; + const size_t output_samples = + rtc::CheckedDivExact(decoder_sample_rate_hz_, 1000) * block_length_ms; + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Set input audio as silence. + std::vector silence(input_samples * channels_, 0); + + // Setting DTX. + EXPECT_EQ(0, dtx ? WebRtcOpus_EnableDtx(opus_encoder_) + : WebRtcOpus_DisableDtx(opus_encoder_)); + + int16_t audio_type; + int16_t* output_data_decode = new int16_t[output_samples * channels_]; + + for (int i = 0; i < 100; ++i) { + EXPECT_EQ(output_samples, + static_cast(EncodeDecode( + opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_, + output_data_decode, &audio_type))); + // If not DTX, it should never enter DTX mode. If DTX, we do not care since + // whether it enters DTX depends on the signal type. + if (!dtx) { + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + } + + // We input some silent segments. In DTX mode, the encoder will stop sending. + // However, DTX may happen after a while. + for (int i = 0; i < 30; ++i) { + EXPECT_EQ(output_samples, static_cast(EncodeDecode( + opus_encoder_, silence, opus_decoder_, + output_data_decode, &audio_type))); + if (!dtx) { + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } else if (encoded_bytes_ == 1) { + EXPECT_EQ(1, opus_encoder_->in_dtx_mode); + EXPECT_EQ(1, opus_decoder_->in_dtx_mode); + EXPECT_EQ(2, audio_type); // Comfort noise. + break; + } + } + + // When Opus is in DTX, it wakes up in a regular basis. It sends two packets, + // one with an arbitrary size and the other of 1-byte, then stops sending for + // a certain number of frames. + + // `max_dtx_frames` is the maximum number of frames Opus can stay in DTX. + // TODO(kwiberg): Why does this number depend on the encoding sample rate? + const int max_dtx_frames = + (encoder_sample_rate_hz_ == 16000 ? 800 : 400) / block_length_ms + 1; + + // We run `kRunTimeMs` milliseconds of pure silence. + const int kRunTimeMs = 4500; + + // We check that, after a `kCheckTimeMs` milliseconds (given that the CNG in + // Opus needs time to adapt), the absolute values of DTX decoded signal are + // bounded by `kOutputValueBound`. + const int kCheckTimeMs = 4000; + +#if defined(OPUS_FIXED_POINT) + // Fixed-point Opus generates a random (comfort) noise, which has a less + // predictable value bound than its floating-point Opus. This value depends on + // input signal, and the time window for checking the output values (between + // `kCheckTimeMs` and `kRunTimeMs`). + const uint16_t kOutputValueBound = 30; + +#else + const uint16_t kOutputValueBound = 2; +#endif + + int time = 0; + while (time < kRunTimeMs) { + // DTX mode is maintained for maximum `max_dtx_frames` frames. + int i = 0; + for (; i < max_dtx_frames; ++i) { + time += block_length_ms; + EXPECT_EQ(output_samples, static_cast(EncodeDecode( + opus_encoder_, silence, opus_decoder_, + output_data_decode, &audio_type))); + if (dtx) { + if (encoded_bytes_ > 1) + break; + EXPECT_EQ(0U, encoded_bytes_) // Send 0 byte. + << "Opus should have entered DTX mode."; + EXPECT_EQ(1, opus_encoder_->in_dtx_mode); + EXPECT_EQ(1, opus_decoder_->in_dtx_mode); + EXPECT_EQ(2, audio_type); // Comfort noise. + if (time >= kCheckTimeMs) { + CheckAudioBounded(output_data_decode, output_samples, channels_, + kOutputValueBound); + } + } else { + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + } + + if (dtx) { + // With DTX, Opus must stop transmission for some time. + EXPECT_GT(i, 1); + } + + // We expect a normal payload. + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + + // Enters DTX again immediately. + time += block_length_ms; + EXPECT_EQ(output_samples, static_cast(EncodeDecode( + opus_encoder_, silence, opus_decoder_, + output_data_decode, &audio_type))); + if (dtx) { + EXPECT_EQ(1U, encoded_bytes_); // Send 1 byte. + EXPECT_EQ(1, opus_encoder_->in_dtx_mode); + EXPECT_EQ(1, opus_decoder_->in_dtx_mode); + EXPECT_EQ(2, audio_type); // Comfort noise. + if (time >= kCheckTimeMs) { + CheckAudioBounded(output_data_decode, output_samples, channels_, + kOutputValueBound); + } + } else { + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + } + + silence[0] = 10000; + if (dtx) { + // Verify that encoder/decoder can jump out from DTX mode. + EXPECT_EQ(output_samples, static_cast(EncodeDecode( + opus_encoder_, silence, opus_decoder_, + output_data_decode, &audio_type))); + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + + // Free memory. + delete[] output_data_decode; + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +// Test if CBR does what we expect. +void OpusTest::TestCbrEffect(bool cbr, int block_length_ms) { + PrepareSpeechData(block_length_ms, 2000); + const size_t output_samples = + rtc::CheckedDivExact(decoder_sample_rate_hz_, 1000) * block_length_ms; + + int32_t max_pkt_size_diff = 0; + int32_t prev_pkt_size = 0; + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Setting CBR. + EXPECT_EQ(0, cbr ? WebRtcOpus_EnableCbr(opus_encoder_) + : WebRtcOpus_DisableCbr(opus_encoder_)); + + int16_t audio_type; + std::vector audio_out(output_samples * channels_); + for (int i = 0; i < 100; ++i) { + EXPECT_EQ(output_samples, + static_cast( + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), + opus_decoder_, audio_out.data(), &audio_type))); + + if (prev_pkt_size > 0) { + int32_t diff = std::abs((int32_t)encoded_bytes_ - prev_pkt_size); + max_pkt_size_diff = std::max(max_pkt_size_diff, diff); + } + prev_pkt_size = rtc::checked_cast(encoded_bytes_); + } + + if (cbr) { + EXPECT_EQ(max_pkt_size_diff, 0); + } else { + EXPECT_GT(max_pkt_size_diff, 0); + } + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +// Test failing Create. +TEST(OpusTest, OpusCreateFail) { + WebRtcOpusEncInst* opus_encoder; + WebRtcOpusDecInst* opus_decoder; + + // Test to see that an invalid pointer is caught. + EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(NULL, 1, 0, 48000)); + // Invalid channel number. + EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 257, 0, 48000)); + // Invalid applciation mode. + EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 1, 2, 48000)); + // Invalid sample rate. + EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 1, 0, 12345)); + + EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(NULL, 1, 48000)); + // Invalid channel number. + EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 257, 48000)); + // Invalid sample rate. + EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 1, 12345)); +} + +// Test failing Free. +TEST(OpusTest, OpusFreeFail) { + // Test to see that an invalid pointer is caught. + EXPECT_EQ(-1, WebRtcOpus_EncoderFree(NULL)); + EXPECT_EQ(-1, WebRtcOpus_DecoderFree(NULL)); +} + +// Test normal Create and Free. +TEST_P(OpusTest, OpusCreateFree) { + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + EXPECT_TRUE(opus_encoder_ != NULL); + EXPECT_TRUE(opus_decoder_ != NULL); + // Free encoder and decoder memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +#define ENCODER_CTL(inst, vargs) \ + inst->encoder \ + ? opus_encoder_ctl(inst->encoder, vargs) \ + : opus_multistream_encoder_ctl(inst->multistream_encoder, vargs) + +TEST_P(OpusTest, OpusEncodeDecode) { + PrepareSpeechData(20, 20); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Check number of channels for decoder. + EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_)); + + // Check application mode. + opus_int32 app; + ENCODER_CTL(opus_encoder_, OPUS_GET_APPLICATION(&app)); + EXPECT_EQ(application_ == 0 ? OPUS_APPLICATION_VOIP : OPUS_APPLICATION_AUDIO, + app); + + // Encode & decode. + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + int16_t* output_data_decode = + new int16_t[decode_samples_per_channel * channels_]; + EXPECT_EQ(decode_samples_per_channel, + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), + opus_decoder_, output_data_decode, &audio_type)); + + // Free memory. + delete[] output_data_decode; + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST_P(OpusTest, OpusSetBitRate) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetBitRate(opus_encoder_, 60000)); + + // Create encoder memory, try with different bitrates. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 30000)); + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 60000)); + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 300000)); + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 600000)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusSetComplexity) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetComplexity(opus_encoder_, 9)); + + // Create encoder memory, try with different complexities. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, 0)); + EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, 10)); + EXPECT_EQ(-1, WebRtcOpus_SetComplexity(opus_encoder_, 11)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusSetBandwidth) { + if (channels_ > 2) { + // TODO(webrtc:10217): investigate why multi-stream Opus reports + // narrowband when it's configured with FULLBAND. + return; + } + PrepareSpeechData(20, 20); + + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + std::unique_ptr output_data_decode( + new int16_t[decode_samples_per_channel * channels_]()); + + // Test without creating encoder memory. + EXPECT_EQ(-1, + WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND)); + EXPECT_EQ(-1, WebRtcOpus_GetBandwidth(opus_encoder_)); + + // Create encoder memory, try with different bandwidths. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + EXPECT_EQ(-1, WebRtcOpus_SetBandwidth(opus_encoder_, + OPUS_BANDWIDTH_NARROWBAND - 1)); + EXPECT_EQ(0, + WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND)); + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_, + output_data_decode.get(), &audio_type); + EXPECT_EQ(OPUS_BANDWIDTH_NARROWBAND, WebRtcOpus_GetBandwidth(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND)); + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_, + output_data_decode.get(), &audio_type); + EXPECT_EQ(encoder_sample_rate_hz_ == 16000 ? OPUS_BANDWIDTH_WIDEBAND + : OPUS_BANDWIDTH_FULLBAND, + WebRtcOpus_GetBandwidth(opus_encoder_)); + EXPECT_EQ( + -1, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND + 1)); + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_, + output_data_decode.get(), &audio_type); + EXPECT_EQ(encoder_sample_rate_hz_ == 16000 ? OPUS_BANDWIDTH_WIDEBAND + : OPUS_BANDWIDTH_FULLBAND, + WebRtcOpus_GetBandwidth(opus_encoder_)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST_P(OpusTest, OpusForceChannels) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 1)); + + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + ASSERT_NE(nullptr, opus_encoder_); + + if (channels_ >= 2) { + EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 3)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 2)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 1)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 0)); + } else { + EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 2)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 1)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 0)); + } + + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +// Encode and decode one frame, initialize the decoder and +// decode once more. +TEST_P(OpusTest, OpusDecodeInit) { + PrepareSpeechData(20, 20); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Encode & decode. + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + int16_t* output_data_decode = + new int16_t[decode_samples_per_channel * channels_]; + EXPECT_EQ(decode_samples_per_channel, + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), + opus_decoder_, output_data_decode, &audio_type)); + + WebRtcOpus_DecoderInit(opus_decoder_); + + EXPECT_EQ(decode_samples_per_channel, + WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_, + output_data_decode, &audio_type)); + + // Free memory. + delete[] output_data_decode; + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST_P(OpusTest, OpusEnableDisableFec) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_EnableFec(opus_encoder_)); + EXPECT_EQ(-1, WebRtcOpus_DisableFec(opus_encoder_)); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DisableFec(opus_encoder_)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusEnableDisableDtx) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_EnableDtx(opus_encoder_)); + EXPECT_EQ(-1, WebRtcOpus_DisableDtx(opus_encoder_)); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + opus_int32 dtx; + + // DTX is off by default. + ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx)); + EXPECT_EQ(0, dtx); + + // Test to enable DTX. + EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_encoder_)); + ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx)); + EXPECT_EQ(1, dtx); + + // Test to disable DTX. + EXPECT_EQ(0, WebRtcOpus_DisableDtx(opus_encoder_)); + ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx)); + EXPECT_EQ(0, dtx); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusDtxOff) { + TestDtxEffect(false, 10); + TestDtxEffect(false, 20); + TestDtxEffect(false, 40); +} + +TEST_P(OpusTest, OpusDtxOn) { + if (channels_ > 2 || application_ != 0) { + // DTX does not work with OPUS_APPLICATION_AUDIO at low complexity settings. + // TODO(webrtc:10218): adapt the test to the sizes and order of multi-stream + // DTX packets. + return; + } + TestDtxEffect(true, 10); + TestDtxEffect(true, 20); + TestDtxEffect(true, 40); +} + +TEST_P(OpusTest, OpusCbrOff) { + TestCbrEffect(false, 10); + TestCbrEffect(false, 20); + TestCbrEffect(false, 40); +} + +TEST_P(OpusTest, OpusCbrOn) { + TestCbrEffect(true, 10); + TestCbrEffect(true, 20); + TestCbrEffect(true, 40); +} + +TEST_P(OpusTest, OpusSetPacketLossRate) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, 50)); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate(opus_encoder_, 50)); + EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, -1)); + EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, 101)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusSetMaxPlaybackRate) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, 20000)); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_FULLBAND, 48000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_FULLBAND, 24001); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_SUPERWIDEBAND, 24000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_SUPERWIDEBAND, 16001); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_WIDEBAND, 16000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_WIDEBAND, 12001); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_MEDIUMBAND, 12000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_MEDIUMBAND, 8001); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND, 8000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND, 4000); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +// Test PLC. +TEST_P(OpusTest, OpusDecodePlc) { + PrepareSpeechData(20, 20); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Check number of channels for decoder. + EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_)); + + // Encode & decode. + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + int16_t* output_data_decode = + new int16_t[decode_samples_per_channel * channels_]; + EXPECT_EQ(decode_samples_per_channel, + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), + opus_decoder_, output_data_decode, &audio_type)); + + // Call decoder PLC. + constexpr int kPlcDurationMs = 10; + const int plc_samples = decoder_sample_rate_hz_ * kPlcDurationMs / 1000; + int16_t* plc_buffer = new int16_t[plc_samples * channels_]; + EXPECT_EQ(plc_samples, + WebRtcOpus_Decode(opus_decoder_, NULL, 0, plc_buffer, &audio_type)); + + // Free memory. + delete[] plc_buffer; + delete[] output_data_decode; + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +// Duration estimation. +TEST_P(OpusTest, OpusDurationEstimation) { + PrepareSpeechData(20, 20); + + // Create. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // 10 ms. We use only first 10 ms of a 20 ms block. + auto speech_block = speech_data_.GetNextBlock(); + int encoded_bytes_int = WebRtcOpus_Encode( + opus_encoder_, speech_block.data(), + rtc::CheckedDivExact(speech_block.size(), 2 * channels_), kMaxBytes, + bitstream_); + EXPECT_GE(encoded_bytes_int, 0); + EXPECT_EQ(SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/10), + WebRtcOpus_DurationEst(opus_decoder_, bitstream_, + static_cast(encoded_bytes_int))); + + // 20 ms + speech_block = speech_data_.GetNextBlock(); + encoded_bytes_int = + WebRtcOpus_Encode(opus_encoder_, speech_block.data(), + rtc::CheckedDivExact(speech_block.size(), channels_), + kMaxBytes, bitstream_); + EXPECT_GE(encoded_bytes_int, 0); + EXPECT_EQ(SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20), + WebRtcOpus_DurationEst(opus_decoder_, bitstream_, + static_cast(encoded_bytes_int))); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST_P(OpusTest, OpusDecodeRepacketized) { + if (channels_ > 2) { + // As per the Opus documentation + // https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/group__opus__repacketizer.html#details, + // multiple streams are not supported. + return; + } + constexpr size_t kPackets = 6; + + PrepareSpeechData(20, 20 * kPackets); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + ASSERT_NE(nullptr, opus_encoder_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + ASSERT_NE(nullptr, opus_decoder_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Check number of channels for decoder. + EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_)); + + // Encode & decode. + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + std::unique_ptr output_data_decode( + new int16_t[kPackets * decode_samples_per_channel * channels_]); + OpusRepacketizer* rp = opus_repacketizer_create(); + + size_t num_packets = 0; + constexpr size_t kMaxCycles = 100; + for (size_t idx = 0; idx < kMaxCycles; ++idx) { + auto speech_block = speech_data_.GetNextBlock(); + encoded_bytes_ = + WebRtcOpus_Encode(opus_encoder_, speech_block.data(), + rtc::CheckedDivExact(speech_block.size(), channels_), + kMaxBytes, bitstream_); + if (opus_repacketizer_cat(rp, bitstream_, + rtc::checked_cast(encoded_bytes_)) == + OPUS_OK) { + ++num_packets; + if (num_packets == kPackets) { + break; + } + } else { + // Opus repacketizer cannot guarantee a success. We try again if it fails. + opus_repacketizer_init(rp); + num_packets = 0; + } + } + EXPECT_EQ(kPackets, num_packets); + + encoded_bytes_ = opus_repacketizer_out(rp, bitstream_, kMaxBytes); + + EXPECT_EQ(decode_samples_per_channel * kPackets, + static_cast(WebRtcOpus_DurationEst( + opus_decoder_, bitstream_, encoded_bytes_))); + + EXPECT_EQ(decode_samples_per_channel * kPackets, + static_cast( + WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_, + output_data_decode.get(), &audio_type))); + + // Free memory. + opus_repacketizer_destroy(rp); + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST(OpusVadTest, CeltUnknownStatus) { + const uint8_t celt[] = {0x80}; + EXPECT_EQ(WebRtcOpus_PacketHasVoiceActivity(celt, 1), -1); +} + +TEST(OpusVadTest, Mono20msVadSet) { + uint8_t silk20msMonoVad[] = {0x78, 0x80}; + EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoVad, 2)); +} + +TEST(OpusVadTest, Mono20MsVadUnset) { + uint8_t silk20msMonoSilence[] = {0x78, 0x00}; + EXPECT_FALSE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoSilence, 2)); +} + +TEST(OpusVadTest, Stereo20MsVadOnSideChannel) { + uint8_t silk20msStereoVadSideChannel[] = {0x78 | 0x04, 0x20}; + EXPECT_TRUE( + WebRtcOpus_PacketHasVoiceActivity(silk20msStereoVadSideChannel, 2)); +} + +TEST(OpusVadTest, TwoOpusMonoFramesVadOnSecond) { + uint8_t twoMonoFrames[] = {0x78 | 0x1, 0x00, 0x80}; + EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(twoMonoFrames, 3)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/BUILD.gn b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/BUILD.gn new file mode 100644 index 0000000000..8bc0bf5e0e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/BUILD.gn @@ -0,0 +1,55 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../../webrtc.gni") + +visibility = [ + ":*", + "../../../:*", +] + +if (rtc_include_tests) { + rtc_library("test") { + testonly = true + + sources = [ + "audio_ring_buffer.cc", + "audio_ring_buffer.h", + "blocker.cc", + "blocker.h", + "lapped_transform.cc", + "lapped_transform.h", + ] + + deps = [ + "../../../../../common_audio", + "../../../../../common_audio:common_audio_c", + "../../../../../rtc_base:checks", + "../../../../../rtc_base/memory:aligned_malloc", + ] + } + + rtc_library("test_unittest") { + testonly = true + + sources = [ + "audio_ring_buffer_unittest.cc", + "blocker_unittest.cc", + "lapped_transform_unittest.cc", + ] + + deps = [ + ":test", + "../../../../../common_audio", + "../../../../../common_audio:common_audio_c", + "../../../../../rtc_base:macromagic", + "../../../../../test:test_support", + "//testing/gtest", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.cc new file mode 100644 index 0000000000..2a71b43d2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.cc @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h" + +#include "common_audio/ring_buffer.h" +#include "rtc_base/checks.h" + +// This is a simple multi-channel wrapper over the ring_buffer.h C interface. + +namespace webrtc { + +AudioRingBuffer::AudioRingBuffer(size_t channels, size_t max_frames) { + buffers_.reserve(channels); + for (size_t i = 0; i < channels; ++i) + buffers_.push_back(WebRtc_CreateBuffer(max_frames, sizeof(float))); +} + +AudioRingBuffer::~AudioRingBuffer() { + for (auto* buf : buffers_) + WebRtc_FreeBuffer(buf); +} + +void AudioRingBuffer::Write(const float* const* data, + size_t channels, + size_t frames) { + RTC_DCHECK_EQ(buffers_.size(), channels); + for (size_t i = 0; i < channels; ++i) { + const size_t written = WebRtc_WriteBuffer(buffers_[i], data[i], frames); + RTC_CHECK_EQ(written, frames); + } +} + +void AudioRingBuffer::Read(float* const* data, size_t channels, size_t frames) { + RTC_DCHECK_EQ(buffers_.size(), channels); + for (size_t i = 0; i < channels; ++i) { + const size_t read = + WebRtc_ReadBuffer(buffers_[i], nullptr, data[i], frames); + RTC_CHECK_EQ(read, frames); + } +} + +size_t AudioRingBuffer::ReadFramesAvailable() const { + // All buffers have the same amount available. + return WebRtc_available_read(buffers_[0]); +} + +size_t AudioRingBuffer::WriteFramesAvailable() const { + // All buffers have the same amount available. + return WebRtc_available_write(buffers_[0]); +} + +void AudioRingBuffer::MoveReadPositionForward(size_t frames) { + for (auto* buf : buffers_) { + const size_t moved = + static_cast(WebRtc_MoveReadPtr(buf, static_cast(frames))); + RTC_CHECK_EQ(moved, frames); + } +} + +void AudioRingBuffer::MoveReadPositionBackward(size_t frames) { + for (auto* buf : buffers_) { + const size_t moved = static_cast( + -WebRtc_MoveReadPtr(buf, -static_cast(frames))); + RTC_CHECK_EQ(moved, frames); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.h new file mode 100644 index 0000000000..a280ca2410 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_ + +#include + +#include +#include + +struct RingBuffer; + +namespace webrtc { + +// A ring buffer tailored for float deinterleaved audio. Any operation that +// cannot be performed as requested will cause a crash (e.g. insufficient data +// in the buffer to fulfill a read request.) +class AudioRingBuffer final { + public: + // Specify the number of channels and maximum number of frames the buffer will + // contain. + AudioRingBuffer(size_t channels, size_t max_frames); + ~AudioRingBuffer(); + + // Copies `data` to the buffer and advances the write pointer. `channels` must + // be the same as at creation time. + void Write(const float* const* data, size_t channels, size_t frames); + + // Copies from the buffer to `data` and advances the read pointer. `channels` + // must be the same as at creation time. + void Read(float* const* data, size_t channels, size_t frames); + + size_t ReadFramesAvailable() const; + size_t WriteFramesAvailable() const; + + // Moves the read position. The forward version advances the read pointer + // towards the write pointer and the backward verison withdraws the read + // pointer away from the write pointer (i.e. flushing and stuffing the buffer + // respectively.) + void MoveReadPositionForward(size_t frames); + void MoveReadPositionBackward(size_t frames); + + private: + // TODO(kwiberg): Use std::vector> instead. + std::vector buffers_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer_unittest.cc new file mode 100644 index 0000000000..6dbc8ee9fe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer_unittest.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h" + +#include + +#include "common_audio/channel_buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +class AudioRingBufferTest + : public ::testing::TestWithParam< ::testing::tuple > { +}; + +void ReadAndWriteTest(const ChannelBuffer& input, + size_t num_write_chunk_frames, + size_t num_read_chunk_frames, + size_t buffer_frames, + ChannelBuffer* output) { + const size_t num_channels = input.num_channels(); + const size_t total_frames = input.num_frames(); + AudioRingBuffer buf(num_channels, buffer_frames); + std::unique_ptr slice(new float*[num_channels]); + + size_t input_pos = 0; + size_t output_pos = 0; + while (input_pos + buf.WriteFramesAvailable() < total_frames) { + // Write until the buffer is as full as possible. + while (buf.WriteFramesAvailable() >= num_write_chunk_frames) { + buf.Write(input.Slice(slice.get(), input_pos), num_channels, + num_write_chunk_frames); + input_pos += num_write_chunk_frames; + } + // Read until the buffer is as empty as possible. + while (buf.ReadFramesAvailable() >= num_read_chunk_frames) { + EXPECT_LT(output_pos, total_frames); + buf.Read(output->Slice(slice.get(), output_pos), num_channels, + num_read_chunk_frames); + output_pos += num_read_chunk_frames; + } + } + + // Write and read the last bit. + if (input_pos < total_frames) { + buf.Write(input.Slice(slice.get(), input_pos), num_channels, + total_frames - input_pos); + } + if (buf.ReadFramesAvailable()) { + buf.Read(output->Slice(slice.get(), output_pos), num_channels, + buf.ReadFramesAvailable()); + } + EXPECT_EQ(0u, buf.ReadFramesAvailable()); +} + +TEST_P(AudioRingBufferTest, ReadDataMatchesWrittenData) { + const size_t kFrames = 5000; + const size_t num_channels = ::testing::get<3>(GetParam()); + + // Initialize the input data to an increasing sequence. + ChannelBuffer input(kFrames, static_cast(num_channels)); + for (size_t i = 0; i < num_channels; ++i) + for (size_t j = 0; j < kFrames; ++j) + input.channels()[i][j] = (i + 1) * (j + 1); + + ChannelBuffer output(kFrames, static_cast(num_channels)); + ReadAndWriteTest(input, ::testing::get<0>(GetParam()), + ::testing::get<1>(GetParam()), ::testing::get<2>(GetParam()), + &output); + + // Verify the read data matches the input. + for (size_t i = 0; i < num_channels; ++i) + for (size_t j = 0; j < kFrames; ++j) + EXPECT_EQ(input.channels()[i][j], output.channels()[i][j]); +} + +INSTANTIATE_TEST_SUITE_P( + AudioRingBufferTest, + AudioRingBufferTest, + ::testing::Combine(::testing::Values(10, 20, 42), // num_write_chunk_frames + ::testing::Values(1, 10, 17), // num_read_chunk_frames + ::testing::Values(100, 256), // buffer_frames + ::testing::Values(1, 4))); // num_channels + +TEST_F(AudioRingBufferTest, MoveReadPosition) { + const size_t kNumChannels = 1; + const float kInputArray[] = {1, 2, 3, 4}; + const size_t kNumFrames = sizeof(kInputArray) / sizeof(*kInputArray); + ChannelBuffer input(kNumFrames, kNumChannels); + input.SetDataForTesting(kInputArray, kNumFrames); + AudioRingBuffer buf(kNumChannels, kNumFrames); + buf.Write(input.channels(), kNumChannels, kNumFrames); + + buf.MoveReadPositionForward(3); + ChannelBuffer output(1, kNumChannels); + buf.Read(output.channels(), kNumChannels, 1); + EXPECT_EQ(4, output.channels()[0][0]); + buf.MoveReadPositionBackward(3); + buf.Read(output.channels(), kNumChannels, 1); + EXPECT_EQ(2, output.channels()[0][0]); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.cc new file mode 100644 index 0000000000..33406cead9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.cc @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/blocker.h" + +#include + +#include "rtc_base/checks.h" + +namespace { + +// Adds `a` and `b` frame by frame into `result` (basically matrix addition). +void AddFrames(const float* const* a, + size_t a_start_index, + const float* const* b, + int b_start_index, + size_t num_frames, + size_t num_channels, + float* const* result, + size_t result_start_index) { + for (size_t i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + result[i][j + result_start_index] = + a[i][j + a_start_index] + b[i][j + b_start_index]; + } + } +} + +// Copies `src` into `dst` channel by channel. +void CopyFrames(const float* const* src, + size_t src_start_index, + size_t num_frames, + size_t num_channels, + float* const* dst, + size_t dst_start_index) { + for (size_t i = 0; i < num_channels; ++i) { + memcpy(&dst[i][dst_start_index], &src[i][src_start_index], + num_frames * sizeof(dst[i][dst_start_index])); + } +} + +// Moves `src` into `dst` channel by channel. +void MoveFrames(const float* const* src, + size_t src_start_index, + size_t num_frames, + size_t num_channels, + float* const* dst, + size_t dst_start_index) { + for (size_t i = 0; i < num_channels; ++i) { + memmove(&dst[i][dst_start_index], &src[i][src_start_index], + num_frames * sizeof(dst[i][dst_start_index])); + } +} + +void ZeroOut(float* const* buffer, + size_t starting_idx, + size_t num_frames, + size_t num_channels) { + for (size_t i = 0; i < num_channels; ++i) { + memset(&buffer[i][starting_idx], 0, + num_frames * sizeof(buffer[i][starting_idx])); + } +} + +// Pointwise multiplies each channel of `frames` with `window`. Results are +// stored in `frames`. +void ApplyWindow(const float* window, + size_t num_frames, + size_t num_channels, + float* const* frames) { + for (size_t i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + frames[i][j] = frames[i][j] * window[j]; + } + } +} + +size_t gcd(size_t a, size_t b) { + size_t tmp; + while (b) { + tmp = a; + a = b; + b = tmp % b; + } + return a; +} + +} // namespace + +namespace webrtc { + +Blocker::Blocker(size_t chunk_size, + size_t block_size, + size_t num_input_channels, + size_t num_output_channels, + const float* window, + size_t shift_amount, + BlockerCallback* callback) + : chunk_size_(chunk_size), + block_size_(block_size), + num_input_channels_(num_input_channels), + num_output_channels_(num_output_channels), + initial_delay_(block_size_ - gcd(chunk_size, shift_amount)), + frame_offset_(0), + input_buffer_(num_input_channels_, chunk_size_ + initial_delay_), + output_buffer_(chunk_size_ + initial_delay_, num_output_channels_), + input_block_(block_size_, num_input_channels_), + output_block_(block_size_, num_output_channels_), + window_(new float[block_size_]), + shift_amount_(shift_amount), + callback_(callback) { + RTC_CHECK_LE(num_output_channels_, num_input_channels_); + RTC_CHECK_LE(shift_amount_, block_size_); + + memcpy(window_.get(), window, block_size_ * sizeof(*window_.get())); + input_buffer_.MoveReadPositionBackward(initial_delay_); +} + +Blocker::~Blocker() = default; + +// When block_size < chunk_size the input and output buffers look like this: +// +// delay* chunk_size chunk_size + delay* +// buffer: <-------------|---------------------|---------------|> +// _a_ _b_ _c_ +// +// On each call to ProcessChunk(): +// 1. New input gets read into sections _b_ and _c_ of the input buffer. +// 2. We block starting from frame_offset. +// 3. We block until we reach a block `bl` that doesn't contain any frames +// from sections _a_ or _b_ of the input buffer. +// 4. We window the current block, fire the callback for processing, window +// again, and overlap/add to the output buffer. +// 5. We copy sections _a_ and _b_ of the output buffer into output. +// 6. For both the input and the output buffers, we copy section _c_ into +// section _a_. +// 7. We set the new frame_offset to be the difference between the first frame +// of `bl` and the border between sections _b_ and _c_. +// +// When block_size > chunk_size the input and output buffers look like this: +// +// chunk_size delay* chunk_size + delay* +// buffer: <-------------|---------------------|---------------|> +// _a_ _b_ _c_ +// +// On each call to ProcessChunk(): +// The procedure is the same as above, except for: +// 1. New input gets read into section _c_ of the input buffer. +// 3. We block until we reach a block `bl` that doesn't contain any frames +// from section _a_ of the input buffer. +// 5. We copy section _a_ of the output buffer into output. +// 6. For both the input and the output buffers, we copy sections _b_ and _c_ +// into section _a_ and _b_. +// 7. We set the new frame_offset to be the difference between the first frame +// of `bl` and the border between sections _a_ and _b_. +// +// * delay here refers to inintial_delay_ +// +// TODO(claguna): Look at using ring buffers to eliminate some copies. +void Blocker::ProcessChunk(const float* const* input, + size_t chunk_size, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) { + RTC_CHECK_EQ(chunk_size, chunk_size_); + RTC_CHECK_EQ(num_input_channels, num_input_channels_); + RTC_CHECK_EQ(num_output_channels, num_output_channels_); + + input_buffer_.Write(input, num_input_channels, chunk_size_); + size_t first_frame_in_block = frame_offset_; + + // Loop through blocks. + while (first_frame_in_block < chunk_size_) { + input_buffer_.Read(input_block_.channels(), num_input_channels, + block_size_); + input_buffer_.MoveReadPositionBackward(block_size_ - shift_amount_); + + ApplyWindow(window_.get(), block_size_, num_input_channels_, + input_block_.channels()); + callback_->ProcessBlock(input_block_.channels(), block_size_, + num_input_channels_, num_output_channels_, + output_block_.channels()); + ApplyWindow(window_.get(), block_size_, num_output_channels_, + output_block_.channels()); + + AddFrames(output_buffer_.channels(), first_frame_in_block, + output_block_.channels(), 0, block_size_, num_output_channels_, + output_buffer_.channels(), first_frame_in_block); + + first_frame_in_block += shift_amount_; + } + + // Copy output buffer to output + CopyFrames(output_buffer_.channels(), 0, chunk_size_, num_output_channels_, + output, 0); + + // Copy output buffer [chunk_size_, chunk_size_ + initial_delay] + // to output buffer [0, initial_delay], zero the rest. + MoveFrames(output_buffer_.channels(), chunk_size, initial_delay_, + num_output_channels_, output_buffer_.channels(), 0); + ZeroOut(output_buffer_.channels(), initial_delay_, chunk_size_, + num_output_channels_); + + // Calculate new starting frames. + frame_offset_ = first_frame_in_block - chunk_size_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.h new file mode 100644 index 0000000000..59b7e29621 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_ + +#include + +#include "common_audio/channel_buffer.h" +#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h" + +namespace webrtc { + +// The callback function to process audio in the time domain. Input has already +// been windowed, and output will be windowed. The number of input channels +// must be >= the number of output channels. +class BlockerCallback { + public: + virtual ~BlockerCallback() {} + + virtual void ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) = 0; +}; + +// The main purpose of Blocker is to abstract away the fact that often we +// receive a different number of audio frames than our transform takes. For +// example, most FFTs work best when the fft-size is a power of 2, but suppose +// we receive 20ms of audio at a sample rate of 48000. That comes to 960 frames +// of audio, which is not a power of 2. Blocker allows us to specify the +// transform and all other necessary processing via the Process() callback +// function without any constraints on the transform-size +// (read: `block_size_`) or received-audio-size (read: `chunk_size_`). +// We handle this for the multichannel audio case, allowing for different +// numbers of input and output channels (for example, beamforming takes 2 or +// more input channels and returns 1 output channel). Audio signals are +// represented as deinterleaved floats in the range [-1, 1]. +// +// Blocker is responsible for: +// - blocking audio while handling potential discontinuities on the edges +// of chunks +// - windowing blocks before sending them to Process() +// - windowing processed blocks, and overlap-adding them together before +// sending back a processed chunk +// +// To use blocker: +// 1. Impelment a BlockerCallback object `bc`. +// 2. Instantiate a Blocker object `b`, passing in `bc`. +// 3. As you receive audio, call b.ProcessChunk() to get processed audio. +// +// A small amount of delay is added to the first received chunk to deal with +// the difference in chunk/block sizes. This delay is <= chunk_size. +// +// Ownership of window is retained by the caller. That is, Blocker makes a +// copy of window and does not attempt to delete it. +class Blocker { + public: + Blocker(size_t chunk_size, + size_t block_size, + size_t num_input_channels, + size_t num_output_channels, + const float* window, + size_t shift_amount, + BlockerCallback* callback); + ~Blocker(); + + void ProcessChunk(const float* const* input, + size_t chunk_size, + size_t num_input_channels, + size_t num_output_channels, + float* const* output); + + size_t initial_delay() const { return initial_delay_; } + + private: + const size_t chunk_size_; + const size_t block_size_; + const size_t num_input_channels_; + const size_t num_output_channels_; + + // The number of frames of delay to add at the beginning of the first chunk. + const size_t initial_delay_; + + // The frame index into the input buffer where the first block should be read + // from. This is necessary because shift_amount_ is not necessarily a + // multiple of chunk_size_, so blocks won't line up at the start of the + // buffer. + size_t frame_offset_; + + // Since blocks nearly always overlap, there are certain blocks that require + // frames from the end of one chunk and the beginning of the next chunk. The + // input and output buffers are responsible for saving those frames between + // calls to ProcessChunk(). + // + // Both contain |initial delay| + `chunk_size` frames. The input is a fairly + // standard FIFO, but due to the overlap-add it's harder to use an + // AudioRingBuffer for the output. + AudioRingBuffer input_buffer_; + ChannelBuffer output_buffer_; + + // Space for the input block (can't wrap because of windowing). + ChannelBuffer input_block_; + + // Space for the output block (can't wrap because of overlap/add). + ChannelBuffer output_block_; + + std::unique_ptr window_; + + // The amount of frames between the start of contiguous blocks. For example, + // `shift_amount_` = `block_size_` / 2 for a Hann window. + size_t shift_amount_; + + BlockerCallback* callback_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker_unittest.cc new file mode 100644 index 0000000000..9c8e789ba9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker_unittest.cc @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/blocker.h" + +#include + +#include "rtc_base/arraysize.h" +#include "test/gtest.h" + +namespace { + +// Callback Function to add 3 to every sample in the signal. +class PlusThreeBlockerCallback : public webrtc::BlockerCallback { + public: + void ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) override { + for (size_t i = 0; i < num_output_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + output[i][j] = input[i][j] + 3; + } + } + } +}; + +// No-op Callback Function. +class CopyBlockerCallback : public webrtc::BlockerCallback { + public: + void ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) override { + for (size_t i = 0; i < num_output_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + output[i][j] = input[i][j]; + } + } + } +}; + +} // namespace + +namespace webrtc { + +// Tests blocking with a window that multiplies the signal by 2, a callback +// that adds 3 to each sample in the signal, and different combinations of chunk +// size, block size, and shift amount. +class BlockerTest : public ::testing::Test { + protected: + void RunTest(Blocker* blocker, + size_t chunk_size, + size_t num_frames, + const float* const* input, + float* const* input_chunk, + float* const* output, + float* const* output_chunk, + size_t num_input_channels, + size_t num_output_channels) { + size_t start = 0; + size_t end = chunk_size - 1; + while (end < num_frames) { + CopyTo(input_chunk, 0, start, num_input_channels, chunk_size, input); + blocker->ProcessChunk(input_chunk, chunk_size, num_input_channels, + num_output_channels, output_chunk); + CopyTo(output, start, 0, num_output_channels, chunk_size, output_chunk); + + start += chunk_size; + end += chunk_size; + } + } + + void ValidateSignalEquality(const float* const* expected, + const float* const* actual, + size_t num_channels, + size_t num_frames) { + for (size_t i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + EXPECT_FLOAT_EQ(expected[i][j], actual[i][j]); + } + } + } + + void ValidateInitialDelay(const float* const* output, + size_t num_channels, + size_t num_frames, + size_t initial_delay) { + for (size_t i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + if (j < initial_delay) { + EXPECT_FLOAT_EQ(output[i][j], 0.f); + } else { + EXPECT_GT(output[i][j], 0.f); + } + } + } + } + + static void CopyTo(float* const* dst, + size_t start_index_dst, + size_t start_index_src, + size_t num_channels, + size_t num_frames, + const float* const* src) { + for (size_t i = 0; i < num_channels; ++i) { + memcpy(&dst[i][start_index_dst], &src[i][start_index_src], + num_frames * sizeof(float)); + } + } +}; + +TEST_F(BlockerTest, TestBlockerMutuallyPrimeChunkandBlockSize) { + const size_t kNumInputChannels = 3; + const size_t kNumOutputChannels = 2; + const size_t kNumFrames = 10; + const size_t kBlockSize = 4; + const size_t kChunkSize = 5; + const size_t kShiftAmount = 2; + + const float kInput[kNumInputChannels][kNumFrames] = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + {3, 3, 3, 3, 3, 3, 3, 3, 3, 3}}; + ChannelBuffer input_cb(kNumFrames, kNumInputChannels); + input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput)); + + const float kExpectedOutput[kNumInputChannels][kNumFrames] = { + {6, 6, 12, 20, 20, 20, 20, 20, 20, 20}, + {6, 6, 12, 28, 28, 28, 28, 28, 28, 28}}; + ChannelBuffer expected_output_cb(kNumFrames, kNumInputChannels); + expected_output_cb.SetDataForTesting( + kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput)); + + const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f}; + + ChannelBuffer actual_output_cb(kNumFrames, kNumOutputChannels); + ChannelBuffer input_chunk_cb(kChunkSize, kNumInputChannels); + ChannelBuffer output_chunk_cb(kChunkSize, kNumOutputChannels); + + PlusThreeBlockerCallback callback; + Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels, + kWindow, kShiftAmount, &callback); + + RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(), + input_chunk_cb.channels(), actual_output_cb.channels(), + output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels); + + ValidateSignalEquality(expected_output_cb.channels(), + actual_output_cb.channels(), kNumOutputChannels, + kNumFrames); +} + +TEST_F(BlockerTest, TestBlockerMutuallyPrimeShiftAndBlockSize) { + const size_t kNumInputChannels = 3; + const size_t kNumOutputChannels = 2; + const size_t kNumFrames = 12; + const size_t kBlockSize = 4; + const size_t kChunkSize = 6; + const size_t kShiftAmount = 3; + + const float kInput[kNumInputChannels][kNumFrames] = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}}; + ChannelBuffer input_cb(kNumFrames, kNumInputChannels); + input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput)); + + const float kExpectedOutput[kNumOutputChannels][kNumFrames] = { + {6, 10, 10, 20, 10, 10, 20, 10, 10, 20, 10, 10}, + {6, 14, 14, 28, 14, 14, 28, 14, 14, 28, 14, 14}}; + ChannelBuffer expected_output_cb(kNumFrames, kNumOutputChannels); + expected_output_cb.SetDataForTesting( + kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput)); + + const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f}; + + ChannelBuffer actual_output_cb(kNumFrames, kNumOutputChannels); + ChannelBuffer input_chunk_cb(kChunkSize, kNumInputChannels); + ChannelBuffer output_chunk_cb(kChunkSize, kNumOutputChannels); + + PlusThreeBlockerCallback callback; + Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels, + kWindow, kShiftAmount, &callback); + + RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(), + input_chunk_cb.channels(), actual_output_cb.channels(), + output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels); + + ValidateSignalEquality(expected_output_cb.channels(), + actual_output_cb.channels(), kNumOutputChannels, + kNumFrames); +} + +TEST_F(BlockerTest, TestBlockerNoOverlap) { + const size_t kNumInputChannels = 3; + const size_t kNumOutputChannels = 2; + const size_t kNumFrames = 12; + const size_t kBlockSize = 4; + const size_t kChunkSize = 4; + const size_t kShiftAmount = 4; + + const float kInput[kNumInputChannels][kNumFrames] = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}}; + ChannelBuffer input_cb(kNumFrames, kNumInputChannels); + input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput)); + + const float kExpectedOutput[kNumOutputChannels][kNumFrames] = { + {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, + {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}}; + ChannelBuffer expected_output_cb(kNumFrames, kNumOutputChannels); + expected_output_cb.SetDataForTesting( + kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput)); + + const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f}; + + ChannelBuffer actual_output_cb(kNumFrames, kNumOutputChannels); + ChannelBuffer input_chunk_cb(kChunkSize, kNumInputChannels); + ChannelBuffer output_chunk_cb(kChunkSize, kNumOutputChannels); + + PlusThreeBlockerCallback callback; + Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels, + kWindow, kShiftAmount, &callback); + + RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(), + input_chunk_cb.channels(), actual_output_cb.channels(), + output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels); + + ValidateSignalEquality(expected_output_cb.channels(), + actual_output_cb.channels(), kNumOutputChannels, + kNumFrames); +} + +TEST_F(BlockerTest, InitialDelaysAreMinimum) { + const size_t kNumInputChannels = 3; + const size_t kNumOutputChannels = 2; + const size_t kNumFrames = 1280; + const size_t kChunkSize[] = {80, 80, 80, 80, 80, 80, + 160, 160, 160, 160, 160, 160}; + const size_t kBlockSize[] = {64, 64, 64, 128, 128, 128, + 128, 128, 128, 256, 256, 256}; + const size_t kShiftAmount[] = {16, 32, 64, 32, 64, 128, + 32, 64, 128, 64, 128, 256}; + const size_t kInitialDelay[] = {48, 48, 48, 112, 112, 112, + 96, 96, 96, 224, 224, 224}; + + float input[kNumInputChannels][kNumFrames]; + for (size_t i = 0; i < kNumInputChannels; ++i) { + for (size_t j = 0; j < kNumFrames; ++j) { + input[i][j] = i + 1; + } + } + ChannelBuffer input_cb(kNumFrames, kNumInputChannels); + input_cb.SetDataForTesting(input[0], sizeof(input) / sizeof(**input)); + + ChannelBuffer output_cb(kNumFrames, kNumOutputChannels); + + CopyBlockerCallback callback; + + for (size_t i = 0; i < arraysize(kChunkSize); ++i) { + std::unique_ptr window(new float[kBlockSize[i]]); + for (size_t j = 0; j < kBlockSize[i]; ++j) { + window[j] = 1.f; + } + + ChannelBuffer input_chunk_cb(kChunkSize[i], kNumInputChannels); + ChannelBuffer output_chunk_cb(kChunkSize[i], kNumOutputChannels); + + Blocker blocker(kChunkSize[i], kBlockSize[i], kNumInputChannels, + kNumOutputChannels, window.get(), kShiftAmount[i], + &callback); + + RunTest(&blocker, kChunkSize[i], kNumFrames, input_cb.channels(), + input_chunk_cb.channels(), output_cb.channels(), + output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels); + + ValidateInitialDelay(output_cb.channels(), kNumOutputChannels, kNumFrames, + kInitialDelay[i]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.cc new file mode 100644 index 0000000000..b1a6526bba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/lapped_transform.h" + +#include +#include +#include + +#include "common_audio/real_fourier.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +void LappedTransform::BlockThunk::ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) { + RTC_CHECK_EQ(num_input_channels, parent_->num_in_channels_); + RTC_CHECK_EQ(num_output_channels, parent_->num_out_channels_); + RTC_CHECK_EQ(parent_->block_length_, num_frames); + + for (size_t i = 0; i < num_input_channels; ++i) { + memcpy(parent_->real_buf_.Row(i), input[i], num_frames * sizeof(*input[0])); + parent_->fft_->Forward(parent_->real_buf_.Row(i), + parent_->cplx_pre_.Row(i)); + } + + size_t block_length = + RealFourier::ComplexLength(RealFourier::FftOrder(num_frames)); + RTC_CHECK_EQ(parent_->cplx_length_, block_length); + parent_->block_processor_->ProcessAudioBlock( + parent_->cplx_pre_.Array(), num_input_channels, parent_->cplx_length_, + num_output_channels, parent_->cplx_post_.Array()); + + for (size_t i = 0; i < num_output_channels; ++i) { + parent_->fft_->Inverse(parent_->cplx_post_.Row(i), + parent_->real_buf_.Row(i)); + memcpy(output[i], parent_->real_buf_.Row(i), + num_frames * sizeof(*input[0])); + } +} + +LappedTransform::LappedTransform(size_t num_in_channels, + size_t num_out_channels, + size_t chunk_length, + const float* window, + size_t block_length, + size_t shift_amount, + Callback* callback) + : blocker_callback_(this), + num_in_channels_(num_in_channels), + num_out_channels_(num_out_channels), + block_length_(block_length), + chunk_length_(chunk_length), + block_processor_(callback), + blocker_(chunk_length_, + block_length_, + num_in_channels_, + num_out_channels_, + window, + shift_amount, + &blocker_callback_), + fft_(RealFourier::Create(RealFourier::FftOrder(block_length_))), + cplx_length_(RealFourier::ComplexLength(fft_->order())), + real_buf_(num_in_channels, + block_length_, + RealFourier::kFftBufferAlignment), + cplx_pre_(num_in_channels, + cplx_length_, + RealFourier::kFftBufferAlignment), + cplx_post_(num_out_channels, + cplx_length_, + RealFourier::kFftBufferAlignment) { + RTC_CHECK(num_in_channels_ > 0); + RTC_CHECK_GT(block_length_, 0); + RTC_CHECK_GT(chunk_length_, 0); + RTC_CHECK(block_processor_); + + // block_length_ power of 2? + RTC_CHECK_EQ(0, block_length_ & (block_length_ - 1)); +} + +LappedTransform::~LappedTransform() = default; + +void LappedTransform::ProcessChunk(const float* const* in_chunk, + float* const* out_chunk) { + blocker_.ProcessChunk(in_chunk, chunk_length_, num_in_channels_, + num_out_channels_, out_chunk); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.h new file mode 100644 index 0000000000..bb25c34a9e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_ + +#include +#include + +#include "common_audio/real_fourier.h" +#include "modules/audio_coding/codecs/opus/test/blocker.h" +#include "rtc_base/memory/aligned_malloc.h" + +namespace webrtc { + +// Wrapper class for aligned arrays. Every row (and the first dimension) are +// aligned to the given byte alignment. +template +class AlignedArray { + public: + AlignedArray(size_t rows, size_t cols, size_t alignment) + : rows_(rows), cols_(cols) { + RTC_CHECK_GT(alignment, 0); + head_row_ = + static_cast(AlignedMalloc(rows_ * sizeof(*head_row_), alignment)); + for (size_t i = 0; i < rows_; ++i) { + head_row_[i] = static_cast( + AlignedMalloc(cols_ * sizeof(**head_row_), alignment)); + } + } + + ~AlignedArray() { + for (size_t i = 0; i < rows_; ++i) { + AlignedFree(head_row_[i]); + } + AlignedFree(head_row_); + } + + T* const* Array() { return head_row_; } + + const T* const* Array() const { return head_row_; } + + T* Row(size_t row) { + RTC_CHECK_LE(row, rows_); + return head_row_[row]; + } + + const T* Row(size_t row) const { + RTC_CHECK_LE(row, rows_); + return head_row_[row]; + } + + private: + size_t rows_; + size_t cols_; + T** head_row_; +}; + +// Helper class for audio processing modules which operate on frequency domain +// input derived from the windowed time domain audio stream. +// +// The input audio chunk is sliced into possibly overlapping blocks, multiplied +// by a window and transformed with an FFT implementation. The transformed data +// is supplied to the given callback for processing. The processed output is +// then inverse transformed into the time domain and spliced back into a chunk +// which constitutes the final output of this processing module. +class LappedTransform { + public: + class Callback { + public: + virtual ~Callback() {} + + virtual void ProcessAudioBlock(const std::complex* const* in_block, + size_t num_in_channels, + size_t frames, + size_t num_out_channels, + std::complex* const* out_block) = 0; + }; + + // Construct a transform instance. `chunk_length` is the number of samples in + // each channel. `window` defines the window, owned by the caller (a copy is + // made internally); `window` should have length equal to `block_length`. + // `block_length` defines the length of a block, in samples. + // `shift_amount` is in samples. `callback` is the caller-owned audio + // processing function called for each block of the input chunk. + LappedTransform(size_t num_in_channels, + size_t num_out_channels, + size_t chunk_length, + const float* window, + size_t block_length, + size_t shift_amount, + Callback* callback); + ~LappedTransform(); + + // Main audio processing helper method. Internally slices `in_chunk` into + // blocks, transforms them to frequency domain, calls the callback for each + // block and returns a de-blocked time domain chunk of audio through + // `out_chunk`. Both buffers are caller-owned. + void ProcessChunk(const float* const* in_chunk, float* const* out_chunk); + + // Get the chunk length. + // + // The chunk length is the number of samples per channel that must be passed + // to ProcessChunk via the parameter in_chunk. + // + // Returns the same chunk_length passed to the LappedTransform constructor. + size_t chunk_length() const { return chunk_length_; } + + // Get the number of input channels. + // + // This is the number of arrays that must be passed to ProcessChunk via + // in_chunk. + // + // Returns the same num_in_channels passed to the LappedTransform constructor. + size_t num_in_channels() const { return num_in_channels_; } + + // Get the number of output channels. + // + // This is the number of arrays that must be passed to ProcessChunk via + // out_chunk. + // + // Returns the same num_out_channels passed to the LappedTransform + // constructor. + size_t num_out_channels() const { return num_out_channels_; } + + // Returns the initial delay. + // + // This is the delay introduced by the `blocker_` to be able to get and return + // chunks of `chunk_length`, but process blocks of `block_length`. + size_t initial_delay() const { return blocker_.initial_delay(); } + + private: + // Internal middleware callback, given to the blocker. Transforms each block + // and hands it over to the processing method given at construction time. + class BlockThunk : public BlockerCallback { + public: + explicit BlockThunk(LappedTransform* parent) : parent_(parent) {} + + void ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) override; + + private: + LappedTransform* const parent_; + } blocker_callback_; + + const size_t num_in_channels_; + const size_t num_out_channels_; + + const size_t block_length_; + const size_t chunk_length_; + + Callback* const block_processor_; + Blocker blocker_; + + // TODO(alessiob): Replace RealFourier with a different FFT library. + std::unique_ptr fft_; + const size_t cplx_length_; + AlignedArray real_buf_; + AlignedArray > cplx_pre_; + AlignedArray > cplx_post_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform_unittest.cc new file mode 100644 index 0000000000..1003ed52e5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform_unittest.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/lapped_transform.h" + +#include +#include +#include + +#include "test/gtest.h" + +using std::complex; + +namespace { + +class NoopCallback : public webrtc::LappedTransform::Callback { + public: + NoopCallback() : block_num_(0) {} + + void ProcessAudioBlock(const complex* const* in_block, + size_t in_channels, + size_t frames, + size_t out_channels, + complex* const* out_block) override { + RTC_CHECK_EQ(in_channels, out_channels); + for (size_t i = 0; i < out_channels; ++i) { + memcpy(out_block[i], in_block[i], sizeof(**in_block) * frames); + } + ++block_num_; + } + + size_t block_num() { return block_num_; } + + private: + size_t block_num_; +}; + +class FftCheckerCallback : public webrtc::LappedTransform::Callback { + public: + FftCheckerCallback() : block_num_(0) {} + + void ProcessAudioBlock(const complex* const* in_block, + size_t in_channels, + size_t frames, + size_t out_channels, + complex* const* out_block) override { + RTC_CHECK_EQ(in_channels, out_channels); + + size_t full_length = (frames - 1) * 2; + ++block_num_; + + if (block_num_ > 0) { + ASSERT_NEAR(in_block[0][0].real(), static_cast(full_length), + 1e-5f); + ASSERT_NEAR(in_block[0][0].imag(), 0.0f, 1e-5f); + for (size_t i = 1; i < frames; ++i) { + ASSERT_NEAR(in_block[0][i].real(), 0.0f, 1e-5f); + ASSERT_NEAR(in_block[0][i].imag(), 0.0f, 1e-5f); + } + } + } + + size_t block_num() { return block_num_; } + + private: + size_t block_num_; +}; + +void SetFloatArray(float value, int rows, int cols, float* const* array) { + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + array[i][j] = value; + } + } +} + +} // namespace + +namespace webrtc { + +TEST(LappedTransformTest, Windowless) { + const size_t kChannels = 3; + const size_t kChunkLength = 512; + const size_t kBlockLength = 64; + const size_t kShiftAmount = 64; + NoopCallback noop; + + // Rectangular window. + float window[kBlockLength]; + std::fill(window, &window[kBlockLength], 1.0f); + + LappedTransform trans(kChannels, kChannels, kChunkLength, window, + kBlockLength, kShiftAmount, &noop); + float in_buffer[kChannels][kChunkLength]; + float* in_chunk[kChannels]; + float out_buffer[kChannels][kChunkLength]; + float* out_chunk[kChannels]; + + in_chunk[0] = in_buffer[0]; + in_chunk[1] = in_buffer[1]; + in_chunk[2] = in_buffer[2]; + out_chunk[0] = out_buffer[0]; + out_chunk[1] = out_buffer[1]; + out_chunk[2] = out_buffer[2]; + SetFloatArray(2.0f, kChannels, kChunkLength, in_chunk); + SetFloatArray(-1.0f, kChannels, kChunkLength, out_chunk); + + trans.ProcessChunk(in_chunk, out_chunk); + + for (size_t i = 0; i < kChannels; ++i) { + for (size_t j = 0; j < kChunkLength; ++j) { + ASSERT_NEAR(out_chunk[i][j], 2.0f, 1e-5f); + } + } + + ASSERT_EQ(kChunkLength / kBlockLength, noop.block_num()); +} + +TEST(LappedTransformTest, IdentityProcessor) { + const size_t kChunkLength = 512; + const size_t kBlockLength = 64; + const size_t kShiftAmount = 32; + NoopCallback noop; + + // Identity window for |overlap = block_size / 2|. + float window[kBlockLength]; + std::fill(window, &window[kBlockLength], std::sqrt(0.5f)); + + LappedTransform trans(1, 1, kChunkLength, window, kBlockLength, kShiftAmount, + &noop); + float in_buffer[kChunkLength]; + float* in_chunk = in_buffer; + float out_buffer[kChunkLength]; + float* out_chunk = out_buffer; + + SetFloatArray(2.0f, 1, kChunkLength, &in_chunk); + SetFloatArray(-1.0f, 1, kChunkLength, &out_chunk); + + trans.ProcessChunk(&in_chunk, &out_chunk); + + for (size_t i = 0; i < kChunkLength; ++i) { + ASSERT_NEAR(out_chunk[i], (i < kBlockLength - kShiftAmount) ? 0.0f : 2.0f, + 1e-5f); + } + + ASSERT_EQ(kChunkLength / kShiftAmount, noop.block_num()); +} + +TEST(LappedTransformTest, Callbacks) { + const size_t kChunkLength = 512; + const size_t kBlockLength = 64; + FftCheckerCallback call; + + // Rectangular window. + float window[kBlockLength]; + std::fill(window, &window[kBlockLength], 1.0f); + + LappedTransform trans(1, 1, kChunkLength, window, kBlockLength, kBlockLength, + &call); + float in_buffer[kChunkLength]; + float* in_chunk = in_buffer; + float out_buffer[kChunkLength]; + float* out_chunk = out_buffer; + + SetFloatArray(1.0f, 1, kChunkLength, &in_chunk); + SetFloatArray(-1.0f, 1, kChunkLength, &out_chunk); + + trans.ProcessChunk(&in_chunk, &out_chunk); + + ASSERT_EQ(kChunkLength / kBlockLength, call.block_num()); +} + +TEST(LappedTransformTest, chunk_length) { + const size_t kBlockLength = 64; + FftCheckerCallback call; + const float window[kBlockLength] = {}; + + // Make sure that chunk_length returns the same value passed to the + // LappedTransform constructor. + { + const size_t kExpectedChunkLength = 512; + const LappedTransform trans(1, 1, kExpectedChunkLength, window, + kBlockLength, kBlockLength, &call); + + EXPECT_EQ(kExpectedChunkLength, trans.chunk_length()); + } + { + const size_t kExpectedChunkLength = 160; + const LappedTransform trans(1, 1, kExpectedChunkLength, window, + kBlockLength, kBlockLength, &call); + + EXPECT_EQ(kExpectedChunkLength, trans.chunk_length()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc new file mode 100644 index 0000000000..7761efe8b3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h" + +#include + +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioDecoderPcm16B::AudioDecoderPcm16B(int sample_rate_hz, size_t num_channels) + : sample_rate_hz_(sample_rate_hz), num_channels_(num_channels) { + RTC_DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 || + sample_rate_hz == 32000 || sample_rate_hz == 48000) + << "Unsupported sample rate " << sample_rate_hz; + RTC_DCHECK_GE(num_channels, 1); +} + +void AudioDecoderPcm16B::Reset() {} + +int AudioDecoderPcm16B::SampleRateHz() const { + return sample_rate_hz_; +} + +size_t AudioDecoderPcm16B::Channels() const { + return num_channels_; +} + +int AudioDecoderPcm16B::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz_, sample_rate_hz); + // Adjust the encoded length down to ensure the same number of samples in each + // channel. + const size_t encoded_len_adjusted = + PacketDuration(encoded, encoded_len) * 2 * + Channels(); // 2 bytes per sample per channel + size_t ret = WebRtcPcm16b_Decode(encoded, encoded_len_adjusted, decoded); + *speech_type = ConvertSpeechType(1); + return static_cast(ret); +} + +std::vector AudioDecoderPcm16B::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + const int samples_per_ms = rtc::CheckedDivExact(sample_rate_hz_, 1000); + return LegacyEncodedAudioFrame::SplitBySamples( + this, std::move(payload), timestamp, samples_per_ms * 2 * num_channels_, + samples_per_ms); +} + +int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // Two encoded byte per sample per channel. + return static_cast(encoded_len / (2 * Channels())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h new file mode 100644 index 0000000000..6f50161d3f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_DECODER_PCM16B_H_ +#define MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_DECODER_PCM16B_H_ + +#include +#include + +#include + +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class AudioDecoderPcm16B final : public AudioDecoder { + public: + AudioDecoderPcm16B(int sample_rate_hz, size_t num_channels); + + AudioDecoderPcm16B(const AudioDecoderPcm16B&) = delete; + AudioDecoderPcm16B& operator=(const AudioDecoderPcm16B&) = delete; + + void Reset() override; + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + const int sample_rate_hz_; + const size_t num_channels_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_DECODER_PCM16B_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.cc b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.cc new file mode 100644 index 0000000000..9445b1ee3e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" + +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +size_t AudioEncoderPcm16B::EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) { + return WebRtcPcm16b_Encode(audio, input_len, encoded); +} + +size_t AudioEncoderPcm16B::BytesPerSample() const { + return 2; +} + +AudioEncoder::CodecType AudioEncoderPcm16B::GetCodecType() const { + return CodecType::kOther; +} + +bool AudioEncoderPcm16B::Config::IsOk() const { + if ((sample_rate_hz != 8000) && (sample_rate_hz != 16000) && + (sample_rate_hz != 32000) && (sample_rate_hz != 48000)) + return false; + return AudioEncoderPcm::Config::IsOk(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h new file mode 100644 index 0000000000..c363b40b3f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_ENCODER_PCM16B_H_ +#define MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_ENCODER_PCM16B_H_ + +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" + +namespace webrtc { + +class AudioEncoderPcm16B final : public AudioEncoderPcm { + public: + struct Config : public AudioEncoderPcm::Config { + public: + Config() : AudioEncoderPcm::Config(107), sample_rate_hz(8000) {} + bool IsOk() const; + + int sample_rate_hz; + }; + + explicit AudioEncoderPcm16B(const Config& config) + : AudioEncoderPcm(config, config.sample_rate_hz) {} + + AudioEncoderPcm16B(const AudioEncoderPcm16B&) = delete; + AudioEncoderPcm16B& operator=(const AudioEncoderPcm16B&) = delete; + + protected: + size_t EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) override; + + size_t BytesPerSample() const override; + + AudioEncoder::CodecType GetCodecType() const override; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_ENCODER_PCM16B_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.c b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.c new file mode 100644 index 0000000000..2f6dce5f41 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" + +size_t WebRtcPcm16b_Encode(const int16_t* speech, + size_t len, + uint8_t* encoded) { + size_t i; + for (i = 0; i < len; ++i) { + uint16_t s = speech[i]; + encoded[2 * i] = s >> 8; + encoded[2 * i + 1] = s; + } + return 2 * len; +} + +size_t WebRtcPcm16b_Decode(const uint8_t* encoded, + size_t len, + int16_t* speech) { + size_t i; + for (i = 0; i < len / 2; ++i) + speech[i] = encoded[2 * i] << 8 | encoded[2 * i + 1]; + return len / 2; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h new file mode 100644 index 0000000000..75d1efda3b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_H_ +#define MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_H_ +/* + * Define the fixpoint numeric formats + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcPcm16b_Encode(...) + * + * "Encode" a sample vector to 16 bit linear (Encoded standard is big endian) + * + * Input: + * - speech : Input speech vector + * - len : Number of samples in speech vector + * + * Output: + * - encoded : Encoded data vector (big endian 16 bit) + * + * Returned value : Length (in bytes) of coded data. + * Always equal to twice the len input parameter. + */ + +size_t WebRtcPcm16b_Encode(const int16_t* speech, size_t len, uint8_t* encoded); + +/**************************************************************************** + * WebRtcPcm16b_Decode(...) + * + * "Decode" a vector to 16 bit linear (Encoded standard is big endian) + * + * Input: + * - encoded : Encoded data vector (big endian 16 bit) + * - len : Number of bytes in encoded + * + * Output: + * - speech : Decoded speech vector + * + * Returned value : Samples in speech + */ + +size_t WebRtcPcm16b_Decode(const uint8_t* encoded, size_t len, int16_t* speech); + +#ifdef __cplusplus +} +#endif + +#endif /* MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.cc b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.cc new file mode 100644 index 0000000000..ecf91b45ac --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.cc @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/pcm16b/pcm16b_common.h" + +#include + +#include + +namespace webrtc { + +void Pcm16BAppendSupportedCodecSpecs(std::vector* specs) { + for (uint8_t num_channels : {1, 2}) { + for (int sample_rate_hz : {8000, 16000, 32000}) { + specs->push_back( + {{"L16", sample_rate_hz, num_channels}, + {sample_rate_hz, num_channels, sample_rate_hz * num_channels * 16}}); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.h b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.h new file mode 100644 index 0000000000..3fae717ff3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_COMMON_H_ +#define MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_COMMON_H_ + +#include + +#include "api/audio_codecs/audio_format.h" + +namespace webrtc { +void Pcm16BAppendSupportedCodecSpecs(std::vector* specs); +} + +#endif // MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc new file mode 100644 index 0000000000..724bba52d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/red/audio_encoder_copy_red.h" + +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/byte_order.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +static constexpr const int kRedMaxPacketSize = + 1 << 10; // RED packets must be less than 1024 bytes to fit the 10 bit + // block length. +static constexpr const size_t kRedMaxTimestampDelta = + 1 << 14; // RED packets can encode a timestamp delta of 14 bits. +static constexpr const size_t kAudioMaxRtpPacketLen = + 1200; // The typical MTU is 1200 bytes. + +static constexpr size_t kRedHeaderLength = 4; // 4 bytes RED header. +static constexpr size_t kRedLastHeaderLength = + 1; // reduced size for last RED header. + +static constexpr size_t kRedNumberOfRedundantEncodings = + 1; // The level of redundancy we support. + +AudioEncoderCopyRed::Config::Config() = default; +AudioEncoderCopyRed::Config::Config(Config&&) = default; +AudioEncoderCopyRed::Config::~Config() = default; + +size_t GetMaxRedundancyFromFieldTrial(const FieldTrialsView& field_trials) { + const std::string red_trial = + field_trials.Lookup("WebRTC-Audio-Red-For-Opus"); + size_t redundancy = 0; + if (sscanf(red_trial.c_str(), "Enabled-%zu", &redundancy) != 1 || + redundancy > 9) { + return kRedNumberOfRedundantEncodings; + } + return redundancy; +} + +AudioEncoderCopyRed::AudioEncoderCopyRed(Config&& config, + const FieldTrialsView& field_trials) + : speech_encoder_(std::move(config.speech_encoder)), + primary_encoded_(0, kAudioMaxRtpPacketLen), + max_packet_length_(kAudioMaxRtpPacketLen), + red_payload_type_(config.payload_type) { + RTC_CHECK(speech_encoder_) << "Speech encoder not provided."; + + auto number_of_redundant_encodings = + GetMaxRedundancyFromFieldTrial(field_trials); + for (size_t i = 0; i < number_of_redundant_encodings; i++) { + std::pair redundant; + redundant.second.EnsureCapacity(kAudioMaxRtpPacketLen); + redundant_encodings_.push_front(std::move(redundant)); + } +} + +AudioEncoderCopyRed::~AudioEncoderCopyRed() = default; + +int AudioEncoderCopyRed::SampleRateHz() const { + return speech_encoder_->SampleRateHz(); +} + +size_t AudioEncoderCopyRed::NumChannels() const { + return speech_encoder_->NumChannels(); +} + +int AudioEncoderCopyRed::RtpTimestampRateHz() const { + return speech_encoder_->RtpTimestampRateHz(); +} + +size_t AudioEncoderCopyRed::Num10MsFramesInNextPacket() const { + return speech_encoder_->Num10MsFramesInNextPacket(); +} + +size_t AudioEncoderCopyRed::Max10MsFramesInAPacket() const { + return speech_encoder_->Max10MsFramesInAPacket(); +} + +int AudioEncoderCopyRed::GetTargetBitrate() const { + return speech_encoder_->GetTargetBitrate(); +} + +AudioEncoder::EncodedInfo AudioEncoderCopyRed::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) { + primary_encoded_.Clear(); + EncodedInfo info = + speech_encoder_->Encode(rtp_timestamp, audio, &primary_encoded_); + RTC_CHECK(info.redundant.empty()) << "Cannot use nested redundant encoders."; + RTC_DCHECK_EQ(primary_encoded_.size(), info.encoded_bytes); + + if (info.encoded_bytes == 0 || info.encoded_bytes >= kRedMaxPacketSize) { + return info; + } + RTC_DCHECK_GT(max_packet_length_, info.encoded_bytes); + + size_t header_length_bytes = kRedLastHeaderLength; + size_t bytes_available = max_packet_length_ - info.encoded_bytes; + auto it = redundant_encodings_.begin(); + + // Determine how much redundancy we can fit into our packet by + // iterating forward. This is determined both by the length as well + // as the timestamp difference. The latter can occur with opus DTX which + // has timestamp gaps of 400ms which exceeds REDs timestamp delta field size. + for (; it != redundant_encodings_.end(); it++) { + if (bytes_available < kRedHeaderLength + it->first.encoded_bytes) { + break; + } + if (it->first.encoded_bytes == 0) { + break; + } + if (rtp_timestamp - it->first.encoded_timestamp >= kRedMaxTimestampDelta) { + break; + } + bytes_available -= kRedHeaderLength + it->first.encoded_bytes; + header_length_bytes += kRedHeaderLength; + } + + // Allocate room for RFC 2198 header. + encoded->SetSize(header_length_bytes); + + // Iterate backwards and append the data. + size_t header_offset = 0; + while (it-- != redundant_encodings_.begin()) { + encoded->AppendData(it->second); + + const uint32_t timestamp_delta = + info.encoded_timestamp - it->first.encoded_timestamp; + encoded->data()[header_offset] = it->first.payload_type | 0x80; + rtc::SetBE16(static_cast(encoded->data()) + header_offset + 1, + (timestamp_delta << 2) | (it->first.encoded_bytes >> 8)); + encoded->data()[header_offset + 3] = it->first.encoded_bytes & 0xff; + header_offset += kRedHeaderLength; + info.redundant.push_back(it->first); + } + + // `info` will be implicitly cast to an EncodedInfoLeaf struct, effectively + // discarding the (empty) vector of redundant information. This is + // intentional. + if (header_length_bytes > kRedHeaderLength) { + info.redundant.push_back(info); + RTC_DCHECK_EQ(info.speech, + info.redundant[info.redundant.size() - 1].speech); + } + + encoded->AppendData(primary_encoded_); + RTC_DCHECK_EQ(header_offset, header_length_bytes - 1); + encoded->data()[header_offset] = info.payload_type; + + // Shift the redundant encodings. + auto rit = redundant_encodings_.rbegin(); + for (auto next = std::next(rit); next != redundant_encodings_.rend(); + rit++, next = std::next(rit)) { + rit->first = next->first; + rit->second.SetData(next->second); + } + it = redundant_encodings_.begin(); + if (it != redundant_encodings_.end()) { + it->first = info; + it->second.SetData(primary_encoded_); + } + + // Update main EncodedInfo. + info.payload_type = red_payload_type_; + info.encoded_bytes = encoded->size(); + return info; +} + +void AudioEncoderCopyRed::Reset() { + speech_encoder_->Reset(); + auto number_of_redundant_encodings = redundant_encodings_.size(); + redundant_encodings_.clear(); + for (size_t i = 0; i < number_of_redundant_encodings; i++) { + std::pair redundant; + redundant.second.EnsureCapacity(kAudioMaxRtpPacketLen); + redundant_encodings_.push_front(std::move(redundant)); + } +} + +bool AudioEncoderCopyRed::SetFec(bool enable) { + return speech_encoder_->SetFec(enable); +} + +bool AudioEncoderCopyRed::SetDtx(bool enable) { + return speech_encoder_->SetDtx(enable); +} + +bool AudioEncoderCopyRed::GetDtx() const { + return speech_encoder_->GetDtx(); +} + +bool AudioEncoderCopyRed::SetApplication(Application application) { + return speech_encoder_->SetApplication(application); +} + +void AudioEncoderCopyRed::SetMaxPlaybackRate(int frequency_hz) { + speech_encoder_->SetMaxPlaybackRate(frequency_hz); +} + +bool AudioEncoderCopyRed::EnableAudioNetworkAdaptor( + const std::string& config_string, + RtcEventLog* event_log) { + return speech_encoder_->EnableAudioNetworkAdaptor(config_string, event_log); +} + +void AudioEncoderCopyRed::DisableAudioNetworkAdaptor() { + speech_encoder_->DisableAudioNetworkAdaptor(); +} + +void AudioEncoderCopyRed::OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) { + speech_encoder_->OnReceivedUplinkPacketLossFraction( + uplink_packet_loss_fraction); +} + +void AudioEncoderCopyRed::OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional bwe_period_ms) { + speech_encoder_->OnReceivedUplinkBandwidth(target_audio_bitrate_bps, + bwe_period_ms); +} + +void AudioEncoderCopyRed::OnReceivedUplinkAllocation( + BitrateAllocationUpdate update) { + speech_encoder_->OnReceivedUplinkAllocation(update); +} + +absl::optional> +AudioEncoderCopyRed::GetFrameLengthRange() const { + return speech_encoder_->GetFrameLengthRange(); +} + +void AudioEncoderCopyRed::OnReceivedRtt(int rtt_ms) { + speech_encoder_->OnReceivedRtt(rtt_ms); +} + +void AudioEncoderCopyRed::OnReceivedOverhead(size_t overhead_bytes_per_packet) { + max_packet_length_ = kAudioMaxRtpPacketLen - overhead_bytes_per_packet; + return speech_encoder_->OnReceivedOverhead(overhead_bytes_per_packet); +} + +void AudioEncoderCopyRed::SetReceiverFrameLengthRange(int min_frame_length_ms, + int max_frame_length_ms) { + return speech_encoder_->SetReceiverFrameLengthRange(min_frame_length_ms, + max_frame_length_ms); +} + +ANAStats AudioEncoderCopyRed::GetANAStats() const { + return speech_encoder_->GetANAStats(); +} + +rtc::ArrayView> +AudioEncoderCopyRed::ReclaimContainedEncoders() { + return rtc::ArrayView>(&speech_encoder_, 1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.h b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.h new file mode 100644 index 0000000000..359b5eaa17 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_RED_AUDIO_ENCODER_COPY_RED_H_ +#define MODULES_AUDIO_CODING_CODECS_RED_AUDIO_ENCODER_COPY_RED_H_ + +#include +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/field_trials_view.h" +#include "api/units/time_delta.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +// This class implements redundant audio coding as described in +// https://tools.ietf.org/html/rfc2198 +// The class object will have an underlying AudioEncoder object that performs +// the actual encodings. The current class will gather the N latest encodings +// from the underlying codec into one packet. Currently N is hard-coded to 2. + +class AudioEncoderCopyRed final : public AudioEncoder { + public: + struct Config { + Config(); + Config(Config&&); + ~Config(); + int payload_type; + std::unique_ptr speech_encoder; + }; + + AudioEncoderCopyRed(Config&& config, const FieldTrialsView& field_trials); + + ~AudioEncoderCopyRed() override; + + AudioEncoderCopyRed(const AudioEncoderCopyRed&) = delete; + AudioEncoderCopyRed& operator=(const AudioEncoderCopyRed&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + int RtpTimestampRateHz() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + + void Reset() override; + bool SetFec(bool enable) override; + + bool SetDtx(bool enable) override; + bool GetDtx() const override; + + bool SetApplication(Application application) override; + void SetMaxPlaybackRate(int frequency_hz) override; + bool EnableAudioNetworkAdaptor(const std::string& config_string, + RtcEventLog* event_log) override; + void DisableAudioNetworkAdaptor() override; + void OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) override; + void OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional bwe_period_ms) override; + void OnReceivedUplinkAllocation(BitrateAllocationUpdate update) override; + void OnReceivedRtt(int rtt_ms) override; + void OnReceivedOverhead(size_t overhead_bytes_per_packet) override; + void SetReceiverFrameLengthRange(int min_frame_length_ms, + int max_frame_length_ms) override; + ANAStats GetANAStats() const override; + absl::optional> GetFrameLengthRange() + const override; + rtc::ArrayView> ReclaimContainedEncoders() + override; + + protected: + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView audio, + rtc::Buffer* encoded) override; + + private: + std::unique_ptr speech_encoder_; + rtc::Buffer primary_encoded_; + size_t max_packet_length_; + int red_payload_type_; + std::list> redundant_encodings_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_RED_AUDIO_ENCODER_COPY_RED_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc new file mode 100644 index 0000000000..795a996624 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc @@ -0,0 +1,641 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/red/audio_encoder_copy_red.h" + +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/field_trial.h" +#include "test/gtest.h" +#include "test/mock_audio_encoder.h" +#include "test/scoped_key_value_config.h" +#include "test/testsupport/rtc_expect_death.h" + +using ::testing::_; +using ::testing::Eq; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::MockFunction; +using ::testing::Not; +using ::testing::Optional; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { + +namespace { +static const size_t kMaxNumSamples = 48 * 10 * 2; // 10 ms @ 48 kHz stereo. +static const size_t kRedLastHeaderLength = + 1; // 1 byte RED header for the last element. +} + +class AudioEncoderCopyRedTest : public ::testing::Test { + protected: + AudioEncoderCopyRedTest() + : mock_encoder_(new MockAudioEncoder), + timestamp_(4711), + sample_rate_hz_(16000), + num_audio_samples_10ms(sample_rate_hz_ / 100), + red_payload_type_(200) { + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::unique_ptr(mock_encoder_); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials_)); + memset(audio_, 0, sizeof(audio_)); + EXPECT_CALL(*mock_encoder_, NumChannels()).WillRepeatedly(Return(1U)); + EXPECT_CALL(*mock_encoder_, SampleRateHz()) + .WillRepeatedly(Return(sample_rate_hz_)); + } + + void TearDown() override { red_.reset(); } + + void Encode() { + ASSERT_TRUE(red_.get() != NULL); + encoded_.Clear(); + encoded_info_ = red_->Encode( + timestamp_, + rtc::ArrayView(audio_, num_audio_samples_10ms), + &encoded_); + timestamp_ += rtc::checked_cast(num_audio_samples_10ms); + } + + test::ScopedKeyValueConfig field_trials_; + MockAudioEncoder* mock_encoder_; + std::unique_ptr red_; + uint32_t timestamp_; + int16_t audio_[kMaxNumSamples]; + const int sample_rate_hz_; + size_t num_audio_samples_10ms; + rtc::Buffer encoded_; + AudioEncoder::EncodedInfo encoded_info_; + const int red_payload_type_; +}; + +TEST_F(AudioEncoderCopyRedTest, CreateAndDestroy) {} + +TEST_F(AudioEncoderCopyRedTest, CheckSampleRatePropagation) { + EXPECT_CALL(*mock_encoder_, SampleRateHz()).WillOnce(Return(17)); + EXPECT_EQ(17, red_->SampleRateHz()); +} + +TEST_F(AudioEncoderCopyRedTest, CheckNumChannelsPropagation) { + EXPECT_CALL(*mock_encoder_, NumChannels()).WillOnce(Return(17U)); + EXPECT_EQ(17U, red_->NumChannels()); +} + +TEST_F(AudioEncoderCopyRedTest, CheckFrameSizePropagation) { + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillOnce(Return(17U)); + EXPECT_EQ(17U, red_->Num10MsFramesInNextPacket()); +} + +TEST_F(AudioEncoderCopyRedTest, CheckMaxFrameSizePropagation) { + EXPECT_CALL(*mock_encoder_, Max10MsFramesInAPacket()).WillOnce(Return(17U)); + EXPECT_EQ(17U, red_->Max10MsFramesInAPacket()); +} + +TEST_F(AudioEncoderCopyRedTest, CheckTargetAudioBitratePropagation) { + EXPECT_CALL(*mock_encoder_, + OnReceivedUplinkBandwidth(4711, absl::optional())); + red_->OnReceivedUplinkBandwidth(4711, absl::nullopt); +} + +TEST_F(AudioEncoderCopyRedTest, CheckPacketLossFractionPropagation) { + EXPECT_CALL(*mock_encoder_, OnReceivedUplinkPacketLossFraction(0.5)); + red_->OnReceivedUplinkPacketLossFraction(0.5); +} + +TEST_F(AudioEncoderCopyRedTest, CheckGetFrameLengthRangePropagation) { + auto expected_range = + std::make_pair(TimeDelta::Millis(20), TimeDelta::Millis(20)); + EXPECT_CALL(*mock_encoder_, GetFrameLengthRange()) + .WillRepeatedly(Return(absl::make_optional(expected_range))); + EXPECT_THAT(red_->GetFrameLengthRange(), Optional(Eq(expected_range))); +} + +// Checks that the an Encode() call is immediately propagated to the speech +// encoder. +TEST_F(AudioEncoderCopyRedTest, CheckImmediateEncode) { + // Interleaving the EXPECT_CALL sequence with expectations on the MockFunction + // check ensures that exactly one call to EncodeImpl happens in each + // Encode call. + InSequence s; + MockFunction check; + for (int i = 1; i <= 6; ++i) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillRepeatedly(Return(AudioEncoder::EncodedInfo())); + EXPECT_CALL(check, Call(i)); + Encode(); + check.Call(i); + } +} + +// Checks that no output is produced if the underlying codec doesn't emit any +// new data, even if the RED codec is loaded with a secondary encoding. +TEST_F(AudioEncoderCopyRedTest, CheckNoOutput) { + static const size_t kEncodedSize = 17; + static const size_t kHeaderLenBytes = 5; + { + InSequence s; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(kEncodedSize))) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(0))) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(kEncodedSize))); + } + + // Start with one Encode() call that will produce output. + Encode(); + // First call is a special case, since it does not include a secondary + // payload. + EXPECT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(kEncodedSize + kRedLastHeaderLength, encoded_info_.encoded_bytes); + + // Next call to the speech encoder will not produce any output. + Encode(); + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + + // Final call to the speech encoder will produce output. + Encode(); + EXPECT_EQ(2 * kEncodedSize + kHeaderLenBytes, encoded_info_.encoded_bytes); + ASSERT_EQ(2u, encoded_info_.redundant.size()); +} + +// Checks that the correct payload sizes are populated into the redundancy +// information for a redundancy level of 1. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes1) { + // Let the mock encoder return payload sizes 1, 2, 3, ..., 10 for the sequence + // of calls. + static const int kNumPackets = 10; + InSequence s; + for (int encode_size = 1; encode_size <= kNumPackets; ++encode_size) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(encode_size))); + } + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(kRedLastHeaderLength + 1u, encoded_info_.encoded_bytes); + + for (size_t i = 2; i <= kNumPackets; ++i) { + Encode(); + ASSERT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(i, encoded_info_.redundant[1].encoded_bytes); + EXPECT_EQ(i - 1, encoded_info_.redundant[0].encoded_bytes); + EXPECT_EQ(5 + i + (i - 1), encoded_info_.encoded_bytes); + } +} + +// Checks that the correct payload sizes are populated into the redundancy +// information for a redundancy level of 0. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes0) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-0/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials)); + + // Let the mock encoder return payload sizes 1, 2, 3, ..., 10 for the sequence + // of calls. + static const int kNumPackets = 10; + InSequence s; + for (int encode_size = 1; encode_size <= kNumPackets; ++encode_size) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(encode_size))); + } + + for (size_t i = 1; i <= kNumPackets; ++i) { + Encode(); + ASSERT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(1 + i, encoded_info_.encoded_bytes); + } +} +// Checks that the correct payload sizes are populated into the redundancy +// information for a redundancy level of 2. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes2) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-2/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials)); + + // Let the mock encoder return payload sizes 1, 2, 3, ..., 10 for the sequence + // of calls. + static const int kNumPackets = 10; + InSequence s; + for (int encode_size = 1; encode_size <= kNumPackets; ++encode_size) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(encode_size))); + } + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(kRedLastHeaderLength + 1u, encoded_info_.encoded_bytes); + + // Second call is also special since it does not include a tertiary + // payload. + Encode(); + EXPECT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(8u, encoded_info_.encoded_bytes); + + for (size_t i = 3; i <= kNumPackets; ++i) { + Encode(); + ASSERT_EQ(3u, encoded_info_.redundant.size()); + EXPECT_EQ(i, encoded_info_.redundant[2].encoded_bytes); + EXPECT_EQ(i - 1, encoded_info_.redundant[1].encoded_bytes); + EXPECT_EQ(i - 2, encoded_info_.redundant[0].encoded_bytes); + EXPECT_EQ(9 + i + (i - 1) + (i - 2), encoded_info_.encoded_bytes); + } +} + +// Checks that the correct payload sizes are populated into the redundancy +// information for a redundancy level of 3. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes3) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-3/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials_)); + + // Let the mock encoder return payload sizes 1, 2, 3, ..., 10 for the sequence + // of calls. + static const int kNumPackets = 10; + InSequence s; + for (int encode_size = 1; encode_size <= kNumPackets; ++encode_size) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(encode_size))); + } + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(kRedLastHeaderLength + 1u, encoded_info_.encoded_bytes); + + // Second call is also special since it does not include a tertiary + // payload. + Encode(); + EXPECT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(8u, encoded_info_.encoded_bytes); + + // Third call is also special since it does not include a quaternary + // payload. + Encode(); + EXPECT_EQ(3u, encoded_info_.redundant.size()); + EXPECT_EQ(15u, encoded_info_.encoded_bytes); + + for (size_t i = 4; i <= kNumPackets; ++i) { + Encode(); + ASSERT_EQ(4u, encoded_info_.redundant.size()); + EXPECT_EQ(i, encoded_info_.redundant[3].encoded_bytes); + EXPECT_EQ(i - 1, encoded_info_.redundant[2].encoded_bytes); + EXPECT_EQ(i - 2, encoded_info_.redundant[1].encoded_bytes); + EXPECT_EQ(i - 3, encoded_info_.redundant[0].encoded_bytes); + EXPECT_EQ(13 + i + (i - 1) + (i - 2) + (i - 3), + encoded_info_.encoded_bytes); + } +} + +// Checks that the correct timestamps are returned. +TEST_F(AudioEncoderCopyRedTest, CheckTimestamps) { + uint32_t primary_timestamp = timestamp_; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 17; + info.encoded_timestamp = timestamp_; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(primary_timestamp, encoded_info_.encoded_timestamp); + + uint32_t secondary_timestamp = primary_timestamp; + primary_timestamp = timestamp_; + info.encoded_timestamp = timestamp_; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + + Encode(); + ASSERT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(primary_timestamp, encoded_info_.redundant[1].encoded_timestamp); + EXPECT_EQ(secondary_timestamp, encoded_info_.redundant[0].encoded_timestamp); + EXPECT_EQ(primary_timestamp, encoded_info_.encoded_timestamp); +} + +// Checks that the primary and secondary payloads are written correctly. +TEST_F(AudioEncoderCopyRedTest, CheckPayloads) { + // Let the mock encoder write payloads with increasing values. The first + // payload will have values 0, 1, 2, ..., kPayloadLenBytes - 1. + static const size_t kPayloadLenBytes = 5; + static const size_t kHeaderLenBytes = 5; + uint8_t payload[kPayloadLenBytes]; + for (uint8_t i = 0; i < kPayloadLenBytes; ++i) { + payload[i] = i; + } + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillRepeatedly(Invoke(MockAudioEncoder::CopyEncoding(payload))); + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(kRedLastHeaderLength + kPayloadLenBytes, + encoded_info_.encoded_bytes); + for (size_t i = 0; i < kPayloadLenBytes; ++i) { + EXPECT_EQ(i, encoded_.data()[kRedLastHeaderLength + i]); + } + + for (int j = 0; j < 1; ++j) { + // Increment all values of the payload by 10. + for (size_t i = 0; i < kPayloadLenBytes; ++i) + payload[i] += 10; + + Encode(); + ASSERT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(kPayloadLenBytes, encoded_info_.redundant[0].encoded_bytes); + EXPECT_EQ(kPayloadLenBytes, encoded_info_.redundant[1].encoded_bytes); + for (size_t i = 0; i < kPayloadLenBytes; ++i) { + // Check secondary payload. + EXPECT_EQ(j * 10 + i, encoded_.data()[kHeaderLenBytes + i]); + + // Check primary payload. + EXPECT_EQ((j + 1) * 10 + i, + encoded_.data()[kHeaderLenBytes + i + kPayloadLenBytes]); + } + } +} + +// Checks correct propagation of payload type. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadType) { + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 17; + info.payload_type = primary_payload_type; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + ASSERT_EQ(0u, encoded_info_.redundant.size()); + + const int secondary_payload_type = red_payload_type_ + 2; + info.payload_type = secondary_payload_type; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + + Encode(); + ASSERT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(secondary_payload_type, encoded_info_.redundant[1].payload_type); + EXPECT_EQ(primary_payload_type, encoded_info_.redundant[0].payload_type); + EXPECT_EQ(red_payload_type_, encoded_info_.payload_type); +} + +TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header) { + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 10; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + info.encoded_timestamp = timestamp_; // update timestamp. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Second call will produce a redundant encoding. + + EXPECT_EQ(encoded_.size(), + 5u + 2 * 10u); // header size + two encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); + + uint32_t timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[0].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[1], timestamp_delta >> 6); + EXPECT_EQ(static_cast(encoded_[2] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff); + EXPECT_EQ(encoded_[4], primary_payload_type); + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Third call will produce a redundant encoding with double + // redundancy. + + EXPECT_EQ(encoded_.size(), + 5u + 2 * 10u); // header size + two encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); + + timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[0].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[1], timestamp_delta >> 6); + EXPECT_EQ(static_cast(encoded_[2] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff); + + EXPECT_EQ(encoded_[4], primary_payload_type | 0x80); + timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[1].encoded_timestamp; +} + +// Variant with a redundancy of 0. +TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header0) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-0/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials)); + + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 10; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + info.encoded_timestamp = timestamp_; // update timestamp. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Second call will not produce a redundant encoding. + + EXPECT_EQ(encoded_.size(), + 1u + 1 * 10u); // header size + one encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type); +} +// Variant with a redundancy of 2. +TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header2) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-2/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials)); + + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 10; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + info.encoded_timestamp = timestamp_; // update timestamp. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Second call will produce a redundant encoding. + + EXPECT_EQ(encoded_.size(), + 5u + 2 * 10u); // header size + two encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); + + uint32_t timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[0].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[1], timestamp_delta >> 6); + EXPECT_EQ(static_cast(encoded_[2] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff); + EXPECT_EQ(encoded_[4], primary_payload_type); + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Third call will produce a redundant encoding with double + // redundancy. + + EXPECT_EQ(encoded_.size(), + 9u + 3 * 10u); // header size + three encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); + + timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[0].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[1], timestamp_delta >> 6); + EXPECT_EQ(static_cast(encoded_[2] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff); + + EXPECT_EQ(encoded_[4], primary_payload_type | 0x80); + timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[1].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[5], timestamp_delta >> 6); + EXPECT_EQ(static_cast(encoded_[6] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[6] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[7], encoded_info_.redundant[1].encoded_bytes & 0xff); + EXPECT_EQ(encoded_[8], primary_payload_type); +} + +TEST_F(AudioEncoderCopyRedTest, RespectsPayloadMTU) { + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 600; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + info.encoded_timestamp = timestamp_; // update timestamp. + info.encoded_bytes = 500; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Second call will produce a redundant encoding. + + EXPECT_EQ(encoded_.size(), 5u + 600u + 500u); + + info.encoded_timestamp = timestamp_; // update timestamp. + info.encoded_bytes = 400; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Third call will drop the oldest packet. + EXPECT_EQ(encoded_.size(), 5u + 500u + 400u); +} + +TEST_F(AudioEncoderCopyRedTest, LargeTimestampGap) { + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 100; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + // Update timestamp to simulate a 400ms gap like the one + // opus DTX causes. + timestamp_ += 19200; + info.encoded_timestamp = timestamp_; // update timestamp. + info.encoded_bytes = 200; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + + // The old packet will be dropped. + EXPECT_EQ(encoded_.size(), 1u + 200u); +} + +#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// This test fixture tests various error conditions that makes the +// AudioEncoderCng die via CHECKs. +class AudioEncoderCopyRedDeathTest : public AudioEncoderCopyRedTest { + protected: + AudioEncoderCopyRedDeathTest() : AudioEncoderCopyRedTest() {} +}; + +TEST_F(AudioEncoderCopyRedDeathTest, WrongFrameSize) { + num_audio_samples_10ms *= 2; // 20 ms frame. + RTC_EXPECT_DEATH(Encode(), ""); + num_audio_samples_10ms = 0; // Zero samples. + RTC_EXPECT_DEATH(Encode(), ""); +} + +TEST_F(AudioEncoderCopyRedDeathTest, NullSpeechEncoder) { + test::ScopedKeyValueConfig field_trials; + AudioEncoderCopyRed* red = NULL; + AudioEncoderCopyRed::Config config; + config.speech_encoder = NULL; + RTC_EXPECT_DEATH( + red = new AudioEncoderCopyRed(std::move(config), field_trials), + "Speech encoder not provided."); + // The delete operation is needed to avoid leak reports from memcheck. + delete red; +} + +#endif // GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.cc b/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.cc new file mode 100644 index 0000000000..537e6fcede --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/tools/audio_codec_speed_test.h" + +#include "rtc_base/checks.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::std::get; + +namespace webrtc { + +AudioCodecSpeedTest::AudioCodecSpeedTest(int block_duration_ms, + int input_sampling_khz, + int output_sampling_khz) + : block_duration_ms_(block_duration_ms), + input_sampling_khz_(input_sampling_khz), + output_sampling_khz_(output_sampling_khz), + input_length_sample_( + static_cast(block_duration_ms_ * input_sampling_khz_)), + output_length_sample_( + static_cast(block_duration_ms_ * output_sampling_khz_)), + data_pointer_(0), + loop_length_samples_(0), + max_bytes_(0), + encoded_bytes_(0), + encoding_time_ms_(0.0), + decoding_time_ms_(0.0), + out_file_(NULL) {} + +void AudioCodecSpeedTest::SetUp() { + channels_ = get<0>(GetParam()); + bit_rate_ = get<1>(GetParam()); + in_filename_ = test::ResourcePath(get<2>(GetParam()), get<3>(GetParam())); + save_out_data_ = get<4>(GetParam()); + + FILE* fp = fopen(in_filename_.c_str(), "rb"); + RTC_DCHECK(fp); + + // Obtain file size. + fseek(fp, 0, SEEK_END); + loop_length_samples_ = ftell(fp) / sizeof(int16_t); + rewind(fp); + + // Allocate memory to contain the whole file. + in_data_.reset( + new int16_t[loop_length_samples_ + input_length_sample_ * channels_]); + + data_pointer_ = 0; + + // Copy the file into the buffer. + ASSERT_EQ(fread(&in_data_[0], sizeof(int16_t), loop_length_samples_, fp), + loop_length_samples_); + fclose(fp); + + // Add an extra block length of samples to the end of the array, starting + // over again from the beginning of the array. This is done to simplify + // the reading process when reading over the end of the loop. + memcpy(&in_data_[loop_length_samples_], &in_data_[0], + input_length_sample_ * channels_ * sizeof(int16_t)); + + max_bytes_ = input_length_sample_ * channels_ * sizeof(int16_t); + out_data_.reset(new int16_t[output_length_sample_ * channels_]); + bit_stream_.reset(new uint8_t[max_bytes_]); + + if (save_out_data_) { + std::string out_filename = + ::testing::UnitTest::GetInstance()->current_test_info()->name(); + + // Erase '/' + size_t found; + while ((found = out_filename.find('/')) != std::string::npos) + out_filename.replace(found, 1, "_"); + + out_filename = test::OutputPath() + out_filename + ".pcm"; + + out_file_ = fopen(out_filename.c_str(), "wb"); + RTC_DCHECK(out_file_); + + printf("Output to be saved in %s.\n", out_filename.c_str()); + } +} + +void AudioCodecSpeedTest::TearDown() { + if (save_out_data_) { + fclose(out_file_); + } +} + +void AudioCodecSpeedTest::EncodeDecode(size_t audio_duration_sec) { + size_t time_now_ms = 0; + float time_ms; + + printf("Coding %d kHz-sampled %zu-channel audio at %d bps ...\n", + input_sampling_khz_, channels_, bit_rate_); + + while (time_now_ms < audio_duration_sec * 1000) { + // Encode & decode. + time_ms = EncodeABlock(&in_data_[data_pointer_], &bit_stream_[0], + max_bytes_, &encoded_bytes_); + encoding_time_ms_ += time_ms; + time_ms = DecodeABlock(&bit_stream_[0], encoded_bytes_, &out_data_[0]); + decoding_time_ms_ += time_ms; + if (save_out_data_) { + fwrite(&out_data_[0], sizeof(int16_t), output_length_sample_ * channels_, + out_file_); + } + data_pointer_ = (data_pointer_ + input_length_sample_ * channels_) % + loop_length_samples_; + time_now_ms += block_duration_ms_; + } + + printf("Encoding: %.2f%% real time,\nDecoding: %.2f%% real time.\n", + (encoding_time_ms_ / audio_duration_sec) / 10.0, + (decoding_time_ms_ / audio_duration_sec) / 10.0); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.h b/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.h new file mode 100644 index 0000000000..c5f1d7c259 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_TOOLS_AUDIO_CODEC_SPEED_TEST_H_ +#define MODULES_AUDIO_CODING_CODECS_TOOLS_AUDIO_CODEC_SPEED_TEST_H_ + +#include +#include + +#include "test/gtest.h" + +namespace webrtc { + +// Define coding parameter as +// . +typedef std::tuple coding_param; + +class AudioCodecSpeedTest : public ::testing::TestWithParam { + protected: + AudioCodecSpeedTest(int block_duration_ms, + int input_sampling_khz, + int output_sampling_khz); + virtual void SetUp(); + virtual void TearDown(); + + // EncodeABlock(...) does the following: + // 1. encodes a block of audio, saved in `in_data`, + // 2. save the bit stream to `bit_stream` of `max_bytes` bytes in size, + // 3. assign `encoded_bytes` with the length of the bit stream (in bytes), + // 4. return the cost of time (in millisecond) spent on actual encoding. + virtual float EncodeABlock(int16_t* in_data, + uint8_t* bit_stream, + size_t max_bytes, + size_t* encoded_bytes) = 0; + + // DecodeABlock(...) does the following: + // 1. decodes the bit stream in `bit_stream` with a length of `encoded_bytes` + // (in bytes), + // 2. save the decoded audio in `out_data`, + // 3. return the cost of time (in millisecond) spent on actual decoding. + virtual float DecodeABlock(const uint8_t* bit_stream, + size_t encoded_bytes, + int16_t* out_data) = 0; + + // Encoding and decode an audio of `audio_duration` (in seconds) and + // record the runtime for encoding and decoding separately. + void EncodeDecode(size_t audio_duration); + + int block_duration_ms_; + int input_sampling_khz_; + int output_sampling_khz_; + + // Number of samples-per-channel in a frame. + size_t input_length_sample_; + + // Expected output number of samples-per-channel in a frame. + size_t output_length_sample_; + + std::unique_ptr in_data_; + std::unique_ptr out_data_; + size_t data_pointer_; + size_t loop_length_samples_; + std::unique_ptr bit_stream_; + + // Maximum number of bytes in output bitstream for a frame of audio. + size_t max_bytes_; + + size_t encoded_bytes_; + float encoding_time_ms_; + float decoding_time_ms_; + FILE* out_file_; + + size_t channels_; + + // Bit rate is in bit-per-second. + int bit_rate_; + + std::string in_filename_; + + // Determines whether to save the output to file. + bool save_out_data_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_TOOLS_AUDIO_CODEC_SPEED_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/default_neteq_factory_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/default_neteq_factory_gn/moz.build new file mode 100644 index 0000000000..206a4ae313 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/default_neteq_factory_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("default_neteq_factory_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/g3doc/index.md b/third_party/libwebrtc/modules/audio_coding/g3doc/index.md new file mode 100644 index 0000000000..467563e696 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/g3doc/index.md @@ -0,0 +1,32 @@ + + + +# The WebRTC Audio Coding Module + +WebRTC audio coding module can handle both audio sending and receiving. Folder +[`acm2`][acm2] contains implementations of the APIs. + +* Audio Sending Audio frames, each of which should always contain 10 ms worth + of data, are provided to the audio coding module through + [`Add10MsData()`][Add10MsData]. The audio coding module uses a provided + audio encoder to encoded audio frames and deliver the data to a + pre-registered audio packetization callback, which is supposed to wrap the + encoded audio into RTP packets and send them over a transport. Built-in + audio codecs are included the [`codecs`][codecs] folder. The + [audio network adaptor][ANA] provides an add-on functionality to an audio + encoder (currently limited to Opus) to make the audio encoder adaptive to + network conditions (bandwidth, packet loss rate, etc). + +* Audio Receiving Audio packets are provided to the audio coding module + through [`IncomingPacket()`][IncomingPacket], and are processed by an audio + jitter buffer ([NetEq][NetEq]), which includes decoding of the packets. + Audio decoders are provided by an audio decoder factory. Decoded audio + samples should be queried by calling [`PlayoutData10Ms()`][PlayoutData10Ms]. + +[acm2]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/acm2/;drc=854d59f7501aac9e9bccfa7b4d1f7f4db7842719 +[Add10MsData]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/include/audio_coding_module.h;l=136;drc=d82a02c837d33cdfd75121e40dcccd32515e42d6 +[codecs]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/codecs/;drc=883fea1548d58e0080f98d66fab2e0c744dfb556 +[ANA]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/audio_network_adaptor/;drc=1f99551775cd876c116d1d90cba94c8a4670d184 +[IncomingPacket]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/include/audio_coding_module.h;l=192;drc=d82a02c837d33cdfd75121e40dcccd32515e42d6 +[NetEq]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/neteq/;drc=213dc2cfc5f1b360b1c6fc51d393491f5de49d3d +[PlayoutData10Ms]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/include/audio_coding_module.h;l=216;drc=d82a02c837d33cdfd75121e40dcccd32515e42d6 diff --git a/third_party/libwebrtc/modules/audio_coding/g711_c_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/g711_c_gn/moz.build new file mode 100644 index 0000000000..8322910094 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/g711_c_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.c" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("g711_c_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/g711_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/g711_gn/moz.build new file mode 100644 index 0000000000..3a21cddc41 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/g711_gn/moz.build @@ -0,0 +1,226 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc", + "/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("g711_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/g722_c_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/g722_c_gn/moz.build new file mode 100644 index 0000000000..dc05788b80 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/g722_c_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.c" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("g722_c_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/g722_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/g722_gn/moz.build new file mode 100644 index 0000000000..910eef267f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/g722_gn/moz.build @@ -0,0 +1,226 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc", + "/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("g722_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/ilbc_c_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/ilbc_c_gn/moz.build new file mode 100644 index 0000000000..1ff9bdf07a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/ilbc_c_gn/moz.build @@ -0,0 +1,300 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/my_corr.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.c" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("ilbc_c_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/ilbc_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/ilbc_gn/moz.build new file mode 100644 index 0000000000..4a7279386e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/ilbc_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc", + "/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("ilbc_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/include/audio_coding_module.h b/third_party/libwebrtc/modules/audio_coding/include/audio_coding_module.h new file mode 100644 index 0000000000..8b518fb979 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/include/audio_coding_module.h @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_ +#define MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_ + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/function_view.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_factory.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { + +// forward declarations +class AudioDecoder; +class AudioEncoder; +class AudioFrame; +struct RTPHeader; + +// Callback class used for sending data ready to be packetized +class AudioPacketizationCallback { + public: + virtual ~AudioPacketizationCallback() {} + + virtual int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) { + // TODO(bugs.webrtc.org/10739): Deprecate the old SendData and make this one + // pure virtual. + return SendData(frame_type, payload_type, timestamp, payload_data, + payload_len_bytes); + } + virtual int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes) { + RTC_DCHECK_NOTREACHED() << "This method must be overridden, or not used."; + return -1; + } +}; + +class AudioCodingModule { + protected: + AudioCodingModule() {} + + public: + struct Config { + explicit Config( + rtc::scoped_refptr decoder_factory = nullptr); + Config(const Config&); + ~Config(); + + NetEq::Config neteq_config; + Clock* clock; + rtc::scoped_refptr decoder_factory; + NetEqFactory* neteq_factory = nullptr; + }; + + static AudioCodingModule* Create(const Config& config); + virtual ~AudioCodingModule() = default; + + /////////////////////////////////////////////////////////////////////////// + // Sender + // + + // `modifier` is called exactly once with one argument: a pointer to the + // unique_ptr that holds the current encoder (which is null if there is no + // current encoder). For the duration of the call, `modifier` has exclusive + // access to the unique_ptr; it may call the encoder, steal the encoder and + // replace it with another encoder or with nullptr, etc. + virtual void ModifyEncoder( + rtc::FunctionView*)> modifier) = 0; + + // Utility method for simply replacing the existing encoder with a new one. + void SetEncoder(std::unique_ptr new_encoder) { + ModifyEncoder([&](std::unique_ptr* encoder) { + *encoder = std::move(new_encoder); + }); + } + + // int32_t RegisterTransportCallback() + // Register a transport callback which will be called to deliver + // the encoded buffers whenever Process() is called and a + // bit-stream is ready. + // + // Input: + // -transport : pointer to the callback class + // transport->SendData() is called whenever + // Process() is called and bit-stream is ready + // to deliver. + // + // Return value: + // -1 if the transport callback could not be registered + // 0 if registration is successful. + // + virtual int32_t RegisterTransportCallback( + AudioPacketizationCallback* transport) = 0; + + /////////////////////////////////////////////////////////////////////////// + // int32_t Add10MsData() + // Add 10MS of raw (PCM) audio data and encode it. If the sampling + // frequency of the audio does not match the sampling frequency of the + // current encoder ACM will resample the audio. If an encoded packet was + // produced, it will be delivered via the callback object registered using + // RegisterTransportCallback, and the return value from this function will + // be the number of bytes encoded. + // + // Input: + // -audio_frame : the input audio frame, containing raw audio + // sampling frequency etc. + // + // Return value: + // >= 0 number of bytes encoded. + // -1 some error occurred. + // + virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0; + + /////////////////////////////////////////////////////////////////////////// + // int SetPacketLossRate() + // Sets expected packet loss rate for encoding. Some encoders provide packet + // loss gnostic encoding to make stream less sensitive to packet losses, + // through e.g., FEC. No effects on codecs that do not provide such encoding. + // + // Input: + // -packet_loss_rate : expected packet loss rate (0 -- 100 inclusive). + // + // Return value + // -1 if failed to set packet loss rate, + // 0 if succeeded. + // + // This is only used in test code that rely on old ACM APIs. + // TODO(minyue): Remove it when possible. + virtual int SetPacketLossRate(int packet_loss_rate) = 0; + + /////////////////////////////////////////////////////////////////////////// + // Receiver + // + + /////////////////////////////////////////////////////////////////////////// + // int32_t InitializeReceiver() + // Any decoder-related state of ACM will be initialized to the + // same state when ACM is created. This will not interrupt or + // effect encoding functionality of ACM. ACM would lose all the + // decoding-related settings by calling this function. + // For instance, all registered codecs are deleted and have to be + // registered again. + // + // Return value: + // -1 if failed to initialize, + // 0 if succeeded. + // + virtual int32_t InitializeReceiver() = 0; + + // Replace any existing decoders with the given payload type -> decoder map. + virtual void SetReceiveCodecs( + const std::map& codecs) = 0; + + /////////////////////////////////////////////////////////////////////////// + // int32_t IncomingPacket() + // Call this function to insert a parsed RTP packet into ACM. + // + // Inputs: + // -incoming_payload : received payload. + // -payload_len_bytes : the length of payload in bytes. + // -rtp_info : the relevant information retrieved from RTP + // header. + // + // Return value: + // -1 if failed to push in the payload + // 0 if payload is successfully pushed in. + // + virtual int32_t IncomingPacket(const uint8_t* incoming_payload, + size_t payload_len_bytes, + const RTPHeader& rtp_header) = 0; + + /////////////////////////////////////////////////////////////////////////// + // int32_t PlayoutData10Ms( + // Get 10 milliseconds of raw audio data for playout, at the given sampling + // frequency. ACM will perform a resampling if required. + // + // Input: + // -desired_freq_hz : the desired sampling frequency, in Hertz, of the + // output audio. If set to -1, the function returns + // the audio at the current sampling frequency. + // + // Output: + // -audio_frame : output audio frame which contains raw audio data + // and other relevant parameters. + // -muted : if true, the sample data in audio_frame is not + // populated, and must be interpreted as all zero. + // + // Return value: + // -1 if the function fails, + // 0 if the function succeeds. + // + virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz, + AudioFrame* audio_frame, + bool* muted) = 0; + + /////////////////////////////////////////////////////////////////////////// + // statistics + // + + /////////////////////////////////////////////////////////////////////////// + // int32_t GetNetworkStatistics() + // Get network statistics. Note that the internal statistics of NetEq are + // reset by this call. + // + // Input: + // -network_statistics : a structure that contains network statistics. + // + // Return value: + // -1 if failed to set the network statistics, + // 0 if statistics are set successfully. + // + virtual int32_t GetNetworkStatistics( + NetworkStatistics* network_statistics) = 0; + + virtual ANAStats GetANAStats() const = 0; + + virtual int GetTargetBitrate() const = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/include/audio_coding_module_typedefs.h b/third_party/libwebrtc/modules/audio_coding/include/audio_coding_module_typedefs.h new file mode 100644 index 0000000000..9d2fcfe22e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/include/audio_coding_module_typedefs.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_TYPEDEFS_H_ +#define MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_TYPEDEFS_H_ + +#include + +namespace webrtc { + +/////////////////////////////////////////////////////////////////////////// +// enum ACMVADMode +// An enumerator for aggressiveness of VAD +// -VADNormal : least aggressive mode. +// -VADLowBitrate : more aggressive than "VADNormal" to save on +// bit-rate. +// -VADAggr : an aggressive mode. +// -VADVeryAggr : the most agressive mode. +// +enum ACMVADMode { + VADNormal = 0, + VADLowBitrate = 1, + VADAggr = 2, + VADVeryAggr = 3 +}; + +enum class AudioFrameType { + kEmptyFrame = 0, + kAudioFrameSpeech = 1, + kAudioFrameCN = 2, +}; + +/////////////////////////////////////////////////////////////////////////// +// +// Enumeration of Opus mode for intended application. +// +// kVoip : optimized for voice signals. +// kAudio : optimized for non-voice signals like music. +// +enum OpusApplicationMode { + kVoip = 0, + kAudio = 1, +}; + +// Statistics for calls to AudioCodingModule::PlayoutData10Ms(). +struct AudioDecodingCallStats { + AudioDecodingCallStats() + : calls_to_silence_generator(0), + calls_to_neteq(0), + decoded_normal(0), + decoded_neteq_plc(0), + decoded_codec_plc(0), + decoded_cng(0), + decoded_plc_cng(0), + decoded_muted_output(0) {} + + int calls_to_silence_generator; // Number of calls where silence generated, + // and NetEq was disengaged from decoding. + int calls_to_neteq; // Number of calls to NetEq. + int decoded_normal; // Number of calls where audio RTP packet decoded. + int decoded_neteq_plc; // Number of calls resulted in NetEq PLC. + int decoded_codec_plc; // Number of calls resulted in codec PLC. + int decoded_cng; // Number of calls where comfort noise generated due to DTX. + int decoded_plc_cng; // Number of calls resulted where PLC faded to CNG. + int decoded_muted_output; // Number of calls returning a muted state output. +}; + +// NETEQ statistics. +struct NetworkStatistics { + // current jitter buffer size in ms + uint16_t currentBufferSize; + // preferred (optimal) buffer size in ms + uint16_t preferredBufferSize; + // adding extra delay due to "peaky jitter" + bool jitterPeaksFound; + // Stats below correspond to similarly-named fields in the WebRTC stats spec. + // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats + uint64_t totalSamplesReceived; + uint64_t concealedSamples; + uint64_t silentConcealedSamples; + uint64_t concealmentEvents; + uint64_t jitterBufferDelayMs; + uint64_t jitterBufferTargetDelayMs; + uint64_t jitterBufferMinimumDelayMs; + uint64_t jitterBufferEmittedCount; + uint64_t insertedSamplesForDeceleration; + uint64_t removedSamplesForAcceleration; + uint64_t fecPacketsReceived; + uint64_t fecPacketsDiscarded; + // Stats below correspond to similarly-named fields in the WebRTC stats spec. + // https://w3c.github.io/webrtc-stats/#dom-rtcreceivedrtpstreamstats + uint64_t packetsDiscarded; + // Stats below DO NOT correspond directly to anything in the WebRTC stats + // fraction (of original stream) of synthesized audio inserted through + // expansion (in Q14) + uint16_t currentExpandRate; + // fraction (of original stream) of synthesized speech inserted through + // expansion (in Q14) + uint16_t currentSpeechExpandRate; + // fraction of synthesized speech inserted through pre-emptive expansion + // (in Q14) + uint16_t currentPreemptiveRate; + // fraction of data removed through acceleration (in Q14) + uint16_t currentAccelerateRate; + // fraction of data coming from secondary decoding (in Q14) + uint16_t currentSecondaryDecodedRate; + // Fraction of secondary data, including FEC and RED, that is discarded (in + // Q14). Discarding of secondary data can be caused by the reception of the + // primary data, obsoleting the secondary data. It can also be caused by early + // or late arrival of secondary data. + uint16_t currentSecondaryDiscardedRate; + // average packet waiting time in the jitter buffer (ms) + int meanWaitingTimeMs; + // max packet waiting time in the jitter buffer (ms) + int maxWaitingTimeMs; + // count of the number of buffer flushes + uint64_t packetBufferFlushes; + // number of samples expanded due to delayed packets + uint64_t delayedPacketOutageSamples; + // arrival delay of incoming packets + uint64_t relativePacketArrivalDelayMs; + // number of audio interruptions + int32_t interruptionCount; + // total duration of audio interruptions + int32_t totalInterruptionDurationMs; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_INCLUDE_AUDIO_CODING_MODULE_TYPEDEFS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/isac_bwinfo_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/isac_bwinfo_gn/moz.build new file mode 100644 index 0000000000..bb9cdf4460 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/isac_bwinfo_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("isac_bwinfo_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/isac_vad_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/isac_vad_gn/moz.build new file mode 100644 index 0000000000..86f566663a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/isac_vad_gn/moz.build @@ -0,0 +1,220 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c", + "/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.c" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("isac_vad_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/legacy_encoded_audio_frame_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/legacy_encoded_audio_frame_gn/moz.build new file mode 100644 index 0000000000..b642ffbe95 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/legacy_encoded_audio_frame_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("legacy_encoded_audio_frame_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.cc b/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.cc new file mode 100644 index 0000000000..f4ef6cdccb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/accelerate.h" + + +#include "api/array_view.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +namespace webrtc { + +Accelerate::ReturnCodes Accelerate::Process(const int16_t* input, + size_t input_length, + bool fast_accelerate, + AudioMultiVector* output, + size_t* length_change_samples) { + // Input length must be (almost) 30 ms. + static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate. + if (num_channels_ == 0 || + input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) { + // Length of input data too short to do accelerate. Simply move all data + // from input to output. + output->PushBackInterleaved( + rtc::ArrayView(input, input_length)); + return kError; + } + return TimeStretch::Process(input, input_length, fast_accelerate, output, + length_change_samples); +} + +void Accelerate::SetParametersForPassiveSpeech(size_t /*len*/, + int16_t* best_correlation, + size_t* /*peak_index*/) const { + // When the signal does not contain any active speech, the correlation does + // not matter. Simply set it to zero. + *best_correlation = 0; +} + +Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch( + const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool fast_mode, + AudioMultiVector* output) const { + // Check for strong correlation or passive speech. + // Use 8192 (0.5 in Q14) in fast mode. + const int correlation_threshold = fast_mode ? 8192 : kCorrelationThreshold; + if ((best_correlation > correlation_threshold) || !active_speech) { + // Do accelerate operation by overlap add. + + // Pre-calculate common multiplication with `fs_mult_`. + // 120 corresponds to 15 ms. + size_t fs_mult_120 = fs_mult_ * 120; + + if (fast_mode) { + // Fit as many multiples of `peak_index` as possible in fs_mult_120. + // TODO(henrik.lundin) Consider finding multiple correlation peaks and + // pick the one with the longest correlation lag in this case. + peak_index = (fs_mult_120 / peak_index) * peak_index; + } + + RTC_DCHECK_GE(fs_mult_120, peak_index); // Should be handled in Process(). + // Copy first part; 0 to 15 ms. + output->PushBackInterleaved( + rtc::ArrayView(input, fs_mult_120 * num_channels_)); + // Copy the `peak_index` starting at 15 ms to `temp_vector`. + AudioMultiVector temp_vector(num_channels_); + temp_vector.PushBackInterleaved(rtc::ArrayView( + &input[fs_mult_120 * num_channels_], peak_index * num_channels_)); + // Cross-fade `temp_vector` onto the end of `output`. + output->CrossFade(temp_vector, peak_index); + // Copy the last unmodified part, 15 ms + pitch period until the end. + output->PushBackInterleaved(rtc::ArrayView( + &input[(fs_mult_120 + peak_index) * num_channels_], + input_length - (fs_mult_120 + peak_index) * num_channels_)); + + if (active_speech) { + return kSuccess; + } else { + return kSuccessLowEnergy; + } + } else { + // Accelerate not allowed. Simply move all data from decoded to outData. + output->PushBackInterleaved( + rtc::ArrayView(input, input_length)); + return kNoStretch; + } +} + +Accelerate* AccelerateFactory::Create( + int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise) const { + return new Accelerate(sample_rate_hz, num_channels, background_noise); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.h b/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.h new file mode 100644 index 0000000000..01fe874d54 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_ +#define MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_ + +#include +#include + +#include "modules/audio_coding/neteq/time_stretch.h" + +namespace webrtc { + +class AudioMultiVector; +class BackgroundNoise; + +// This class implements the Accelerate operation. Most of the work is done +// in the base class TimeStretch, which is shared with the PreemptiveExpand +// operation. In the Accelerate class, the operations that are specific to +// Accelerate are implemented. +class Accelerate : public TimeStretch { + public: + Accelerate(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise) + : TimeStretch(sample_rate_hz, num_channels, background_noise) {} + + Accelerate(const Accelerate&) = delete; + Accelerate& operator=(const Accelerate&) = delete; + + // This method performs the actual Accelerate operation. The samples are + // read from `input`, of length `input_length` elements, and are written to + // `output`. The number of samples removed through time-stretching is + // is provided in the output `length_change_samples`. The method returns + // the outcome of the operation as an enumerator value. If `fast_accelerate` + // is true, the algorithm will relax the requirements on finding strong + // correlations, and may remove multiple pitch periods if possible. + ReturnCodes Process(const int16_t* input, + size_t input_length, + bool fast_accelerate, + AudioMultiVector* output, + size_t* length_change_samples); + + protected: + // Sets the parameters `best_correlation` and `peak_index` to suitable + // values when the signal contains no active speech. + void SetParametersForPassiveSpeech(size_t len, + int16_t* best_correlation, + size_t* peak_index) const override; + + // Checks the criteria for performing the time-stretching operation and, + // if possible, performs the time-stretching. + ReturnCodes CheckCriteriaAndStretch(const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool fast_mode, + AudioMultiVector* output) const override; +}; + +struct AccelerateFactory { + AccelerateFactory() {} + virtual ~AccelerateFactory() {} + + virtual Accelerate* Create(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise) const; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc new file mode 100644 index 0000000000..fef3c3c1e4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc @@ -0,0 +1,526 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include +#include +#include +#include + +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h" +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" +#include "modules/audio_coding/codecs/g722/audio_decoder_g722.h" +#include "modules/audio_coding/codecs/g722/audio_encoder_g722.h" +#include "modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h" +#include "modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h" +#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h" +#include "modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h" +#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "rtc_base/system/arch.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace { + +constexpr int kOverheadBytesPerPacket = 50; + +// The absolute difference between the input and output (the first channel) is +// compared vs `tolerance`. The parameter `delay` is used to correct for codec +// delays. +void CompareInputOutput(const std::vector& input, + const std::vector& output, + size_t num_samples, + size_t channels, + int tolerance, + int delay) { + ASSERT_LE(num_samples, input.size()); + ASSERT_LE(num_samples * channels, output.size()); + for (unsigned int n = 0; n < num_samples - delay; ++n) { + ASSERT_NEAR(input[n], output[channels * n + delay], tolerance) + << "Exit test on first diff; n = " << n; + } +} + +// The absolute difference between the first two channels in `output` is +// compared vs `tolerance`. +void CompareTwoChannels(const std::vector& output, + size_t samples_per_channel, + size_t channels, + int tolerance) { + ASSERT_GE(channels, 2u); + ASSERT_LE(samples_per_channel * channels, output.size()); + for (unsigned int n = 0; n < samples_per_channel; ++n) + ASSERT_NEAR(output[channels * n], output[channels * n + 1], tolerance) + << "Stereo samples differ."; +} + +// Calculates mean-squared error between input and output (the first channel). +// The parameter `delay` is used to correct for codec delays. +double MseInputOutput(const std::vector& input, + const std::vector& output, + size_t num_samples, + size_t channels, + int delay) { + RTC_DCHECK_LT(delay, static_cast(num_samples)); + RTC_DCHECK_LE(num_samples, input.size()); + RTC_DCHECK_LE(num_samples * channels, output.size()); + if (num_samples == 0) + return 0.0; + double squared_sum = 0.0; + for (unsigned int n = 0; n < num_samples - delay; ++n) { + squared_sum += (input[n] - output[channels * n + delay]) * + (input[n] - output[channels * n + delay]); + } + return squared_sum / (num_samples - delay); +} +} // namespace + +class AudioDecoderTest : public ::testing::Test { + protected: + AudioDecoderTest() + : input_audio_( + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), + 32000), + codec_input_rate_hz_(32000), // Legacy default value. + frame_size_(0), + data_length_(0), + channels_(1), + payload_type_(17), + decoder_(NULL) {} + + ~AudioDecoderTest() override {} + + void SetUp() override { + if (audio_encoder_) + codec_input_rate_hz_ = audio_encoder_->SampleRateHz(); + // Create arrays. + ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0"; + } + + void TearDown() override { + delete decoder_; + decoder_ = NULL; + } + + virtual void InitEncoder() {} + + // TODO(henrik.lundin) Change return type to size_t once most/all overriding + // implementations are gone. + virtual int EncodeFrame(const int16_t* input, + size_t input_len_samples, + rtc::Buffer* output) { + AudioEncoder::EncodedInfo encoded_info; + const size_t samples_per_10ms = audio_encoder_->SampleRateHz() / 100; + RTC_CHECK_EQ(samples_per_10ms * audio_encoder_->Num10MsFramesInNextPacket(), + input_len_samples); + std::unique_ptr interleaved_input( + new int16_t[channels_ * samples_per_10ms]); + for (size_t i = 0; i < audio_encoder_->Num10MsFramesInNextPacket(); ++i) { + EXPECT_EQ(0u, encoded_info.encoded_bytes); + + // Duplicate the mono input signal to however many channels the test + // wants. + test::InputAudioFile::DuplicateInterleaved(input + i * samples_per_10ms, + samples_per_10ms, channels_, + interleaved_input.get()); + + encoded_info = + audio_encoder_->Encode(0, + rtc::ArrayView( + interleaved_input.get(), + audio_encoder_->NumChannels() * + audio_encoder_->SampleRateHz() / 100), + output); + } + EXPECT_EQ(payload_type_, encoded_info.payload_type); + return static_cast(encoded_info.encoded_bytes); + } + + // Encodes and decodes audio. The absolute difference between the input and + // output is compared vs `tolerance`, and the mean-squared error is compared + // with `mse`. The encoded stream should contain `expected_bytes`. For stereo + // audio, the absolute difference between the two channels is compared vs + // `channel_diff_tolerance`. + void EncodeDecodeTest(size_t expected_bytes, + int tolerance, + double mse, + int delay = 0, + int channel_diff_tolerance = 0) { + ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0"; + ASSERT_GE(channel_diff_tolerance, 0) + << "Test must define a channel_diff_tolerance >= 0"; + size_t processed_samples = 0u; + size_t encoded_bytes = 0u; + InitEncoder(); + std::vector input; + std::vector decoded; + while (processed_samples + frame_size_ <= data_length_) { + // Extend input vector with `frame_size_`. + input.resize(input.size() + frame_size_, 0); + // Read from input file. + ASSERT_GE(input.size() - processed_samples, frame_size_); + ASSERT_TRUE(input_audio_.Read(frame_size_, codec_input_rate_hz_, + &input[processed_samples])); + rtc::Buffer encoded; + size_t enc_len = + EncodeFrame(&input[processed_samples], frame_size_, &encoded); + // Make sure that frame_size_ * channels_ samples are allocated and free. + decoded.resize((processed_samples + frame_size_) * channels_, 0); + + const std::vector parse_result = + decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0); + RTC_CHECK_EQ(parse_result.size(), size_t{1}); + auto decode_result = parse_result[0].frame->Decode( + rtc::ArrayView(&decoded[processed_samples * channels_], + frame_size_ * channels_ * sizeof(int16_t))); + RTC_CHECK(decode_result.has_value()); + EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples); + encoded_bytes += enc_len; + processed_samples += frame_size_; + } + // For some codecs it doesn't make sense to check expected number of bytes, + // since the number can vary for different platforms. Opus is such a codec. + // In this case expected_bytes is set to 0. + if (expected_bytes) { + EXPECT_EQ(expected_bytes, encoded_bytes); + } + CompareInputOutput(input, decoded, processed_samples, channels_, tolerance, + delay); + if (channels_ == 2) + CompareTwoChannels(decoded, processed_samples, channels_, + channel_diff_tolerance); + EXPECT_LE( + MseInputOutput(input, decoded, processed_samples, channels_, delay), + mse); + } + + // Encodes a payload and decodes it twice with decoder re-init before each + // decode. Verifies that the decoded result is the same. + void ReInitTest() { + InitEncoder(); + std::unique_ptr input(new int16_t[frame_size_]); + ASSERT_TRUE( + input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); + std::array encoded; + EncodeFrame(input.get(), frame_size_, &encoded[0]); + // Make a copy. + encoded[1].SetData(encoded[0].data(), encoded[0].size()); + + std::array, 2> outputs; + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].resize(frame_size_ * channels_); + decoder_->Reset(); + const std::vector parse_result = + decoder_->ParsePayload(std::move(encoded[i]), /*timestamp=*/0); + RTC_CHECK_EQ(parse_result.size(), size_t{1}); + auto decode_result = parse_result[0].frame->Decode(outputs[i]); + RTC_CHECK(decode_result.has_value()); + EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples); + } + EXPECT_EQ(outputs[0], outputs[1]); + } + + // Call DecodePlc and verify that the correct number of samples is produced. + void DecodePlcTest() { + InitEncoder(); + std::unique_ptr input(new int16_t[frame_size_]); + ASSERT_TRUE( + input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); + rtc::Buffer encoded; + EncodeFrame(input.get(), frame_size_, &encoded); + decoder_->Reset(); + std::vector output(frame_size_ * channels_); + const std::vector parse_result = + decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0); + RTC_CHECK_EQ(parse_result.size(), size_t{1}); + auto decode_result = parse_result[0].frame->Decode(output); + RTC_CHECK(decode_result.has_value()); + EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples); + // Call DecodePlc and verify that we get one frame of data. + // (Overwrite the output from the above Decode call, but that does not + // matter.) + size_t dec_len = + decoder_->DecodePlc(/*num_frames=*/1, /*decoded=*/output.data()); + EXPECT_EQ(frame_size_ * channels_, dec_len); + } + + test::ResampleInputAudioFile input_audio_; + int codec_input_rate_hz_; + size_t frame_size_; + size_t data_length_; + size_t channels_; + const int payload_type_; + AudioDecoder* decoder_; + std::unique_ptr audio_encoder_; +}; + +class AudioDecoderPcmUTest : public AudioDecoderTest { + protected: + AudioDecoderPcmUTest() : AudioDecoderTest() { + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderPcmU(1); + AudioEncoderPcmU::Config config; + config.frame_size_ms = static_cast(frame_size_ / 8); + config.payload_type = payload_type_; + audio_encoder_.reset(new AudioEncoderPcmU(config)); + } +}; + +class AudioDecoderPcmATest : public AudioDecoderTest { + protected: + AudioDecoderPcmATest() : AudioDecoderTest() { + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderPcmA(1); + AudioEncoderPcmA::Config config; + config.frame_size_ms = static_cast(frame_size_ / 8); + config.payload_type = payload_type_; + audio_encoder_.reset(new AudioEncoderPcmA(config)); + } +}; + +class AudioDecoderPcm16BTest : public AudioDecoderTest { + protected: + AudioDecoderPcm16BTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 16000; + frame_size_ = 20 * codec_input_rate_hz_ / 1000; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderPcm16B(codec_input_rate_hz_, 1); + RTC_DCHECK(decoder_); + AudioEncoderPcm16B::Config config; + config.sample_rate_hz = codec_input_rate_hz_; + config.frame_size_ms = + static_cast(frame_size_ / (config.sample_rate_hz / 1000)); + config.payload_type = payload_type_; + audio_encoder_.reset(new AudioEncoderPcm16B(config)); + } +}; + +class AudioDecoderIlbcTest : public AudioDecoderTest { + protected: + AudioDecoderIlbcTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 8000; + frame_size_ = 240; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderIlbcImpl; + RTC_DCHECK(decoder_); + AudioEncoderIlbcConfig config; + config.frame_size_ms = 30; + audio_encoder_.reset(new AudioEncoderIlbcImpl(config, payload_type_)); + } + + // Overload the default test since iLBC's function WebRtcIlbcfix_NetEqPlc does + // not return any data. It simply resets a few states and returns 0. + void DecodePlcTest() { + InitEncoder(); + std::unique_ptr input(new int16_t[frame_size_]); + ASSERT_TRUE( + input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); + rtc::Buffer encoded; + size_t enc_len = EncodeFrame(input.get(), frame_size_, &encoded); + AudioDecoder::SpeechType speech_type; + decoder_->Reset(); + std::unique_ptr output(new int16_t[frame_size_ * channels_]); + size_t dec_len = decoder_->Decode( + encoded.data(), enc_len, codec_input_rate_hz_, + frame_size_ * channels_ * sizeof(int16_t), output.get(), &speech_type); + EXPECT_EQ(frame_size_, dec_len); + // Simply call DecodePlc and verify that we get 0 as return value. + EXPECT_EQ(0U, decoder_->DecodePlc(1, output.get())); + } +}; + +class AudioDecoderG722Test : public AudioDecoderTest { + protected: + AudioDecoderG722Test() : AudioDecoderTest() { + codec_input_rate_hz_ = 16000; + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderG722Impl; + RTC_DCHECK(decoder_); + AudioEncoderG722Config config; + config.frame_size_ms = 10; + config.num_channels = 1; + audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_)); + } +}; + +class AudioDecoderG722StereoTest : public AudioDecoderTest { + protected: + AudioDecoderG722StereoTest() : AudioDecoderTest() { + channels_ = 2; + codec_input_rate_hz_ = 16000; + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderG722StereoImpl; + RTC_DCHECK(decoder_); + AudioEncoderG722Config config; + config.frame_size_ms = 10; + config.num_channels = 2; + audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_)); + } +}; + +class AudioDecoderOpusTest + : public AudioDecoderTest, + public testing::WithParamInterface> { + protected: + AudioDecoderOpusTest() : AudioDecoderTest() { + channels_ = opus_num_channels_; + codec_input_rate_hz_ = opus_sample_rate_hz_; + frame_size_ = rtc::CheckedDivExact(opus_sample_rate_hz_, 100); + data_length_ = 10 * frame_size_; + decoder_ = + new AudioDecoderOpusImpl(opus_num_channels_, opus_sample_rate_hz_); + AudioEncoderOpusConfig config; + config.frame_size_ms = 10; + config.sample_rate_hz = opus_sample_rate_hz_; + config.num_channels = opus_num_channels_; + config.application = opus_num_channels_ == 1 + ? AudioEncoderOpusConfig::ApplicationMode::kVoip + : AudioEncoderOpusConfig::ApplicationMode::kAudio; + audio_encoder_ = AudioEncoderOpus::MakeAudioEncoder(config, payload_type_); + audio_encoder_->OnReceivedOverhead(kOverheadBytesPerPacket); + } + const int opus_sample_rate_hz_{std::get<0>(GetParam())}; + const int opus_num_channels_{std::get<1>(GetParam())}; +}; + +INSTANTIATE_TEST_SUITE_P(Param, + AudioDecoderOpusTest, + testing::Combine(testing::Values(16000, 48000), + testing::Values(1, 2))); + +TEST_F(AudioDecoderPcmUTest, EncodeDecode) { + int tolerance = 251; + double mse = 1734.0; + EncodeDecodeTest(data_length_, tolerance, mse); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +namespace { +int SetAndGetTargetBitrate(AudioEncoder* audio_encoder, int rate) { + audio_encoder->OnReceivedUplinkBandwidth(rate, absl::nullopt); + return audio_encoder->GetTargetBitrate(); +} +void TestSetAndGetTargetBitratesWithFixedCodec(AudioEncoder* audio_encoder, + int fixed_rate) { + EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, 32000)); + EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate - 1)); + EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate)); + EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate + 1)); +} +} // namespace + +TEST_F(AudioDecoderPcmUTest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000); +} + +TEST_F(AudioDecoderPcmATest, EncodeDecode) { + int tolerance = 308; + double mse = 1931.0; + EncodeDecodeTest(data_length_, tolerance, mse); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderPcmATest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000); +} + +TEST_F(AudioDecoderPcm16BTest, EncodeDecode) { + int tolerance = 0; + double mse = 0.0; + EncodeDecodeTest(2 * data_length_, tolerance, mse); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderPcm16BTest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), + codec_input_rate_hz_ * 16); +} + +TEST_F(AudioDecoderIlbcTest, EncodeDecode) { + int tolerance = 6808; + double mse = 2.13e6; + int delay = 80; // Delay from input to output. + EncodeDecodeTest(500, tolerance, mse, delay); + ReInitTest(); + EXPECT_TRUE(decoder_->HasDecodePlc()); + DecodePlcTest(); +} + +TEST_F(AudioDecoderIlbcTest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 13333); +} + +TEST_F(AudioDecoderG722Test, EncodeDecode) { + int tolerance = 6176; + double mse = 238630.0; + int delay = 22; // Delay from input to output. + EncodeDecodeTest(data_length_ / 2, tolerance, mse, delay); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderG722Test, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000); +} + +TEST_F(AudioDecoderG722StereoTest, EncodeDecode) { + int tolerance = 6176; + int channel_diff_tolerance = 0; + double mse = 238630.0; + int delay = 22; // Delay from input to output. + EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderG722StereoTest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 128000); +} + +// TODO(http://bugs.webrtc.org/12518): Enable the test after Opus has been +// updated. +TEST_P(AudioDecoderOpusTest, DISABLED_EncodeDecode) { + constexpr int tolerance = 6176; + constexpr int channel_diff_tolerance = 6; + constexpr double mse = 238630.0; + constexpr int delay = 22; // Delay from input to output. + EncodeDecodeTest(0, tolerance, mse, delay, channel_diff_tolerance); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_P(AudioDecoderOpusTest, SetTargetBitrate) { + const int overhead_rate = + 8 * kOverheadBytesPerPacket * codec_input_rate_hz_ / frame_size_; + EXPECT_EQ(6000, + SetAndGetTargetBitrate(audio_encoder_.get(), 5999 + overhead_rate)); + EXPECT_EQ(6000, + SetAndGetTargetBitrate(audio_encoder_.get(), 6000 + overhead_rate)); + EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder_.get(), + 32000 + overhead_rate)); + EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(), + 510000 + overhead_rate)); + EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(), + 511000 + overhead_rate)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.cc new file mode 100644 index 0000000000..14ae94649b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.cc @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioMultiVector::AudioMultiVector(size_t N) { + RTC_DCHECK_GT(N, 0); + if (N < 1) + N = 1; + for (size_t n = 0; n < N; ++n) { + channels_.push_back(new AudioVector); + } + num_channels_ = N; +} + +AudioMultiVector::AudioMultiVector(size_t N, size_t initial_size) { + RTC_DCHECK_GT(N, 0); + if (N < 1) + N = 1; + for (size_t n = 0; n < N; ++n) { + channels_.push_back(new AudioVector(initial_size)); + } + num_channels_ = N; +} + +AudioMultiVector::~AudioMultiVector() { + std::vector::iterator it = channels_.begin(); + while (it != channels_.end()) { + delete (*it); + ++it; + } +} + +void AudioMultiVector::Clear() { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->Clear(); + } +} + +void AudioMultiVector::Zeros(size_t length) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->Clear(); + channels_[i]->Extend(length); + } +} + +void AudioMultiVector::CopyTo(AudioMultiVector* copy_to) const { + if (copy_to) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->CopyTo(&(*copy_to)[i]); + } + } +} + +void AudioMultiVector::PushBackInterleaved( + rtc::ArrayView append_this) { + RTC_DCHECK_EQ(append_this.size() % num_channels_, 0); + if (append_this.empty()) { + return; + } + if (num_channels_ == 1) { + // Special case to avoid extra allocation and data shuffling. + channels_[0]->PushBack(append_this.data(), append_this.size()); + return; + } + size_t length_per_channel = append_this.size() / num_channels_; + int16_t* temp_array = new int16_t[length_per_channel]; // Temporary storage. + for (size_t channel = 0; channel < num_channels_; ++channel) { + // Copy elements to `temp_array`. + for (size_t i = 0; i < length_per_channel; ++i) { + temp_array[i] = append_this[channel + i * num_channels_]; + } + channels_[channel]->PushBack(temp_array, length_per_channel); + } + delete[] temp_array; +} + +void AudioMultiVector::PushBack(const AudioMultiVector& append_this) { + RTC_DCHECK_EQ(num_channels_, append_this.num_channels_); + if (num_channels_ == append_this.num_channels_) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->PushBack(append_this[i]); + } + } +} + +void AudioMultiVector::PushBackFromIndex(const AudioMultiVector& append_this, + size_t index) { + RTC_DCHECK_LT(index, append_this.Size()); + index = std::min(index, append_this.Size() - 1); + size_t length = append_this.Size() - index; + RTC_DCHECK_EQ(num_channels_, append_this.num_channels_); + if (num_channels_ == append_this.num_channels_) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->PushBack(append_this[i], length, index); + } + } +} + +void AudioMultiVector::PopFront(size_t length) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->PopFront(length); + } +} + +void AudioMultiVector::PopBack(size_t length) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->PopBack(length); + } +} + +size_t AudioMultiVector::ReadInterleaved(size_t length, + int16_t* destination) const { + return ReadInterleavedFromIndex(0, length, destination); +} + +size_t AudioMultiVector::ReadInterleavedFromIndex(size_t start_index, + size_t length, + int16_t* destination) const { + RTC_DCHECK(destination); + size_t index = 0; // Number of elements written to `destination` so far. + RTC_DCHECK_LE(start_index, Size()); + start_index = std::min(start_index, Size()); + if (length + start_index > Size()) { + length = Size() - start_index; + } + if (num_channels_ == 1) { + // Special case to avoid the nested for loop below. + (*this)[0].CopyTo(length, start_index, destination); + return length; + } + for (size_t i = 0; i < length; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + destination[index] = (*this)[channel][i + start_index]; + ++index; + } + } + return index; +} + +size_t AudioMultiVector::ReadInterleavedFromEnd(size_t length, + int16_t* destination) const { + length = std::min(length, Size()); // Cannot read more than Size() elements. + return ReadInterleavedFromIndex(Size() - length, length, destination); +} + +void AudioMultiVector::OverwriteAt(const AudioMultiVector& insert_this, + size_t length, + size_t position) { + RTC_DCHECK_EQ(num_channels_, insert_this.num_channels_); + // Cap `length` at the length of `insert_this`. + RTC_DCHECK_LE(length, insert_this.Size()); + length = std::min(length, insert_this.Size()); + if (num_channels_ == insert_this.num_channels_) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->OverwriteAt(insert_this[i], length, position); + } + } +} + +void AudioMultiVector::CrossFade(const AudioMultiVector& append_this, + size_t fade_length) { + RTC_DCHECK_EQ(num_channels_, append_this.num_channels_); + if (num_channels_ == append_this.num_channels_) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->CrossFade(append_this[i], fade_length); + } + } +} + +size_t AudioMultiVector::Channels() const { + return num_channels_; +} + +size_t AudioMultiVector::Size() const { + RTC_DCHECK(channels_[0]); + return channels_[0]->Size(); +} + +void AudioMultiVector::AssertSize(size_t required_size) { + if (Size() < required_size) { + size_t extend_length = required_size - Size(); + for (size_t channel = 0; channel < num_channels_; ++channel) { + channels_[channel]->Extend(extend_length); + } + } +} + +bool AudioMultiVector::Empty() const { + RTC_DCHECK(channels_[0]); + return channels_[0]->Empty(); +} + +void AudioMultiVector::CopyChannel(size_t from_channel, size_t to_channel) { + RTC_DCHECK_LT(from_channel, num_channels_); + RTC_DCHECK_LT(to_channel, num_channels_); + channels_[from_channel]->CopyTo(channels_[to_channel]); +} + +const AudioVector& AudioMultiVector::operator[](size_t index) const { + return *(channels_[index]); +} + +AudioVector& AudioMultiVector::operator[](size_t index) { + return *(channels_[index]); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.h b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.h new file mode 100644 index 0000000000..715ec6dfc7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_ + +#include +#include + +#include + +#include "api/array_view.h" +#include "modules/audio_coding/neteq/audio_vector.h" + +namespace webrtc { + +class AudioMultiVector { + public: + // Creates an empty AudioMultiVector with `N` audio channels. `N` must be + // larger than 0. + explicit AudioMultiVector(size_t N); + + // Creates an AudioMultiVector with `N` audio channels, each channel having + // an initial size. `N` must be larger than 0. + AudioMultiVector(size_t N, size_t initial_size); + + virtual ~AudioMultiVector(); + + AudioMultiVector(const AudioMultiVector&) = delete; + AudioMultiVector& operator=(const AudioMultiVector&) = delete; + + // Deletes all values and make the vector empty. + virtual void Clear(); + + // Clears the vector and inserts `length` zeros into each channel. + virtual void Zeros(size_t length); + + // Copies all values from this vector to `copy_to`. Any contents in `copy_to` + // are deleted. After the operation is done, `copy_to` will be an exact + // replica of this object. The source and the destination must have the same + // number of channels. + virtual void CopyTo(AudioMultiVector* copy_to) const; + + // Appends the contents of `append_this` to the end of this object. The array + // is assumed to be channel-interleaved. The length must be an even multiple + // of this object's number of channels. The length of this object is increased + // with the length of the array divided by the number of channels. + void PushBackInterleaved(rtc::ArrayView append_this); + + // Appends the contents of AudioMultiVector `append_this` to this object. The + // length of this object is increased with the length of `append_this`. + virtual void PushBack(const AudioMultiVector& append_this); + + // Appends the contents of AudioMultiVector `append_this` to this object, + // taken from `index` up until the end of `append_this`. The length of this + // object is increased. + virtual void PushBackFromIndex(const AudioMultiVector& append_this, + size_t index); + + // Removes `length` elements from the beginning of this object, from each + // channel. + virtual void PopFront(size_t length); + + // Removes `length` elements from the end of this object, from each + // channel. + virtual void PopBack(size_t length); + + // Reads `length` samples from each channel and writes them interleaved to + // `destination`. The total number of elements written to `destination` is + // returned, i.e., `length` * number of channels. If the AudioMultiVector + // contains less than `length` samples per channel, this is reflected in the + // return value. + virtual size_t ReadInterleaved(size_t length, int16_t* destination) const; + + // Like ReadInterleaved() above, but reads from `start_index` instead of from + // the beginning. + virtual size_t ReadInterleavedFromIndex(size_t start_index, + size_t length, + int16_t* destination) const; + + // Like ReadInterleaved() above, but reads from the end instead of from + // the beginning. + virtual size_t ReadInterleavedFromEnd(size_t length, + int16_t* destination) const; + + // Overwrites each channel in this AudioMultiVector with values taken from + // `insert_this`. The values are taken from the beginning of `insert_this` and + // are inserted starting at `position`. `length` values are written into each + // channel. If `length` and `position` are selected such that the new data + // extends beyond the end of the current AudioVector, the vector is extended + // to accommodate the new data. `length` is limited to the length of + // `insert_this`. + virtual void OverwriteAt(const AudioMultiVector& insert_this, + size_t length, + size_t position); + + // Appends `append_this` to the end of the current vector. Lets the two + // vectors overlap by `fade_length` samples (per channel), and cross-fade + // linearly in this region. + virtual void CrossFade(const AudioMultiVector& append_this, + size_t fade_length); + + // Returns the number of channels. + virtual size_t Channels() const; + + // Returns the number of elements per channel in this AudioMultiVector. + virtual size_t Size() const; + + // Verify that each channel can hold at least `required_size` elements. If + // not, extend accordingly. + virtual void AssertSize(size_t required_size); + + virtual bool Empty() const; + + // Copies the data between two channels in the AudioMultiVector. The method + // does not add any new channel. Thus, `from_channel` and `to_channel` must + // both be valid channel numbers. + virtual void CopyChannel(size_t from_channel, size_t to_channel); + + // Accesses and modifies a channel (i.e., an AudioVector object) of this + // AudioMultiVector. + const AudioVector& operator[](size_t index) const; + AudioVector& operator[](size_t index); + + protected: + std::vector channels_; + size_t num_channels_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector_unittest.cc new file mode 100644 index 0000000000..386c3d48a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector_unittest.cc @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +#include + +#include +#include + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { + +// This is a value-parameterized test. The test cases are instantiated with +// different values for the test parameter, which is used to determine the +// number of channels in the AudioMultiBuffer. Note that it is not possible +// to combine typed testing with value-parameterized testing, and since the +// tests for AudioVector already covers a number of different type parameters, +// this test focuses on testing different number of channels, and keeping the +// value type constant. + +class AudioMultiVectorTest : public ::testing::TestWithParam { + protected: + AudioMultiVectorTest() + : num_channels_(GetParam()), // Get the test parameter. + array_interleaved_(num_channels_ * array_length()) {} + + ~AudioMultiVectorTest() = default; + + virtual void SetUp() { + // Populate test arrays. + for (size_t i = 0; i < array_length(); ++i) { + array_[i] = static_cast(i); + } + int16_t* ptr = array_interleaved_.data(); + // Write 100, 101, 102, ... for first channel. + // Write 200, 201, 202, ... for second channel. + // And so on. + for (size_t i = 0; i < array_length(); ++i) { + for (size_t j = 1; j <= num_channels_; ++j) { + *ptr = rtc::checked_cast(j * 100 + i); + ++ptr; + } + } + } + + size_t array_length() const { return sizeof(array_) / sizeof(array_[0]); } + + const size_t num_channels_; + int16_t array_[10]; + std::vector array_interleaved_; +}; + +// Create and destroy AudioMultiVector objects, both empty and with a predefined +// length. +TEST_P(AudioMultiVectorTest, CreateAndDestroy) { + AudioMultiVector vec1(num_channels_); + EXPECT_TRUE(vec1.Empty()); + EXPECT_EQ(num_channels_, vec1.Channels()); + EXPECT_EQ(0u, vec1.Size()); + + size_t initial_size = 17; + AudioMultiVector vec2(num_channels_, initial_size); + EXPECT_FALSE(vec2.Empty()); + EXPECT_EQ(num_channels_, vec2.Channels()); + EXPECT_EQ(initial_size, vec2.Size()); +} + +// Test the subscript operator [] for getting and setting. +TEST_P(AudioMultiVectorTest, SubscriptOperator) { + AudioMultiVector vec(num_channels_, array_length()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < array_length(); ++i) { + vec[channel][i] = static_cast(i); + // Make sure to use the const version. + const AudioVector& audio_vec = vec[channel]; + EXPECT_EQ(static_cast(i), audio_vec[i]); + } + } +} + +// Test the PushBackInterleaved method and the CopyFrom method. The Clear +// method is also invoked. +TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + AudioMultiVector vec_copy(num_channels_); + vec.CopyTo(&vec_copy); // Copy from `vec` to `vec_copy`. + ASSERT_EQ(num_channels_, vec.Channels()); + ASSERT_EQ(array_length(), vec.Size()); + ASSERT_EQ(num_channels_, vec_copy.Channels()); + ASSERT_EQ(array_length(), vec_copy.Size()); + for (size_t channel = 0; channel < vec.Channels(); ++channel) { + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(static_cast((channel + 1) * 100 + i), vec[channel][i]); + EXPECT_EQ(vec[channel][i], vec_copy[channel][i]); + } + } + + // Clear `vec` and verify that it is empty. + vec.Clear(); + EXPECT_TRUE(vec.Empty()); + + // Now copy the empty vector and verify that the copy becomes empty too. + vec.CopyTo(&vec_copy); + EXPECT_TRUE(vec_copy.Empty()); +} + +// Try to copy to a NULL pointer. Nothing should happen. +TEST_P(AudioMultiVectorTest, CopyToNull) { + AudioMultiVector vec(num_channels_); + AudioMultiVector* vec_copy = NULL; + vec.PushBackInterleaved(array_interleaved_); + vec.CopyTo(vec_copy); +} + +// Test the PushBack method with another AudioMultiVector as input argument. +TEST_P(AudioMultiVectorTest, PushBackVector) { + AudioMultiVector vec1(num_channels_, array_length()); + AudioMultiVector vec2(num_channels_, array_length()); + // Set the first vector to [0, 1, ..., array_length() - 1] + + // 100 * channel_number. + // Set the second vector to [array_length(), array_length() + 1, ..., + // 2 * array_length() - 1] + 100 * channel_number. + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < array_length(); ++i) { + vec1[channel][i] = static_cast(i + 100 * channel); + vec2[channel][i] = + static_cast(i + 100 * channel + array_length()); + } + } + // Append vec2 to the back of vec1. + vec1.PushBack(vec2); + ASSERT_EQ(2u * array_length(), vec1.Size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < 2 * array_length(); ++i) { + EXPECT_EQ(static_cast(i + 100 * channel), vec1[channel][i]); + } + } +} + +// Test the PushBackFromIndex method. +TEST_P(AudioMultiVectorTest, PushBackFromIndex) { + AudioMultiVector vec1(num_channels_); + vec1.PushBackInterleaved(array_interleaved_); + AudioMultiVector vec2(num_channels_); + + // Append vec1 to the back of vec2 (which is empty). Read vec1 from the second + // last element. + vec2.PushBackFromIndex(vec1, array_length() - 2); + ASSERT_EQ(2u, vec2.Size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < 2; ++i) { + EXPECT_EQ(array_interleaved_[channel + + num_channels_ * (array_length() - 2 + i)], + vec2[channel][i]); + } + } +} + +// Starts with pushing some values to the vector, then test the Zeros method. +TEST_P(AudioMultiVectorTest, Zeros) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + vec.Zeros(2 * array_length()); + ASSERT_EQ(num_channels_, vec.Channels()); + ASSERT_EQ(2u * array_length(), vec.Size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < 2 * array_length(); ++i) { + EXPECT_EQ(0, vec[channel][i]); + } + } +} + +// Test the ReadInterleaved method +TEST_P(AudioMultiVectorTest, ReadInterleaved) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + int16_t* output = new int16_t[array_interleaved_.size()]; + // Read 5 samples. + size_t read_samples = 5; + EXPECT_EQ(num_channels_ * read_samples, + vec.ReadInterleaved(read_samples, output)); + EXPECT_EQ(0, memcmp(array_interleaved_.data(), output, + read_samples * sizeof(int16_t))); + + // Read too many samples. Expect to get all samples from the vector. + EXPECT_EQ(array_interleaved_.size(), + vec.ReadInterleaved(array_length() + 1, output)); + EXPECT_EQ(0, memcmp(array_interleaved_.data(), output, + read_samples * sizeof(int16_t))); + + delete[] output; +} + +// Test the PopFront method. +TEST_P(AudioMultiVectorTest, PopFront) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + vec.PopFront(1); // Remove one element from each channel. + ASSERT_EQ(array_length() - 1u, vec.Size()); + // Let `ptr` point to the second element of the first channel in the + // interleaved array. + int16_t* ptr = &array_interleaved_[num_channels_]; + for (size_t i = 0; i < array_length() - 1; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + EXPECT_EQ(*ptr, vec[channel][i]); + ++ptr; + } + } + vec.PopFront(array_length()); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the PopBack method. +TEST_P(AudioMultiVectorTest, PopBack) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + vec.PopBack(1); // Remove one element from each channel. + ASSERT_EQ(array_length() - 1u, vec.Size()); + // Let `ptr` point to the first element of the first channel in the + // interleaved array. + int16_t* ptr = array_interleaved_.data(); + for (size_t i = 0; i < array_length() - 1; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + EXPECT_EQ(*ptr, vec[channel][i]); + ++ptr; + } + } + vec.PopBack(array_length()); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the AssertSize method. +TEST_P(AudioMultiVectorTest, AssertSize) { + AudioMultiVector vec(num_channels_, array_length()); + EXPECT_EQ(array_length(), vec.Size()); + // Start with asserting with smaller sizes than already allocated. + vec.AssertSize(0); + vec.AssertSize(array_length() - 1); + // Nothing should have changed. + EXPECT_EQ(array_length(), vec.Size()); + // Assert with one element longer than already allocated. + vec.AssertSize(array_length() + 1); + // Expect vector to have grown. + EXPECT_EQ(array_length() + 1, vec.Size()); + // Also check the individual AudioVectors. + for (size_t channel = 0; channel < vec.Channels(); ++channel) { + EXPECT_EQ(array_length() + 1u, vec[channel].Size()); + } +} + +// Test the PushBack method with another AudioMultiVector as input argument. +TEST_P(AudioMultiVectorTest, OverwriteAt) { + AudioMultiVector vec1(num_channels_); + vec1.PushBackInterleaved(array_interleaved_); + AudioMultiVector vec2(num_channels_); + vec2.Zeros(3); // 3 zeros in each channel. + // Overwrite vec2 at position 5. + vec1.OverwriteAt(vec2, 3, 5); + // Verify result. + // Length remains the same. + ASSERT_EQ(array_length(), vec1.Size()); + int16_t* ptr = array_interleaved_.data(); + for (size_t i = 0; i < array_length() - 1; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + if (i >= 5 && i <= 7) { + // Elements 5, 6, 7 should have been replaced with zeros. + EXPECT_EQ(0, vec1[channel][i]); + } else { + EXPECT_EQ(*ptr, vec1[channel][i]); + } + ++ptr; + } + } +} + +// Test the CopyChannel method, when the test is instantiated with at least two +// channels. +TEST_P(AudioMultiVectorTest, CopyChannel) { + if (num_channels_ < 2) + return; + + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + // Create a reference copy. + AudioMultiVector ref(num_channels_); + ref.PushBack(vec); + // Copy from first to last channel. + vec.CopyChannel(0, num_channels_ - 1); + // Verify that the first and last channels are identical; the others should + // be left untouched. + for (size_t i = 0; i < array_length(); ++i) { + // Verify that all but the last channel are untouched. + for (size_t channel = 0; channel < num_channels_ - 1; ++channel) { + EXPECT_EQ(ref[channel][i], vec[channel][i]); + } + // Verify that the last and the first channels are identical. + EXPECT_EQ(vec[0][i], vec[num_channels_ - 1][i]); + } +} + +TEST_P(AudioMultiVectorTest, PushBackEmptyArray) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved({}); + EXPECT_TRUE(vec.Empty()); +} + +INSTANTIATE_TEST_SUITE_P(TestNumChannels, + AudioMultiVectorTest, + ::testing::Values(static_cast(1), + static_cast(2), + static_cast(5))); +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.cc new file mode 100644 index 0000000000..10e8936447 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.cc @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/audio_vector.h" + + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioVector::AudioVector() : AudioVector(kDefaultInitialSize) { + Clear(); +} + +AudioVector::AudioVector(size_t initial_size) + : array_(new int16_t[initial_size + 1]), + capacity_(initial_size + 1), + begin_index_(0), + end_index_(capacity_ - 1) { + memset(array_.get(), 0, capacity_ * sizeof(int16_t)); +} + +AudioVector::~AudioVector() = default; + +void AudioVector::Clear() { + end_index_ = begin_index_ = 0; +} + +void AudioVector::CopyTo(AudioVector* copy_to) const { + RTC_DCHECK(copy_to); + copy_to->Reserve(Size()); + CopyTo(Size(), 0, copy_to->array_.get()); + copy_to->begin_index_ = 0; + copy_to->end_index_ = Size(); +} + +void AudioVector::CopyTo(size_t length, + size_t position, + int16_t* copy_to) const { + if (length == 0) + return; + length = std::min(length, Size() - position); + const size_t copy_index = (begin_index_ + position) % capacity_; + const size_t first_chunk_length = std::min(length, capacity_ - copy_index); + memcpy(copy_to, &array_[copy_index], first_chunk_length * sizeof(int16_t)); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + memcpy(©_to[first_chunk_length], array_.get(), + remaining_length * sizeof(int16_t)); + } +} + +void AudioVector::PushFront(const AudioVector& prepend_this) { + const size_t length = prepend_this.Size(); + if (length == 0) + return; + + // Although the subsequent calling to PushFront does Reserve in it, it is + // always more efficient to do a big Reserve first. + Reserve(Size() + length); + + const size_t first_chunk_length = + std::min(length, prepend_this.capacity_ - prepend_this.begin_index_); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) + PushFront(prepend_this.array_.get(), remaining_length); + PushFront(&prepend_this.array_[prepend_this.begin_index_], + first_chunk_length); +} + +void AudioVector::PushFront(const int16_t* prepend_this, size_t length) { + if (length == 0) + return; + Reserve(Size() + length); + const size_t first_chunk_length = std::min(length, begin_index_); + memcpy(&array_[begin_index_ - first_chunk_length], + &prepend_this[length - first_chunk_length], + first_chunk_length * sizeof(int16_t)); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + memcpy(&array_[capacity_ - remaining_length], prepend_this, + remaining_length * sizeof(int16_t)); + } + begin_index_ = (begin_index_ + capacity_ - length) % capacity_; +} + +void AudioVector::PushBack(const AudioVector& append_this) { + PushBack(append_this, append_this.Size(), 0); +} + +void AudioVector::PushBack(const AudioVector& append_this, + size_t length, + size_t position) { + RTC_DCHECK_LE(position, append_this.Size()); + RTC_DCHECK_LE(length, append_this.Size() - position); + + if (length == 0) + return; + + // Although the subsequent calling to PushBack does Reserve in it, it is + // always more efficient to do a big Reserve first. + Reserve(Size() + length); + + const size_t start_index = + (append_this.begin_index_ + position) % append_this.capacity_; + const size_t first_chunk_length = + std::min(length, append_this.capacity_ - start_index); + PushBack(&append_this.array_[start_index], first_chunk_length); + + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) + PushBack(append_this.array_.get(), remaining_length); +} + +void AudioVector::PushBack(const int16_t* append_this, size_t length) { + if (length == 0) + return; + Reserve(Size() + length); + const size_t first_chunk_length = std::min(length, capacity_ - end_index_); + memcpy(&array_[end_index_], append_this, + first_chunk_length * sizeof(int16_t)); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + memcpy(array_.get(), &append_this[first_chunk_length], + remaining_length * sizeof(int16_t)); + } + end_index_ = (end_index_ + length) % capacity_; +} + +void AudioVector::PopFront(size_t length) { + if (length == 0) + return; + length = std::min(length, Size()); + begin_index_ = (begin_index_ + length) % capacity_; +} + +void AudioVector::PopBack(size_t length) { + if (length == 0) + return; + // Never remove more than what is in the array. + length = std::min(length, Size()); + end_index_ = (end_index_ + capacity_ - length) % capacity_; +} + +void AudioVector::Extend(size_t extra_length) { + if (extra_length == 0) + return; + InsertZerosByPushBack(extra_length, Size()); +} + +void AudioVector::InsertAt(const int16_t* insert_this, + size_t length, + size_t position) { + if (length == 0) + return; + // Cap the insert position at the current array length. + position = std::min(Size(), position); + + // When inserting to a position closer to the beginning, it is more efficient + // to insert by pushing front than to insert by pushing back, since less data + // will be moved, vice versa. + if (position <= Size() - position) { + InsertByPushFront(insert_this, length, position); + } else { + InsertByPushBack(insert_this, length, position); + } +} + +void AudioVector::InsertZerosAt(size_t length, size_t position) { + if (length == 0) + return; + // Cap the insert position at the current array length. + position = std::min(Size(), position); + + // When inserting to a position closer to the beginning, it is more efficient + // to insert by pushing front than to insert by pushing back, since less data + // will be moved, vice versa. + if (position <= Size() - position) { + InsertZerosByPushFront(length, position); + } else { + InsertZerosByPushBack(length, position); + } +} + +void AudioVector::OverwriteAt(const AudioVector& insert_this, + size_t length, + size_t position) { + RTC_DCHECK_LE(length, insert_this.Size()); + if (length == 0) + return; + + // Cap the insert position at the current array length. + position = std::min(Size(), position); + + // Although the subsequent calling to OverwriteAt does Reserve in it, it is + // always more efficient to do a big Reserve first. + size_t new_size = std::max(Size(), position + length); + Reserve(new_size); + + const size_t first_chunk_length = + std::min(length, insert_this.capacity_ - insert_this.begin_index_); + OverwriteAt(&insert_this.array_[insert_this.begin_index_], first_chunk_length, + position); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + OverwriteAt(insert_this.array_.get(), remaining_length, + position + first_chunk_length); + } +} + +void AudioVector::OverwriteAt(const int16_t* insert_this, + size_t length, + size_t position) { + if (length == 0) + return; + // Cap the insert position at the current array length. + position = std::min(Size(), position); + + size_t new_size = std::max(Size(), position + length); + Reserve(new_size); + + const size_t overwrite_index = (begin_index_ + position) % capacity_; + const size_t first_chunk_length = + std::min(length, capacity_ - overwrite_index); + memcpy(&array_[overwrite_index], insert_this, + first_chunk_length * sizeof(int16_t)); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + memcpy(array_.get(), &insert_this[first_chunk_length], + remaining_length * sizeof(int16_t)); + } + + end_index_ = (begin_index_ + new_size) % capacity_; +} + +void AudioVector::CrossFade(const AudioVector& append_this, + size_t fade_length) { + // Fade length cannot be longer than the current vector or `append_this`. + RTC_DCHECK_LE(fade_length, Size()); + RTC_DCHECK_LE(fade_length, append_this.Size()); + fade_length = std::min(fade_length, Size()); + fade_length = std::min(fade_length, append_this.Size()); + size_t position = Size() - fade_length + begin_index_; + // Cross fade the overlapping regions. + // `alpha` is the mixing factor in Q14. + // TODO(hlundin): Consider skipping +1 in the denominator to produce a + // smoother cross-fade, in particular at the end of the fade. + int alpha_step = 16384 / (static_cast(fade_length) + 1); + int alpha = 16384; + for (size_t i = 0; i < fade_length; ++i) { + alpha -= alpha_step; + array_[(position + i) % capacity_] = + (alpha * array_[(position + i) % capacity_] + + (16384 - alpha) * append_this[i] + 8192) >> + 14; + } + RTC_DCHECK_GE(alpha, 0); // Verify that the slope was correct. + // Append what is left of `append_this`. + size_t samples_to_push_back = append_this.Size() - fade_length; + if (samples_to_push_back > 0) + PushBack(append_this, samples_to_push_back, fade_length); +} + +// Returns the number of elements in this AudioVector. +size_t AudioVector::Size() const { + return (end_index_ + capacity_ - begin_index_) % capacity_; +} + +// Returns true if this AudioVector is empty. +bool AudioVector::Empty() const { + return begin_index_ == end_index_; +} + +void AudioVector::Reserve(size_t n) { + if (capacity_ > n) + return; + const size_t length = Size(); + // Reserve one more sample to remove the ambiguity between empty vector and + // full vector. Therefore `begin_index_` == `end_index_` indicates empty + // vector, and `begin_index_` == (`end_index_` + 1) % capacity indicates + // full vector. + std::unique_ptr temp_array(new int16_t[n + 1]); + CopyTo(length, 0, temp_array.get()); + array_.swap(temp_array); + begin_index_ = 0; + end_index_ = length; + capacity_ = n + 1; +} + +void AudioVector::InsertByPushBack(const int16_t* insert_this, + size_t length, + size_t position) { + const size_t move_chunk_length = Size() - position; + std::unique_ptr temp_array(nullptr); + if (move_chunk_length > 0) { + // TODO(minyue): see if it is possible to avoid copying to a buffer. + temp_array.reset(new int16_t[move_chunk_length]); + CopyTo(move_chunk_length, position, temp_array.get()); + PopBack(move_chunk_length); + } + + Reserve(Size() + length + move_chunk_length); + PushBack(insert_this, length); + if (move_chunk_length > 0) + PushBack(temp_array.get(), move_chunk_length); +} + +void AudioVector::InsertByPushFront(const int16_t* insert_this, + size_t length, + size_t position) { + std::unique_ptr temp_array(nullptr); + if (position > 0) { + // TODO(minyue): see if it is possible to avoid copying to a buffer. + temp_array.reset(new int16_t[position]); + CopyTo(position, 0, temp_array.get()); + PopFront(position); + } + + Reserve(Size() + length + position); + PushFront(insert_this, length); + if (position > 0) + PushFront(temp_array.get(), position); +} + +void AudioVector::InsertZerosByPushBack(size_t length, size_t position) { + const size_t move_chunk_length = Size() - position; + std::unique_ptr temp_array(nullptr); + if (move_chunk_length > 0) { + temp_array.reset(new int16_t[move_chunk_length]); + CopyTo(move_chunk_length, position, temp_array.get()); + PopBack(move_chunk_length); + } + + Reserve(Size() + length + move_chunk_length); + + const size_t first_zero_chunk_length = + std::min(length, capacity_ - end_index_); + memset(&array_[end_index_], 0, first_zero_chunk_length * sizeof(int16_t)); + const size_t remaining_zero_length = length - first_zero_chunk_length; + if (remaining_zero_length > 0) + memset(array_.get(), 0, remaining_zero_length * sizeof(int16_t)); + end_index_ = (end_index_ + length) % capacity_; + + if (move_chunk_length > 0) + PushBack(temp_array.get(), move_chunk_length); +} + +void AudioVector::InsertZerosByPushFront(size_t length, size_t position) { + std::unique_ptr temp_array(nullptr); + if (position > 0) { + temp_array.reset(new int16_t[position]); + CopyTo(position, 0, temp_array.get()); + PopFront(position); + } + + Reserve(Size() + length + position); + + const size_t first_zero_chunk_length = std::min(length, begin_index_); + memset(&array_[begin_index_ - first_zero_chunk_length], 0, + first_zero_chunk_length * sizeof(int16_t)); + const size_t remaining_zero_length = length - first_zero_chunk_length; + if (remaining_zero_length > 0) + memset(&array_[capacity_ - remaining_zero_length], 0, + remaining_zero_length * sizeof(int16_t)); + begin_index_ = (begin_index_ + capacity_ - length) % capacity_; + + if (position > 0) + PushFront(temp_array.get(), position); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.h b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.h new file mode 100644 index 0000000000..d68f3ec6be --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_ + +#include + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +class AudioVector { + public: + // Creates an empty AudioVector. + AudioVector(); + + // Creates an AudioVector with an initial size. + explicit AudioVector(size_t initial_size); + + virtual ~AudioVector(); + + AudioVector(const AudioVector&) = delete; + AudioVector& operator=(const AudioVector&) = delete; + + // Deletes all values and make the vector empty. + virtual void Clear(); + + // Copies all values from this vector to `copy_to`. Any contents in `copy_to` + // are deleted before the copy operation. After the operation is done, + // `copy_to` will be an exact replica of this object. + virtual void CopyTo(AudioVector* copy_to) const; + + // Copies `length` values from `position` in this vector to `copy_to`. + virtual void CopyTo(size_t length, size_t position, int16_t* copy_to) const; + + // Prepends the contents of AudioVector `prepend_this` to this object. The + // length of this object is increased with the length of `prepend_this`. + virtual void PushFront(const AudioVector& prepend_this); + + // Same as above, but with an array `prepend_this` with `length` elements as + // source. + virtual void PushFront(const int16_t* prepend_this, size_t length); + + // Same as PushFront but will append to the end of this object. + virtual void PushBack(const AudioVector& append_this); + + // Appends a segment of `append_this` to the end of this object. The segment + // starts from `position` and has `length` samples. + virtual void PushBack(const AudioVector& append_this, + size_t length, + size_t position); + + // Same as PushFront but will append to the end of this object. + virtual void PushBack(const int16_t* append_this, size_t length); + + // Removes `length` elements from the beginning of this object. + virtual void PopFront(size_t length); + + // Removes `length` elements from the end of this object. + virtual void PopBack(size_t length); + + // Extends this object with `extra_length` elements at the end. The new + // elements are initialized to zero. + virtual void Extend(size_t extra_length); + + // Inserts `length` elements taken from the array `insert_this` and insert + // them at `position`. The length of the AudioVector is increased by `length`. + // `position` = 0 means that the new values are prepended to the vector. + // `position` = Size() means that the new values are appended to the vector. + virtual void InsertAt(const int16_t* insert_this, + size_t length, + size_t position); + + // Like InsertAt, but inserts `length` zero elements at `position`. + virtual void InsertZerosAt(size_t length, size_t position); + + // Overwrites `length` elements of this AudioVector starting from `position` + // with first values in `AudioVector`. The definition of `position` + // is the same as for InsertAt(). If `length` and `position` are selected + // such that the new data extends beyond the end of the current AudioVector, + // the vector is extended to accommodate the new data. + virtual void OverwriteAt(const AudioVector& insert_this, + size_t length, + size_t position); + + // Overwrites `length` elements of this AudioVector with values taken from the + // array `insert_this`, starting at `position`. The definition of `position` + // is the same as for InsertAt(). If `length` and `position` are selected + // such that the new data extends beyond the end of the current AudioVector, + // the vector is extended to accommodate the new data. + virtual void OverwriteAt(const int16_t* insert_this, + size_t length, + size_t position); + + // Appends `append_this` to the end of the current vector. Lets the two + // vectors overlap by `fade_length` samples, and cross-fade linearly in this + // region. + virtual void CrossFade(const AudioVector& append_this, size_t fade_length); + + // Returns the number of elements in this AudioVector. + virtual size_t Size() const; + + // Returns true if this AudioVector is empty. + virtual bool Empty() const; + + // Accesses and modifies an element of AudioVector. + inline const int16_t& operator[](size_t index) const { + return array_[WrapIndex(index, begin_index_, capacity_)]; + } + + inline int16_t& operator[](size_t index) { + return array_[WrapIndex(index, begin_index_, capacity_)]; + } + + private: + static const size_t kDefaultInitialSize = 10; + + // This method is used by the [] operators to calculate an index within the + // capacity of the array, but without using the modulo operation (%). + static inline size_t WrapIndex(size_t index, + size_t begin_index, + size_t capacity) { + RTC_DCHECK_LT(index, capacity); + RTC_DCHECK_LT(begin_index, capacity); + size_t ix = begin_index + index; + RTC_DCHECK_GE(ix, index); // Check for overflow. + if (ix >= capacity) { + ix -= capacity; + } + RTC_DCHECK_LT(ix, capacity); + return ix; + } + + void Reserve(size_t n); + + void InsertByPushBack(const int16_t* insert_this, + size_t length, + size_t position); + + void InsertByPushFront(const int16_t* insert_this, + size_t length, + size_t position); + + void InsertZerosByPushBack(size_t length, size_t position); + + void InsertZerosByPushFront(size_t length, size_t position); + + std::unique_ptr array_; + + size_t capacity_; // Allocated number of samples in the array. + + // The index of the first sample in `array_`, except when + // |begin_index_ == end_index_|, which indicates an empty buffer. + size_t begin_index_; + + // The index of the sample after the last sample in `array_`. + size_t end_index_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector_unittest.cc new file mode 100644 index 0000000000..ae9dd88606 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector_unittest.cc @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/audio_vector.h" + +#include + +#include + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { + +class AudioVectorTest : public ::testing::Test { + protected: + virtual void SetUp() { + // Populate test array. + for (size_t i = 0; i < array_length(); ++i) { + array_[i] = rtc::checked_cast(i); + } + } + + size_t array_length() const { return sizeof(array_) / sizeof(array_[0]); } + + int16_t array_[10]; +}; + +// Create and destroy AudioVector objects, both empty and with a predefined +// length. +TEST_F(AudioVectorTest, CreateAndDestroy) { + AudioVector vec1; + EXPECT_TRUE(vec1.Empty()); + EXPECT_EQ(0u, vec1.Size()); + + size_t initial_size = 17; + AudioVector vec2(initial_size); + EXPECT_FALSE(vec2.Empty()); + EXPECT_EQ(initial_size, vec2.Size()); +} + +// Test the subscript operator [] for getting and setting. +TEST_F(AudioVectorTest, SubscriptOperator) { + AudioVector vec(array_length()); + for (size_t i = 0; i < array_length(); ++i) { + vec[i] = static_cast(i); + const int16_t& value = vec[i]; // Make sure to use the const version. + EXPECT_EQ(static_cast(i), value); + } +} + +// Test the PushBack method and the CopyFrom method. The Clear method is also +// invoked. +TEST_F(AudioVectorTest, PushBackAndCopy) { + AudioVector vec; + AudioVector vec_copy; + vec.PushBack(array_, array_length()); + vec.CopyTo(&vec_copy); // Copy from `vec` to `vec_copy`. + ASSERT_EQ(array_length(), vec.Size()); + ASSERT_EQ(array_length(), vec_copy.Size()); + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[i]); + EXPECT_EQ(array_[i], vec_copy[i]); + } + + // Clear `vec` and verify that it is empty. + vec.Clear(); + EXPECT_TRUE(vec.Empty()); + + // Now copy the empty vector and verify that the copy becomes empty too. + vec.CopyTo(&vec_copy); + EXPECT_TRUE(vec_copy.Empty()); +} + +// Test the PushBack method with another AudioVector as input argument. +TEST_F(AudioVectorTest, PushBackVector) { + static const size_t kLength = 10; + AudioVector vec1(kLength); + AudioVector vec2(kLength); + // Set the first vector to [0, 1, ..., kLength - 1]. + // Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1]. + for (size_t i = 0; i < kLength; ++i) { + vec1[i] = static_cast(i); + vec2[i] = static_cast(i + kLength); + } + // Append vec2 to the back of vec1. + vec1.PushBack(vec2); + ASSERT_EQ(2 * kLength, vec1.Size()); + for (size_t i = 0; i < 2 * kLength; ++i) { + EXPECT_EQ(static_cast(i), vec1[i]); + } +} + +// Test the PushFront method. +TEST_F(AudioVectorTest, PushFront) { + AudioVector vec; + vec.PushFront(array_, array_length()); + ASSERT_EQ(array_length(), vec.Size()); + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[i]); + } +} + +// Test the PushFront method with another AudioVector as input argument. +TEST_F(AudioVectorTest, PushFrontVector) { + static const size_t kLength = 10; + AudioVector vec1(kLength); + AudioVector vec2(kLength); + // Set the first vector to [0, 1, ..., kLength - 1]. + // Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1]. + for (size_t i = 0; i < kLength; ++i) { + vec1[i] = static_cast(i); + vec2[i] = static_cast(i + kLength); + } + // Prepend vec1 to the front of vec2. + vec2.PushFront(vec1); + ASSERT_EQ(2 * kLength, vec2.Size()); + for (size_t i = 0; i < 2 * kLength; ++i) { + EXPECT_EQ(static_cast(i), vec2[i]); + } +} + +// Test the PopFront method. +TEST_F(AudioVectorTest, PopFront) { + AudioVector vec; + vec.PushBack(array_, array_length()); + vec.PopFront(1); // Remove one element. + EXPECT_EQ(array_length() - 1u, vec.Size()); + for (size_t i = 0; i < array_length() - 1; ++i) { + EXPECT_EQ(static_cast(i + 1), vec[i]); + } + vec.PopFront(array_length()); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the PopBack method. +TEST_F(AudioVectorTest, PopBack) { + AudioVector vec; + vec.PushBack(array_, array_length()); + vec.PopBack(1); // Remove one element. + EXPECT_EQ(array_length() - 1u, vec.Size()); + for (size_t i = 0; i < array_length() - 1; ++i) { + EXPECT_EQ(static_cast(i), vec[i]); + } + vec.PopBack(array_length()); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the Extend method. +TEST_F(AudioVectorTest, Extend) { + AudioVector vec; + vec.PushBack(array_, array_length()); + vec.Extend(5); // Extend with 5 elements, which should all be zeros. + ASSERT_EQ(array_length() + 5u, vec.Size()); + // Verify that all are zero. + for (size_t i = array_length(); i < array_length() + 5; ++i) { + EXPECT_EQ(0, vec[i]); + } +} + +// Test the InsertAt method with an insert position in the middle of the vector. +TEST_F(AudioVectorTest, InsertAt) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = 5; + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, 1, ..., `insert_position` - 1, 100, 101, ..., 100 + kNewLength - 1, + // `insert_position`, `insert_position` + 1, ..., kLength - 1}. + size_t pos = 0; + for (int i = 0; i < insert_position; ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + for (size_t i = insert_position; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } +} + +// Test the InsertZerosAt method with an insert position in the middle of the +// vector. Use the InsertAt method as reference. +TEST_F(AudioVectorTest, InsertZerosAt) { + AudioVector vec; + AudioVector vec_ref; + vec.PushBack(array_, array_length()); + vec_ref.PushBack(array_, array_length()); + static const int kNewLength = 5; + int insert_position = 5; + vec.InsertZerosAt(kNewLength, insert_position); + int16_t new_array[kNewLength] = {0}; // All zero elements. + vec_ref.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vectors are identical. + ASSERT_EQ(vec_ref.Size(), vec.Size()); + for (size_t i = 0; i < vec.Size(); ++i) { + EXPECT_EQ(vec_ref[i], vec[i]); + } +} + +// Test the InsertAt method with an insert position at the start of the vector. +TEST_F(AudioVectorTest, InsertAtBeginning) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = 0; + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {100, 101, ..., 100 + kNewLength - 1, + // 0, 1, ..., kLength - 1}. + size_t pos = 0; + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + for (size_t i = insert_position; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } +} + +// Test the InsertAt method with an insert position at the end of the vector. +TEST_F(AudioVectorTest, InsertAtEnd) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = rtc::checked_cast(array_length()); + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }. + size_t pos = 0; + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } +} + +// Test the InsertAt method with an insert position beyond the end of the +// vector. Verify that a position beyond the end of the vector does not lead to +// an error. The expected outcome is the same as if the vector end was used as +// input position. That is, the input position should be capped at the maximum +// allowed value. +TEST_F(AudioVectorTest, InsertBeyondEnd) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = + rtc::checked_cast(array_length() + 10); // Too large. + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }. + size_t pos = 0; + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } +} + +// Test the OverwriteAt method with a position such that all of the new values +// fit within the old vector. +TEST_F(AudioVectorTest, OverwriteAt) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + size_t insert_position = 2; + vec.OverwriteAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, ..., `insert_position` - 1, 100, 101, ..., 100 + kNewLength - 1, + // `insert_position`, `insert_position` + 1, ..., kLength - 1}. + size_t pos = 0; + for (pos = 0; pos < insert_position; ++pos) { + EXPECT_EQ(array_[pos], vec[pos]); + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + for (; pos < array_length(); ++pos) { + EXPECT_EQ(array_[pos], vec[pos]); + } +} + +// Test the OverwriteAt method with a position such that some of the new values +// extend beyond the end of the current vector. This is valid, and the vector is +// expected to expand to accommodate the new values. +TEST_F(AudioVectorTest, OverwriteBeyondEnd) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = rtc::checked_cast(array_length() - 2); + vec.OverwriteAt(new_array, kNewLength, insert_position); + ASSERT_EQ(array_length() - 2u + kNewLength, vec.Size()); + // Verify that the vector looks as follows: + // {0, ..., `insert_position` - 1, 100, 101, ..., 100 + kNewLength - 1, + // `insert_position`, `insert_position` + 1, ..., kLength - 1}. + int pos = 0; + for (pos = 0; pos < insert_position; ++pos) { + EXPECT_EQ(array_[pos], vec[pos]); + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + // Verify that we checked to the end of `vec`. + EXPECT_EQ(vec.Size(), static_cast(pos)); +} + +TEST_F(AudioVectorTest, CrossFade) { + static const size_t kLength = 100; + static const size_t kFadeLength = 10; + AudioVector vec1(kLength); + AudioVector vec2(kLength); + // Set all vector elements to 0 in `vec1` and 100 in `vec2`. + for (size_t i = 0; i < kLength; ++i) { + vec1[i] = 0; + vec2[i] = 100; + } + vec1.CrossFade(vec2, kFadeLength); + ASSERT_EQ(2 * kLength - kFadeLength, vec1.Size()); + // First part untouched. + for (size_t i = 0; i < kLength - kFadeLength; ++i) { + EXPECT_EQ(0, vec1[i]); + } + // Check mixing zone. + for (size_t i = 0; i < kFadeLength; ++i) { + EXPECT_NEAR((i + 1) * 100 / (kFadeLength + 1), + vec1[kLength - kFadeLength + i], 1); + } + // Second part untouched. + for (size_t i = kLength; i < vec1.Size(); ++i) { + EXPECT_EQ(100, vec1[i]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc new file mode 100644 index 0000000000..2c95d3b390 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/background_noise.h" + +#include // memcpy + +#include // min, max + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/cross_correlation.h" +#include "modules/audio_coding/neteq/post_decode_vad.h" + +namespace webrtc { +namespace { + +constexpr size_t kMaxSampleRate = 48000; + +} // namespace + +// static +constexpr size_t BackgroundNoise::kMaxLpcOrder; + +BackgroundNoise::BackgroundNoise(size_t num_channels) + : num_channels_(num_channels), + channel_parameters_(new ChannelParameters[num_channels_]) { + Reset(); +} + +BackgroundNoise::~BackgroundNoise() {} + +void BackgroundNoise::Reset() { + initialized_ = false; + for (size_t channel = 0; channel < num_channels_; ++channel) { + channel_parameters_[channel].Reset(); + } +} + +bool BackgroundNoise::Update(const AudioMultiVector& input, + const PostDecodeVad& vad) { + bool filter_params_saved = false; + if (vad.running() && vad.active_speech()) { + // Do not update the background noise parameters if we know that the signal + // is active speech. + return filter_params_saved; + } + + int32_t auto_correlation[kMaxLpcOrder + 1]; + int16_t fiter_output[kMaxLpcOrder + kResidualLength]; + int16_t reflection_coefficients[kMaxLpcOrder]; + int16_t lpc_coefficients[kMaxLpcOrder + 1]; + + for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { + ChannelParameters& parameters = channel_parameters_[channel_ix]; + int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0}; + int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder]; + RTC_DCHECK_GE(input.Size(), kVecLen); + input[channel_ix].CopyTo(kVecLen, input.Size() - kVecLen, temp_signal); + int32_t sample_energy = + CalculateAutoCorrelation(temp_signal, kVecLen, auto_correlation); + + if ((!vad.running() && + sample_energy < parameters.energy_update_threshold) || + (vad.running() && !vad.active_speech())) { + // Generate LPC coefficients. + if (auto_correlation[0] <= 0) { + // Center value in auto-correlation is not positive. Do not update. + return filter_params_saved; + } + + // Regardless of whether the filter is actually updated or not, + // update energy threshold levels, since we have in fact observed + // a low energy signal. + if (sample_energy < parameters.energy_update_threshold) { + // Never go under 1.0 in average sample energy. + parameters.energy_update_threshold = std::max(sample_energy, 1); + parameters.low_energy_update_threshold = 0; + } + + // Only update BGN if filter is stable, i.e., if return value from + // Levinson-Durbin function is 1. + if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients, + reflection_coefficients, + kMaxLpcOrder) != 1) { + return filter_params_saved; + } + + // Generate the CNG gain factor by looking at the energy of the residual. + WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength, + fiter_output, lpc_coefficients, + kMaxLpcOrder + 1, kResidualLength); + int32_t residual_energy = WebRtcSpl_DotProductWithScale( + fiter_output, fiter_output, kResidualLength, 0); + + // Check spectral flatness. + // Comparing the residual variance with the input signal variance tells + // if the spectrum is flat or not. + // If 5 * residual_energy >= 16 * sample_energy, the spectrum is flat + // enough. Also ensure that the energy is non-zero. + if ((sample_energy > 0) && + (int64_t{5} * residual_energy >= int64_t{16} * sample_energy)) { + // Spectrum is flat enough; save filter parameters. + // `temp_signal` + `kVecLen` - `kMaxLpcOrder` points at the first of the + // `kMaxLpcOrder` samples in the residual signal, which will form the + // filter state for the next noise generation. + SaveParameters(channel_ix, lpc_coefficients, + temp_signal + kVecLen - kMaxLpcOrder, sample_energy, + residual_energy); + filter_params_saved = true; + } + } else { + // Will only happen if post-decode VAD is disabled and `sample_energy` is + // not low enough. Increase the threshold for update so that it increases + // by a factor 4 in 4 seconds. + IncrementEnergyThreshold(channel_ix, sample_energy); + } + } + return filter_params_saved; +} + +void BackgroundNoise::GenerateBackgroundNoise( + rtc::ArrayView random_vector, + size_t channel, + int mute_slope, + bool too_many_expands, + size_t num_noise_samples, + int16_t* buffer) { + constexpr size_t kNoiseLpcOrder = kMaxLpcOrder; + int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; + RTC_DCHECK_LE(num_noise_samples, (kMaxSampleRate / 8000 * 125)); + RTC_DCHECK_GE(random_vector.size(), num_noise_samples); + int16_t* noise_samples = &buffer[kNoiseLpcOrder]; + if (initialized()) { + // Use background noise parameters. + memcpy(noise_samples - kNoiseLpcOrder, FilterState(channel), + sizeof(int16_t) * kNoiseLpcOrder); + + int dc_offset = 0; + if (ScaleShift(channel) > 1) { + dc_offset = 1 << (ScaleShift(channel) - 1); + } + + // Scale random vector to correct energy level. + WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector.data(), + Scale(channel), dc_offset, + ScaleShift(channel), num_noise_samples); + + WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples, + Filter(channel), kNoiseLpcOrder + 1, + num_noise_samples); + + SetFilterState( + channel, + {&(noise_samples[num_noise_samples - kNoiseLpcOrder]), kNoiseLpcOrder}); + + // Unmute the background noise. + int16_t bgn_mute_factor = MuteFactor(channel); + if (bgn_mute_factor < 16384) { + WebRtcSpl_AffineTransformVector(noise_samples, noise_samples, + bgn_mute_factor, 8192, 14, + num_noise_samples); + } + // Update mute_factor in BackgroundNoise class. + SetMuteFactor(channel, bgn_mute_factor); + } else { + // BGN parameters have not been initialized; use zero noise. + memset(noise_samples, 0, sizeof(int16_t) * num_noise_samples); + } +} + +int32_t BackgroundNoise::Energy(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].energy; +} + +void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) { + RTC_DCHECK_LT(channel, num_channels_); + channel_parameters_[channel].mute_factor = value; +} + +int16_t BackgroundNoise::MuteFactor(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].mute_factor; +} + +const int16_t* BackgroundNoise::Filter(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].filter; +} + +const int16_t* BackgroundNoise::FilterState(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].filter_state; +} + +void BackgroundNoise::SetFilterState(size_t channel, + rtc::ArrayView input) { + RTC_DCHECK_LT(channel, num_channels_); + size_t length = std::min(input.size(), kMaxLpcOrder); + memcpy(channel_parameters_[channel].filter_state, input.data(), + length * sizeof(int16_t)); +} + +int16_t BackgroundNoise::Scale(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].scale; +} +int16_t BackgroundNoise::ScaleShift(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].scale_shift; +} + +int32_t BackgroundNoise::CalculateAutoCorrelation( + const int16_t* signal, + size_t length, + int32_t* auto_correlation) const { + static const int kCorrelationStep = -1; + const int correlation_scale = + CrossCorrelationWithAutoShift(signal, signal, length, kMaxLpcOrder + 1, + kCorrelationStep, auto_correlation); + + // Number of shifts to normalize energy to energy/sample. + int energy_sample_shift = kLogVecLen - correlation_scale; + return auto_correlation[0] >> energy_sample_shift; +} + +void BackgroundNoise::IncrementEnergyThreshold(size_t channel, + int32_t sample_energy) { + // TODO(hlundin): Simplify the below threshold update. What this code + // does is simply "threshold += (increment * threshold) >> 16", but due + // to the limited-width operations, it is not exactly the same. The + // difference should be inaudible, but bit-exactness would not be + // maintained. + RTC_DCHECK_LT(channel, num_channels_); + ChannelParameters& parameters = channel_parameters_[channel]; + int32_t temp_energy = + (kThresholdIncrement * parameters.low_energy_update_threshold) >> 16; + temp_energy += + kThresholdIncrement * (parameters.energy_update_threshold & 0xFF); + temp_energy += + (kThresholdIncrement * ((parameters.energy_update_threshold >> 8) & 0xFF)) + << 8; + parameters.low_energy_update_threshold += temp_energy; + + parameters.energy_update_threshold += + kThresholdIncrement * (parameters.energy_update_threshold >> 16); + parameters.energy_update_threshold += + parameters.low_energy_update_threshold >> 16; + parameters.low_energy_update_threshold = + parameters.low_energy_update_threshold & 0x0FFFF; + + // Update maximum energy. + // Decrease by a factor 1/1024 each time. + parameters.max_energy = parameters.max_energy - (parameters.max_energy >> 10); + if (sample_energy > parameters.max_energy) { + parameters.max_energy = sample_energy; + } + + // Set `energy_update_threshold` to no less than 60 dB lower than + // `max_energy_`. Adding 524288 assures proper rounding. + int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20; + if (energy_update_threshold > parameters.energy_update_threshold) { + parameters.energy_update_threshold = energy_update_threshold; + } +} + +void BackgroundNoise::SaveParameters(size_t channel, + const int16_t* lpc_coefficients, + const int16_t* filter_state, + int32_t sample_energy, + int32_t residual_energy) { + RTC_DCHECK_LT(channel, num_channels_); + ChannelParameters& parameters = channel_parameters_[channel]; + memcpy(parameters.filter, lpc_coefficients, + (kMaxLpcOrder + 1) * sizeof(int16_t)); + memcpy(parameters.filter_state, filter_state, kMaxLpcOrder * sizeof(int16_t)); + // Save energy level and update energy threshold levels. + // Never get under 1.0 in average sample energy. + parameters.energy = std::max(sample_energy, 1); + parameters.energy_update_threshold = parameters.energy; + parameters.low_energy_update_threshold = 0; + + // Normalize residual_energy to 29 or 30 bits before sqrt. + int16_t norm_shift = WebRtcSpl_NormW32(residual_energy) - 1; + if (norm_shift & 0x1) { + norm_shift -= 1; // Even number of shifts required. + } + residual_energy = WEBRTC_SPL_SHIFT_W32(residual_energy, norm_shift); + + // Calculate scale and shift factor. + parameters.scale = static_cast(WebRtcSpl_SqrtFloor(residual_energy)); + // Add 13 to the `scale_shift_`, since the random numbers table is in + // Q13. + // TODO(hlundin): Move the "13" to where the `scale_shift_` is used? + parameters.scale_shift = + static_cast(13 + ((kLogResidualLength + norm_shift) / 2)); + + initialized_ = true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h new file mode 100644 index 0000000000..8e6d5890a0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_ +#define MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_ + +#include // size_t + +#include + +#include "api/array_view.h" + +namespace webrtc { + +// Forward declarations. +class AudioMultiVector; +class PostDecodeVad; + +// This class handles estimation of background noise parameters. +class BackgroundNoise { + public: + // TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10. + // Will work anyway, but probably sound a little worse. + static constexpr size_t kMaxLpcOrder = 8; // 32000 / 8000 + 4. + + explicit BackgroundNoise(size_t num_channels); + virtual ~BackgroundNoise(); + + BackgroundNoise(const BackgroundNoise&) = delete; + BackgroundNoise& operator=(const BackgroundNoise&) = delete; + + void Reset(); + + // Updates the parameter estimates based on the signal currently in the + // `sync_buffer`, and on the latest decision in `vad` if it is running. + // Returns true if the filter parameters are updated. + bool Update(const AudioMultiVector& sync_buffer, const PostDecodeVad& vad); + + // Generates background noise given a random vector and writes the output to + // `buffer`. + void GenerateBackgroundNoise(rtc::ArrayView random_vector, + size_t channel, + int mute_slope, + bool too_many_expands, + size_t num_noise_samples, + int16_t* buffer); + + // Returns `energy_` for `channel`. + int32_t Energy(size_t channel) const; + + // Sets the value of `mute_factor_` for `channel` to `value`. + void SetMuteFactor(size_t channel, int16_t value); + + // Returns `mute_factor_` for `channel`. + int16_t MuteFactor(size_t channel) const; + + // Returns a pointer to `filter_` for `channel`. + const int16_t* Filter(size_t channel) const; + + // Returns a pointer to `filter_state_` for `channel`. + const int16_t* FilterState(size_t channel) const; + + // Copies `input` to the filter state. Will not copy more than `kMaxLpcOrder` + // elements. + void SetFilterState(size_t channel, rtc::ArrayView input); + + // Returns `scale_` for `channel`. + int16_t Scale(size_t channel) const; + + // Returns `scale_shift_` for `channel`. + int16_t ScaleShift(size_t channel) const; + + // Accessors. + bool initialized() const { return initialized_; } + + private: + static const int kThresholdIncrement = 229; // 0.0035 in Q16. + static const size_t kVecLen = 256; + static const int kLogVecLen = 8; // log2(kVecLen). + static const size_t kResidualLength = 64; + static const int16_t kLogResidualLength = 6; // log2(kResidualLength) + + struct ChannelParameters { + // Constructor. + ChannelParameters() { Reset(); } + + void Reset() { + energy = 2500; + max_energy = 0; + energy_update_threshold = 500000; + low_energy_update_threshold = 0; + memset(filter_state, 0, sizeof(filter_state)); + memset(filter, 0, sizeof(filter)); + filter[0] = 4096; + mute_factor = 0; + scale = 20000; + scale_shift = 24; + } + + int32_t energy; + int32_t max_energy; + int32_t energy_update_threshold; + int32_t low_energy_update_threshold; + int16_t filter_state[kMaxLpcOrder]; + int16_t filter[kMaxLpcOrder + 1]; + int16_t mute_factor; + int16_t scale; + int16_t scale_shift; + }; + + int32_t CalculateAutoCorrelation(const int16_t* signal, + size_t length, + int32_t* auto_correlation) const; + + // Increments the energy threshold by a factor 1 + `kThresholdIncrement`. + void IncrementEnergyThreshold(size_t channel, int32_t sample_energy); + + // Updates the filter parameters. + void SaveParameters(size_t channel, + const int16_t* lpc_coefficients, + const int16_t* filter_state, + int32_t sample_energy, + int32_t residual_energy); + + size_t num_channels_; + std::unique_ptr channel_parameters_; + bool initialized_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise_unittest.cc new file mode 100644 index 0000000000..e32492f57e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise_unittest.cc @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for BackgroundNoise class. + +#include "modules/audio_coding/neteq/background_noise.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(BackgroundNoise, CreateAndDestroy) { + size_t channels = 1; + BackgroundNoise bgn(channels); +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.cc new file mode 100644 index 0000000000..2c42d0d13f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/buffer_level_filter.h" + +#include + +#include + +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +BufferLevelFilter::BufferLevelFilter() { + Reset(); +} + +void BufferLevelFilter::Reset() { + filtered_current_level_ = 0; + level_factor_ = 253; +} + +void BufferLevelFilter::Update(size_t buffer_size_samples, + int time_stretched_samples) { + // Filter: + // `filtered_current_level_` = `level_factor_` * `filtered_current_level_` + + // (1 - `level_factor_`) * `buffer_size_samples` + // `level_factor_` and `filtered_current_level_` are in Q8. + // `buffer_size_samples` is in Q0. + const int64_t filtered_current_level = + (level_factor_ * int64_t{filtered_current_level_} >> 8) + + (256 - level_factor_) * rtc::dchecked_cast(buffer_size_samples); + + // Account for time-scale operations (accelerate and pre-emptive expand) and + // make sure that the filtered value remains non-negative. + filtered_current_level_ = rtc::saturated_cast(std::max( + 0, filtered_current_level - int64_t{time_stretched_samples} * (1 << 8))); +} + +void BufferLevelFilter::SetFilteredBufferLevel(int buffer_size_samples) { + filtered_current_level_ = + rtc::saturated_cast(int64_t{buffer_size_samples} * 256); +} + +void BufferLevelFilter::SetTargetBufferLevel(int target_buffer_level_ms) { + if (target_buffer_level_ms <= 20) { + level_factor_ = 251; + } else if (target_buffer_level_ms <= 60) { + level_factor_ = 252; + } else if (target_buffer_level_ms <= 140) { + level_factor_ = 253; + } else { + level_factor_ = 254; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.h b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.h new file mode 100644 index 0000000000..ced36da9c2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_ + +#include +#include + +namespace webrtc { + +class BufferLevelFilter { + public: + BufferLevelFilter(); + virtual ~BufferLevelFilter() {} + + BufferLevelFilter(const BufferLevelFilter&) = delete; + BufferLevelFilter& operator=(const BufferLevelFilter&) = delete; + + virtual void Reset(); + + // Updates the filter. Current buffer size is `buffer_size_samples`. + // `time_stretched_samples` is subtracted from the filtered value (thus + // bypassing the filter operation). + virtual void Update(size_t buffer_size_samples, int time_stretched_samples); + + // Set the filtered buffer level to a particular value directly. This should + // only be used in case of large changes in buffer size, such as buffer + // flushes. + virtual void SetFilteredBufferLevel(int buffer_size_samples); + + // The target level is used to select the appropriate filter coefficient. + virtual void SetTargetBufferLevel(int target_buffer_level_ms); + + // Returns filtered current level in number of samples. + virtual int filtered_current_level() const { + // Round to nearest whole sample. + return (int64_t{filtered_current_level_} + (1 << 7)) >> 8; + } + + private: + int level_factor_; // Filter factor for the buffer level filter in Q8. + int filtered_current_level_; // Filtered current buffer level in Q8. +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter_unittest.cc new file mode 100644 index 0000000000..6773e96f58 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter_unittest.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for BufferLevelFilter class. + +#include "modules/audio_coding/neteq/buffer_level_filter.h" + +#include // Access to pow function. + +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(BufferLevelFilter, CreateAndDestroy) { + BufferLevelFilter* filter = new BufferLevelFilter(); + EXPECT_EQ(0, filter->filtered_current_level()); + delete filter; +} + +TEST(BufferLevelFilter, ConvergenceTest) { + BufferLevelFilter filter; + for (int times = 10; times <= 50; times += 10) { + for (int value = 100; value <= 200; value += 10) { + filter.Reset(); + filter.SetTargetBufferLevel(20); // Makes filter coefficient 251/256. + rtc::StringBuilder ss; + ss << "times = " << times << ", value = " << value; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + for (int i = 0; i < times; ++i) { + filter.Update(value, 0 /* time_stretched_samples */); + } + // Expect the filtered value to be (theoretically) + // (1 - (251/256) ^ `times`) * `value`. + double expected_value_double = (1 - pow(251.0 / 256.0, times)) * value; + int expected_value = static_cast(expected_value_double); + + // The actual value may differ slightly from the expected value due to + // intermediate-stage rounding errors in the filter implementation. + // This is why we have to use EXPECT_NEAR with a tolerance of +/-1. + EXPECT_NEAR(expected_value, filter.filtered_current_level(), 1); + } + } +} + +// Verify that target buffer level impacts on the filter convergence. +TEST(BufferLevelFilter, FilterFactor) { + BufferLevelFilter filter; + // Update 10 times with value 100. + const int kTimes = 10; + const int kValue = 100; + + filter.SetTargetBufferLevel(60); // Makes filter coefficient 252/256. + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0 /* time_stretched_samples */); + } + // Expect the filtered value to be + // (1 - (252/256) ^ `kTimes`) * `kValue`. + int expected_value = 15; + EXPECT_EQ(expected_value, filter.filtered_current_level()); + + filter.Reset(); + filter.SetTargetBufferLevel(140); // Makes filter coefficient 253/256. + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0 /* time_stretched_samples */); + } + // Expect the filtered value to be + // (1 - (253/256) ^ `kTimes`) * `kValue`. + expected_value = 11; + EXPECT_EQ(expected_value, filter.filtered_current_level()); + + filter.Reset(); + filter.SetTargetBufferLevel(160); // Makes filter coefficient 254/256. + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0 /* time_stretched_samples */); + } + // Expect the filtered value to be + // (1 - (254/256) ^ `kTimes`) * `kValue`. + expected_value = 8; + EXPECT_EQ(expected_value, filter.filtered_current_level()); +} + +TEST(BufferLevelFilter, TimeStretchedSamples) { + BufferLevelFilter filter; + filter.SetTargetBufferLevel(20); // Makes filter coefficient 251/256. + // Update 10 times with value 100. + const int kTimes = 10; + const int kValue = 100; + const int kTimeStretchedSamples = 3; + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0); + } + // Expect the filtered value to be + // (1 - (251/256) ^ `kTimes`) * `kValue`. + const int kExpectedValue = 18; + EXPECT_EQ(kExpectedValue, filter.filtered_current_level()); + + // Update filter again, now with non-zero value for packet length. + // Set the current filtered value to be the input, in order to isolate the + // impact of `kTimeStretchedSamples`. + filter.Update(filter.filtered_current_level(), kTimeStretchedSamples); + EXPECT_EQ(kExpectedValue - kTimeStretchedSamples, + filter.filtered_current_level()); + // Try negative value and verify that we come back to the previous result. + filter.Update(filter.filtered_current_level(), -kTimeStretchedSamples); + EXPECT_EQ(kExpectedValue, filter.filtered_current_level()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.cc b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.cc new file mode 100644 index 0000000000..a2ce888f45 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/comfort_noise.h" + + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/audio_vector.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/dsp_helper.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +void ComfortNoise::Reset() { + first_call_ = true; +} + +int ComfortNoise::UpdateParameters(const Packet& packet) { + // Get comfort noise decoder. + if (decoder_database_->SetActiveCngDecoder(packet.payload_type) != kOK) { + return kUnknownPayloadType; + } + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + RTC_DCHECK(cng_decoder); + cng_decoder->UpdateSid(packet.payload); + return kOK; +} + +int ComfortNoise::Generate(size_t requested_length, AudioMultiVector* output) { + // TODO(hlundin): Change to an enumerator and skip assert. + RTC_DCHECK(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 || + fs_hz_ == 48000); + // Not adapted for multi-channel yet. + if (output->Channels() != 1) { + RTC_LOG(LS_ERROR) << "No multi-channel support"; + return kMultiChannelNotSupported; + } + + size_t number_of_samples = requested_length; + bool new_period = false; + if (first_call_) { + // Generate noise and overlap slightly with old data. + number_of_samples = requested_length + overlap_length_; + new_period = true; + } + output->AssertSize(number_of_samples); + // Get the decoder from the database. + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + if (!cng_decoder) { + RTC_LOG(LS_ERROR) << "Unknwown payload type"; + return kUnknownPayloadType; + } + + std::unique_ptr temp(new int16_t[number_of_samples]); + if (!cng_decoder->Generate( + rtc::ArrayView(temp.get(), number_of_samples), new_period)) { + // Error returned. + output->Zeros(requested_length); + RTC_LOG(LS_ERROR) + << "ComfortNoiseDecoder::Genererate failed to generate comfort noise"; + return kInternalError; + } + (*output)[0].OverwriteAt(temp.get(), number_of_samples, 0); + + if (first_call_) { + // Set tapering window parameters. Values are in Q15. + int16_t muting_window; // Mixing factor for overlap data. + int16_t muting_window_increment; // Mixing factor increment (negative). + int16_t unmuting_window; // Mixing factor for comfort noise. + int16_t unmuting_window_increment; // Mixing factor increment. + if (fs_hz_ == 8000) { + muting_window = DspHelper::kMuteFactorStart8kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement8kHz; + unmuting_window = DspHelper::kUnmuteFactorStart8kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz; + } else if (fs_hz_ == 16000) { + muting_window = DspHelper::kMuteFactorStart16kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement16kHz; + unmuting_window = DspHelper::kUnmuteFactorStart16kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz; + } else if (fs_hz_ == 32000) { + muting_window = DspHelper::kMuteFactorStart32kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement32kHz; + unmuting_window = DspHelper::kUnmuteFactorStart32kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz; + } else { // fs_hz_ == 48000 + muting_window = DspHelper::kMuteFactorStart48kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement48kHz; + unmuting_window = DspHelper::kUnmuteFactorStart48kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz; + } + + // Do overlap-add between new vector and overlap. + size_t start_ix = sync_buffer_->Size() - overlap_length_; + for (size_t i = 0; i < overlap_length_; i++) { + /* overlapVec[i] = WinMute * overlapVec[i] + WinUnMute * outData[i] */ + // The expression (*output)[0][i] is the i-th element in the first + // channel. + (*sync_buffer_)[0][start_ix + i] = + (((*sync_buffer_)[0][start_ix + i] * muting_window) + + ((*output)[0][i] * unmuting_window) + 16384) >> + 15; + muting_window += muting_window_increment; + unmuting_window += unmuting_window_increment; + } + // Remove `overlap_length_` samples from the front of `output` since they + // were mixed into `sync_buffer_` above. + output->PopFront(overlap_length_); + } + first_call_ = false; + return kOK; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.h b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.h new file mode 100644 index 0000000000..31fcee31d0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_ +#define MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_ + +#include + +namespace webrtc { + +// Forward declarations. +class AudioMultiVector; +class DecoderDatabase; +class SyncBuffer; +struct Packet; + +// This class acts as an interface to the CNG generator. +class ComfortNoise { + public: + enum ReturnCodes { + kOK = 0, + kUnknownPayloadType, + kInternalError, + kMultiChannelNotSupported + }; + + ComfortNoise(int fs_hz, + DecoderDatabase* decoder_database, + SyncBuffer* sync_buffer) + : fs_hz_(fs_hz), + first_call_(true), + overlap_length_(5 * fs_hz_ / 8000), + decoder_database_(decoder_database), + sync_buffer_(sync_buffer) {} + + ComfortNoise(const ComfortNoise&) = delete; + ComfortNoise& operator=(const ComfortNoise&) = delete; + + // Resets the state. Should be called before each new comfort noise period. + void Reset(); + + // Update the comfort noise generator with the parameters in `packet`. + int UpdateParameters(const Packet& packet); + + // Generates `requested_length` samples of comfort noise and writes to + // `output`. If this is the first in call after Reset (or first after creating + // the object), it will also mix in comfort noise at the end of the + // SyncBuffer object provided in the constructor. + int Generate(size_t requested_length, AudioMultiVector* output); + + // Returns the last error code that was produced by the comfort noise + // decoder. Returns 0 if no error has been encountered since the last reset. + int internal_error_code() { return internal_error_code_; } + + private: + int fs_hz_; + bool first_call_; + size_t overlap_length_; + DecoderDatabase* decoder_database_; + SyncBuffer* sync_buffer_; + int internal_error_code_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise_unittest.cc new file mode 100644 index 0000000000..b436800061 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise_unittest.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for ComfortNoise class. + +#include "modules/audio_coding/neteq/comfort_noise.h" + +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(ComfortNoise, CreateAndDestroy) { + int fs = 8000; + MockDecoderDatabase db; + SyncBuffer sync_buffer(1, 1000); + ComfortNoise cn(fs, &db, &sync_buffer); + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.cc b/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.cc new file mode 100644 index 0000000000..37ed9374f0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/cross_correlation.h" + +#include +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +// This function decides the overflow-protecting scaling and calls +// WebRtcSpl_CrossCorrelation. +int CrossCorrelationWithAutoShift(const int16_t* sequence_1, + const int16_t* sequence_2, + size_t sequence_1_length, + size_t cross_correlation_length, + int cross_correlation_step, + int32_t* cross_correlation) { + // Find the element that has the maximum absolute value of sequence_1 and 2. + // Note that these values may be negative. + const int16_t max_1 = + WebRtcSpl_MaxAbsElementW16(sequence_1, sequence_1_length); + const int sequence_2_shift = + cross_correlation_step * (static_cast(cross_correlation_length) - 1); + const int16_t* sequence_2_start = + sequence_2_shift >= 0 ? sequence_2 : sequence_2 + sequence_2_shift; + const size_t sequence_2_length = + sequence_1_length + std::abs(sequence_2_shift); + const int16_t max_2 = + WebRtcSpl_MaxAbsElementW16(sequence_2_start, sequence_2_length); + + // In order to avoid overflow when computing the sum we should scale the + // samples so that (in_vector_length * max_1 * max_2) will not overflow. + const int64_t max_value = + abs(max_1 * max_2) * static_cast(sequence_1_length); + const int32_t factor = max_value >> 31; + const int scaling = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); + + WebRtcSpl_CrossCorrelation(cross_correlation, sequence_1, sequence_2, + sequence_1_length, cross_correlation_length, + scaling, cross_correlation_step); + + return scaling; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.h b/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.h new file mode 100644 index 0000000000..5082ce6a30 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_ +#define MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_ + +#include +#include + +namespace webrtc { + +// The function calculates the cross-correlation between two sequences +// `sequence_1` and `sequence_2`. `sequence_1` is taken as reference, with +// `sequence_1_length` as its length. `sequence_2` slides for the calculation of +// cross-correlation. The result will be saved in `cross_correlation`. +// `cross_correlation_length` correlation points are calculated. +// The corresponding lag starts from 0, and increases with a step of +// `cross_correlation_step`. The result is without normalization. To avoid +// overflow, the result will be right shifted. The amount of shifts will be +// returned. +// +// Input: +// - sequence_1 : First sequence (reference). +// - sequence_2 : Second sequence (sliding during calculation). +// - sequence_1_length : Length of `sequence_1`. +// - cross_correlation_length : Number of cross-correlations to calculate. +// - cross_correlation_step : Step in the lag for the cross-correlation. +// +// Output: +// - cross_correlation : The cross-correlation in Q(-right_shifts) +// +// Return: +// Number of right shifts in cross_correlation. + +int CrossCorrelationWithAutoShift(const int16_t* sequence_1, + const int16_t* sequence_2, + size_t sequence_1_length, + size_t cross_correlation_length, + int cross_correlation_step, + int32_t* cross_correlation); + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc new file mode 100644 index 0000000000..91b0252d2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc @@ -0,0 +1,515 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/decision_logic.h" + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_controller.h" +#include "modules/audio_coding/neteq/packet_arrival_history.h" +#include "modules/audio_coding/neteq/packet_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +constexpr int kPostponeDecodingLevel = 50; +constexpr int kTargetLevelWindowMs = 100; +constexpr int kMaxWaitForPacketTicks = 10; +// The granularity of delay adjustments (accelerate/preemptive expand) is 15ms, +// but round up since the clock has a granularity of 10ms. +constexpr int kDelayAdjustmentGranularityMs = 20; + +std::unique_ptr CreateDelayManager( + const NetEqController::Config& neteq_config) { + DelayManager::Config config; + config.max_packets_in_buffer = neteq_config.max_packets_in_buffer; + config.base_minimum_delay_ms = neteq_config.base_min_delay_ms; + config.Log(); + return std::make_unique(config, neteq_config.tick_timer); +} + +bool IsTimestretch(NetEq::Mode mode) { + return mode == NetEq::Mode::kAccelerateSuccess || + mode == NetEq::Mode::kAccelerateLowEnergy || + mode == NetEq::Mode::kPreemptiveExpandSuccess || + mode == NetEq::Mode::kPreemptiveExpandLowEnergy; +} + +bool IsCng(NetEq::Mode mode) { + return mode == NetEq::Mode::kRfc3389Cng || + mode == NetEq::Mode::kCodecInternalCng; +} + +bool IsExpand(NetEq::Mode mode) { + return mode == NetEq::Mode::kExpand || mode == NetEq::Mode::kCodecPlc; +} + +} // namespace + +DecisionLogic::Config::Config() { + StructParametersParser::Create( + "enable_stable_playout_delay", &enable_stable_playout_delay, // + "reinit_after_expands", &reinit_after_expands, // + "packet_history_size_ms", &packet_history_size_ms, // + "cng_timeout_ms", &cng_timeout_ms, // + "deceleration_target_level_offset_ms", + &deceleration_target_level_offset_ms) + ->Parse(webrtc::field_trial::FindFullName( + "WebRTC-Audio-NetEqDecisionLogicConfig")); + RTC_LOG(LS_INFO) << "NetEq decision logic config:" + << " enable_stable_playout_delay=" + << enable_stable_playout_delay + << " reinit_after_expands=" << reinit_after_expands + << " packet_history_size_ms=" << packet_history_size_ms + << " cng_timeout_ms=" << cng_timeout_ms.value_or(-1) + << " deceleration_target_level_offset_ms=" + << deceleration_target_level_offset_ms; +} + +DecisionLogic::DecisionLogic(NetEqController::Config config) + : DecisionLogic(config, + CreateDelayManager(config), + std::make_unique()) {} + +DecisionLogic::DecisionLogic( + NetEqController::Config config, + std::unique_ptr delay_manager, + std::unique_ptr buffer_level_filter) + : delay_manager_(std::move(delay_manager)), + buffer_level_filter_(std::move(buffer_level_filter)), + packet_arrival_history_(config_.packet_history_size_ms), + tick_timer_(config.tick_timer), + disallow_time_stretching_(!config.allow_time_stretching), + timescale_countdown_( + tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)) {} + +DecisionLogic::~DecisionLogic() = default; + +void DecisionLogic::SoftReset() { + packet_length_samples_ = 0; + sample_memory_ = 0; + prev_time_scale_ = false; + timescale_countdown_ = + tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1); + time_stretched_cn_samples_ = 0; + delay_manager_->Reset(); + buffer_level_filter_->Reset(); + packet_arrival_history_.Reset(); + last_playout_delay_ms_ = 0; +} + +void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) { + // TODO(hlundin): Change to an enumerator and skip assert. + RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || + fs_hz == 48000); + sample_rate_khz_ = fs_hz / 1000; + output_size_samples_ = output_size_samples; + packet_arrival_history_.set_sample_rate(fs_hz); +} + +NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, + bool* reset_decoder) { + // If last mode was CNG (or Expand, since this could be covering up for + // a lost CNG packet), remember that CNG is on. This is needed if comfort + // noise is interrupted by DTMF. + if (status.last_mode == NetEq::Mode::kRfc3389Cng) { + cng_state_ = kCngRfc3389On; + } else if (status.last_mode == NetEq::Mode::kCodecInternalCng) { + cng_state_ = kCngInternalOn; + } + + if (IsExpand(status.last_mode)) { + ++num_consecutive_expands_; + } else { + num_consecutive_expands_ = 0; + } + + if (!IsExpand(status.last_mode) && !IsCng(status.last_mode)) { + last_playout_delay_ms_ = GetPlayoutDelayMs(status); + } + + prev_time_scale_ = prev_time_scale_ && IsTimestretch(status.last_mode); + if (prev_time_scale_) { + timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval); + } + if (!IsCng(status.last_mode)) { + FilterBufferLevel(status.packet_buffer_info.span_samples); + } + + // Guard for errors, to avoid getting stuck in error mode. + if (status.last_mode == NetEq::Mode::kError) { + if (!status.next_packet) { + return NetEq::Operation::kExpand; + } else { + // Use kUndefined to flag for a reset. + return NetEq::Operation::kUndefined; + } + } + + if (status.next_packet && status.next_packet->is_cng) { + return CngOperation(status); + } + + // Handle the case with no packet at all available (except maybe DTMF). + if (!status.next_packet) { + return NoPacket(status); + } + + // If the expand period was very long, reset NetEQ since it is likely that the + // sender was restarted. + if (num_consecutive_expands_ > config_.reinit_after_expands) { + *reset_decoder = true; + return NetEq::Operation::kNormal; + } + + // Make sure we don't restart audio too soon after an expansion to avoid + // running out of data right away again. We should only wait if there are no + // DTX or CNG packets in the buffer (otherwise we should just play out what we + // have, since we cannot know the exact duration of DTX or CNG packets), and + // if the mute factor is low enough (otherwise the expansion was short enough + // to not be noticable). + // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. + const int target_level_samples = TargetLevelMs() * sample_rate_khz_; + if (!config_.enable_stable_playout_delay && IsExpand(status.last_mode) && + status.expand_mutefactor < 16384 / 2 && + status.packet_buffer_info.span_samples < + static_cast(target_level_samples * kPostponeDecodingLevel / + 100) && + !status.packet_buffer_info.dtx_or_cng) { + return NetEq::Operation::kExpand; + } + + const uint32_t five_seconds_samples = + static_cast(5000 * sample_rate_khz_); + // Check if the required packet is available. + if (status.target_timestamp == status.next_packet->timestamp) { + return ExpectedPacketAvailable(status); + } + if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp, + status.target_timestamp, + five_seconds_samples)) { + return FuturePacketAvailable(status); + } + // This implies that available_timestamp < target_timestamp, which can + // happen when a new stream or codec is received. Signal for a reset. + return NetEq::Operation::kUndefined; +} + +void DecisionLogic::NotifyMutedState() { + ++num_consecutive_expands_; +} + +int DecisionLogic::TargetLevelMs() const { + int target_delay_ms = delay_manager_->TargetDelayMs(); + if (!config_.enable_stable_playout_delay) { + target_delay_ms = + std::max(target_delay_ms, + static_cast(packet_length_samples_ / sample_rate_khz_)); + } + return target_delay_ms; +} + +int DecisionLogic::UnlimitedTargetLevelMs() const { + return delay_manager_->UnlimitedTargetLevelMs(); +} + +int DecisionLogic::GetFilteredBufferLevel() const { + if (config_.enable_stable_playout_delay) { + return last_playout_delay_ms_ * sample_rate_khz_; + } + return buffer_level_filter_->filtered_current_level(); +} + +absl::optional DecisionLogic::PacketArrived( + int fs_hz, + bool should_update_stats, + const PacketArrivedInfo& info) { + buffer_flush_ = buffer_flush_ || info.buffer_flush; + if (!should_update_stats || info.is_cng_or_dtmf) { + return absl::nullopt; + } + if (info.packet_length_samples > 0 && fs_hz > 0 && + info.packet_length_samples != packet_length_samples_) { + packet_length_samples_ = info.packet_length_samples; + delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz); + } + int64_t time_now_ms = tick_timer_->ticks() * tick_timer_->ms_per_tick(); + packet_arrival_history_.Insert(info.main_timestamp, time_now_ms); + if (packet_arrival_history_.size() < 2) { + // No meaningful delay estimate unless at least 2 packets have arrived. + return absl::nullopt; + } + int arrival_delay_ms = + packet_arrival_history_.GetDelayMs(info.main_timestamp, time_now_ms); + bool reordered = + !packet_arrival_history_.IsNewestRtpTimestamp(info.main_timestamp); + delay_manager_->Update(arrival_delay_ms, reordered); + return arrival_delay_ms; +} + +void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) { + buffer_level_filter_->SetTargetBufferLevel(TargetLevelMs()); + + int time_stretched_samples = time_stretched_cn_samples_; + if (prev_time_scale_) { + time_stretched_samples += sample_memory_; + } + + if (buffer_flush_) { + buffer_level_filter_->SetFilteredBufferLevel(buffer_size_samples); + buffer_flush_ = false; + } else { + buffer_level_filter_->Update(buffer_size_samples, time_stretched_samples); + } + prev_time_scale_ = false; + time_stretched_cn_samples_ = 0; +} + +NetEq::Operation DecisionLogic::CngOperation( + NetEqController::NetEqStatus status) { + // Signed difference between target and available timestamp. + int32_t timestamp_diff = static_cast( + static_cast(status.generated_noise_samples + + status.target_timestamp) - + status.next_packet->timestamp); + int optimal_level_samp = TargetLevelMs() * sample_rate_khz_; + const int64_t excess_waiting_time_samp = + -static_cast(timestamp_diff) - optimal_level_samp; + + if (excess_waiting_time_samp > optimal_level_samp / 2) { + // The waiting time for this packet will be longer than 1.5 + // times the wanted buffer delay. Apply fast-forward to cut the + // waiting time down to the optimal. + noise_fast_forward_ = rtc::saturated_cast(noise_fast_forward_ + + excess_waiting_time_samp); + timestamp_diff = + rtc::saturated_cast(timestamp_diff + excess_waiting_time_samp); + } + + if (timestamp_diff < 0 && status.last_mode == NetEq::Mode::kRfc3389Cng) { + // Not time to play this packet yet. Wait another round before using this + // packet. Keep on playing CNG from previous CNG parameters. + return NetEq::Operation::kRfc3389CngNoPacket; + } else { + // Otherwise, go for the CNG packet now. + noise_fast_forward_ = 0; + return NetEq::Operation::kRfc3389Cng; + } +} + +NetEq::Operation DecisionLogic::NoPacket(NetEqController::NetEqStatus status) { + if (cng_state_ == kCngRfc3389On) { + // Keep on playing comfort noise. + return NetEq::Operation::kRfc3389CngNoPacket; + } else if (cng_state_ == kCngInternalOn) { + // Stop CNG after a timeout. + if (config_.cng_timeout_ms && + status.generated_noise_samples > + static_cast(*config_.cng_timeout_ms * sample_rate_khz_)) { + return NetEq::Operation::kExpand; + } + return NetEq::Operation::kCodecInternalCng; + } else if (status.play_dtmf) { + return NetEq::Operation::kDtmf; + } else { + // Nothing to play, do expand. + return NetEq::Operation::kExpand; + } +} + +NetEq::Operation DecisionLogic::ExpectedPacketAvailable( + NetEqController::NetEqStatus status) { + if (!disallow_time_stretching_ && status.last_mode != NetEq::Mode::kExpand && + !status.play_dtmf) { + if (config_.enable_stable_playout_delay) { + const int playout_delay_ms = GetPlayoutDelayMs(status); + if (playout_delay_ms >= HighThreshold() << 2) { + return NetEq::Operation::kFastAccelerate; + } + if (TimescaleAllowed()) { + if (playout_delay_ms >= HighThreshold()) { + return NetEq::Operation::kAccelerate; + } + if (playout_delay_ms < LowThreshold()) { + return NetEq::Operation::kPreemptiveExpand; + } + } + } else { + const int target_level_samples = TargetLevelMs() * sample_rate_khz_; + const int low_limit = std::max( + target_level_samples * 3 / 4, + target_level_samples - + config_.deceleration_target_level_offset_ms * sample_rate_khz_); + const int high_limit = std::max( + target_level_samples, + low_limit + kDelayAdjustmentGranularityMs * sample_rate_khz_); + + const int buffer_level_samples = + buffer_level_filter_->filtered_current_level(); + if (buffer_level_samples >= high_limit << 2) + return NetEq::Operation::kFastAccelerate; + if (TimescaleAllowed()) { + if (buffer_level_samples >= high_limit) + return NetEq::Operation::kAccelerate; + if (buffer_level_samples < low_limit) + return NetEq::Operation::kPreemptiveExpand; + } + } + } + return NetEq::Operation::kNormal; +} + +NetEq::Operation DecisionLogic::FuturePacketAvailable( + NetEqController::NetEqStatus status) { + // Required packet is not available, but a future packet is. + // Check if we should continue with an ongoing expand because the new packet + // is too far into the future. + if (IsExpand(status.last_mode) && ShouldContinueExpand(status)) { + if (status.play_dtmf) { + // Still have DTMF to play, so do not do expand. + return NetEq::Operation::kDtmf; + } else { + // Nothing to play. + return NetEq::Operation::kExpand; + } + } + + if (status.last_mode == NetEq::Mode::kCodecPlc) { + return NetEq::Operation::kNormal; + } + + // If previous was comfort noise, then no merge is needed. + if (IsCng(status.last_mode)) { + uint32_t timestamp_leap = + status.next_packet->timestamp - status.target_timestamp; + const bool generated_enough_noise = + status.generated_noise_samples >= timestamp_leap; + + int playout_delay_ms = GetNextPacketDelayMs(status); + const bool above_target_delay = playout_delay_ms > HighThresholdCng(); + const bool below_target_delay = playout_delay_ms < LowThresholdCng(); + // Keep the delay same as before CNG, but make sure that it is within the + // target window. + if ((generated_enough_noise && !below_target_delay) || above_target_delay) { + time_stretched_cn_samples_ = + timestamp_leap - status.generated_noise_samples; + return NetEq::Operation::kNormal; + } + + if (status.last_mode == NetEq::Mode::kRfc3389Cng) { + return NetEq::Operation::kRfc3389CngNoPacket; + } + return NetEq::Operation::kCodecInternalCng; + } + + // Do not merge unless we have done an expand before. + if (status.last_mode == NetEq::Mode::kExpand) { + return NetEq::Operation::kMerge; + } else if (status.play_dtmf) { + // Play DTMF instead of expand. + return NetEq::Operation::kDtmf; + } else { + return NetEq::Operation::kExpand; + } +} + +bool DecisionLogic::UnderTargetLevel() const { + return buffer_level_filter_->filtered_current_level() < + TargetLevelMs() * sample_rate_khz_; +} + +bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const { + return timestamp_leap >= static_cast(output_size_samples_ * + config_.reinit_after_expands); +} + +bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const { + return timestamp_leap > + static_cast(output_size_samples_ * num_consecutive_expands_); +} + +bool DecisionLogic::MaxWaitForPacket() const { + return num_consecutive_expands_ >= kMaxWaitForPacketTicks; +} + +bool DecisionLogic::ShouldContinueExpand( + NetEqController::NetEqStatus status) const { + uint32_t timestamp_leap = + status.next_packet->timestamp - status.target_timestamp; + if (config_.enable_stable_playout_delay) { + return GetNextPacketDelayMs(status) < HighThreshold() && + PacketTooEarly(timestamp_leap); + } + return !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() && + PacketTooEarly(timestamp_leap) && UnderTargetLevel(); +} + +int DecisionLogic::GetNextPacketDelayMs( + NetEqController::NetEqStatus status) const { + if (config_.enable_stable_playout_delay) { + return packet_arrival_history_.GetDelayMs( + status.next_packet->timestamp, + tick_timer_->ticks() * tick_timer_->ms_per_tick()); + } + return status.packet_buffer_info.span_samples / sample_rate_khz_; +} + +int DecisionLogic::GetPlayoutDelayMs( + NetEqController::NetEqStatus status) const { + uint32_t playout_timestamp = + status.target_timestamp - status.sync_buffer_samples; + return packet_arrival_history_.GetDelayMs( + playout_timestamp, tick_timer_->ticks() * tick_timer_->ms_per_tick()); +} + +int DecisionLogic::LowThreshold() const { + int target_delay_ms = TargetLevelMs(); + return std::max( + target_delay_ms * 3 / 4, + target_delay_ms - config_.deceleration_target_level_offset_ms); +} + +int DecisionLogic::HighThreshold() const { + if (config_.enable_stable_playout_delay) { + return std::max(TargetLevelMs(), packet_arrival_history_.GetMaxDelayMs()) + + kDelayAdjustmentGranularityMs; + } + return std::max(TargetLevelMs(), + LowThreshold() + kDelayAdjustmentGranularityMs); +} + +int DecisionLogic::LowThresholdCng() const { + if (config_.enable_stable_playout_delay) { + return LowThreshold(); + } + return std::max(0, TargetLevelMs() - kTargetLevelWindowMs / 2); +} + +int DecisionLogic::HighThresholdCng() const { + if (config_.enable_stable_playout_delay) { + return HighThreshold(); + } + return TargetLevelMs() + kTargetLevelWindowMs / 2; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.h b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.h new file mode 100644 index 0000000000..8d1ff4d622 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.h @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_ +#define MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_ + +#include + +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_controller.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/buffer_level_filter.h" +#include "modules/audio_coding/neteq/delay_manager.h" +#include "modules/audio_coding/neteq/packet_arrival_history.h" +#include "rtc_base/experiments/field_trial_parser.h" + +namespace webrtc { + +// This is the class for the decision tree implementation. +class DecisionLogic : public NetEqController { + public: + DecisionLogic(NetEqController::Config config); + DecisionLogic(NetEqController::Config config, + std::unique_ptr delay_manager, + std::unique_ptr buffer_level_filter); + + ~DecisionLogic() override; + + DecisionLogic(const DecisionLogic&) = delete; + DecisionLogic& operator=(const DecisionLogic&) = delete; + + // Not used. + void Reset() override {} + + // Resets parts of the state. Typically done when switching codecs. + void SoftReset() override; + + // Sets the sample rate and the output block size. + void SetSampleRate(int fs_hz, size_t output_size_samples) override; + + // Given info about the latest received packet, and current jitter buffer + // status, returns the operation. `target_timestamp` and `expand_mutefactor` + // are provided for reference. `last_packet_samples` is the number of samples + // obtained from the last decoded frame. If there is a packet available, it + // should be supplied in `packet`; otherwise it should be NULL. The mode + // resulting from the last call to NetEqImpl::GetAudio is supplied in + // `last_mode`. If there is a DTMF event to play, `play_dtmf` should be set to + // true. The output variable `reset_decoder` will be set to true if a reset is + // required; otherwise it is left unchanged (i.e., it can remain true if it + // was true before the call). + NetEq::Operation GetDecision(const NetEqController::NetEqStatus& status, + bool* reset_decoder) override; + + // These methods test the `cng_state_` for different conditions. + bool CngRfc3389On() const override { return cng_state_ == kCngRfc3389On; } + bool CngOff() const override { return cng_state_ == kCngOff; } + + // Resets the `cng_state_` to kCngOff. + void SetCngOff() override { cng_state_ = kCngOff; } + + void ExpandDecision(NetEq::Operation operation) override {} + + // Adds `value` to `sample_memory_`. + void AddSampleMemory(int32_t value) override { sample_memory_ += value; } + + int TargetLevelMs() const override; + + int UnlimitedTargetLevelMs() const override; + + absl::optional PacketArrived(int fs_hz, + bool should_update_stats, + const PacketArrivedInfo& info) override; + + void RegisterEmptyPacket() override {} + + void NotifyMutedState() override; + + bool SetMaximumDelay(int delay_ms) override { + return delay_manager_->SetMaximumDelay(delay_ms); + } + bool SetMinimumDelay(int delay_ms) override { + return delay_manager_->SetMinimumDelay(delay_ms); + } + bool SetBaseMinimumDelay(int delay_ms) override { + return delay_manager_->SetBaseMinimumDelay(delay_ms); + } + int GetBaseMinimumDelay() const override { + return delay_manager_->GetBaseMinimumDelay(); + } + bool PeakFound() const override { return false; } + + int GetFilteredBufferLevel() const override; + + // Accessors and mutators. + void set_sample_memory(int32_t value) override { sample_memory_ = value; } + size_t noise_fast_forward() const override { return noise_fast_forward_; } + size_t packet_length_samples() const override { + return packet_length_samples_; + } + void set_packet_length_samples(size_t value) override { + packet_length_samples_ = value; + } + void set_prev_time_scale(bool value) override { prev_time_scale_ = value; } + + private: + // The value 5 sets maximum time-stretch rate to about 100 ms/s. + static const int kMinTimescaleInterval = 5; + + enum CngState { kCngOff, kCngRfc3389On, kCngInternalOn }; + + // Updates the `buffer_level_filter_` with the current buffer level + // `buffer_size_samples`. + void FilterBufferLevel(size_t buffer_size_samples); + + // Returns the operation given that the next available packet is a comfort + // noise payload (RFC 3389 only, not codec-internal). + virtual NetEq::Operation CngOperation(NetEqController::NetEqStatus status); + + // Returns the operation given that no packets are available (except maybe + // a DTMF event, flagged by setting `play_dtmf` true). + virtual NetEq::Operation NoPacket(NetEqController::NetEqStatus status); + + // Returns the operation to do given that the expected packet is available. + virtual NetEq::Operation ExpectedPacketAvailable( + NetEqController::NetEqStatus status); + + // Returns the operation to do given that the expected packet is not + // available, but a packet further into the future is at hand. + virtual NetEq::Operation FuturePacketAvailable( + NetEqController::NetEqStatus status); + + // Checks if enough time has elapsed since the last successful timescale + // operation was done (i.e., accelerate or preemptive expand). + bool TimescaleAllowed() const { + return !timescale_countdown_ || timescale_countdown_->Finished(); + } + + // Checks if the current (filtered) buffer level is under the target level. + bool UnderTargetLevel() const; + + // Checks if `timestamp_leap` is so long into the future that a reset due + // to exceeding kReinitAfterExpands will be done. + bool ReinitAfterExpands(uint32_t timestamp_leap) const; + + // Checks if we still have not done enough expands to cover the distance from + // the last decoded packet to the next available packet, the distance beeing + // conveyed in `timestamp_leap`. + bool PacketTooEarly(uint32_t timestamp_leap) const; + + bool MaxWaitForPacket() const; + + bool ShouldContinueExpand(NetEqController::NetEqStatus status) const; + + int GetNextPacketDelayMs(NetEqController::NetEqStatus status) const; + int GetPlayoutDelayMs(NetEqController::NetEqStatus status) const; + + int LowThreshold() const; + int HighThreshold() const; + int LowThresholdCng() const; + int HighThresholdCng() const; + + // Runtime configurable options through field trial + // WebRTC-Audio-NetEqDecisionLogicConfig. + struct Config { + Config(); + + bool enable_stable_playout_delay = false; + int reinit_after_expands = 100; + int deceleration_target_level_offset_ms = 85; + int packet_history_size_ms = 2000; + absl::optional cng_timeout_ms; + }; + Config config_; + std::unique_ptr delay_manager_; + std::unique_ptr buffer_level_filter_; + PacketArrivalHistory packet_arrival_history_; + const TickTimer* tick_timer_; + int sample_rate_khz_; + size_t output_size_samples_; + CngState cng_state_ = kCngOff; // Remember if comfort noise is interrupted by + // other event (e.g., DTMF). + size_t noise_fast_forward_ = 0; + size_t packet_length_samples_ = 0; + int sample_memory_ = 0; + bool prev_time_scale_ = false; + bool disallow_time_stretching_; + std::unique_ptr timescale_countdown_; + int num_consecutive_expands_ = 0; + int time_stretched_cn_samples_ = 0; + bool buffer_flush_ = false; + int last_playout_delay_ms_ = 0; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc new file mode 100644 index 0000000000..6150c9a6db --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DecisionLogic class and derived classes. + +#include "modules/audio_coding/neteq/decision_logic.h" + +#include "api/neteq/neteq_controller.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/buffer_level_filter.h" +#include "modules/audio_coding/neteq/delay_manager.h" +#include "modules/audio_coding/neteq/mock/mock_buffer_level_filter.h" +#include "modules/audio_coding/neteq/mock/mock_delay_manager.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kSampleRate = 8000; +constexpr int kSamplesPerMs = kSampleRate / 1000; +constexpr int kOutputSizeSamples = kSamplesPerMs * 10; +constexpr int kMinTimescaleInterval = 5; + +NetEqController::NetEqStatus CreateNetEqStatus(NetEq::Mode last_mode, + int current_delay_ms) { + NetEqController::NetEqStatus status; + status.play_dtmf = false; + status.last_mode = last_mode; + status.target_timestamp = 1234; + status.generated_noise_samples = 0; + status.expand_mutefactor = 0; + status.packet_buffer_info.num_samples = current_delay_ms * kSamplesPerMs; + status.packet_buffer_info.span_samples = current_delay_ms * kSamplesPerMs; + status.packet_buffer_info.span_samples_no_dtx = + current_delay_ms * kSamplesPerMs; + status.packet_buffer_info.dtx_or_cng = false; + status.next_packet = {status.target_timestamp, false, false}; + return status; +} + +using ::testing::Return; + +} // namespace + +class DecisionLogicTest : public ::testing::Test { + protected: + DecisionLogicTest() { + test::ScopedFieldTrials trials( + "WebRTC-Audio-NetEqDecisionLogicConfig/cng_timeout_ms:1000/"); + NetEqController::Config config; + config.tick_timer = &tick_timer_; + config.allow_time_stretching = true; + auto delay_manager = std::make_unique( + DelayManager::Config(), config.tick_timer); + mock_delay_manager_ = delay_manager.get(); + auto buffer_level_filter = std::make_unique(); + mock_buffer_level_filter_ = buffer_level_filter.get(); + decision_logic_ = std::make_unique( + config, std::move(delay_manager), std::move(buffer_level_filter)); + decision_logic_->SetSampleRate(kSampleRate, kOutputSizeSamples); + } + + TickTimer tick_timer_; + std::unique_ptr decision_logic_; + MockDelayManager* mock_delay_manager_; + MockBufferLevelFilter* mock_buffer_level_filter_; +}; + +TEST_F(DecisionLogicTest, NormalOperation) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(100)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(90 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder), + NetEq::Operation::kNormal); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, Accelerate) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(100)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(110 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder), + NetEq::Operation::kAccelerate); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, FastAccelerate) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(100)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(400 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder), + NetEq::Operation::kFastAccelerate); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, PreemptiveExpand) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(100)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(50 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder), + NetEq::Operation::kPreemptiveExpand); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, DecelerationTargetLevelOffset) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(500)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(400 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 400), &reset_decoder), + NetEq::Operation::kPreemptiveExpand); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, PostponeDecodeAfterExpand) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(500)); + + // Below 50% target delay threshold. + bool reset_decoder = false; + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kExpand, 200), &reset_decoder), + NetEq::Operation::kExpand); + EXPECT_FALSE(reset_decoder); + + // Above 50% target delay threshold. + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kExpand, 250), &reset_decoder), + NetEq::Operation::kNormal); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, TimeStrechComfortNoise) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(500)); + + { + bool reset_decoder = false; + // Below target window. + auto status = CreateNetEqStatus(NetEq::Mode::kCodecInternalCng, 400); + status.generated_noise_samples = 400 * kSamplesPerMs; + status.next_packet->timestamp = + status.target_timestamp + 400 * kSamplesPerMs; + EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder), + NetEq::Operation::kCodecInternalCng); + EXPECT_FALSE(reset_decoder); + } + + { + bool reset_decoder = false; + // Above target window. + auto status = CreateNetEqStatus(NetEq::Mode::kCodecInternalCng, 600); + status.generated_noise_samples = 200 * kSamplesPerMs; + status.next_packet->timestamp = + status.target_timestamp + 400 * kSamplesPerMs; + EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder), + NetEq::Operation::kNormal); + EXPECT_FALSE(reset_decoder); + + // The buffer level filter should be adjusted with the number of samples + // that was skipped. + int timestamp_leap = status.next_packet->timestamp - + status.target_timestamp - + status.generated_noise_samples; + EXPECT_CALL(*mock_buffer_level_filter_, + Update(400 * kSamplesPerMs, timestamp_leap)); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 400), &reset_decoder), + NetEq::Operation::kNormal); + EXPECT_FALSE(reset_decoder); + } +} + +TEST_F(DecisionLogicTest, CngTimeout) { + auto status = CreateNetEqStatus(NetEq::Mode::kCodecInternalCng, 0); + status.next_packet = absl::nullopt; + status.generated_noise_samples = kSamplesPerMs * 500; + bool reset_decoder = false; + EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder), + NetEq::Operation::kCodecInternalCng); + status.generated_noise_samples = kSamplesPerMs * 1010; + EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder), + NetEq::Operation::kExpand); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.cc new file mode 100644 index 0000000000..3447ced1da --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.cc @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/decoder_database.h" + +#include + +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/audio_format_to_string.h" + +namespace webrtc { + +DecoderDatabase::DecoderDatabase( + const rtc::scoped_refptr& decoder_factory, + absl::optional codec_pair_id) + : active_decoder_type_(-1), + active_cng_decoder_type_(-1), + decoder_factory_(decoder_factory), + codec_pair_id_(codec_pair_id) {} + +DecoderDatabase::~DecoderDatabase() = default; + +DecoderDatabase::DecoderInfo::DecoderInfo( + const SdpAudioFormat& audio_format, + absl::optional codec_pair_id, + AudioDecoderFactory* factory, + absl::string_view codec_name) + : name_(codec_name), + audio_format_(audio_format), + codec_pair_id_(codec_pair_id), + factory_(factory), + cng_decoder_(CngDecoder::Create(audio_format)), + subtype_(SubtypeFromFormat(audio_format)) {} + +DecoderDatabase::DecoderInfo::DecoderInfo( + const SdpAudioFormat& audio_format, + absl::optional codec_pair_id, + AudioDecoderFactory* factory) + : DecoderInfo(audio_format, codec_pair_id, factory, audio_format.name) {} + +DecoderDatabase::DecoderInfo::DecoderInfo(DecoderInfo&&) = default; +DecoderDatabase::DecoderInfo::~DecoderInfo() = default; + +AudioDecoder* DecoderDatabase::DecoderInfo::GetDecoder() const { + if (subtype_ != Subtype::kNormal) { + // These are handled internally, so they have no AudioDecoder objects. + return nullptr; + } + if (!decoder_) { + // TODO(ossu): Keep a check here for now, since a number of tests create + // DecoderInfos without factories. + RTC_DCHECK(factory_); + decoder_ = factory_->MakeAudioDecoder(audio_format_, codec_pair_id_); + } + RTC_DCHECK(decoder_) << "Failed to create: " << rtc::ToString(audio_format_); + return decoder_.get(); +} + +bool DecoderDatabase::DecoderInfo::IsType(absl::string_view name) const { + return absl::EqualsIgnoreCase(audio_format_.name, name); +} + +absl::optional +DecoderDatabase::DecoderInfo::CngDecoder::Create(const SdpAudioFormat& format) { + if (absl::EqualsIgnoreCase(format.name, "CN")) { + // CN has a 1:1 RTP clock rate to sample rate ratio. + const int sample_rate_hz = format.clockrate_hz; + RTC_DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 || + sample_rate_hz == 32000 || sample_rate_hz == 48000); + return DecoderDatabase::DecoderInfo::CngDecoder{sample_rate_hz}; + } else { + return absl::nullopt; + } +} + +DecoderDatabase::DecoderInfo::Subtype +DecoderDatabase::DecoderInfo::SubtypeFromFormat(const SdpAudioFormat& format) { + if (absl::EqualsIgnoreCase(format.name, "CN")) { + return Subtype::kComfortNoise; + } else if (absl::EqualsIgnoreCase(format.name, "telephone-event")) { + return Subtype::kDtmf; + } else if (absl::EqualsIgnoreCase(format.name, "red")) { + return Subtype::kRed; + } + + return Subtype::kNormal; +} + +bool DecoderDatabase::Empty() const { + return decoders_.empty(); +} + +int DecoderDatabase::Size() const { + return static_cast(decoders_.size()); +} + +std::vector DecoderDatabase::SetCodecs( + const std::map& codecs) { + // First collect all payload types that we'll remove or reassign, then remove + // them from the database. + std::vector changed_payload_types; + for (const std::pair kv : decoders_) { + auto i = codecs.find(kv.first); + if (i == codecs.end() || i->second != kv.second.GetFormat()) { + changed_payload_types.push_back(kv.first); + } + } + for (int pl_type : changed_payload_types) { + Remove(pl_type); + } + + // Enter the new and changed payload type mappings into the database. + for (const auto& kv : codecs) { + const int& rtp_payload_type = kv.first; + const SdpAudioFormat& audio_format = kv.second; + RTC_DCHECK_GE(rtp_payload_type, 0); + RTC_DCHECK_LE(rtp_payload_type, 0x7f); + if (decoders_.count(rtp_payload_type) == 0) { + decoders_.insert(std::make_pair( + rtp_payload_type, + DecoderInfo(audio_format, codec_pair_id_, decoder_factory_.get()))); + } else { + // The mapping for this payload type hasn't changed. + } + } + + return changed_payload_types; +} + +int DecoderDatabase::RegisterPayload(int rtp_payload_type, + const SdpAudioFormat& audio_format) { + if (rtp_payload_type < 0 || rtp_payload_type > 0x7f) { + return kInvalidRtpPayloadType; + } + const auto ret = decoders_.insert(std::make_pair( + rtp_payload_type, + DecoderInfo(audio_format, codec_pair_id_, decoder_factory_.get()))); + if (ret.second == false) { + // Database already contains a decoder with type `rtp_payload_type`. + return kDecoderExists; + } + return kOK; +} + +int DecoderDatabase::Remove(uint8_t rtp_payload_type) { + if (decoders_.erase(rtp_payload_type) == 0) { + // No decoder with that `rtp_payload_type`. + return kDecoderNotFound; + } + if (active_decoder_type_ == rtp_payload_type) { + active_decoder_type_ = -1; // No active decoder. + } + if (active_cng_decoder_type_ == rtp_payload_type) { + active_cng_decoder_type_ = -1; // No active CNG decoder. + } + return kOK; +} + +void DecoderDatabase::RemoveAll() { + decoders_.clear(); + active_decoder_type_ = -1; // No active decoder. + active_cng_decoder_type_ = -1; // No active CNG decoder. +} + +const DecoderDatabase::DecoderInfo* DecoderDatabase::GetDecoderInfo( + uint8_t rtp_payload_type) const { + DecoderMap::const_iterator it = decoders_.find(rtp_payload_type); + if (it == decoders_.end()) { + // Decoder not found. + return NULL; + } + return &it->second; +} + +int DecoderDatabase::SetActiveDecoder(uint8_t rtp_payload_type, + bool* new_decoder) { + // Check that `rtp_payload_type` exists in the database. + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + if (!info) { + // Decoder not found. + return kDecoderNotFound; + } + RTC_CHECK(!info->IsComfortNoise()); + RTC_DCHECK(new_decoder); + *new_decoder = false; + if (active_decoder_type_ < 0) { + // This is the first active decoder. + *new_decoder = true; + } else if (active_decoder_type_ != rtp_payload_type) { + // Moving from one active decoder to another. Delete the first one. + const DecoderInfo* old_info = GetDecoderInfo(active_decoder_type_); + RTC_DCHECK(old_info); + old_info->DropDecoder(); + *new_decoder = true; + } + active_decoder_type_ = rtp_payload_type; + return kOK; +} + +AudioDecoder* DecoderDatabase::GetActiveDecoder() const { + if (active_decoder_type_ < 0) { + // No active decoder. + return NULL; + } + return GetDecoder(active_decoder_type_); +} + +int DecoderDatabase::SetActiveCngDecoder(uint8_t rtp_payload_type) { + // Check that `rtp_payload_type` exists in the database. + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + if (!info) { + // Decoder not found. + return kDecoderNotFound; + } + if (active_cng_decoder_type_ >= 0 && + active_cng_decoder_type_ != rtp_payload_type) { + // Moving from one active CNG decoder to another. Delete the first one. + RTC_DCHECK(active_cng_decoder_); + active_cng_decoder_.reset(); + } + active_cng_decoder_type_ = rtp_payload_type; + return kOK; +} + +ComfortNoiseDecoder* DecoderDatabase::GetActiveCngDecoder() const { + if (active_cng_decoder_type_ < 0) { + // No active CNG decoder. + return NULL; + } + if (!active_cng_decoder_) { + active_cng_decoder_.reset(new ComfortNoiseDecoder); + } + return active_cng_decoder_.get(); +} + +AudioDecoder* DecoderDatabase::GetDecoder(uint8_t rtp_payload_type) const { + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + return info ? info->GetDecoder() : nullptr; +} + +bool DecoderDatabase::IsComfortNoise(uint8_t rtp_payload_type) const { + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + return info && info->IsComfortNoise(); +} + +bool DecoderDatabase::IsDtmf(uint8_t rtp_payload_type) const { + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + return info && info->IsDtmf(); +} + +bool DecoderDatabase::IsRed(uint8_t rtp_payload_type) const { + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + return info && info->IsRed(); +} + +int DecoderDatabase::CheckPayloadTypes(const PacketList& packet_list) const { + PacketList::const_iterator it; + for (it = packet_list.begin(); it != packet_list.end(); ++it) { + if (!GetDecoderInfo(it->payload_type)) { + // Payload type is not found. + RTC_LOG(LS_WARNING) << "CheckPayloadTypes: unknown RTP payload type " + << static_cast(it->payload_type); + return kDecoderNotFound; + } + } + return kOK; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.h b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.h new file mode 100644 index 0000000000..8cf2019135 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_ +#define MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/audio_codecs/audio_format.h" +#include "api/scoped_refptr.h" +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "modules/audio_coding/neteq/packet.h" + +namespace webrtc { + +class DecoderDatabase { + public: + enum DatabaseReturnCodes { + kOK = 0, + kInvalidRtpPayloadType = -1, + kCodecNotSupported = -2, + kInvalidSampleRate = -3, + kDecoderExists = -4, + kDecoderNotFound = -5, + kInvalidPointer = -6 + }; + + // Class that stores decoder info in the database. + class DecoderInfo { + public: + DecoderInfo(const SdpAudioFormat& audio_format, + absl::optional codec_pair_id, + AudioDecoderFactory* factory, + absl::string_view codec_name); + explicit DecoderInfo(const SdpAudioFormat& audio_format, + absl::optional codec_pair_id, + AudioDecoderFactory* factory = nullptr); + DecoderInfo(DecoderInfo&&); + ~DecoderInfo(); + + // Get the AudioDecoder object, creating it first if necessary. + AudioDecoder* GetDecoder() const; + + // Delete the AudioDecoder object, unless it's external. (This means we can + // always recreate it later if we need it.) + void DropDecoder() const { decoder_.reset(); } + + int SampleRateHz() const { + if (IsDtmf()) { + // DTMF has a 1:1 mapping between clock rate and sample rate. + return audio_format_.clockrate_hz; + } + const AudioDecoder* decoder = GetDecoder(); + RTC_DCHECK_EQ(1, !!decoder + !!cng_decoder_); + return decoder ? decoder->SampleRateHz() : cng_decoder_->sample_rate_hz; + } + + const SdpAudioFormat& GetFormat() const { return audio_format_; } + + // Returns true if the decoder's format is comfort noise. + bool IsComfortNoise() const { + RTC_DCHECK_EQ(!!cng_decoder_, subtype_ == Subtype::kComfortNoise); + return subtype_ == Subtype::kComfortNoise; + } + + // Returns true if the decoder's format is DTMF. + bool IsDtmf() const { return subtype_ == Subtype::kDtmf; } + + // Returns true if the decoder's format is RED. + bool IsRed() const { return subtype_ == Subtype::kRed; } + + // Returns true if the decoder's format is named `name`. + bool IsType(absl::string_view name) const; + + const std::string& get_name() const { return name_; } + + private: + // TODO(ossu): `name_` is kept here while we retain the old external + // decoder interface. Remove this once using an + // AudioDecoderFactory has supplanted the old functionality. + const std::string name_; + + const SdpAudioFormat audio_format_; + const absl::optional codec_pair_id_; + AudioDecoderFactory* const factory_; + mutable std::unique_ptr decoder_; + + // Set iff this is a comfort noise decoder. + struct CngDecoder { + static absl::optional Create(const SdpAudioFormat& format); + int sample_rate_hz; + }; + const absl::optional cng_decoder_; + + enum class Subtype : int8_t { kNormal, kComfortNoise, kDtmf, kRed }; + + static Subtype SubtypeFromFormat(const SdpAudioFormat& format); + + const Subtype subtype_; + }; + + // Maximum value for 8 bits, and an invalid RTP payload type (since it is + // only 7 bits). + static const uint8_t kRtpPayloadTypeError = 0xFF; + + DecoderDatabase( + const rtc::scoped_refptr& decoder_factory, + absl::optional codec_pair_id); + + virtual ~DecoderDatabase(); + + DecoderDatabase(const DecoderDatabase&) = delete; + DecoderDatabase& operator=(const DecoderDatabase&) = delete; + + // Returns true if the database is empty. + virtual bool Empty() const; + + // Returns the number of decoders registered in the database. + virtual int Size() const; + + // Replaces the existing set of decoders with the given set. Returns the + // payload types that were reassigned or removed while doing so. + virtual std::vector SetCodecs( + const std::map& codecs); + + // Registers a decoder for the given payload type. Returns kOK on success; + // otherwise an error code. + virtual int RegisterPayload(int rtp_payload_type, + const SdpAudioFormat& audio_format); + + // Removes the entry for `rtp_payload_type` from the database. + // Returns kDecoderNotFound or kOK depending on the outcome of the operation. + virtual int Remove(uint8_t rtp_payload_type); + + // Remove all entries. + virtual void RemoveAll(); + + // Returns a pointer to the DecoderInfo struct for `rtp_payload_type`. If + // no decoder is registered with that `rtp_payload_type`, NULL is returned. + virtual const DecoderInfo* GetDecoderInfo(uint8_t rtp_payload_type) const; + + // Sets the active decoder to be `rtp_payload_type`. If this call results in a + // change of active decoder, `new_decoder` is set to true. The previous active + // decoder's AudioDecoder object is deleted. + virtual int SetActiveDecoder(uint8_t rtp_payload_type, bool* new_decoder); + + // Returns the current active decoder, or NULL if no active decoder exists. + virtual AudioDecoder* GetActiveDecoder() const; + + // Sets the active comfort noise decoder to be `rtp_payload_type`. If this + // call results in a change of active comfort noise decoder, the previous + // active decoder's AudioDecoder object is deleted. + virtual int SetActiveCngDecoder(uint8_t rtp_payload_type); + + // Returns the current active comfort noise decoder, or NULL if no active + // comfort noise decoder exists. + virtual ComfortNoiseDecoder* GetActiveCngDecoder() const; + + // The following are utility methods: they will look up DecoderInfo through + // GetDecoderInfo and call the respective method on that info object, if it + // exists. + + // Returns a pointer to the AudioDecoder object associated with + // `rtp_payload_type`, or NULL if none is registered. If the AudioDecoder + // object does not exist for that decoder, the object is created. + AudioDecoder* GetDecoder(uint8_t rtp_payload_type) const; + + // Returns true if `rtp_payload_type` is registered as comfort noise. + bool IsComfortNoise(uint8_t rtp_payload_type) const; + + // Returns true if `rtp_payload_type` is registered as DTMF. + bool IsDtmf(uint8_t rtp_payload_type) const; + + // Returns true if `rtp_payload_type` is registered as RED. + bool IsRed(uint8_t rtp_payload_type) const; + + // Returns kOK if all packets in `packet_list` carry payload types that are + // registered in the database. Otherwise, returns kDecoderNotFound. + int CheckPayloadTypes(const PacketList& packet_list) const; + + private: + typedef std::map DecoderMap; + + DecoderMap decoders_; + int active_decoder_type_; + int active_cng_decoder_type_; + mutable std::unique_ptr active_cng_decoder_; + rtc::scoped_refptr decoder_factory_; + const absl::optional codec_pair_id_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database_unittest.cc new file mode 100644 index 0000000000..445c21924b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database_unittest.cc @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/decoder_database.h" + +#include + +#include + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder.h" +#include "test/mock_audio_decoder_factory.h" + +using ::testing::_; +using ::testing::Invoke; + +namespace webrtc { + +TEST(DecoderDatabase, CreateAndDestroy) { + DecoderDatabase db(rtc::make_ref_counted(), + absl::nullopt); + EXPECT_EQ(0, db.Size()); + EXPECT_TRUE(db.Empty()); +} + +TEST(DecoderDatabase, InsertAndRemove) { + auto factory = rtc::make_ref_counted(); + DecoderDatabase db(factory, absl::nullopt); + const uint8_t kPayloadType = 0; + const std::string kCodecName = "Robert\'); DROP TABLE Students;"; + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, SdpAudioFormat(kCodecName, 8000, 1))); + EXPECT_EQ(1, db.Size()); + EXPECT_FALSE(db.Empty()); + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType)); + EXPECT_EQ(0, db.Size()); + EXPECT_TRUE(db.Empty()); +} + +TEST(DecoderDatabase, InsertAndRemoveAll) { + auto factory = rtc::make_ref_counted(); + DecoderDatabase db(factory, absl::nullopt); + const std::string kCodecName1 = "Robert\'); DROP TABLE Students;"; + const std::string kCodecName2 = "https://xkcd.com/327/"; + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(0, SdpAudioFormat(kCodecName1, 8000, 1))); + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(1, SdpAudioFormat(kCodecName2, 8000, 1))); + EXPECT_EQ(2, db.Size()); + EXPECT_FALSE(db.Empty()); + db.RemoveAll(); + EXPECT_EQ(0, db.Size()); + EXPECT_TRUE(db.Empty()); +} + +TEST(DecoderDatabase, GetDecoderInfo) { + auto factory = rtc::make_ref_counted(); + auto* decoder = new MockAudioDecoder; + EXPECT_CALL(*factory, MakeAudioDecoderMock(_, _, _)) + .WillOnce(Invoke([decoder](const SdpAudioFormat& format, + absl::optional codec_pair_id, + std::unique_ptr* dec) { + EXPECT_EQ("pcmu", format.name); + dec->reset(decoder); + })); + DecoderDatabase db(factory, absl::nullopt); + const uint8_t kPayloadType = 0; + const std::string kCodecName = "pcmu"; + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, SdpAudioFormat(kCodecName, 8000, 1))); + const DecoderDatabase::DecoderInfo* info; + info = db.GetDecoderInfo(kPayloadType); + ASSERT_TRUE(info != NULL); + EXPECT_TRUE(info->IsType("pcmu")); + EXPECT_EQ(kCodecName, info->get_name()); + EXPECT_EQ(decoder, db.GetDecoder(kPayloadType)); + info = db.GetDecoderInfo(kPayloadType + 1); // Other payload type. + EXPECT_TRUE(info == NULL); // Should not be found. +} + +TEST(DecoderDatabase, GetDecoder) { + DecoderDatabase db(CreateBuiltinAudioDecoderFactory(), absl::nullopt); + const uint8_t kPayloadType = 0; + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, SdpAudioFormat("l16", 8000, 1))); + AudioDecoder* dec = db.GetDecoder(kPayloadType); + ASSERT_TRUE(dec != NULL); +} + +TEST(DecoderDatabase, TypeTests) { + auto factory = rtc::make_ref_counted(); + DecoderDatabase db(factory, absl::nullopt); + const uint8_t kPayloadTypePcmU = 0; + const uint8_t kPayloadTypeCng = 13; + const uint8_t kPayloadTypeDtmf = 100; + const uint8_t kPayloadTypeRed = 101; + const uint8_t kPayloadNotUsed = 102; + // Load into database. + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypePcmU, SdpAudioFormat("pcmu", 8000, 1))); + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypeCng, SdpAudioFormat("cn", 8000, 1))); + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypeDtmf, + SdpAudioFormat("telephone-event", 8000, 1))); + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypeRed, SdpAudioFormat("red", 8000, 1))); + EXPECT_EQ(4, db.Size()); + // Test. + EXPECT_FALSE(db.IsComfortNoise(kPayloadNotUsed)); + EXPECT_FALSE(db.IsDtmf(kPayloadNotUsed)); + EXPECT_FALSE(db.IsRed(kPayloadNotUsed)); + EXPECT_FALSE(db.IsComfortNoise(kPayloadTypePcmU)); + EXPECT_FALSE(db.IsDtmf(kPayloadTypePcmU)); + EXPECT_FALSE(db.IsRed(kPayloadTypePcmU)); + EXPECT_TRUE(db.IsComfortNoise(kPayloadTypeCng)); + EXPECT_TRUE(db.IsDtmf(kPayloadTypeDtmf)); + EXPECT_TRUE(db.IsRed(kPayloadTypeRed)); +} + +TEST(DecoderDatabase, CheckPayloadTypes) { + constexpr int kNumPayloads = 10; + auto factory = rtc::make_ref_counted(); + DecoderDatabase db(factory, absl::nullopt); + // Load a number of payloads into the database. Payload types are 0, 1, ..., + // while the decoder type is the same for all payload types (this does not + // matter for the test). + for (uint8_t payload_type = 0; payload_type < kNumPayloads; ++payload_type) { + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(payload_type, SdpAudioFormat("pcmu", 8000, 1))); + } + PacketList packet_list; + for (int i = 0; i < kNumPayloads + 1; ++i) { + // Create packet with payload type `i`. The last packet will have a payload + // type that is not registered in the decoder database. + Packet packet; + packet.payload_type = i; + packet_list.push_back(std::move(packet)); + } + + // Expect to return false, since the last packet is of an unknown type. + EXPECT_EQ(DecoderDatabase::kDecoderNotFound, + db.CheckPayloadTypes(packet_list)); + + packet_list.pop_back(); // Remove the unknown one. + + EXPECT_EQ(DecoderDatabase::kOK, db.CheckPayloadTypes(packet_list)); + + // Delete all packets. + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + it = packet_list.erase(it); + } +} + +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) +#define IF_ISAC(x) x +#else +#define IF_ISAC(x) DISABLED_##x +#endif + +// Test the methods for setting and getting active speech and CNG decoders. +TEST(DecoderDatabase, IF_ISAC(ActiveDecoders)) { + DecoderDatabase db(CreateBuiltinAudioDecoderFactory(), absl::nullopt); + // Load payload types. + ASSERT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(0, SdpAudioFormat("pcmu", 8000, 1))); + ASSERT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(103, SdpAudioFormat("isac", 16000, 1))); + ASSERT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(13, SdpAudioFormat("cn", 8000, 1))); + // Verify that no decoders are active from the start. + EXPECT_EQ(NULL, db.GetActiveDecoder()); + EXPECT_EQ(NULL, db.GetActiveCngDecoder()); + + // Set active speech codec. + bool changed; // Should be true when the active decoder changed. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed)); + EXPECT_TRUE(changed); + AudioDecoder* decoder = db.GetActiveDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + + // Set the same again. Expect no change. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed)); + EXPECT_FALSE(changed); + decoder = db.GetActiveDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + + // Change active decoder. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(103, &changed)); + EXPECT_TRUE(changed); + decoder = db.GetActiveDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + + // Remove the active decoder, and verify that the active becomes NULL. + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(103)); + EXPECT_EQ(NULL, db.GetActiveDecoder()); + + // Set active CNG codec. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveCngDecoder(13)); + ComfortNoiseDecoder* cng = db.GetActiveCngDecoder(); + ASSERT_FALSE(cng == NULL); // Should get a decoder here. + + // Remove the active CNG decoder, and verify that the active becomes NULL. + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(13)); + EXPECT_EQ(NULL, db.GetActiveCngDecoder()); + + // Try to set non-existing codecs as active. + EXPECT_EQ(DecoderDatabase::kDecoderNotFound, + db.SetActiveDecoder(17, &changed)); + EXPECT_EQ(DecoderDatabase::kDecoderNotFound, db.SetActiveCngDecoder(17)); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.cc b/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.cc new file mode 100644 index 0000000000..487450fe0f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/default_neteq_factory.h" + +#include + +#include "modules/audio_coding/neteq/neteq_impl.h" + +namespace webrtc { + +DefaultNetEqFactory::DefaultNetEqFactory() = default; +DefaultNetEqFactory::~DefaultNetEqFactory() = default; + +std::unique_ptr DefaultNetEqFactory::CreateNetEq( + const NetEq::Config& config, + const rtc::scoped_refptr& decoder_factory, + Clock* clock) const { + return std::make_unique( + config, NetEqImpl::Dependencies(config, clock, decoder_factory, + controller_factory_)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.h b/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.h new file mode 100644 index 0000000000..24d2bae419 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DEFAULT_NETEQ_FACTORY_H_ +#define MODULES_AUDIO_CODING_NETEQ_DEFAULT_NETEQ_FACTORY_H_ + +#include + +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/neteq/default_neteq_controller_factory.h" +#include "api/neteq/neteq_factory.h" +#include "api/scoped_refptr.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { + +class DefaultNetEqFactory : public NetEqFactory { + public: + DefaultNetEqFactory(); + ~DefaultNetEqFactory() override; + DefaultNetEqFactory(const DefaultNetEqFactory&) = delete; + DefaultNetEqFactory& operator=(const DefaultNetEqFactory&) = delete; + + std::unique_ptr CreateNetEq( + const NetEq::Config& config, + const rtc::scoped_refptr& decoder_factory, + Clock* clock) const override; + + private: + const DefaultNetEqControllerFactory controller_factory_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DEFAULT_NETEQ_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.cc b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.cc new file mode 100644 index 0000000000..bf3a0f18a1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.cc @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/delay_manager.h" + +#include +#include + +#include +#include +#include +#include + +#include "modules/include/module_common_types_public.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +constexpr int kMinBaseMinimumDelayMs = 0; +constexpr int kMaxBaseMinimumDelayMs = 10000; +constexpr int kStartDelayMs = 80; + +std::unique_ptr MaybeCreateReorderOptimizer( + const DelayManager::Config& config) { + if (!config.use_reorder_optimizer) { + return nullptr; + } + return std::make_unique( + (1 << 15) * config.reorder_forget_factor, config.ms_per_loss_percent, + config.start_forget_weight); +} + +} // namespace + +DelayManager::Config::Config() { + StructParametersParser::Create( // + "quantile", &quantile, // + "forget_factor", &forget_factor, // + "start_forget_weight", &start_forget_weight, // + "resample_interval_ms", &resample_interval_ms, // + "use_reorder_optimizer", &use_reorder_optimizer, // + "reorder_forget_factor", &reorder_forget_factor, // + "ms_per_loss_percent", &ms_per_loss_percent) + ->Parse(webrtc::field_trial::FindFullName( + "WebRTC-Audio-NetEqDelayManagerConfig")); +} + +void DelayManager::Config::Log() { + RTC_LOG(LS_INFO) << "Delay manager config:" + " quantile=" + << quantile << " forget_factor=" << forget_factor + << " start_forget_weight=" << start_forget_weight.value_or(0) + << " resample_interval_ms=" + << resample_interval_ms.value_or(0) + << " use_reorder_optimizer=" << use_reorder_optimizer + << " reorder_forget_factor=" << reorder_forget_factor + << " ms_per_loss_percent=" << ms_per_loss_percent; +} + +DelayManager::DelayManager(const Config& config, const TickTimer* tick_timer) + : max_packets_in_buffer_(config.max_packets_in_buffer), + underrun_optimizer_(tick_timer, + (1 << 30) * config.quantile, + (1 << 15) * config.forget_factor, + config.start_forget_weight, + config.resample_interval_ms), + reorder_optimizer_(MaybeCreateReorderOptimizer(config)), + base_minimum_delay_ms_(config.base_minimum_delay_ms), + effective_minimum_delay_ms_(config.base_minimum_delay_ms), + minimum_delay_ms_(0), + maximum_delay_ms_(0), + target_level_ms_(kStartDelayMs) { + RTC_DCHECK_GE(base_minimum_delay_ms_, 0); + + Reset(); +} + +DelayManager::~DelayManager() {} + +void DelayManager::Update(int arrival_delay_ms, bool reordered) { + if (!reorder_optimizer_ || !reordered) { + underrun_optimizer_.Update(arrival_delay_ms); + } + target_level_ms_ = + underrun_optimizer_.GetOptimalDelayMs().value_or(kStartDelayMs); + if (reorder_optimizer_) { + reorder_optimizer_->Update(arrival_delay_ms, reordered, target_level_ms_); + target_level_ms_ = std::max( + target_level_ms_, reorder_optimizer_->GetOptimalDelayMs().value_or(0)); + } + unlimited_target_level_ms_ = target_level_ms_; + target_level_ms_ = std::max(target_level_ms_, effective_minimum_delay_ms_); + if (maximum_delay_ms_ > 0) { + target_level_ms_ = std::min(target_level_ms_, maximum_delay_ms_); + } + if (packet_len_ms_ > 0) { + // Limit to 75% of maximum buffer size. + target_level_ms_ = std::min( + target_level_ms_, 3 * max_packets_in_buffer_ * packet_len_ms_ / 4); + } +} + +int DelayManager::SetPacketAudioLength(int length_ms) { + if (length_ms <= 0) { + RTC_LOG_F(LS_ERROR) << "length_ms = " << length_ms; + return -1; + } + packet_len_ms_ = length_ms; + return 0; +} + +void DelayManager::Reset() { + packet_len_ms_ = 0; + underrun_optimizer_.Reset(); + target_level_ms_ = kStartDelayMs; + if (reorder_optimizer_) { + reorder_optimizer_->Reset(); + } +} + +int DelayManager::TargetDelayMs() const { + return target_level_ms_; +} + +int DelayManager::UnlimitedTargetLevelMs() const { + return unlimited_target_level_ms_; +} + +bool DelayManager::IsValidMinimumDelay(int delay_ms) const { + return 0 <= delay_ms && delay_ms <= MinimumDelayUpperBound(); +} + +bool DelayManager::IsValidBaseMinimumDelay(int delay_ms) const { + return kMinBaseMinimumDelayMs <= delay_ms && + delay_ms <= kMaxBaseMinimumDelayMs; +} + +bool DelayManager::SetMinimumDelay(int delay_ms) { + if (!IsValidMinimumDelay(delay_ms)) { + return false; + } + + minimum_delay_ms_ = delay_ms; + UpdateEffectiveMinimumDelay(); + return true; +} + +bool DelayManager::SetMaximumDelay(int delay_ms) { + // If `delay_ms` is zero then it unsets the maximum delay and target level is + // unconstrained by maximum delay. + if (delay_ms != 0 && delay_ms < minimum_delay_ms_) { + // Maximum delay shouldn't be less than minimum delay or less than a packet. + return false; + } + + maximum_delay_ms_ = delay_ms; + UpdateEffectiveMinimumDelay(); + return true; +} + +bool DelayManager::SetBaseMinimumDelay(int delay_ms) { + if (!IsValidBaseMinimumDelay(delay_ms)) { + return false; + } + + base_minimum_delay_ms_ = delay_ms; + UpdateEffectiveMinimumDelay(); + return true; +} + +int DelayManager::GetBaseMinimumDelay() const { + return base_minimum_delay_ms_; +} + +void DelayManager::UpdateEffectiveMinimumDelay() { + // Clamp `base_minimum_delay_ms_` into the range which can be effectively + // used. + const int base_minimum_delay_ms = + rtc::SafeClamp(base_minimum_delay_ms_, 0, MinimumDelayUpperBound()); + effective_minimum_delay_ms_ = + std::max(minimum_delay_ms_, base_minimum_delay_ms); +} + +int DelayManager::MinimumDelayUpperBound() const { + // Choose the lowest possible bound discarding 0 cases which mean the value + // is not set and unconstrained. + int q75 = max_packets_in_buffer_ * packet_len_ms_ * 3 / 4; + q75 = q75 > 0 ? q75 : kMaxBaseMinimumDelayMs; + const int maximum_delay_ms = + maximum_delay_ms_ > 0 ? maximum_delay_ms_ : kMaxBaseMinimumDelayMs; + return std::min(maximum_delay_ms, q75); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.h b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.h new file mode 100644 index 0000000000..a333681535 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_ +#define MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_ + +#include // Provide access to size_t. + +#include +#include + +#include "absl/types/optional.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/histogram.h" +#include "modules/audio_coding/neteq/reorder_optimizer.h" +#include "modules/audio_coding/neteq/underrun_optimizer.h" + +namespace webrtc { + +class DelayManager { + public: + struct Config { + Config(); + void Log(); + + // Options that can be configured via field trial. + double quantile = 0.95; + double forget_factor = 0.983; + absl::optional start_forget_weight = 2; + absl::optional resample_interval_ms = 500; + + bool use_reorder_optimizer = true; + double reorder_forget_factor = 0.9993; + int ms_per_loss_percent = 20; + + // Options that are externally populated. + int max_packets_in_buffer = 200; + int base_minimum_delay_ms = 0; + }; + + DelayManager(const Config& config, const TickTimer* tick_timer); + + virtual ~DelayManager(); + + DelayManager(const DelayManager&) = delete; + DelayManager& operator=(const DelayManager&) = delete; + + // Updates the delay manager that a new packet arrived with delay + // `arrival_delay_ms`. This updates the statistics and a new target buffer + // level is calculated. The `reordered` flag indicates if the packet was + // reordered. + virtual void Update(int arrival_delay_ms, bool reordered); + + // Resets all state. + virtual void Reset(); + + // Gets the target buffer level in milliseconds. If a minimum or maximum delay + // has been set, the target delay reported here also respects the configured + // min/max delay. + virtual int TargetDelayMs() const; + + // Reports the target delay that would be used if no minimum/maximum delay + // would be set. + virtual int UnlimitedTargetLevelMs() const; + + // Notifies the DelayManager of how much audio data is carried in each packet. + virtual int SetPacketAudioLength(int length_ms); + + // Accessors and mutators. + // Assuming `delay` is in valid range. + virtual bool SetMinimumDelay(int delay_ms); + virtual bool SetMaximumDelay(int delay_ms); + virtual bool SetBaseMinimumDelay(int delay_ms); + virtual int GetBaseMinimumDelay() const; + + // These accessors are only intended for testing purposes. + int effective_minimum_delay_ms_for_test() const { + return effective_minimum_delay_ms_; + } + + private: + // Provides value which minimum delay can't exceed based on current buffer + // size and given `maximum_delay_ms_`. Lower bound is a constant 0. + int MinimumDelayUpperBound() const; + + // Updates `effective_minimum_delay_ms_` delay based on current + // `minimum_delay_ms_`, `base_minimum_delay_ms_` and `maximum_delay_ms_` + // and buffer size. + void UpdateEffectiveMinimumDelay(); + + // Makes sure that `delay_ms` is less than maximum delay, if any maximum + // is set. Also, if possible check `delay_ms` to be less than 75% of + // `max_packets_in_buffer_`. + bool IsValidMinimumDelay(int delay_ms) const; + + bool IsValidBaseMinimumDelay(int delay_ms) const; + + // TODO(jakobi): set maximum buffer delay instead of number of packets. + const int max_packets_in_buffer_; + UnderrunOptimizer underrun_optimizer_; + std::unique_ptr reorder_optimizer_; + + int base_minimum_delay_ms_; + int effective_minimum_delay_ms_; // Used as lower bound for target delay. + int minimum_delay_ms_; // Externally set minimum delay. + int maximum_delay_ms_; // Externally set maximum allowed delay. + + int packet_len_ms_ = 0; + int target_level_ms_ = 0; // Currently preferred buffer level. + int unlimited_target_level_ms_ = 0; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager_unittest.cc new file mode 100644 index 0000000000..da5f53188c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager_unittest.cc @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DelayManager class. + +#include "modules/audio_coding/neteq/delay_manager.h" + +#include + +#include + +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/histogram.h" +#include "modules/audio_coding/neteq/mock/mock_histogram.h" +#include "modules/audio_coding/neteq/mock/mock_statistics_calculator.h" +#include "rtc_base/checks.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +constexpr int kMaxNumberOfPackets = 200; +constexpr int kTimeStepMs = 10; +constexpr int kFrameSizeMs = 20; +constexpr int kMaxBufferSizeMs = kMaxNumberOfPackets * kFrameSizeMs; + +} // namespace + +class DelayManagerTest : public ::testing::Test { + protected: + DelayManagerTest(); + virtual void SetUp(); + void Update(int delay); + void IncreaseTime(int inc_ms); + + TickTimer tick_timer_; + DelayManager dm_; +}; + +DelayManagerTest::DelayManagerTest() + : dm_(DelayManager::Config(), &tick_timer_) {} + +void DelayManagerTest::SetUp() { + dm_.SetPacketAudioLength(kFrameSizeMs); +} + +void DelayManagerTest::Update(int delay) { + dm_.Update(delay, false); +} + +void DelayManagerTest::IncreaseTime(int inc_ms) { + for (int t = 0; t < inc_ms; t += kTimeStepMs) { + tick_timer_.Increment(); + } +} + +TEST_F(DelayManagerTest, CreateAndDestroy) { + // Nothing to do here. The test fixture creates and destroys the DelayManager + // object. +} + +TEST_F(DelayManagerTest, UpdateNormal) { + for (int i = 0; i < 50; ++i) { + Update(0); + IncreaseTime(kFrameSizeMs); + } + EXPECT_EQ(20, dm_.TargetDelayMs()); +} + +TEST_F(DelayManagerTest, MaxDelay) { + Update(0); + const int kMaxDelayMs = 60; + EXPECT_GT(dm_.TargetDelayMs(), kMaxDelayMs); + EXPECT_TRUE(dm_.SetMaximumDelay(kMaxDelayMs)); + Update(0); + EXPECT_EQ(kMaxDelayMs, dm_.TargetDelayMs()); +} + +TEST_F(DelayManagerTest, MinDelay) { + Update(0); + int kMinDelayMs = 7 * kFrameSizeMs; + EXPECT_LT(dm_.TargetDelayMs(), kMinDelayMs); + dm_.SetMinimumDelay(kMinDelayMs); + IncreaseTime(kFrameSizeMs); + Update(0); + EXPECT_EQ(kMinDelayMs, dm_.TargetDelayMs()); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayCheckValidRange) { + // Base minimum delay should be between [0, 10000] milliseconds. + EXPECT_FALSE(dm_.SetBaseMinimumDelay(-1)); + EXPECT_FALSE(dm_.SetBaseMinimumDelay(10001)); + EXPECT_EQ(dm_.GetBaseMinimumDelay(), 0); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(7999)); + EXPECT_EQ(dm_.GetBaseMinimumDelay(), 7999); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayLowerThanMinimumDelay) { + constexpr int kBaseMinimumDelayMs = 100; + constexpr int kMinimumDelayMs = 200; + + // Base minimum delay sets lower bound on minimum. That is why when base + // minimum delay is lower than minimum delay we use minimum delay. + RTC_DCHECK_LT(kBaseMinimumDelayMs, kMinimumDelayMs); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMinimumDelayMs); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayGreaterThanMinimumDelay) { + constexpr int kBaseMinimumDelayMs = 70; + constexpr int kMinimumDelayMs = 30; + + // Base minimum delay sets lower bound on minimum. That is why when base + // minimum delay is greater than minimum delay we use base minimum delay. + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kBaseMinimumDelayMs); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayGreaterThanBufferSize) { + constexpr int kBaseMinimumDelayMs = kMaxBufferSizeMs + 1; + constexpr int kMinimumDelayMs = 12; + constexpr int kMaximumDelayMs = 20; + constexpr int kMaxBufferSizeMsQ75 = 3 * kMaxBufferSizeMs / 4; + + EXPECT_TRUE(dm_.SetMaximumDelay(kMaximumDelayMs)); + + // Base minimum delay is greater than minimum delay, that is why we clamp + // it to current the highest possible value which is maximum delay. + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs); + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMaxBufferSizeMs); + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMaximumDelayMs); + RTC_DCHECK_LT(kMaximumDelayMs, kMaxBufferSizeMsQ75); + + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + + // Unset maximum value. + EXPECT_TRUE(dm_.SetMaximumDelay(0)); + + // With maximum value unset, the highest possible value now is 75% of + // currently possible maximum buffer size. + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMaxBufferSizeMsQ75); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayGreaterThanMaximumDelay) { + constexpr int kMaximumDelayMs = 400; + constexpr int kBaseMinimumDelayMs = kMaximumDelayMs + 1; + constexpr int kMinimumDelayMs = 20; + + // Base minimum delay is greater than minimum delay, that is why we clamp + // it to current the highest possible value which is kMaximumDelayMs. + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs); + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMaximumDelayMs); + RTC_DCHECK_LT(kMaximumDelayMs, kMaxBufferSizeMs); + + EXPECT_TRUE(dm_.SetMaximumDelay(kMaximumDelayMs)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMaximumDelayMs); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayLowerThanMaxSize) { + constexpr int kMaximumDelayMs = 400; + constexpr int kBaseMinimumDelayMs = kMaximumDelayMs - 1; + constexpr int kMinimumDelayMs = 20; + + // Base minimum delay is greater than minimum delay, and lower than maximum + // delays that is why it is used. + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs); + RTC_DCHECK_LT(kBaseMinimumDelayMs, kMaximumDelayMs); + + EXPECT_TRUE(dm_.SetMaximumDelay(kMaximumDelayMs)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kBaseMinimumDelayMs); +} + +TEST_F(DelayManagerTest, MinimumDelayMemorization) { + // Check that when we increase base minimum delay to value higher than + // minimum delay then minimum delay is still memorized. This allows to + // restore effective minimum delay to memorized minimum delay value when we + // decrease base minimum delay. + constexpr int kBaseMinimumDelayMsLow = 10; + constexpr int kMinimumDelayMs = 20; + constexpr int kBaseMinimumDelayMsHigh = 30; + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMsLow)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + // Minimum delay is used as it is higher than base minimum delay. + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMinimumDelayMs); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMsHigh)); + // Base minimum delay is used as it is now higher than minimum delay. + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kBaseMinimumDelayMsHigh); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMsLow)); + // Check that minimum delay is memorized and is used again. + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMinimumDelayMs); +} + +TEST_F(DelayManagerTest, BaseMinimumDelay) { + // First packet arrival. + Update(0); + + constexpr int kBaseMinimumDelayMs = 7 * kFrameSizeMs; + EXPECT_LT(dm_.TargetDelayMs(), kBaseMinimumDelayMs); + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_EQ(dm_.GetBaseMinimumDelay(), kBaseMinimumDelayMs); + + IncreaseTime(kFrameSizeMs); + Update(0); + EXPECT_EQ(dm_.GetBaseMinimumDelay(), kBaseMinimumDelayMs); + EXPECT_EQ(kBaseMinimumDelayMs, dm_.TargetDelayMs()); +} + +TEST_F(DelayManagerTest, Failures) { + // Wrong packet size. + EXPECT_EQ(-1, dm_.SetPacketAudioLength(0)); + EXPECT_EQ(-1, dm_.SetPacketAudioLength(-1)); + + // Minimum delay higher than a maximum delay is not accepted. + EXPECT_TRUE(dm_.SetMaximumDelay(20)); + EXPECT_FALSE(dm_.SetMinimumDelay(40)); + + // Maximum delay less than minimum delay is not accepted. + EXPECT_TRUE(dm_.SetMaximumDelay(100)); + EXPECT_TRUE(dm_.SetMinimumDelay(80)); + EXPECT_FALSE(dm_.SetMaximumDelay(60)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.cc new file mode 100644 index 0000000000..a979f94214 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.cc @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/dsp_helper.h" + +#include // Access to memset. + +#include // Access to min, max. + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +// Table of constants used in method DspHelper::ParabolicFit(). +const int16_t DspHelper::kParabolaCoefficients[17][3] = { + {120, 32, 64}, {140, 44, 75}, {150, 50, 80}, {160, 57, 85}, + {180, 72, 96}, {200, 89, 107}, {210, 98, 112}, {220, 108, 117}, + {240, 128, 128}, {260, 150, 139}, {270, 162, 144}, {280, 174, 149}, + {300, 200, 160}, {320, 228, 171}, {330, 242, 176}, {340, 257, 181}, + {360, 288, 192}}; + +// Filter coefficients used when downsampling from the indicated sample rates +// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. The corresponding Q0 +// values are provided in the comments before each array. + +// Q0 values: {0.3, 0.4, 0.3}. +const int16_t DspHelper::kDownsample8kHzTbl[3] = {1229, 1638, 1229}; + +// Q0 values: {0.15, 0.2, 0.3, 0.2, 0.15}. +const int16_t DspHelper::kDownsample16kHzTbl[5] = {614, 819, 1229, 819, 614}; + +// Q0 values: {0.1425, 0.1251, 0.1525, 0.1628, 0.1525, 0.1251, 0.1425}. +const int16_t DspHelper::kDownsample32kHzTbl[7] = {584, 512, 625, 667, + 625, 512, 584}; + +// Q0 values: {0.2487, 0.0952, 0.1042, 0.1074, 0.1042, 0.0952, 0.2487}. +const int16_t DspHelper::kDownsample48kHzTbl[7] = {1019, 390, 427, 440, + 427, 390, 1019}; + +int DspHelper::RampSignal(const int16_t* input, + size_t length, + int factor, + int increment, + int16_t* output) { + int factor_q20 = (factor << 6) + 32; + // TODO(hlundin): Add 32 to factor_q20 when converting back to Q14? + for (size_t i = 0; i < length; ++i) { + output[i] = (factor * input[i] + 8192) >> 14; + factor_q20 += increment; + factor_q20 = std::max(factor_q20, 0); // Never go negative. + factor = std::min(factor_q20 >> 6, 16384); + } + return factor; +} + +int DspHelper::RampSignal(int16_t* signal, + size_t length, + int factor, + int increment) { + return RampSignal(signal, length, factor, increment, signal); +} + +int DspHelper::RampSignal(AudioVector* signal, + size_t start_index, + size_t length, + int factor, + int increment) { + int factor_q20 = (factor << 6) + 32; + // TODO(hlundin): Add 32 to factor_q20 when converting back to Q14? + for (size_t i = start_index; i < start_index + length; ++i) { + (*signal)[i] = (factor * (*signal)[i] + 8192) >> 14; + factor_q20 += increment; + factor_q20 = std::max(factor_q20, 0); // Never go negative. + factor = std::min(factor_q20 >> 6, 16384); + } + return factor; +} + +int DspHelper::RampSignal(AudioMultiVector* signal, + size_t start_index, + size_t length, + int factor, + int increment) { + RTC_DCHECK_LE(start_index + length, signal->Size()); + if (start_index + length > signal->Size()) { + // Wrong parameters. Do nothing and return the scale factor unaltered. + return factor; + } + int end_factor = 0; + // Loop over the channels, starting at the same `factor` each time. + for (size_t channel = 0; channel < signal->Channels(); ++channel) { + end_factor = + RampSignal(&(*signal)[channel], start_index, length, factor, increment); + } + return end_factor; +} + +void DspHelper::PeakDetection(int16_t* data, + size_t data_length, + size_t num_peaks, + int fs_mult, + size_t* peak_index, + int16_t* peak_value) { + size_t min_index = 0; + size_t max_index = 0; + + for (size_t i = 0; i <= num_peaks - 1; i++) { + if (num_peaks == 1) { + // Single peak. The parabola fit assumes that an extra point is + // available; worst case it gets a zero on the high end of the signal. + // TODO(hlundin): This can potentially get much worse. It breaks the + // API contract, that the length of `data` is `data_length`. + data_length++; + } + + peak_index[i] = WebRtcSpl_MaxIndexW16(data, data_length - 1); + + if (i != num_peaks - 1) { + min_index = (peak_index[i] > 2) ? (peak_index[i] - 2) : 0; + max_index = std::min(data_length - 1, peak_index[i] + 2); + } + + if ((peak_index[i] != 0) && (peak_index[i] != (data_length - 2))) { + ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i], + &peak_value[i]); + } else { + if (peak_index[i] == data_length - 2) { + if (data[peak_index[i]] > data[peak_index[i] + 1]) { + ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i], + &peak_value[i]); + } else if (data[peak_index[i]] <= data[peak_index[i] + 1]) { + // Linear approximation. + peak_value[i] = (data[peak_index[i]] + data[peak_index[i] + 1]) >> 1; + peak_index[i] = (peak_index[i] * 2 + 1) * fs_mult; + } + } else { + peak_value[i] = data[peak_index[i]]; + peak_index[i] = peak_index[i] * 2 * fs_mult; + } + } + + if (i != num_peaks - 1) { + memset(&data[min_index], 0, + sizeof(data[0]) * (max_index - min_index + 1)); + } + } +} + +void DspHelper::ParabolicFit(int16_t* signal_points, + int fs_mult, + size_t* peak_index, + int16_t* peak_value) { + uint16_t fit_index[13]; + if (fs_mult == 1) { + fit_index[0] = 0; + fit_index[1] = 8; + fit_index[2] = 16; + } else if (fs_mult == 2) { + fit_index[0] = 0; + fit_index[1] = 4; + fit_index[2] = 8; + fit_index[3] = 12; + fit_index[4] = 16; + } else if (fs_mult == 4) { + fit_index[0] = 0; + fit_index[1] = 2; + fit_index[2] = 4; + fit_index[3] = 6; + fit_index[4] = 8; + fit_index[5] = 10; + fit_index[6] = 12; + fit_index[7] = 14; + fit_index[8] = 16; + } else { + fit_index[0] = 0; + fit_index[1] = 1; + fit_index[2] = 3; + fit_index[3] = 4; + fit_index[4] = 5; + fit_index[5] = 7; + fit_index[6] = 8; + fit_index[7] = 9; + fit_index[8] = 11; + fit_index[9] = 12; + fit_index[10] = 13; + fit_index[11] = 15; + fit_index[12] = 16; + } + + // num = -3 * signal_points[0] + 4 * signal_points[1] - signal_points[2]; + // den = signal_points[0] - 2 * signal_points[1] + signal_points[2]; + int32_t num = + (signal_points[0] * -3) + (signal_points[1] * 4) - signal_points[2]; + int32_t den = signal_points[0] + (signal_points[1] * -2) + signal_points[2]; + int32_t temp = num * 120; + int flag = 1; + int16_t stp = kParabolaCoefficients[fit_index[fs_mult]][0] - + kParabolaCoefficients[fit_index[fs_mult - 1]][0]; + int16_t strt = (kParabolaCoefficients[fit_index[fs_mult]][0] + + kParabolaCoefficients[fit_index[fs_mult - 1]][0]) / + 2; + int16_t lmt; + if (temp < -den * strt) { + lmt = strt - stp; + while (flag) { + if ((flag == fs_mult) || (temp > -den * lmt)) { + *peak_value = + (den * kParabolaCoefficients[fit_index[fs_mult - flag]][1] + + num * kParabolaCoefficients[fit_index[fs_mult - flag]][2] + + signal_points[0] * 256) / + 256; + *peak_index = *peak_index * 2 * fs_mult - flag; + flag = 0; + } else { + flag++; + lmt -= stp; + } + } + } else if (temp > -den * (strt + stp)) { + lmt = strt + 2 * stp; + while (flag) { + if ((flag == fs_mult) || (temp < -den * lmt)) { + int32_t temp_term_1 = + den * kParabolaCoefficients[fit_index[fs_mult + flag]][1]; + int32_t temp_term_2 = + num * kParabolaCoefficients[fit_index[fs_mult + flag]][2]; + int32_t temp_term_3 = signal_points[0] * 256; + *peak_value = (temp_term_1 + temp_term_2 + temp_term_3) / 256; + *peak_index = *peak_index * 2 * fs_mult + flag; + flag = 0; + } else { + flag++; + lmt += stp; + } + } + } else { + *peak_value = signal_points[1]; + *peak_index = *peak_index * 2 * fs_mult; + } +} + +size_t DspHelper::MinDistortion(const int16_t* signal, + size_t min_lag, + size_t max_lag, + size_t length, + int32_t* distortion_value) { + size_t best_index = 0; + int32_t min_distortion = WEBRTC_SPL_WORD32_MAX; + for (size_t i = min_lag; i <= max_lag; i++) { + int32_t sum_diff = 0; + const int16_t* data1 = signal; + const int16_t* data2 = signal - i; + for (size_t j = 0; j < length; j++) { + sum_diff += WEBRTC_SPL_ABS_W32(data1[j] - data2[j]); + } + // Compare with previous minimum. + if (sum_diff < min_distortion) { + min_distortion = sum_diff; + best_index = i; + } + } + *distortion_value = min_distortion; + return best_index; +} + +void DspHelper::CrossFade(const int16_t* input1, + const int16_t* input2, + size_t length, + int16_t* mix_factor, + int16_t factor_decrement, + int16_t* output) { + int16_t factor = *mix_factor; + int16_t complement_factor = 16384 - factor; + for (size_t i = 0; i < length; i++) { + output[i] = + (factor * input1[i] + complement_factor * input2[i] + 8192) >> 14; + factor -= factor_decrement; + complement_factor += factor_decrement; + } + *mix_factor = factor; +} + +void DspHelper::UnmuteSignal(const int16_t* input, + size_t length, + int16_t* factor, + int increment, + int16_t* output) { + uint16_t factor_16b = *factor; + int32_t factor_32b = (static_cast(factor_16b) << 6) + 32; + for (size_t i = 0; i < length; i++) { + output[i] = (factor_16b * input[i] + 8192) >> 14; + factor_32b = std::max(factor_32b + increment, 0); + factor_16b = std::min(16384, factor_32b >> 6); + } + *factor = factor_16b; +} + +void DspHelper::MuteSignal(int16_t* signal, int mute_slope, size_t length) { + int32_t factor = (16384 << 6) + 32; + for (size_t i = 0; i < length; i++) { + signal[i] = ((factor >> 6) * signal[i] + 8192) >> 14; + factor -= mute_slope; + } +} + +int DspHelper::DownsampleTo4kHz(const int16_t* input, + size_t input_length, + size_t output_length, + int input_rate_hz, + bool compensate_delay, + int16_t* output) { + // Set filter parameters depending on input frequency. + // NOTE: The phase delay values are wrong compared to the true phase delay + // of the filters. However, the error is preserved (through the +1 term) for + // consistency. + const int16_t* filter_coefficients; // Filter coefficients. + size_t filter_length; // Number of coefficients. + size_t filter_delay; // Phase delay in samples. + int16_t factor; // Conversion rate (inFsHz / 8000). + switch (input_rate_hz) { + case 8000: { + filter_length = 3; + factor = 2; + filter_coefficients = kDownsample8kHzTbl; + filter_delay = 1 + 1; + break; + } + case 16000: { + filter_length = 5; + factor = 4; + filter_coefficients = kDownsample16kHzTbl; + filter_delay = 2 + 1; + break; + } + case 32000: { + filter_length = 7; + factor = 8; + filter_coefficients = kDownsample32kHzTbl; + filter_delay = 3 + 1; + break; + } + case 48000: { + filter_length = 7; + factor = 12; + filter_coefficients = kDownsample48kHzTbl; + filter_delay = 3 + 1; + break; + } + default: { + RTC_DCHECK_NOTREACHED(); + return -1; + } + } + + if (!compensate_delay) { + // Disregard delay compensation. + filter_delay = 0; + } + + // Returns -1 if input signal is too short; 0 otherwise. + return WebRtcSpl_DownsampleFast( + &input[filter_length - 1], input_length - filter_length + 1, output, + output_length, filter_coefficients, filter_length, factor, filter_delay); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.h b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.h new file mode 100644 index 0000000000..4aead7df18 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_ +#define MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_ + +#include +#include + +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/audio_vector.h" + +namespace webrtc { + +// This class contains various signal processing functions, all implemented as +// static methods. +class DspHelper { + public: + // Filter coefficients used when downsampling from the indicated sample rates + // (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. + static const int16_t kDownsample8kHzTbl[3]; + static const int16_t kDownsample16kHzTbl[5]; + static const int16_t kDownsample32kHzTbl[7]; + static const int16_t kDownsample48kHzTbl[7]; + + // Constants used to mute and unmute over 5 samples. The coefficients are + // in Q15. + static const int kMuteFactorStart8kHz = 27307; + static const int kMuteFactorIncrement8kHz = -5461; + static const int kUnmuteFactorStart8kHz = 5461; + static const int kUnmuteFactorIncrement8kHz = 5461; + static const int kMuteFactorStart16kHz = 29789; + static const int kMuteFactorIncrement16kHz = -2979; + static const int kUnmuteFactorStart16kHz = 2979; + static const int kUnmuteFactorIncrement16kHz = 2979; + static const int kMuteFactorStart32kHz = 31208; + static const int kMuteFactorIncrement32kHz = -1560; + static const int kUnmuteFactorStart32kHz = 1560; + static const int kUnmuteFactorIncrement32kHz = 1560; + static const int kMuteFactorStart48kHz = 31711; + static const int kMuteFactorIncrement48kHz = -1057; + static const int kUnmuteFactorStart48kHz = 1057; + static const int kUnmuteFactorIncrement48kHz = 1057; + + // Multiplies the signal with a gradually changing factor. + // The first sample is multiplied with `factor` (in Q14). For each sample, + // `factor` is increased (additive) by the `increment` (in Q20), which can + // be negative. Returns the scale factor after the last increment. + static int RampSignal(const int16_t* input, + size_t length, + int factor, + int increment, + int16_t* output); + + // Same as above, but with the samples of `signal` being modified in-place. + static int RampSignal(int16_t* signal, + size_t length, + int factor, + int increment); + + // Same as above, but processes `length` samples from `signal`, starting at + // `start_index`. + static int RampSignal(AudioVector* signal, + size_t start_index, + size_t length, + int factor, + int increment); + + // Same as above, but for an AudioMultiVector. + static int RampSignal(AudioMultiVector* signal, + size_t start_index, + size_t length, + int factor, + int increment); + + // Peak detection with parabolic fit. Looks for `num_peaks` maxima in `data`, + // having length `data_length` and sample rate multiplier `fs_mult`. The peak + // locations and values are written to the arrays `peak_index` and + // `peak_value`, respectively. Both arrays must hold at least `num_peaks` + // elements. + static void PeakDetection(int16_t* data, + size_t data_length, + size_t num_peaks, + int fs_mult, + size_t* peak_index, + int16_t* peak_value); + + // Estimates the height and location of a maximum. The three values in the + // array `signal_points` are used as basis for a parabolic fit, which is then + // used to find the maximum in an interpolated signal. The `signal_points` are + // assumed to be from a 4 kHz signal, while the maximum, written to + // `peak_index` and `peak_value` is given in the full sample rate, as + // indicated by the sample rate multiplier `fs_mult`. + static void ParabolicFit(int16_t* signal_points, + int fs_mult, + size_t* peak_index, + int16_t* peak_value); + + // Calculates the sum-abs-diff for `signal` when compared to a displaced + // version of itself. Returns the displacement lag that results in the minimum + // distortion. The resulting distortion is written to `distortion_value`. + // The values of `min_lag` and `max_lag` are boundaries for the search. + static size_t MinDistortion(const int16_t* signal, + size_t min_lag, + size_t max_lag, + size_t length, + int32_t* distortion_value); + + // Mixes `length` samples from `input1` and `input2` together and writes the + // result to `output`. The gain for `input1` starts at `mix_factor` (Q14) and + // is decreased by `factor_decrement` (Q14) for each sample. The gain for + // `input2` is the complement 16384 - mix_factor. + static void CrossFade(const int16_t* input1, + const int16_t* input2, + size_t length, + int16_t* mix_factor, + int16_t factor_decrement, + int16_t* output); + + // Scales `input` with an increasing gain. Applies `factor` (Q14) to the first + // sample and increases the gain by `increment` (Q20) for each sample. The + // result is written to `output`. `length` samples are processed. + static void UnmuteSignal(const int16_t* input, + size_t length, + int16_t* factor, + int increment, + int16_t* output); + + // Starts at unity gain and gradually fades out `signal`. For each sample, + // the gain is reduced by `mute_slope` (Q14). `length` samples are processed. + static void MuteSignal(int16_t* signal, int mute_slope, size_t length); + + // Downsamples `input` from `sample_rate_hz` to 4 kHz sample rate. The input + // has `input_length` samples, and the method will write `output_length` + // samples to `output`. Compensates for the phase delay of the downsampling + // filters if `compensate_delay` is true. Returns -1 if the input is too short + // to produce `output_length` samples, otherwise 0. + static int DownsampleTo4kHz(const int16_t* input, + size_t input_length, + size_t output_length, + int input_rate_hz, + bool compensate_delay, + int16_t* output); + + DspHelper(const DspHelper&) = delete; + DspHelper& operator=(const DspHelper&) = delete; + + private: + // Table of constants used in method DspHelper::ParabolicFit(). + static const int16_t kParabolaCoefficients[17][3]; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper_unittest.cc new file mode 100644 index 0000000000..09247417d3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper_unittest.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/dsp_helper.h" + +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(DspHelper, RampSignalArray) { + static const int kLen = 100; + int16_t input[kLen]; + int16_t output[kLen]; + // Fill input with 1000. + for (int i = 0; i < kLen; ++i) { + input[i] = 1000; + } + int start_factor = 0; + // Ramp from 0 to 1 (in Q14) over the array. Note that `increment` is in Q20, + // while the factor is in Q14, hence the shift by 6. + int increment = (16384 << 6) / kLen; + + // Test first method. + int stop_factor = + DspHelper::RampSignal(input, kLen, start_factor, increment, output); + EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14. + for (int i = 0; i < kLen; ++i) { + EXPECT_EQ(1000 * i / kLen, output[i]); + } + + // Test second method. (Note that this modifies `input`.) + stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment); + EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14. + for (int i = 0; i < kLen; ++i) { + EXPECT_EQ(1000 * i / kLen, input[i]); + } +} + +TEST(DspHelper, RampSignalAudioMultiVector) { + static const int kLen = 100; + static const int kChannels = 5; + AudioMultiVector input(kChannels, kLen * 3); + // Fill input with 1000. + for (int i = 0; i < kLen * 3; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + input[channel][i] = 1000; + } + } + // We want to start ramping at `start_index` and keep ramping for `kLen` + // samples. + int start_index = kLen; + int start_factor = 0; + // Ramp from 0 to 1 (in Q14) in `kLen` samples. Note that `increment` is in + // Q20, while the factor is in Q14, hence the shift by 6. + int increment = (16384 << 6) / kLen; + + int stop_factor = + DspHelper::RampSignal(&input, start_index, kLen, start_factor, increment); + EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14. + // Verify that the first `kLen` samples are left untouched. + int i; + for (i = 0; i < kLen; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000, input[channel][i]); + } + } + // Verify that the next block of `kLen` samples are ramped. + for (; i < 2 * kLen; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000 * (i - kLen) / kLen, input[channel][i]); + } + } + // Verify the last `kLen` samples are left untouched. + for (; i < 3 * kLen; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000, input[channel][i]); + } + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.cc new file mode 100644 index 0000000000..115bfcf97b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.cc @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/dtmf_buffer.h" + +#include // max + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +// Modify the code to obtain backwards bit-exactness. Once bit-exactness is no +// longer required, this #define should be removed (and the code that it +// enables). +#define LEGACY_BITEXACT + +namespace webrtc { + +DtmfBuffer::DtmfBuffer(int fs_hz) { + SetSampleRate(fs_hz); +} + +DtmfBuffer::~DtmfBuffer() = default; + +void DtmfBuffer::Flush() { + buffer_.clear(); +} + +// The ParseEvent method parses 4 bytes from `payload` according to this format +// from RFC 4733: +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | event |E|R| volume | duration | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// Legend (adapted from RFC 4733) +// - event: The event field is a number between 0 and 255 identifying a +// specific telephony event. The buffer will not accept any event +// numbers larger than 15. +// - E: If set to a value of one, the "end" bit indicates that this +// packet contains the end of the event. For long-lasting events +// that have to be split into segments, only the final packet for +// the final segment will have the E bit set. +// - R: Reserved. +// - volume: For DTMF digits and other events representable as tones, this +// field describes the power level of the tone, expressed in dBm0 +// after dropping the sign. Power levels range from 0 to -63 dBm0. +// Thus, larger values denote lower volume. The buffer discards +// values larger than 36 (i.e., lower than -36 dBm0). +// - duration: The duration field indicates the duration of the event or segment +// being reported, in timestamp units, expressed as an unsigned +// integer in network byte order. For a non-zero value, the event +// or segment began at the instant identified by the RTP timestamp +// and has so far lasted as long as indicated by this parameter. +// The event may or may not have ended. If the event duration +// exceeds the maximum representable by the duration field, the +// event is split into several contiguous segments. The buffer will +// discard zero-duration events. +// +int DtmfBuffer::ParseEvent(uint32_t rtp_timestamp, + const uint8_t* payload, + size_t payload_length_bytes, + DtmfEvent* event) { + RTC_CHECK(payload); + RTC_CHECK(event); + if (payload_length_bytes < 4) { + RTC_LOG(LS_WARNING) << "ParseEvent payload too short"; + return kPayloadTooShort; + } + + event->event_no = payload[0]; + event->end_bit = ((payload[1] & 0x80) != 0); + event->volume = (payload[1] & 0x3F); + event->duration = payload[2] << 8 | payload[3]; + event->timestamp = rtp_timestamp; + return kOK; +} + +// Inserts a DTMF event into the buffer. The event should be parsed from the +// bit stream using the ParseEvent method above before inserting it in the +// buffer. +// DTMF events can be quite long, and in most cases the duration of the event +// is not known when the first packet describing it is sent. To deal with that, +// the RFC 4733 specifies that multiple packets are sent for one and the same +// event as it is being created (typically, as the user is pressing the key). +// These packets will all share the same start timestamp and event number, +// while the duration will be the cumulative duration from the start. When +// inserting a new event, the InsertEvent method tries to find a matching event +// already in the buffer. If so, the new event is simply merged with the +// existing one. +int DtmfBuffer::InsertEvent(const DtmfEvent& event) { + if (event.event_no < 0 || event.event_no > 15 || event.volume < 0 || + event.volume > 63 || event.duration <= 0 || event.duration > 65535) { + RTC_LOG(LS_WARNING) << "InsertEvent invalid parameters"; + return kInvalidEventParameters; + } + DtmfList::iterator it = buffer_.begin(); + while (it != buffer_.end()) { + if (MergeEvents(it, event)) { + // A matching event was found and the new event was merged. + return kOK; + } + ++it; + } + buffer_.push_back(event); + // Sort the buffer using CompareEvents to rank the events. + buffer_.sort(CompareEvents); + return kOK; +} + +bool DtmfBuffer::GetEvent(uint32_t current_timestamp, DtmfEvent* event) { + DtmfList::iterator it = buffer_.begin(); + while (it != buffer_.end()) { + // `event_end` is an estimate of where the current event ends. If the end + // bit is set, we know that the event ends at `timestamp` + `duration`. + uint32_t event_end = it->timestamp + it->duration; +#ifdef LEGACY_BITEXACT + bool next_available = false; +#endif + if (!it->end_bit) { + // If the end bit is not set, we allow extrapolation of the event for + // some time. + event_end += max_extrapolation_samples_; + DtmfList::iterator next = it; + ++next; + if (next != buffer_.end()) { + // If there is a next event in the buffer, we will not extrapolate over + // the start of that new event. + event_end = std::min(event_end, next->timestamp); +#ifdef LEGACY_BITEXACT + next_available = true; +#endif + } + } + if (current_timestamp >= it->timestamp && + current_timestamp <= event_end) { // TODO(hlundin): Change to <. + // Found a matching event. + if (event) { + event->event_no = it->event_no; + event->end_bit = it->end_bit; + event->volume = it->volume; + event->duration = it->duration; + event->timestamp = it->timestamp; + } +#ifdef LEGACY_BITEXACT + if (it->end_bit && current_timestamp + frame_len_samples_ >= event_end) { + // We are done playing this. Erase the event. + buffer_.erase(it); + } +#endif + return true; + } else if (current_timestamp > event_end) { // TODO(hlundin): Change to >=. +// Erase old event. Operation returns a valid pointer to the next element +// in the list. +#ifdef LEGACY_BITEXACT + if (!next_available) { + if (event) { + event->event_no = it->event_no; + event->end_bit = it->end_bit; + event->volume = it->volume; + event->duration = it->duration; + event->timestamp = it->timestamp; + } + it = buffer_.erase(it); + return true; + } else { + it = buffer_.erase(it); + } +#else + it = buffer_.erase(it); +#endif + } else { + ++it; + } + } + return false; +} + +size_t DtmfBuffer::Length() const { + return buffer_.size(); +} + +bool DtmfBuffer::Empty() const { + return buffer_.empty(); +} + +int DtmfBuffer::SetSampleRate(int fs_hz) { + if (fs_hz != 8000 && + fs_hz != 16000 && + fs_hz != 32000 && + fs_hz != 44100 && + fs_hz != 48000) { + return kInvalidSampleRate; + } + max_extrapolation_samples_ = 7 * fs_hz / 100; + frame_len_samples_ = fs_hz / 100; + return kOK; +} + +// The method returns true if the two events are considered to be the same. +// The are defined as equal if they share the same timestamp and event number. +// The special case with long-lasting events that have to be split into segments +// is not handled in this method. These will be treated as separate events in +// the buffer. +bool DtmfBuffer::SameEvent(const DtmfEvent& a, const DtmfEvent& b) { + return (a.event_no == b.event_no) && (a.timestamp == b.timestamp); +} + +bool DtmfBuffer::MergeEvents(DtmfList::iterator it, const DtmfEvent& event) { + if (SameEvent(*it, event)) { + if (!it->end_bit) { + // Do not extend the duration of an event for which the end bit was + // already received. + it->duration = std::max(event.duration, it->duration); + } + if (event.end_bit) { + it->end_bit = true; + } + return true; + } else { + return false; + } +} + +// Returns true if `a` goes before `b` in the sorting order ("`a` < `b`"). +// The events are ranked using their start timestamp (taking wrap-around into +// account). In the unlikely situation that two events share the same start +// timestamp, the event number is used to rank the two. Note that packets +// that belong to the same events, and therefore sharing the same start +// timestamp, have already been merged before the sort method is called. +bool DtmfBuffer::CompareEvents(const DtmfEvent& a, const DtmfEvent& b) { + if (a.timestamp == b.timestamp) { + return a.event_no < b.event_no; + } + // Take wrap-around into account. + return (static_cast(b.timestamp - a.timestamp) < 0xFFFFFFFF / 2); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.h new file mode 100644 index 0000000000..62b751525c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_ + +#include +#include + +#include + +namespace webrtc { + +struct DtmfEvent { + uint32_t timestamp; + int event_no; + int volume; + int duration; + bool end_bit; + + // Constructors + DtmfEvent() + : timestamp(0), event_no(0), volume(0), duration(0), end_bit(false) {} + DtmfEvent(uint32_t ts, int ev, int vol, int dur, bool end) + : timestamp(ts), event_no(ev), volume(vol), duration(dur), end_bit(end) {} +}; + +// This is the buffer holding DTMF events while waiting for them to be played. +class DtmfBuffer { + public: + enum BufferReturnCodes { + kOK = 0, + kInvalidPointer, + kPayloadTooShort, + kInvalidEventParameters, + kInvalidSampleRate + }; + + // Set up the buffer for use at sample rate `fs_hz`. + explicit DtmfBuffer(int fs_hz); + + virtual ~DtmfBuffer(); + + DtmfBuffer(const DtmfBuffer&) = delete; + DtmfBuffer& operator=(const DtmfBuffer&) = delete; + + // Flushes the buffer. + virtual void Flush(); + + // Static method to parse 4 bytes from `payload` as a DTMF event (RFC 4733) + // and write the parsed information into the struct `event`. Input variable + // `rtp_timestamp` is simply copied into the struct. + static int ParseEvent(uint32_t rtp_timestamp, + const uint8_t* payload, + size_t payload_length_bytes, + DtmfEvent* event); + + // Inserts `event` into the buffer. The method looks for a matching event and + // merges the two if a match is found. + virtual int InsertEvent(const DtmfEvent& event); + + // Checks if a DTMF event should be played at time `current_timestamp`. If so, + // the method returns true; otherwise false. The parameters of the event to + // play will be written to `event`. + virtual bool GetEvent(uint32_t current_timestamp, DtmfEvent* event); + + // Number of events in the buffer. + virtual size_t Length() const; + + virtual bool Empty() const; + + // Set a new sample rate. + virtual int SetSampleRate(int fs_hz); + + private: + typedef std::list DtmfList; + + int max_extrapolation_samples_; + int frame_len_samples_; // TODO(hlundin): Remove this later. + + // Compares two events and returns true if they are the same. + static bool SameEvent(const DtmfEvent& a, const DtmfEvent& b); + + // Merges `event` to the event pointed out by `it`. The method checks that + // the two events are the same (using the SameEvent method), and merges them + // if that was the case, returning true. If the events are not the same, false + // is returned. + bool MergeEvents(DtmfList::iterator it, const DtmfEvent& event); + + // Method used by the sort algorithm to rank events in the buffer. + static bool CompareEvents(const DtmfEvent& a, const DtmfEvent& b); + + DtmfList buffer_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer_unittest.cc new file mode 100644 index 0000000000..83745b6c09 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer_unittest.cc @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/dtmf_buffer.h" + +#ifdef WIN32 +#include // ntohl() +#else +#include // ntohl() +#endif + +#include + +#include "test/gtest.h" + +// Modify the tests so that they pass with the modifications done to DtmfBuffer +// for backwards bit-exactness. Once bit-exactness is no longer required, this +// #define should be removed (and the code that it enables). +#define LEGACY_BITEXACT + +namespace webrtc { + +static int sample_rate_hz = 8000; + +static uint32_t MakeDtmfPayload(int event, bool end, int volume, int duration) { + uint32_t payload = 0; + // 0 1 2 3 + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | event |E|R| volume | duration | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + payload |= (event & 0x00FF) << 24; + payload |= (end ? 0x00800000 : 0x00000000); + payload |= (volume & 0x003F) << 16; + payload |= (duration & 0xFFFF); + payload = ntohl(payload); + return payload; +} + +static bool EqualEvents(const DtmfEvent& a, const DtmfEvent& b) { + return (a.duration == b.duration && a.end_bit == b.end_bit && + a.event_no == b.event_no && a.timestamp == b.timestamp && + a.volume == b.volume); +} + +TEST(DtmfBuffer, CreateAndDestroy) { + DtmfBuffer* buffer = new DtmfBuffer(sample_rate_hz); + delete buffer; +} + +// Test the event parser. +TEST(DtmfBuffer, ParseEvent) { + int event_no = 7; + bool end_bit = true; + int volume = 17; + int duration = 4711; + uint32_t timestamp = 0x12345678; + uint32_t payload = MakeDtmfPayload(event_no, end_bit, volume, duration); + uint8_t* payload_ptr = reinterpret_cast(&payload); + DtmfEvent event; + EXPECT_EQ(DtmfBuffer::kOK, DtmfBuffer::ParseEvent(timestamp, payload_ptr, + sizeof(payload), &event)); + EXPECT_EQ(duration, event.duration); + EXPECT_EQ(end_bit, event.end_bit); + EXPECT_EQ(event_no, event.event_no); + EXPECT_EQ(timestamp, event.timestamp); + EXPECT_EQ(volume, event.volume); + + EXPECT_EQ(DtmfBuffer::kPayloadTooShort, + DtmfBuffer::ParseEvent(timestamp, payload_ptr, 3, &event)); +} + +TEST(DtmfBuffer, SimpleInsertAndGet) { + int event_no = 7; + bool end_bit = true; + int volume = 17; + int duration = 4711; + uint32_t timestamp = 0x12345678; + DtmfEvent event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + DtmfEvent out_event; + // Too early to get event. + EXPECT_FALSE(buffer.GetEvent(timestamp - 10, &out_event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + // Get the event at its starting timestamp. + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(event, out_event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + // Get the event some time into the event. + EXPECT_TRUE(buffer.GetEvent(timestamp + duration / 2, &out_event)); + EXPECT_TRUE(EqualEvents(event, out_event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); +// Give a "current" timestamp after the event has ended. +#ifdef LEGACY_BITEXACT + EXPECT_TRUE(buffer.GetEvent(timestamp + duration + 10, &out_event)); +#endif + EXPECT_FALSE(buffer.GetEvent(timestamp + duration + 10, &out_event)); + EXPECT_EQ(0u, buffer.Length()); + EXPECT_TRUE(buffer.Empty()); +} + +TEST(DtmfBuffer, MergingPackets) { + int event_no = 0; + bool end_bit = false; + int volume = 17; + int duration = 80; + uint32_t timestamp = 0x12345678; + DtmfEvent event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + + event.duration += 80; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + + event.duration += 80; + event.end_bit = true; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + + EXPECT_EQ(1u, buffer.Length()); + + DtmfEvent out_event; + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(event, out_event)); +} + +// This test case inserts one shorter event completely overlapped by one longer +// event. The expected outcome is that only the longer event is played. +TEST(DtmfBuffer, OverlappingEvents) { + int event_no = 0; + bool end_bit = true; + int volume = 1; + int duration = 80; + uint32_t timestamp = 0x12345678 + 80; + DtmfEvent short_event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(short_event)); + + event_no = 10; + end_bit = false; + timestamp = 0x12345678; + DtmfEvent long_event(timestamp, event_no, volume, duration, end_bit); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event)); + + long_event.duration += 80; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event)); + + long_event.duration += 80; + long_event.end_bit = true; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event)); + + EXPECT_EQ(2u, buffer.Length()); + + DtmfEvent out_event; + // Expect to get the long event. + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(long_event, out_event)); +// Expect no more events. +#ifdef LEGACY_BITEXACT + EXPECT_TRUE( + buffer.GetEvent(timestamp + long_event.duration + 10, &out_event)); + EXPECT_TRUE(EqualEvents(long_event, out_event)); + EXPECT_TRUE( + buffer.GetEvent(timestamp + long_event.duration + 10, &out_event)); + EXPECT_TRUE(EqualEvents(short_event, out_event)); +#else + EXPECT_FALSE( + buffer.GetEvent(timestamp + long_event.duration + 10, &out_event)); +#endif + EXPECT_TRUE(buffer.Empty()); +} + +TEST(DtmfBuffer, ExtrapolationTime) { + int event_no = 0; + bool end_bit = false; + int volume = 1; + int duration = 80; + uint32_t timestamp = 0x12345678; + DtmfEvent event1(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1)); + EXPECT_EQ(1u, buffer.Length()); + + DtmfEvent out_event; + // Get the event at the start. + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); + // Also get the event 100 samples after the end of the event (since we're + // missing the end bit). + uint32_t timestamp_now = timestamp + duration + 100; + EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); + // Insert another event starting back-to-back with the previous event. + timestamp += duration; + event_no = 1; + DtmfEvent event2(timestamp, event_no, volume, duration, end_bit); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2)); + EXPECT_EQ(2u, buffer.Length()); + // Now we expect to get the new event when supplying `timestamp_now`. + EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event)); + EXPECT_TRUE(EqualEvents(event2, out_event)); + // Expect the the first event to be erased now. + EXPECT_EQ(1u, buffer.Length()); + // Move `timestamp_now` to more than 560 samples after the end of the second + // event. Expect that event to be erased. + timestamp_now = timestamp + duration + 600; +#ifdef LEGACY_BITEXACT + EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event)); +#endif + EXPECT_FALSE(buffer.GetEvent(timestamp_now, &out_event)); + EXPECT_TRUE(buffer.Empty()); +} + +TEST(DtmfBuffer, TimestampWraparound) { + int event_no = 0; + bool end_bit = true; + int volume = 1; + int duration = 80; + uint32_t timestamp1 = 0xFFFFFFFF - duration; + DtmfEvent event1(timestamp1, event_no, volume, duration, end_bit); + uint32_t timestamp2 = 0; + DtmfEvent event2(timestamp2, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1)); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2)); + EXPECT_EQ(2u, buffer.Length()); + DtmfEvent out_event; + EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); +#ifdef LEGACY_BITEXACT + EXPECT_EQ(1u, buffer.Length()); +#else + EXPECT_EQ(2u, buffer.Length()); +#endif + + buffer.Flush(); + // Reverse the insert order. Expect same results. + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2)); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1)); + EXPECT_EQ(2u, buffer.Length()); + EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); +#ifdef LEGACY_BITEXACT + EXPECT_EQ(1u, buffer.Length()); +#else + EXPECT_EQ(2u, buffer.Length()); +#endif +} + +TEST(DtmfBuffer, InvalidEvents) { + int event_no = 0; + bool end_bit = true; + int volume = 1; + int duration = 80; + uint32_t timestamp = 0x12345678; + DtmfEvent event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + + // Invalid event number. + event.event_no = -1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.event_no = 16; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.event_no = 0; // Valid value; + + // Invalid volume. + event.volume = -1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.volume = 64; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.volume = 0; // Valid value; + + // Invalid duration. + event.duration = -1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.duration = 0; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.duration = 0xFFFF + 1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.duration = 1; // Valid value; + + // Finish with a valid event, just to verify that all is ok. + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.cc new file mode 100644 index 0000000000..9061e27c67 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.cc @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This class provides a generator for DTMF tones. The tone generation is based +// on a sinusoid recursion. Each sinusoid is generated using a recursion +// formula; x[n] = a * x[n-1] - x[n-2], where the coefficient +// a = 2*cos(2*pi*f/fs). The recursion is started with x[-1] = 0 and +// x[-2] = sin(2*pi*f/fs). (Note that with this initialization, the resulting +// sinusoid gets a "negative" rotation; x[n] = sin(-2*pi*f/fs * n + phi), but +// kept this way due to historical reasons.) +// TODO(hlundin): Change to positive rotation? +// +// Each key on the telephone keypad corresponds to an "event", 0-15. Each event +// is mapped to a tone pair, with a low and a high frequency. There are four +// low and four high frequencies, each corresponding to a row and column, +// respectively, on the keypad as illustrated below. +// +// 1209 Hz 1336 Hz 1477 Hz 1633 Hz +// 697 Hz 1 2 3 12 +// 770 Hz 4 5 6 13 +// 852 Hz 7 8 9 14 +// 941 Hz 10 0 11 15 + +#include "modules/audio_coding/neteq/dtmf_tone_generator.h" + +#include "modules/audio_coding/neteq/audio_vector.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// The filter coefficient a = 2*cos(2*pi*f/fs) for the low frequency tone, for +// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15. +// Values are in Q14. +const int DtmfToneGenerator::kCoeff1[4][16] = { + {24219, 27980, 27980, 27980, 26956, 26956, 26956, 25701, 25701, 25701, + 24219, 24219, 27980, 26956, 25701, 24219}, + {30556, 31548, 31548, 31548, 31281, 31281, 31281, 30951, 30951, 30951, + 30556, 30556, 31548, 31281, 30951, 30556}, + {32210, 32462, 32462, 32462, 32394, 32394, 32394, 32311, 32311, 32311, + 32210, 32210, 32462, 32394, 32311, 32210}, + {32520, 32632, 32632, 32632, 32602, 32602, 32602, 32564, 32564, 32564, + 32520, 32520, 32632, 32602, 32564, 32520}}; + +// The filter coefficient a = 2*cos(2*pi*f/fs) for the high frequency tone, for +// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15. +// Values are in Q14. +const int DtmfToneGenerator::kCoeff2[4][16] = { + {16325, 19073, 16325, 13085, 19073, 16325, 13085, 19073, 16325, 13085, + 19073, 13085, 9315, 9315, 9315, 9315}, + {28361, 29144, 28361, 27409, 29144, 28361, 27409, 29144, 28361, 27409, + 29144, 27409, 26258, 26258, 26258, 26258}, + {31647, 31849, 31647, 31400, 31849, 31647, 31400, 31849, 31647, 31400, + 31849, 31400, 31098, 31098, 31098, 31098}, + {32268, 32359, 32268, 32157, 32359, 32268, 32157, 32359, 32268, 32157, + 32359, 32157, 32022, 32022, 32022, 32022}}; + +// The initialization value x[-2] = sin(2*pi*f/fs) for the low frequency tone, +// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15. +// Values are in Q14. +const int DtmfToneGenerator::kInitValue1[4][16] = { + {11036, 8528, 8528, 8528, 9315, 9315, 9315, 10163, 10163, 10163, 11036, + 11036, 8528, 9315, 10163, 11036}, + {5918, 4429, 4429, 4429, 4879, 4879, 4879, 5380, 5380, 5380, 5918, 5918, + 4429, 4879, 5380, 5918}, + {3010, 2235, 2235, 2235, 2468, 2468, 2468, 2728, 2728, 2728, 3010, 3010, + 2235, 2468, 2728, 3010}, + {2013, 1493, 1493, 1493, 1649, 1649, 1649, 1823, 1823, 1823, 2013, 2013, + 1493, 1649, 1823, 2013}}; + +// The initialization value x[-2] = sin(2*pi*f/fs) for the high frequency tone, +// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15. +// Values are in Q14. +const int DtmfToneGenerator::kInitValue2[4][16] = { + {14206, 13323, 14206, 15021, 13323, 14206, 15021, 13323, 14206, 15021, + 13323, 15021, 15708, 15708, 15708, 15708}, + {8207, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8979, + 9801, 9801, 9801, 9801}, + {4249, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4685, + 5164, 5164, 5164, 5164}, + {2851, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 3148, + 3476, 3476, 3476, 3476}}; + +// Amplitude multipliers for volume values 0 through 63, corresponding to +// 0 dBm0 through -63 dBm0. Values are in Q14. +// for a in range(0, 64): +// print round(16141.0 * 10**(-float(a)/20)) +const int DtmfToneGenerator::kAmplitude[64] = { + 16141, 14386, 12821, 11427, 10184, 9077, 8090, 7210, 6426, 5727, 5104, + 4549, 4054, 3614, 3221, 2870, 2558, 2280, 2032, 1811, 1614, 1439, + 1282, 1143, 1018, 908, 809, 721, 643, 573, 510, 455, 405, + 361, 322, 287, 256, 228, 203, 181, 161, 144, 128, 114, + 102, 91, 81, 72, 64, 57, 51, 45, 41, 36, 32, + 29, 26, 23, 20, 18, 16, 14, 13, 11}; + +// Constructor. +DtmfToneGenerator::DtmfToneGenerator() + : initialized_(false), coeff1_(0), coeff2_(0), amplitude_(0) {} + +// Initialize the DTMF generator with sample rate fs Hz (8000, 16000, 32000, +// 48000), event (0-15) and attenuation (0-36 dB). +// Returns 0 on success, otherwise an error code. +int DtmfToneGenerator::Init(int fs, int event, int attenuation) { + initialized_ = false; + size_t fs_index; + if (fs == 8000) { + fs_index = 0; + } else if (fs == 16000) { + fs_index = 1; + } else if (fs == 32000) { + fs_index = 2; + } else if (fs == 48000) { + fs_index = 3; + } else { + RTC_DCHECK_NOTREACHED(); + fs_index = 1; // Default to 8000 Hz. + } + + if (event < 0 || event > 15) { + return kParameterError; // Invalid event number. + } + + if (attenuation < 0 || attenuation > 63) { + return kParameterError; // Invalid attenuation. + } + + // Look up oscillator coefficient for low and high frequencies. + RTC_DCHECK_LE(0, fs_index); + RTC_DCHECK_GT(arraysize(kCoeff1), fs_index); + RTC_DCHECK_GT(arraysize(kCoeff2), fs_index); + RTC_DCHECK_LE(0, event); + RTC_DCHECK_GT(arraysize(kCoeff1[fs_index]), event); + RTC_DCHECK_GT(arraysize(kCoeff2[fs_index]), event); + coeff1_ = kCoeff1[fs_index][event]; + coeff2_ = kCoeff2[fs_index][event]; + + // Look up amplitude multiplier. + RTC_DCHECK_LE(0, attenuation); + RTC_DCHECK_GT(arraysize(kAmplitude), attenuation); + amplitude_ = kAmplitude[attenuation]; + + // Initialize sample history. + RTC_DCHECK_LE(0, fs_index); + RTC_DCHECK_GT(arraysize(kInitValue1), fs_index); + RTC_DCHECK_GT(arraysize(kInitValue2), fs_index); + RTC_DCHECK_LE(0, event); + RTC_DCHECK_GT(arraysize(kInitValue1[fs_index]), event); + RTC_DCHECK_GT(arraysize(kInitValue2[fs_index]), event); + sample_history1_[0] = kInitValue1[fs_index][event]; + sample_history1_[1] = 0; + sample_history2_[0] = kInitValue2[fs_index][event]; + sample_history2_[1] = 0; + + initialized_ = true; + return 0; +} + +// Reset tone generator to uninitialized state. +void DtmfToneGenerator::Reset() { + initialized_ = false; +} + +// Generate num_samples of DTMF signal and write to `output`. +int DtmfToneGenerator::Generate(size_t num_samples, AudioMultiVector* output) { + if (!initialized_) { + return kNotInitialized; + } + + if (!output) { + return kParameterError; + } + + output->AssertSize(num_samples); + for (size_t i = 0; i < num_samples; ++i) { + // Use recursion formula y[n] = a * y[n - 1] - y[n - 2]. + int16_t temp_val_low = + ((coeff1_ * sample_history1_[1] + 8192) >> 14) - sample_history1_[0]; + int16_t temp_val_high = + ((coeff2_ * sample_history2_[1] + 8192) >> 14) - sample_history2_[0]; + + // Update recursion memory. + sample_history1_[0] = sample_history1_[1]; + sample_history1_[1] = temp_val_low; + sample_history2_[0] = sample_history2_[1]; + sample_history2_[1] = temp_val_high; + + // Attenuate the low frequency tone 3 dB. + int32_t temp_val = + kAmpMultiplier * temp_val_low + temp_val_high * (1 << 15); + // Normalize the signal to Q14 with proper rounding. + temp_val = (temp_val + 16384) >> 15; + // Scale the signal to correct volume. + (*output)[0][i] = + static_cast((temp_val * amplitude_ + 8192) >> 14); + } + // Copy first channel to all other channels. + for (size_t channel = 1; channel < output->Channels(); ++channel) { + output->CopyChannel(0, channel); + } + + return static_cast(num_samples); +} + +bool DtmfToneGenerator::initialized() const { + return initialized_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.h b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.h new file mode 100644 index 0000000000..35114f4f49 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_ + +#include +#include + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +namespace webrtc { + +// This class provides a generator for DTMF tones. +class DtmfToneGenerator { + public: + enum ReturnCodes { + kNotInitialized = -1, + kParameterError = -2, + }; + + DtmfToneGenerator(); + virtual ~DtmfToneGenerator() {} + + DtmfToneGenerator(const DtmfToneGenerator&) = delete; + DtmfToneGenerator& operator=(const DtmfToneGenerator&) = delete; + + virtual int Init(int fs, int event, int attenuation); + virtual void Reset(); + virtual int Generate(size_t num_samples, AudioMultiVector* output); + virtual bool initialized() const; + + private: + static const int kCoeff1[4][16]; // 1st oscillator model coefficient table. + static const int kCoeff2[4][16]; // 2nd oscillator model coefficient table. + static const int kInitValue1[4][16]; // Initialization for 1st oscillator. + static const int kInitValue2[4][16]; // Initialization for 2nd oscillator. + static const int kAmplitude[64]; // Amplitude for 0 through -63 dBm0. + static const int16_t kAmpMultiplier = 23171; // 3 dB attenuation (in Q15). + + bool initialized_; // True if generator is initialized properly. + int coeff1_; // 1st oscillator coefficient for this event. + int coeff2_; // 2nd oscillator coefficient for this event. + int amplitude_; // Amplitude for this event. + int16_t sample_history1_[2]; // Last 2 samples for the 1st oscillator. + int16_t sample_history2_[2]; // Last 2 samples for the 2nd oscillator. +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator_unittest.cc new file mode 100644 index 0000000000..e843706dd3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator_unittest.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DtmfToneGenerator class. + +#include "modules/audio_coding/neteq/dtmf_tone_generator.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +class DtmfToneGeneratorTest : public ::testing::Test { + protected: + static const double kLowFreqHz[16]; + static const double kHighFreqHz[16]; + // This is the attenuation applied to all cases. + const double kBaseAttenuation = 16141.0 / 16384.0; + const double k3dbAttenuation = 23171.0 / 32768; + const int kNumSamples = 10; + + void TestAllTones(int fs_hz, int channels) { + AudioMultiVector signal(channels); + + for (int event = 0; event <= 15; ++event) { + rtc::StringBuilder ss; + ss << "Checking event " << event << " at sample rate " << fs_hz; + SCOPED_TRACE(ss.str()); + const int kAttenuation = 0; + ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, kAttenuation)); + EXPECT_TRUE(tone_gen_.initialized()); + EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal)); + + double f1 = kLowFreqHz[event]; + double f2 = kHighFreqHz[event]; + const double pi = 3.14159265358979323846; + + for (int n = 0; n < kNumSamples; ++n) { + double x = k3dbAttenuation * sin(2.0 * pi * f1 / fs_hz * (-n - 1)) + + sin(2.0 * pi * f2 / fs_hz * (-n - 1)); + x *= kBaseAttenuation; + x = ldexp(x, 14); // Scale to Q14. + for (int channel = 0; channel < channels; ++channel) { + EXPECT_NEAR(x, static_cast(signal[channel][n]), 25); + } + } + + tone_gen_.Reset(); + EXPECT_FALSE(tone_gen_.initialized()); + } + } + + void TestAmplitudes(int fs_hz, int channels) { + AudioMultiVector signal(channels); + AudioMultiVector ref_signal(channels); + + const int event_vec[] = {0, 4, 9, 13}; // Test a few events. + for (int e = 0; e < 4; ++e) { + int event = event_vec[e]; + // Create full-scale reference. + ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, 0)); // 0 attenuation. + EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &ref_signal)); + // Test every 5 steps (to save time). + for (int attenuation = 1; attenuation <= 63; attenuation += 5) { + rtc::StringBuilder ss; + ss << "Checking event " << event << " at sample rate " << fs_hz; + ss << "; attenuation " << attenuation; + SCOPED_TRACE(ss.str()); + ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, attenuation)); + EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal)); + for (int n = 0; n < kNumSamples; ++n) { + double attenuation_factor = + DbToRatio(-static_cast(attenuation)); + // Verify that the attenuation is correct. + for (int channel = 0; channel < channels; ++channel) { + EXPECT_NEAR(attenuation_factor * ref_signal[channel][n], + signal[channel][n], 2); + } + } + + tone_gen_.Reset(); + } + } + } + + DtmfToneGenerator tone_gen_; +}; + +// Low and high frequencies for events 0 through 15. +const double DtmfToneGeneratorTest::kLowFreqHz[16] = { + 941.0, 697.0, 697.0, 697.0, 770.0, 770.0, 770.0, 852.0, + 852.0, 852.0, 941.0, 941.0, 697.0, 770.0, 852.0, 941.0}; +const double DtmfToneGeneratorTest::kHighFreqHz[16] = { + 1336.0, 1209.0, 1336.0, 1477.0, 1209.0, 1336.0, 1477.0, 1209.0, + 1336.0, 1477.0, 1209.0, 1477.0, 1633.0, 1633.0, 1633.0, 1633.0}; + +TEST_F(DtmfToneGeneratorTest, Test8000Mono) { + TestAllTones(8000, 1); + TestAmplitudes(8000, 1); +} + +TEST_F(DtmfToneGeneratorTest, Test16000Mono) { + TestAllTones(16000, 1); + TestAmplitudes(16000, 1); +} + +TEST_F(DtmfToneGeneratorTest, Test32000Mono) { + TestAllTones(32000, 1); + TestAmplitudes(32000, 1); +} + +TEST_F(DtmfToneGeneratorTest, Test48000Mono) { + TestAllTones(48000, 1); + TestAmplitudes(48000, 1); +} + +TEST_F(DtmfToneGeneratorTest, Test8000Stereo) { + TestAllTones(8000, 2); + TestAmplitudes(8000, 2); +} + +TEST_F(DtmfToneGeneratorTest, Test16000Stereo) { + TestAllTones(16000, 2); + TestAmplitudes(16000, 2); +} + +TEST_F(DtmfToneGeneratorTest, Test32000Stereo) { + TestAllTones(32000, 2); + TestAmplitudes(32000, 2); +} + +TEST_F(DtmfToneGeneratorTest, Test48000Stereo) { + TestAllTones(48000, 2); + TestAmplitudes(48000, 2); +} + +TEST(DtmfToneGenerator, TestErrors) { + DtmfToneGenerator tone_gen; + const int kNumSamples = 10; + AudioMultiVector signal(1); // One channel. + + // Try to generate tones without initializing. + EXPECT_EQ(DtmfToneGenerator::kNotInitialized, + tone_gen.Generate(kNumSamples, &signal)); + + const int fs = 16000; // Valid sample rate. + const int event = 7; // Valid event. + const int attenuation = 0; // Valid attenuation. + // Initialize with invalid event -1. + EXPECT_EQ(DtmfToneGenerator::kParameterError, + tone_gen.Init(fs, -1, attenuation)); + // Initialize with invalid event 16. + EXPECT_EQ(DtmfToneGenerator::kParameterError, + tone_gen.Init(fs, 16, attenuation)); + // Initialize with invalid attenuation -1. + EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, -1)); + // Initialize with invalid attenuation 64. + EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, 64)); + EXPECT_FALSE(tone_gen.initialized()); // Should still be uninitialized. + + // Initialize with valid parameters. + ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation)); + EXPECT_TRUE(tone_gen.initialized()); + // NULL pointer to destination. + EXPECT_EQ(DtmfToneGenerator::kParameterError, + tone_gen.Generate(kNumSamples, NULL)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand.cc b/third_party/libwebrtc/modules/audio_coding/neteq/expand.cc new file mode 100644 index 0000000000..9c3274609f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand.cc @@ -0,0 +1,888 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/expand.h" + +#include // memset + +#include // min, max +#include // numeric_limits + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/cross_correlation.h" +#include "modules/audio_coding/neteq/dsp_helper.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +Expand::Expand(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels) + : random_vector_(random_vector), + sync_buffer_(sync_buffer), + first_expand_(true), + fs_hz_(fs), + num_channels_(num_channels), + consecutive_expands_(0), + background_noise_(background_noise), + statistics_(statistics), + overlap_length_(5 * fs / 8000), + lag_index_direction_(0), + current_lag_index_(0), + stop_muting_(false), + expand_duration_samples_(0), + channel_parameters_(new ChannelParameters[num_channels_]) { + RTC_DCHECK(fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000); + RTC_DCHECK_LE(fs, + static_cast(kMaxSampleRate)); // Should not be possible. + RTC_DCHECK_GT(num_channels_, 0); + memset(expand_lags_, 0, sizeof(expand_lags_)); + Reset(); +} + +Expand::~Expand() = default; + +void Expand::Reset() { + first_expand_ = true; + consecutive_expands_ = 0; + max_lag_ = 0; + for (size_t ix = 0; ix < num_channels_; ++ix) { + channel_parameters_[ix].expand_vector0.Clear(); + channel_parameters_[ix].expand_vector1.Clear(); + } +} + +int Expand::Process(AudioMultiVector* output) { + int16_t random_vector[kMaxSampleRate / 8000 * 120 + 30]; + int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; + static const int kTempDataSize = 3600; + int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this. + int16_t* voiced_vector_storage = temp_data; + int16_t* voiced_vector = &voiced_vector_storage[overlap_length_]; + static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; + int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; + int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; + int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder; + + int fs_mult = fs_hz_ / 8000; + + if (first_expand_) { + // Perform initial setup if this is the first expansion since last reset. + AnalyzeSignal(random_vector); + first_expand_ = false; + expand_duration_samples_ = 0; + } else { + // This is not the first expansion, parameters are already estimated. + // Extract a noise segment. + size_t rand_length = max_lag_; + // This only applies to SWB where length could be larger than 256. + RTC_DCHECK_LE(rand_length, kMaxSampleRate / 8000 * 120 + 30); + GenerateRandomVector(2, rand_length, random_vector); + } + + // Generate signal. + UpdateLagIndex(); + + // Voiced part. + // Generate a weighted vector with the current lag. + size_t expansion_vector_length = max_lag_ + overlap_length_; + size_t current_lag = expand_lags_[current_lag_index_]; + // Copy lag+overlap data. + size_t expansion_vector_position = + expansion_vector_length - current_lag - overlap_length_; + size_t temp_length = current_lag + overlap_length_; + for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { + ChannelParameters& parameters = channel_parameters_[channel_ix]; + if (current_lag_index_ == 0) { + // Use only expand_vector0. + RTC_DCHECK_LE(expansion_vector_position + temp_length, + parameters.expand_vector0.Size()); + parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position, + voiced_vector_storage); + } else if (current_lag_index_ == 1) { + std::unique_ptr temp_0(new int16_t[temp_length]); + parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position, + temp_0.get()); + std::unique_ptr temp_1(new int16_t[temp_length]); + parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position, + temp_1.get()); + // Mix 3/4 of expand_vector0 with 1/4 of expand_vector1. + WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 3, temp_1.get(), 1, 2, + voiced_vector_storage, temp_length); + } else if (current_lag_index_ == 2) { + // Mix 1/2 of expand_vector0 with 1/2 of expand_vector1. + RTC_DCHECK_LE(expansion_vector_position + temp_length, + parameters.expand_vector0.Size()); + RTC_DCHECK_LE(expansion_vector_position + temp_length, + parameters.expand_vector1.Size()); + + std::unique_ptr temp_0(new int16_t[temp_length]); + parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position, + temp_0.get()); + std::unique_ptr temp_1(new int16_t[temp_length]); + parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position, + temp_1.get()); + WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 1, temp_1.get(), 1, 1, + voiced_vector_storage, temp_length); + } + + // Get tapering window parameters. Values are in Q15. + int16_t muting_window, muting_window_increment; + int16_t unmuting_window, unmuting_window_increment; + if (fs_hz_ == 8000) { + muting_window = DspHelper::kMuteFactorStart8kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement8kHz; + unmuting_window = DspHelper::kUnmuteFactorStart8kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz; + } else if (fs_hz_ == 16000) { + muting_window = DspHelper::kMuteFactorStart16kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement16kHz; + unmuting_window = DspHelper::kUnmuteFactorStart16kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz; + } else if (fs_hz_ == 32000) { + muting_window = DspHelper::kMuteFactorStart32kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement32kHz; + unmuting_window = DspHelper::kUnmuteFactorStart32kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz; + } else { // fs_ == 48000 + muting_window = DspHelper::kMuteFactorStart48kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement48kHz; + unmuting_window = DspHelper::kUnmuteFactorStart48kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz; + } + + // Smooth the expanded if it has not been muted to a low amplitude and + // `current_voice_mix_factor` is larger than 0.5. + if ((parameters.mute_factor > 819) && + (parameters.current_voice_mix_factor > 8192)) { + size_t start_ix = sync_buffer_->Size() - overlap_length_; + for (size_t i = 0; i < overlap_length_; i++) { + // Do overlap add between new vector and overlap. + (*sync_buffer_)[channel_ix][start_ix + i] = + (((*sync_buffer_)[channel_ix][start_ix + i] * muting_window) + + (((parameters.mute_factor * voiced_vector_storage[i]) >> 14) * + unmuting_window) + + 16384) >> + 15; + muting_window += muting_window_increment; + unmuting_window += unmuting_window_increment; + } + } else if (parameters.mute_factor == 0) { + // The expanded signal will consist of only comfort noise if + // mute_factor = 0. Set the output length to 15 ms for best noise + // production. + // TODO(hlundin): This has been disabled since the length of + // parameters.expand_vector0 and parameters.expand_vector1 no longer + // match with expand_lags_, causing invalid reads and writes. Is it a good + // idea to enable this again, and solve the vector size problem? + // max_lag_ = fs_mult * 120; + // expand_lags_[0] = fs_mult * 120; + // expand_lags_[1] = fs_mult * 120; + // expand_lags_[2] = fs_mult * 120; + } + + // Unvoiced part. + // Filter `scaled_random_vector` through `ar_filter_`. + memcpy(unvoiced_vector - kUnvoicedLpcOrder, parameters.ar_filter_state, + sizeof(int16_t) * kUnvoicedLpcOrder); + int32_t add_constant = 0; + if (parameters.ar_gain_scale > 0) { + add_constant = 1 << (parameters.ar_gain_scale - 1); + } + WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector, + parameters.ar_gain, add_constant, + parameters.ar_gain_scale, current_lag); + WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector, + parameters.ar_filter, kUnvoicedLpcOrder + 1, + current_lag); + memcpy(parameters.ar_filter_state, + &(unvoiced_vector[current_lag - kUnvoicedLpcOrder]), + sizeof(int16_t) * kUnvoicedLpcOrder); + + // Combine voiced and unvoiced contributions. + + // Set a suitable cross-fading slope. + // For lag = + // <= 31 * fs_mult => go from 1 to 0 in about 8 ms; + // (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms; + // >= 64 * fs_mult => go from 1 to 0 in about 32 ms. + // temp_shift = getbits(max_lag_) - 5. + int temp_shift = + (31 - WebRtcSpl_NormW32(rtc::dchecked_cast(max_lag_))) - 5; + int16_t mix_factor_increment = 256 >> temp_shift; + if (stop_muting_) { + mix_factor_increment = 0; + } + + // Create combined signal by shifting in more and more of unvoiced part. + temp_shift = 8 - temp_shift; // = getbits(mix_factor_increment). + size_t temp_length = + (parameters.current_voice_mix_factor - parameters.voice_mix_factor) >> + temp_shift; + temp_length = std::min(temp_length, current_lag); + DspHelper::CrossFade(voiced_vector, unvoiced_vector, temp_length, + ¶meters.current_voice_mix_factor, + mix_factor_increment, temp_data); + + // End of cross-fading period was reached before end of expanded signal + // path. Mix the rest with a fixed mixing factor. + if (temp_length < current_lag) { + if (mix_factor_increment != 0) { + parameters.current_voice_mix_factor = parameters.voice_mix_factor; + } + int16_t temp_scale = 16384 - parameters.current_voice_mix_factor; + WebRtcSpl_ScaleAndAddVectorsWithRound( + voiced_vector + temp_length, parameters.current_voice_mix_factor, + unvoiced_vector + temp_length, temp_scale, 14, + temp_data + temp_length, current_lag - temp_length); + } + + // Select muting slope depending on how many consecutive expands we have + // done. + if (consecutive_expands_ == 3) { + // Let the mute factor decrease from 1.0 to 0.95 in 6.25 ms. + // mute_slope = 0.0010 / fs_mult in Q20. + parameters.mute_slope = std::max(parameters.mute_slope, 1049 / fs_mult); + } + if (consecutive_expands_ == 7) { + // Let the mute factor decrease from 1.0 to 0.90 in 6.25 ms. + // mute_slope = 0.0020 / fs_mult in Q20. + parameters.mute_slope = std::max(parameters.mute_slope, 2097 / fs_mult); + } + + // Mute segment according to slope value. + if ((consecutive_expands_ != 0) || !parameters.onset) { + // Mute to the previous level, then continue with the muting. + WebRtcSpl_AffineTransformVector( + temp_data, temp_data, parameters.mute_factor, 8192, 14, current_lag); + + if (!stop_muting_) { + DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag); + + // Shift by 6 to go from Q20 to Q14. + // TODO(hlundin): Adding 8192 before shifting 6 steps seems wrong. + // Legacy. + int16_t gain = static_cast( + 16384 - (((current_lag * parameters.mute_slope) + 8192) >> 6)); + gain = ((gain * parameters.mute_factor) + 8192) >> 14; + + // Guard against getting stuck with very small (but sometimes audible) + // gain. + if ((consecutive_expands_ > 3) && (gain >= parameters.mute_factor)) { + parameters.mute_factor = 0; + } else { + parameters.mute_factor = gain; + } + } + } + + // Background noise part. + background_noise_->GenerateBackgroundNoise( + random_vector, channel_ix, channel_parameters_[channel_ix].mute_slope, + TooManyExpands(), current_lag, unvoiced_array_memory); + + // Add background noise to the combined voiced-unvoiced signal. + for (size_t i = 0; i < current_lag; i++) { + temp_data[i] = temp_data[i] + noise_vector[i]; + } + if (channel_ix == 0) { + output->AssertSize(current_lag); + } else { + RTC_DCHECK_EQ(output->Size(), current_lag); + } + (*output)[channel_ix].OverwriteAt(temp_data, current_lag, 0); + } + + // Increase call number and cap it. + consecutive_expands_ = consecutive_expands_ >= kMaxConsecutiveExpands + ? kMaxConsecutiveExpands + : consecutive_expands_ + 1; + expand_duration_samples_ += output->Size(); + // Clamp the duration counter at 2 seconds. + expand_duration_samples_ = std::min(expand_duration_samples_, + rtc::dchecked_cast(fs_hz_ * 2)); + return 0; +} + +void Expand::SetParametersForNormalAfterExpand() { + current_lag_index_ = 0; + lag_index_direction_ = 0; + stop_muting_ = true; // Do not mute signal any more. + statistics_->LogDelayedPacketOutageEvent(expand_duration_samples_, fs_hz_); + statistics_->EndExpandEvent(fs_hz_); +} + +void Expand::SetParametersForMergeAfterExpand() { + current_lag_index_ = -1; /* out of the 3 possible ones */ + lag_index_direction_ = 1; /* make sure we get the "optimal" lag */ + stop_muting_ = true; + statistics_->EndExpandEvent(fs_hz_); +} + +bool Expand::Muted() const { + if (first_expand_ || stop_muting_) + return false; + RTC_DCHECK(channel_parameters_); + for (size_t ch = 0; ch < num_channels_; ++ch) { + if (channel_parameters_[ch].mute_factor != 0) + return false; + } + return true; +} + +size_t Expand::overlap_length() const { + return overlap_length_; +} + +void Expand::InitializeForAnExpandPeriod() { + lag_index_direction_ = 1; + current_lag_index_ = -1; + stop_muting_ = false; + random_vector_->set_seed_increment(1); + consecutive_expands_ = 0; + for (size_t ix = 0; ix < num_channels_; ++ix) { + channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14. + channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14. + // Start with 0 gain for background noise. + background_noise_->SetMuteFactor(ix, 0); + } +} + +bool Expand::TooManyExpands() { + return consecutive_expands_ >= kMaxConsecutiveExpands; +} + +void Expand::AnalyzeSignal(int16_t* random_vector) { + int32_t auto_correlation[kUnvoicedLpcOrder + 1]; + int16_t reflection_coeff[kUnvoicedLpcOrder]; + int16_t correlation_vector[kMaxSampleRate / 8000 * 102]; + size_t best_correlation_index[kNumCorrelationCandidates]; + int16_t best_correlation[kNumCorrelationCandidates]; + size_t best_distortion_index[kNumCorrelationCandidates]; + int16_t best_distortion[kNumCorrelationCandidates]; + int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1]; + int32_t best_distortion_w32[kNumCorrelationCandidates]; + static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; + int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; + int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; + + int fs_mult = fs_hz_ / 8000; + + // Pre-calculate common multiplications with fs_mult. + size_t fs_mult_4 = static_cast(fs_mult * 4); + size_t fs_mult_20 = static_cast(fs_mult * 20); + size_t fs_mult_120 = static_cast(fs_mult * 120); + size_t fs_mult_dist_len = fs_mult * kDistortionLength; + size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength; + + const size_t signal_length = static_cast(256 * fs_mult); + + const size_t audio_history_position = sync_buffer_->Size() - signal_length; + std::unique_ptr audio_history(new int16_t[signal_length]); + (*sync_buffer_)[0].CopyTo(signal_length, audio_history_position, + audio_history.get()); + + // Initialize. + InitializeForAnExpandPeriod(); + + // Calculate correlation in downsampled domain (4 kHz sample rate). + size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness. + // If it is decided to break bit-exactness `correlation_length` should be + // initialized to the return value of Correlation(). + Correlation(audio_history.get(), signal_length, correlation_vector); + + // Find peaks in correlation vector. + DspHelper::PeakDetection(correlation_vector, correlation_length, + kNumCorrelationCandidates, fs_mult, + best_correlation_index, best_correlation); + + // Adjust peak locations; cross-correlation lags start at 2.5 ms + // (20 * fs_mult samples). + best_correlation_index[0] += fs_mult_20; + best_correlation_index[1] += fs_mult_20; + best_correlation_index[2] += fs_mult_20; + + // Calculate distortion around the `kNumCorrelationCandidates` best lags. + int distortion_scale = 0; + for (size_t i = 0; i < kNumCorrelationCandidates; i++) { + size_t min_index = + std::max(fs_mult_20, best_correlation_index[i] - fs_mult_4); + size_t max_index = + std::min(fs_mult_120 - 1, best_correlation_index[i] + fs_mult_4); + best_distortion_index[i] = DspHelper::MinDistortion( + &(audio_history[signal_length - fs_mult_dist_len]), min_index, + max_index, fs_mult_dist_len, &best_distortion_w32[i]); + distortion_scale = std::max(16 - WebRtcSpl_NormW32(best_distortion_w32[i]), + distortion_scale); + } + // Shift the distortion values to fit in 16 bits. + WebRtcSpl_VectorBitShiftW32ToW16(best_distortion, kNumCorrelationCandidates, + best_distortion_w32, distortion_scale); + + // Find the maximizing index `i` of the cost function + // f[i] = best_correlation[i] / best_distortion[i]. + int32_t best_ratio = std::numeric_limits::min(); + size_t best_index = std::numeric_limits::max(); + for (size_t i = 0; i < kNumCorrelationCandidates; ++i) { + int32_t ratio; + if (best_distortion[i] > 0) { + ratio = (best_correlation[i] * (1 << 16)) / best_distortion[i]; + } else if (best_correlation[i] == 0) { + ratio = 0; // No correlation set result to zero. + } else { + ratio = std::numeric_limits::max(); // Denominator is zero. + } + if (ratio > best_ratio) { + best_index = i; + best_ratio = ratio; + } + } + + size_t distortion_lag = best_distortion_index[best_index]; + size_t correlation_lag = best_correlation_index[best_index]; + max_lag_ = std::max(distortion_lag, correlation_lag); + + // Calculate the exact best correlation in the range between + // `correlation_lag` and `distortion_lag`. + correlation_length = std::max(std::min(distortion_lag + 10, fs_mult_120), + static_cast(60 * fs_mult)); + + size_t start_index = std::min(distortion_lag, correlation_lag); + size_t correlation_lags = static_cast( + WEBRTC_SPL_ABS_W16((distortion_lag - correlation_lag)) + 1); + RTC_DCHECK_LE(correlation_lags, static_cast(99 * fs_mult + 1)); + + for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { + ChannelParameters& parameters = channel_parameters_[channel_ix]; + if (channel_ix > 0) { + // When channel_ix == 0, audio_history contains the correct audio. For the + // other cases, we will have to copy the correct channel into + // audio_history. + (*sync_buffer_)[channel_ix].CopyTo(signal_length, audio_history_position, + audio_history.get()); + } + + // Calculate suitable scaling. + int16_t signal_max = WebRtcSpl_MaxAbsValueW16( + &audio_history[signal_length - correlation_length - start_index - + correlation_lags], + correlation_length + start_index + correlation_lags - 1); + int correlation_scale = + (31 - WebRtcSpl_NormW32(signal_max * signal_max)) + + (31 - WebRtcSpl_NormW32(static_cast(correlation_length))) - 31; + correlation_scale = std::max(0, correlation_scale); + + // Calculate the correlation, store in `correlation_vector2`. + WebRtcSpl_CrossCorrelation( + correlation_vector2, + &(audio_history[signal_length - correlation_length]), + &(audio_history[signal_length - correlation_length - start_index]), + correlation_length, correlation_lags, correlation_scale, -1); + + // Find maximizing index. + best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags); + int32_t max_correlation = correlation_vector2[best_index]; + // Compensate index with start offset. + best_index = best_index + start_index; + + // Calculate energies. + int32_t energy1 = WebRtcSpl_DotProductWithScale( + &(audio_history[signal_length - correlation_length]), + &(audio_history[signal_length - correlation_length]), + correlation_length, correlation_scale); + int32_t energy2 = WebRtcSpl_DotProductWithScale( + &(audio_history[signal_length - correlation_length - best_index]), + &(audio_history[signal_length - correlation_length - best_index]), + correlation_length, correlation_scale); + + // Calculate the correlation coefficient between the two portions of the + // signal. + int32_t corr_coefficient; + if ((energy1 > 0) && (energy2 > 0)) { + int energy1_scale = std::max(16 - WebRtcSpl_NormW32(energy1), 0); + int energy2_scale = std::max(16 - WebRtcSpl_NormW32(energy2), 0); + // Make sure total scaling is even (to simplify scale factor after sqrt). + if ((energy1_scale + energy2_scale) & 1) { + // If sum is odd, add 1 to make it even. + energy1_scale += 1; + } + int32_t scaled_energy1 = energy1 >> energy1_scale; + int32_t scaled_energy2 = energy2 >> energy2_scale; + int16_t sqrt_energy_product = static_cast( + WebRtcSpl_SqrtFloor(scaled_energy1 * scaled_energy2)); + // Calculate max_correlation / sqrt(energy1 * energy2) in Q14. + int cc_shift = 14 - (energy1_scale + energy2_scale) / 2; + max_correlation = WEBRTC_SPL_SHIFT_W32(max_correlation, cc_shift); + corr_coefficient = + WebRtcSpl_DivW32W16(max_correlation, sqrt_energy_product); + // Cap at 1.0 in Q14. + corr_coefficient = std::min(16384, corr_coefficient); + } else { + corr_coefficient = 0; + } + + // Extract the two vectors expand_vector0 and expand_vector1 from + // `audio_history`. + size_t expansion_length = max_lag_ + overlap_length_; + const int16_t* vector1 = &(audio_history[signal_length - expansion_length]); + const int16_t* vector2 = vector1 - distortion_lag; + // Normalize the second vector to the same energy as the first. + energy1 = WebRtcSpl_DotProductWithScale(vector1, vector1, expansion_length, + correlation_scale); + energy2 = WebRtcSpl_DotProductWithScale(vector2, vector2, expansion_length, + correlation_scale); + // Confirm that amplitude ratio sqrt(energy1 / energy2) is within 0.5 - 2.0, + // i.e., energy1 / energy2 is within 0.25 - 4. + int16_t amplitude_ratio; + if ((energy1 / 4 < energy2) && (energy1 > energy2 / 4)) { + // Energy constraint fulfilled. Use both vectors and scale them + // accordingly. + int32_t scaled_energy2 = std::max(16 - WebRtcSpl_NormW32(energy2), 0); + int32_t scaled_energy1 = scaled_energy2 - 13; + // Calculate scaled_energy1 / scaled_energy2 in Q13. + int32_t energy_ratio = + WebRtcSpl_DivW32W16(WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1), + static_cast(energy2 >> scaled_energy2)); + // Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26). + amplitude_ratio = + static_cast(WebRtcSpl_SqrtFloor(energy_ratio << 13)); + // Copy the two vectors and give them the same energy. + parameters.expand_vector0.Clear(); + parameters.expand_vector0.PushBack(vector1, expansion_length); + parameters.expand_vector1.Clear(); + if (parameters.expand_vector1.Size() < expansion_length) { + parameters.expand_vector1.Extend(expansion_length - + parameters.expand_vector1.Size()); + } + std::unique_ptr temp_1(new int16_t[expansion_length]); + WebRtcSpl_AffineTransformVector( + temp_1.get(), const_cast(vector2), amplitude_ratio, 4096, + 13, expansion_length); + parameters.expand_vector1.OverwriteAt(temp_1.get(), expansion_length, 0); + } else { + // Energy change constraint not fulfilled. Only use last vector. + parameters.expand_vector0.Clear(); + parameters.expand_vector0.PushBack(vector1, expansion_length); + // Copy from expand_vector0 to expand_vector1. + parameters.expand_vector0.CopyTo(¶meters.expand_vector1); + // Set the energy_ratio since it is used by muting slope. + if ((energy1 / 4 < energy2) || (energy2 == 0)) { + amplitude_ratio = 4096; // 0.5 in Q13. + } else { + amplitude_ratio = 16384; // 2.0 in Q13. + } + } + + // Set the 3 lag values. + if (distortion_lag == correlation_lag) { + expand_lags_[0] = distortion_lag; + expand_lags_[1] = distortion_lag; + expand_lags_[2] = distortion_lag; + } else { + // `distortion_lag` and `correlation_lag` are not equal; use different + // combinations of the two. + // First lag is `distortion_lag` only. + expand_lags_[0] = distortion_lag; + // Second lag is the average of the two. + expand_lags_[1] = (distortion_lag + correlation_lag) / 2; + // Third lag is the average again, but rounding towards `correlation_lag`. + if (distortion_lag > correlation_lag) { + expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2; + } else { + expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2; + } + } + + // Calculate the LPC and the gain of the filters. + + // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function. + size_t temp_index = + signal_length - fs_mult_lpc_analysis_len - kUnvoicedLpcOrder; + // Copy signal to temporary vector to be able to pad with leading zeros. + int16_t* temp_signal = + new int16_t[fs_mult_lpc_analysis_len + kUnvoicedLpcOrder]; + memset(temp_signal, 0, + sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder)); + memcpy(&temp_signal[kUnvoicedLpcOrder], + &audio_history[temp_index + kUnvoicedLpcOrder], + sizeof(int16_t) * fs_mult_lpc_analysis_len); + CrossCorrelationWithAutoShift( + &temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder], + fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation); + delete[] temp_signal; + + // Verify that variance is positive. + if (auto_correlation[0] > 0) { + // Estimate AR filter parameters using Levinson-Durbin algorithm; + // kUnvoicedLpcOrder + 1 filter coefficients. + int16_t stability = + WebRtcSpl_LevinsonDurbin(auto_correlation, parameters.ar_filter, + reflection_coeff, kUnvoicedLpcOrder); + + // Keep filter parameters only if filter is stable. + if (stability != 1) { + // Set first coefficient to 4096 (1.0 in Q12). + parameters.ar_filter[0] = 4096; + // Set remaining `kUnvoicedLpcOrder` coefficients to zero. + WebRtcSpl_MemSetW16(parameters.ar_filter + 1, 0, kUnvoicedLpcOrder); + } + } + + if (channel_ix == 0) { + // Extract a noise segment. + size_t noise_length; + if (distortion_lag < 40) { + noise_length = 2 * distortion_lag + 30; + } else { + noise_length = distortion_lag + 30; + } + if (noise_length <= RandomVector::kRandomTableSize) { + memcpy(random_vector, RandomVector::kRandomTable, + sizeof(int16_t) * noise_length); + } else { + // Only applies to SWB where length could be larger than + // `kRandomTableSize`. + memcpy(random_vector, RandomVector::kRandomTable, + sizeof(int16_t) * RandomVector::kRandomTableSize); + RTC_DCHECK_LE(noise_length, kMaxSampleRate / 8000 * 120 + 30); + random_vector_->IncreaseSeedIncrement(2); + random_vector_->Generate( + noise_length - RandomVector::kRandomTableSize, + &random_vector[RandomVector::kRandomTableSize]); + } + } + + // Set up state vector and calculate scale factor for unvoiced filtering. + memcpy(parameters.ar_filter_state, + &(audio_history[signal_length - kUnvoicedLpcOrder]), + sizeof(int16_t) * kUnvoicedLpcOrder); + memcpy(unvoiced_vector - kUnvoicedLpcOrder, + &(audio_history[signal_length - 128 - kUnvoicedLpcOrder]), + sizeof(int16_t) * kUnvoicedLpcOrder); + WebRtcSpl_FilterMAFastQ12(&audio_history[signal_length - 128], + unvoiced_vector, parameters.ar_filter, + kUnvoicedLpcOrder + 1, 128); + const int unvoiced_max_abs = [&] { + const int16_t max_abs = WebRtcSpl_MaxAbsValueW16(unvoiced_vector, 128); + // Since WebRtcSpl_MaxAbsValueW16 returns 2^15 - 1 when the input contains + // -2^15, we have to conservatively bump the return value by 1 + // if it is 2^15 - 1. + return max_abs == WEBRTC_SPL_WORD16_MAX ? max_abs + 1 : max_abs; + }(); + // Pick the smallest n such that 2^n > unvoiced_max_abs; then the maximum + // value of the dot product is less than 2^7 * 2^(2*n) = 2^(2*n + 7), so to + // prevent overflows we want 2n + 7 <= 31, which means we should shift by + // 2n + 7 - 31 bits, if this value is greater than zero. + int unvoiced_prescale = + std::max(0, 2 * WebRtcSpl_GetSizeInBits(unvoiced_max_abs) - 24); + + int32_t unvoiced_energy = WebRtcSpl_DotProductWithScale( + unvoiced_vector, unvoiced_vector, 128, unvoiced_prescale); + + // Normalize `unvoiced_energy` to 28 or 29 bits to preserve sqrt() accuracy. + int16_t unvoiced_scale = WebRtcSpl_NormW32(unvoiced_energy) - 3; + // Make sure we do an odd number of shifts since we already have 7 shifts + // from dividing with 128 earlier. This will make the total scale factor + // even, which is suitable for the sqrt. + unvoiced_scale += ((unvoiced_scale & 0x1) ^ 0x1); + unvoiced_energy = WEBRTC_SPL_SHIFT_W32(unvoiced_energy, unvoiced_scale); + int16_t unvoiced_gain = + static_cast(WebRtcSpl_SqrtFloor(unvoiced_energy)); + parameters.ar_gain_scale = + 13 + (unvoiced_scale + 7 - unvoiced_prescale) / 2; + parameters.ar_gain = unvoiced_gain; + + // Calculate voice_mix_factor from corr_coefficient. + // Let x = corr_coefficient. Then, we compute: + // if (x > 0.48) + // voice_mix_factor = (-5179 + 19931x - 16422x^2 + 5776x^3) / 4096; + // else + // voice_mix_factor = 0; + if (corr_coefficient > 7875) { + int16_t x1, x2, x3; + // `corr_coefficient` is in Q14. + x1 = static_cast(corr_coefficient); + x2 = (x1 * x1) >> 14; // Shift 14 to keep result in Q14. + x3 = (x1 * x2) >> 14; + static const int kCoefficients[4] = {-5179, 19931, -16422, 5776}; + int32_t temp_sum = kCoefficients[0] * 16384; + temp_sum += kCoefficients[1] * x1; + temp_sum += kCoefficients[2] * x2; + temp_sum += kCoefficients[3] * x3; + parameters.voice_mix_factor = + static_cast(std::min(temp_sum / 4096, 16384)); + parameters.voice_mix_factor = + std::max(parameters.voice_mix_factor, static_cast(0)); + } else { + parameters.voice_mix_factor = 0; + } + + // Calculate muting slope. Reuse value from earlier scaling of + // `expand_vector0` and `expand_vector1`. + int16_t slope = amplitude_ratio; + if (slope > 12288) { + // slope > 1.5. + // Calculate (1 - (1 / slope)) / distortion_lag = + // (slope - 1) / (distortion_lag * slope). + // `slope` is in Q13, so 1 corresponds to 8192. Shift up to Q25 before + // the division. + // Shift the denominator from Q13 to Q5 before the division. The result of + // the division will then be in Q20. + int16_t denom = + rtc::saturated_cast((distortion_lag * slope) >> 8); + int temp_ratio = WebRtcSpl_DivW32W16((slope - 8192) << 12, denom); + if (slope > 14746) { + // slope > 1.8. + // Divide by 2, with proper rounding. + parameters.mute_slope = (temp_ratio + 1) / 2; + } else { + // Divide by 8, with proper rounding. + parameters.mute_slope = (temp_ratio + 4) / 8; + } + parameters.onset = true; + } else { + // Calculate (1 - slope) / distortion_lag. + // Shift `slope` by 7 to Q20 before the division. The result is in Q20. + parameters.mute_slope = WebRtcSpl_DivW32W16( + (8192 - slope) * 128, static_cast(distortion_lag)); + if (parameters.voice_mix_factor <= 13107) { + // Make sure the mute factor decreases from 1.0 to 0.9 in no more than + // 6.25 ms. + // mute_slope >= 0.005 / fs_mult in Q20. + parameters.mute_slope = std::max(5243 / fs_mult, parameters.mute_slope); + } else if (slope > 8028) { + parameters.mute_slope = 0; + } + parameters.onset = false; + } + } +} + +Expand::ChannelParameters::ChannelParameters() + : mute_factor(16384), + ar_gain(0), + ar_gain_scale(0), + voice_mix_factor(0), + current_voice_mix_factor(0), + onset(false), + mute_slope(0) { + memset(ar_filter, 0, sizeof(ar_filter)); + memset(ar_filter_state, 0, sizeof(ar_filter_state)); +} + +void Expand::Correlation(const int16_t* input, + size_t input_length, + int16_t* output) const { + // Set parameters depending on sample rate. + const int16_t* filter_coefficients; + size_t num_coefficients; + int16_t downsampling_factor; + if (fs_hz_ == 8000) { + num_coefficients = 3; + downsampling_factor = 2; + filter_coefficients = DspHelper::kDownsample8kHzTbl; + } else if (fs_hz_ == 16000) { + num_coefficients = 5; + downsampling_factor = 4; + filter_coefficients = DspHelper::kDownsample16kHzTbl; + } else if (fs_hz_ == 32000) { + num_coefficients = 7; + downsampling_factor = 8; + filter_coefficients = DspHelper::kDownsample32kHzTbl; + } else { // fs_hz_ == 48000. + num_coefficients = 7; + downsampling_factor = 12; + filter_coefficients = DspHelper::kDownsample48kHzTbl; + } + + // Correlate from lag 10 to lag 60 in downsampled domain. + // (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.) + static const size_t kCorrelationStartLag = 10; + static const size_t kNumCorrelationLags = 54; + static const size_t kCorrelationLength = 60; + // Downsample to 4 kHz sample rate. + static const size_t kDownsampledLength = + kCorrelationStartLag + kNumCorrelationLags + kCorrelationLength; + int16_t downsampled_input[kDownsampledLength]; + static const size_t kFilterDelay = 0; + WebRtcSpl_DownsampleFast( + input + input_length - kDownsampledLength * downsampling_factor, + kDownsampledLength * downsampling_factor, downsampled_input, + kDownsampledLength, filter_coefficients, num_coefficients, + downsampling_factor, kFilterDelay); + + // Normalize `downsampled_input` to using all 16 bits. + int16_t max_value = + WebRtcSpl_MaxAbsValueW16(downsampled_input, kDownsampledLength); + int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value); + WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength, + downsampled_input, norm_shift); + + int32_t correlation[kNumCorrelationLags]; + CrossCorrelationWithAutoShift( + &downsampled_input[kDownsampledLength - kCorrelationLength], + &downsampled_input[kDownsampledLength - kCorrelationLength - + kCorrelationStartLag], + kCorrelationLength, kNumCorrelationLags, -1, correlation); + + // Normalize and move data from 32-bit to 16-bit vector. + int32_t max_correlation = + WebRtcSpl_MaxAbsValueW32(correlation, kNumCorrelationLags); + int16_t norm_shift2 = static_cast( + std::max(18 - WebRtcSpl_NormW32(max_correlation), 0)); + WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation, + norm_shift2); +} + +void Expand::UpdateLagIndex() { + current_lag_index_ = current_lag_index_ + lag_index_direction_; + // Change direction if needed. + if (current_lag_index_ <= 0) { + lag_index_direction_ = 1; + } + if (current_lag_index_ >= kNumLags - 1) { + lag_index_direction_ = -1; + } +} + +Expand* ExpandFactory::Create(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels) const { + return new Expand(background_noise, sync_buffer, random_vector, statistics, + fs, num_channels); +} + +void Expand::GenerateRandomVector(int16_t seed_increment, + size_t length, + int16_t* random_vector) { + // TODO(turajs): According to hlundin The loop should not be needed. Should be + // just as good to generate all of the vector in one call. + size_t samples_generated = 0; + const size_t kMaxRandSamples = RandomVector::kRandomTableSize; + while (samples_generated < length) { + size_t rand_length = std::min(length - samples_generated, kMaxRandSamples); + random_vector_->IncreaseSeedIncrement(seed_increment); + random_vector_->Generate(rand_length, &random_vector[samples_generated]); + samples_generated += rand_length; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand.h b/third_party/libwebrtc/modules/audio_coding/neteq/expand.h new file mode 100644 index 0000000000..2e64583ec2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_EXPAND_H_ +#define MODULES_AUDIO_CODING_NETEQ_EXPAND_H_ + + +#include + +#include "modules/audio_coding/neteq/audio_vector.h" + +namespace webrtc { + +// Forward declarations. +class AudioMultiVector; +class BackgroundNoise; +class RandomVector; +class StatisticsCalculator; +class SyncBuffer; + +// This class handles extrapolation of audio data from the sync_buffer to +// produce packet-loss concealment. +// TODO(hlundin): Refactor this class to divide the long methods into shorter +// ones. +class Expand { + public: + Expand(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels); + + virtual ~Expand(); + + Expand(const Expand&) = delete; + Expand& operator=(const Expand&) = delete; + + // Resets the object. + virtual void Reset(); + + // The main method to produce concealment data. The data is appended to the + // end of `output`. + virtual int Process(AudioMultiVector* output); + + // Prepare the object to do extra expansion during normal operation following + // a period of expands. + virtual void SetParametersForNormalAfterExpand(); + + // Prepare the object to do extra expansion during merge operation following + // a period of expands. + virtual void SetParametersForMergeAfterExpand(); + + // Returns the mute factor for `channel`. + int16_t MuteFactor(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].mute_factor; + } + + // Returns true if expansion has been faded down to zero amplitude (for all + // channels); false otherwise. + bool Muted() const; + + // Accessors and mutators. + virtual size_t overlap_length() const; + size_t max_lag() const { return max_lag_; } + + protected: + static const int kMaxConsecutiveExpands = 200; + void GenerateRandomVector(int16_t seed_increment, + size_t length, + int16_t* random_vector); + + // Initializes member variables at the beginning of an expand period. + void InitializeForAnExpandPeriod(); + + bool TooManyExpands(); + + // Analyzes the signal history in `sync_buffer_`, and set up all parameters + // necessary to produce concealment data. + void AnalyzeSignal(int16_t* random_vector); + + RandomVector* const random_vector_; + SyncBuffer* const sync_buffer_; + bool first_expand_; + const int fs_hz_; + const size_t num_channels_; + int consecutive_expands_; + + private: + static const size_t kUnvoicedLpcOrder = 6; + static const size_t kNumCorrelationCandidates = 3; + static const size_t kDistortionLength = 20; + static const size_t kLpcAnalysisLength = 160; + static const size_t kMaxSampleRate = 48000; + static const int kNumLags = 3; + + struct ChannelParameters { + ChannelParameters(); + int16_t mute_factor; + int16_t ar_filter[kUnvoicedLpcOrder + 1]; + int16_t ar_filter_state[kUnvoicedLpcOrder]; + int16_t ar_gain; + int16_t ar_gain_scale; + int16_t voice_mix_factor; /* Q14 */ + int16_t current_voice_mix_factor; /* Q14 */ + AudioVector expand_vector0; + AudioVector expand_vector1; + bool onset; + int mute_slope; /* Q20 */ + }; + + // Calculate the auto-correlation of `input`, with length `input_length` + // samples. The correlation is calculated from a downsampled version of + // `input`, and is written to `output`. + void Correlation(const int16_t* input, + size_t input_length, + int16_t* output) const; + + void UpdateLagIndex(); + + BackgroundNoise* const background_noise_; + StatisticsCalculator* const statistics_; + const size_t overlap_length_; + size_t max_lag_; + size_t expand_lags_[kNumLags]; + int lag_index_direction_; + int current_lag_index_; + bool stop_muting_; + size_t expand_duration_samples_; + std::unique_ptr channel_parameters_; +}; + +struct ExpandFactory { + ExpandFactory() {} + virtual ~ExpandFactory() {} + + virtual Expand* Create(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels) const; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_EXPAND_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.cc b/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.cc new file mode 100644 index 0000000000..a91358b489 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.cc @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/expand_uma_logger.h" + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { +std::unique_ptr GetNewCountdown( + const TickTimer& tick_timer, + int logging_period_s) { + return tick_timer.GetNewCountdown((logging_period_s * 1000) / + tick_timer.ms_per_tick()); +} +} // namespace + +ExpandUmaLogger::ExpandUmaLogger(absl::string_view uma_name, + int logging_period_s, + const TickTimer* tick_timer) + : uma_name_(uma_name), + logging_period_s_(logging_period_s), + tick_timer_(*tick_timer), + timer_(GetNewCountdown(tick_timer_, logging_period_s_)) { + RTC_DCHECK(tick_timer); + RTC_DCHECK_GT(logging_period_s_, 0); +} + +ExpandUmaLogger::~ExpandUmaLogger() = default; + +void ExpandUmaLogger::UpdateSampleCounter(uint64_t samples, + int sample_rate_hz) { + if ((last_logged_value_ && *last_logged_value_ > samples) || + sample_rate_hz_ != sample_rate_hz) { + // Sanity checks. The incremental counter moved backwards, or sample rate + // changed. + last_logged_value_.reset(); + } + last_value_ = samples; + sample_rate_hz_ = sample_rate_hz; + if (!last_logged_value_) { + last_logged_value_ = absl::optional(samples); + } + + if (!timer_->Finished()) { + // Not yet time to log. + return; + } + + RTC_DCHECK(last_logged_value_); + RTC_DCHECK_GE(last_value_, *last_logged_value_); + const uint64_t diff = last_value_ - *last_logged_value_; + last_logged_value_ = absl::optional(last_value_); + // Calculate rate in percent. + RTC_DCHECK_GT(sample_rate_hz, 0); + const int rate = (100 * diff) / (sample_rate_hz * logging_period_s_); + RTC_DCHECK_GE(rate, 0); + RTC_DCHECK_LE(rate, 100); + RTC_HISTOGRAM_PERCENTAGE_SPARSE(uma_name_, rate); + timer_ = GetNewCountdown(tick_timer_, logging_period_s_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.h b/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.h new file mode 100644 index 0000000000..cc5c20a886 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_EXPAND_UMA_LOGGER_H_ +#define MODULES_AUDIO_CODING_NETEQ_EXPAND_UMA_LOGGER_H_ + +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/neteq/tick_timer.h" + +namespace webrtc { + +// This class is used to periodically log values to a UMA histogram. The caller +// is expected to update this class with an incremental sample counter which +// counts expand samples. At the end of each logging period, the class will +// calculate the fraction of samples that were expand samples during that period +// and report that in percent. The logging period must be strictly positive. +// Does not take ownership of tick_timer and the pointer must refer to a valid +// object that outlives the one constructed. +class ExpandUmaLogger { + public: + ExpandUmaLogger(absl::string_view uma_name, + int logging_period_s, + const TickTimer* tick_timer); + + ~ExpandUmaLogger(); + + ExpandUmaLogger(const ExpandUmaLogger&) = delete; + ExpandUmaLogger& operator=(const ExpandUmaLogger&) = delete; + + // In this call, value should be an incremental sample counter. The sample + // rate must be strictly positive. + void UpdateSampleCounter(uint64_t value, int sample_rate_hz); + + private: + const std::string uma_name_; + const int logging_period_s_; + const TickTimer& tick_timer_; + std::unique_ptr timer_; + absl::optional last_logged_value_; + uint64_t last_value_ = 0; + int sample_rate_hz_ = 0; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_EXPAND_UMA_LOGGER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/expand_unittest.cc new file mode 100644 index 0000000000..9355fce5e1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand_unittest.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Expand class. + +#include "modules/audio_coding/neteq/expand.h" + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(Expand, CreateAndDestroy) { + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels); +} + +TEST(Expand, CreateUsingFactory) { + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + ExpandFactory expand_factory; + Expand* expand = expand_factory.Create(&bgn, &sync_buffer, &random_vector, + &statistics, fs, channels); + EXPECT_TRUE(expand != NULL); + delete expand; +} + +namespace { +class FakeStatisticsCalculator : public StatisticsCalculator { + public: + void LogDelayedPacketOutageEvent(int num_samples, int fs_hz) override { + last_outage_duration_samples_ = num_samples; + } + + int last_outage_duration_samples() const { + return last_outage_duration_samples_; + } + + private: + int last_outage_duration_samples_ = 0; +}; + +// This is the same size that is given to the SyncBuffer object in NetEq. +const size_t kNetEqSyncBufferLengthMs = 720; +} // namespace + +class ExpandTest : public ::testing::Test { + protected: + ExpandTest() + : input_file_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"), + 32000), + test_sample_rate_hz_(32000), + num_channels_(1), + background_noise_(num_channels_), + sync_buffer_(num_channels_, + kNetEqSyncBufferLengthMs * test_sample_rate_hz_ / 1000), + expand_(&background_noise_, + &sync_buffer_, + &random_vector_, + &statistics_, + test_sample_rate_hz_, + num_channels_) { + input_file_.set_output_rate_hz(test_sample_rate_hz_); + } + + void SetUp() override { + // Fast-forward the input file until there is speech (about 1.1 second into + // the file). + const int speech_start_samples = + static_cast(test_sample_rate_hz_ * 1.1f); + ASSERT_TRUE(input_file_.Seek(speech_start_samples)); + + // Pre-load the sync buffer with speech data. + std::unique_ptr temp(new int16_t[sync_buffer_.Size()]); + ASSERT_TRUE(input_file_.Read(sync_buffer_.Size(), temp.get())); + sync_buffer_.Channel(0).OverwriteAt(temp.get(), sync_buffer_.Size(), 0); + ASSERT_EQ(1u, num_channels_) << "Fix: Must populate all channels."; + } + + test::ResampleInputAudioFile input_file_; + int test_sample_rate_hz_; + size_t num_channels_; + BackgroundNoise background_noise_; + SyncBuffer sync_buffer_; + RandomVector random_vector_; + FakeStatisticsCalculator statistics_; + Expand expand_; +}; + +// This test calls the expand object to produce concealment data a few times, +// and then ends by calling SetParametersForNormalAfterExpand. This simulates +// the situation where the packet next up for decoding was just delayed, not +// lost. +TEST_F(ExpandTest, DelayedPacketOutage) { + AudioMultiVector output(num_channels_); + size_t sum_output_len_samples = 0; + for (int i = 0; i < 10; ++i) { + EXPECT_EQ(0, expand_.Process(&output)); + EXPECT_GT(output.Size(), 0u); + sum_output_len_samples += output.Size(); + EXPECT_EQ(0, statistics_.last_outage_duration_samples()); + } + expand_.SetParametersForNormalAfterExpand(); + // Convert `sum_output_len_samples` to milliseconds. + EXPECT_EQ(rtc::checked_cast(sum_output_len_samples), + statistics_.last_outage_duration_samples()); +} + +// This test is similar to DelayedPacketOutage, but ends by calling +// SetParametersForMergeAfterExpand. This simulates the situation where the +// packet next up for decoding was actually lost (or at least a later packet +// arrived before it). +TEST_F(ExpandTest, LostPacketOutage) { + AudioMultiVector output(num_channels_); + for (int i = 0; i < 10; ++i) { + EXPECT_EQ(0, expand_.Process(&output)); + EXPECT_GT(output.Size(), 0u); + EXPECT_EQ(0, statistics_.last_outage_duration_samples()); + } + expand_.SetParametersForMergeAfterExpand(); + EXPECT_EQ(0, statistics_.last_outage_duration_samples()); +} + +// This test is similar to the DelayedPacketOutage test above, but with the +// difference that Expand::Reset() is called after 5 calls to Expand::Process(). +// This should reset the statistics, and will in the end lead to an outage of +// 5 periods instead of 10. +TEST_F(ExpandTest, CheckOutageStatsAfterReset) { + AudioMultiVector output(num_channels_); + size_t sum_output_len_samples = 0; + for (int i = 0; i < 10; ++i) { + EXPECT_EQ(0, expand_.Process(&output)); + EXPECT_GT(output.Size(), 0u); + sum_output_len_samples += output.Size(); + if (i == 5) { + expand_.Reset(); + sum_output_len_samples = 0; + } + EXPECT_EQ(0, statistics_.last_outage_duration_samples()); + } + expand_.SetParametersForNormalAfterExpand(); + // Convert `sum_output_len_samples` to milliseconds. + EXPECT_EQ(rtc::checked_cast(sum_output_len_samples), + statistics_.last_outage_duration_samples()); +} + +namespace { +// Runs expand until Muted() returns true. Times out after 1000 calls. +void ExpandUntilMuted(size_t num_channels, Expand* expand) { + EXPECT_FALSE(expand->Muted()) << "Instance is muted from the start"; + AudioMultiVector output(num_channels); + int num_calls = 0; + while (!expand->Muted()) { + ASSERT_LT(num_calls++, 1000) << "Test timed out"; + EXPECT_EQ(0, expand->Process(&output)); + } +} +} // namespace + +// Verifies that Muted() returns true after a long expand period. Also verifies +// that Muted() is reset to false after calling Reset(), +// SetParametersForMergeAfterExpand() and SetParametersForNormalAfterExpand(). +TEST_F(ExpandTest, Muted) { + ExpandUntilMuted(num_channels_, &expand_); + expand_.Reset(); + EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted. + + ExpandUntilMuted(num_channels_, &expand_); + expand_.SetParametersForMergeAfterExpand(); + EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted. + + expand_.Reset(); // Must reset in order to start a new expand period. + ExpandUntilMuted(num_channels_, &expand_); + expand_.SetParametersForNormalAfterExpand(); + EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted. +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/g3doc/index.md b/third_party/libwebrtc/modules/audio_coding/neteq/g3doc/index.md new file mode 100644 index 0000000000..40e76e2742 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/g3doc/index.md @@ -0,0 +1,102 @@ + + + +# NetEq + +NetEq is the audio jitter buffer and packet loss concealer. The jitter buffer is +an adaptive jitter buffer, meaning that the buffering delay is continuously +optimized based on the network conditions. Its main goal is to ensure a smooth +playout of incoming audio packets from the network with a low amount of audio +artifacts (alterations to the original content of the packets) while at the same +time keep the delay as low as possible. + +## API + +At a high level, the NetEq API has two main functions: +[`InsertPacket`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=198;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72) +and +[`GetAudio`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=219;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72). + +### InsertPacket + +[`InsertPacket`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=198;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72) +delivers an RTP packet from the network to NetEq where the following happens: + +1. The packet is discarded if it is too late for playout (for example if it was + reordered). Otherwize it is put into the packet buffer where it is stored + until it is time for playout. If the buffer is full, discard all the + existing packets (this should be rare). +2. The interarrival time between packets is analyzed and statistics is updated + which is used to derive a new target playout delay. The interarrival time is + measured in the number of GetAudio ‘ticks’ and thus clock drift between the + sender and receiver can be accounted for. + +### GetAudio + +[`GetAudio`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=219;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72) +pulls 10 ms of audio from NetEq for playout. A much simplified decision logic is +as follows: + +1. If there is 10 ms audio in the sync buffer then return that. +2. If the next packet is available (based on RTP timestamp) in the packet + buffer then decode it and append the result to the sync buffer. + 1. Compare the current delay estimate (filtered buffer level) with the + target delay and time stretch (accelerate or decelerate) the contents of + the sync buffer if the buffer level is too high or too low. + 2. Return 10 ms of audio from the sync buffer. +3. If the last decoded packet was a discontinuous transmission (DTX) packet + then generate comfort noise. +4. If there is no available packet for decoding due to the next packet having + not arrived or been lost then generate packet loss concealment by + extrapolating the remaining audio in the sync buffer or by asking the + decoder to produce it. + +In summary, the output is the result one of the following operations: + +* Normal: audio decoded from a packet. +* Acceleration: accelerated playout of a decoded packet. +* Preemptive expand: decelerated playout of a decoded packet. +* Expand: packet loss concealment generated by NetEq or the decoder. +* Merge: audio stitched together from packet loss concealment to decoded data + in case of a loss. +* Comfort noise (CNG): comfort noise generated by NetEq or the decoder between + talk spurts due to discontinuous transmission of packets (DTX). + +## Statistics + +There are a number of functions that can be used to query the internal state of +NetEq, statistics about the type of audio output and latency metrics such as how +long time packets have waited in the buffer. + +* [`NetworkStatistics`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=273;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72): + instantaneous values or stats averaged over the duration since last call to + this function. +* [`GetLifetimeStatistics`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=280;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72): + cumulative stats that persist over the lifetime of the class. +* [`GetOperationsAndState`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=284;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72): + information about the internal state of NetEq (is only inteded to be used + for testing and debugging). + +## Tests and tools + +* [`neteq_rtpplay`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc;drc=cee751abff598fc19506f77de08bea7c61b9dcca): + Simulate NetEq behavior based on either an RTP dump, a PCAP file or an RTC + event log. A replacement audio file can also be used instead of the original + payload. Outputs aggregated statistics and optionally an audio file to + listen to. +* [`neteq_speed_test`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc;drc=2ab97f6f8e27b47c0d9beeb8b6ca5387bda9f55c): + Measure performance of NetEq, used on perf bots. +* Unit tests including bit exactness tests where RTP file is used as an input + to NetEq, the output is concatenated and a checksum is calculated and + compared against a reference. + +## Other responsibilities + +* Dual-tone multi-frequency signaling (DTMF): receive telephone events and + produce dual tone waveforms. +* Forward error correction (RED or codec inband FEC): split inserted packets + and prioritize the payloads. +* NACK (negative acknowledgement): keep track of lost packets and generate a + list of packets to NACK. +* Audio/video sync: NetEq can be instructed to increase the latency in order + to keep audio and video in sync. diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/histogram.cc b/third_party/libwebrtc/modules/audio_coding/neteq/histogram.cc new file mode 100644 index 0000000000..e4b7f10379 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/histogram.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/histogram.h" + +#include +#include +#include + +#include "absl/types/optional.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +Histogram::Histogram(size_t num_buckets, + int forget_factor, + absl::optional start_forget_weight) + : buckets_(num_buckets, 0), + forget_factor_(0), + base_forget_factor_(forget_factor), + add_count_(0), + start_forget_weight_(start_forget_weight) { + RTC_DCHECK_LT(base_forget_factor_, 1 << 15); +} + +Histogram::~Histogram() {} + +// Each element in the vector is first multiplied by the forgetting factor +// `forget_factor_`. Then the vector element indicated by `iat_packets` is then +// increased (additive) by 1 - `forget_factor_`. This way, the probability of +// `value` is slightly increased, while the sum of the histogram remains +// constant (=1). +// Due to inaccuracies in the fixed-point arithmetic, the histogram may no +// longer sum up to 1 (in Q30) after the update. To correct this, a correction +// term is added or subtracted from the first element (or elements) of the +// vector. +// The forgetting factor `forget_factor_` is also updated. When the DelayManager +// is reset, the factor is set to 0 to facilitate rapid convergence in the +// beginning. With each update of the histogram, the factor is increased towards +// the steady-state value `base_forget_factor_`. +void Histogram::Add(int value) { + RTC_DCHECK(value >= 0); + RTC_DCHECK(value < static_cast(buckets_.size())); + int vector_sum = 0; // Sum up the vector elements as they are processed. + // Multiply each element in `buckets_` with `forget_factor_`. + for (int& bucket : buckets_) { + bucket = (static_cast(bucket) * forget_factor_) >> 15; + vector_sum += bucket; + } + + // Increase the probability for the currently observed inter-arrival time + // by 1 - `forget_factor_`. The factor is in Q15, `buckets_` in Q30. + // Thus, left-shift 15 steps to obtain result in Q30. + buckets_[value] += (32768 - forget_factor_) << 15; + vector_sum += (32768 - forget_factor_) << 15; // Add to vector sum. + + // `buckets_` should sum up to 1 (in Q30), but it may not due to + // fixed-point rounding errors. + vector_sum -= 1 << 30; // Should be zero. Compensate if not. + if (vector_sum != 0) { + // Modify a few values early in `buckets_`. + int flip_sign = vector_sum > 0 ? -1 : 1; + for (int& bucket : buckets_) { + // Add/subtract 1/16 of the element, but not more than `vector_sum`. + int correction = flip_sign * std::min(std::abs(vector_sum), bucket >> 4); + bucket += correction; + vector_sum += correction; + if (std::abs(vector_sum) == 0) { + break; + } + } + } + RTC_DCHECK(vector_sum == 0); // Verify that the above is correct. + + ++add_count_; + + // Update `forget_factor_` (changes only during the first seconds after a + // reset). The factor converges to `base_forget_factor_`. + if (start_forget_weight_) { + if (forget_factor_ != base_forget_factor_) { + int old_forget_factor = forget_factor_; + int forget_factor = + (1 << 15) * (1 - start_forget_weight_.value() / (add_count_ + 1)); + forget_factor_ = + std::max(0, std::min(base_forget_factor_, forget_factor)); + // The histogram is updated recursively by forgetting the old histogram + // with `forget_factor_` and adding a new sample multiplied by |1 - + // forget_factor_|. We need to make sure that the effective weight on the + // new sample is no smaller than those on the old samples, i.e., to + // satisfy the following DCHECK. + RTC_DCHECK_GE((1 << 15) - forget_factor_, + ((1 << 15) - old_forget_factor) * forget_factor_ >> 15); + } + } else { + forget_factor_ += (base_forget_factor_ - forget_factor_ + 3) >> 2; + } +} + +int Histogram::Quantile(int probability) { + // Find the bucket for which the probability of observing an + // inter-arrival time larger than or equal to `index` is larger than or + // equal to `probability`. The sought probability is estimated using + // the histogram as the reverse cumulant PDF, i.e., the sum of elements from + // the end up until `index`. Now, since the sum of all elements is 1 + // (in Q30) by definition, and since the solution is often a low value for + // `iat_index`, it is more efficient to start with `sum` = 1 and subtract + // elements from the start of the histogram. + int inverse_probability = (1 << 30) - probability; + size_t index = 0; // Start from the beginning of `buckets_`. + int sum = 1 << 30; // Assign to 1 in Q30. + sum -= buckets_[index]; + + while ((sum > inverse_probability) && (index < buckets_.size() - 1)) { + // Subtract the probabilities one by one until the sum is no longer greater + // than `inverse_probability`. + ++index; + sum -= buckets_[index]; + } + return static_cast(index); +} + +// Set the histogram vector to an exponentially decaying distribution +// buckets_[i] = 0.5^(i+1), i = 0, 1, 2, ... +// buckets_ is in Q30. +void Histogram::Reset() { + // Set temp_prob to (slightly more than) 1 in Q14. This ensures that the sum + // of buckets_ is 1. + uint16_t temp_prob = 0x4002; // 16384 + 2 = 100000000000010 binary. + for (int& bucket : buckets_) { + temp_prob >>= 1; + bucket = temp_prob << 16; + } + forget_factor_ = 0; // Adapt the histogram faster for the first few packets. + add_count_ = 0; +} + +int Histogram::NumBuckets() const { + return buckets_.size(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/histogram.h b/third_party/libwebrtc/modules/audio_coding/neteq/histogram.h new file mode 100644 index 0000000000..265a10e00a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/histogram.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_HISTOGRAM_H_ +#define MODULES_AUDIO_CODING_NETEQ_HISTOGRAM_H_ + +#include // Provide access to size_t. + +#include + +#include "absl/types/optional.h" + +namespace webrtc { + +class Histogram { + public: + // Creates histogram with capacity `num_buckets` and `forget_factor` in Q15. + Histogram(size_t num_buckets, + int forget_factor, + absl::optional start_forget_weight = absl::nullopt); + + virtual ~Histogram(); + + // Resets the histogram to the default start distribution. + virtual void Reset(); + + // Add entry in bucket `index`. + virtual void Add(int index); + + // Calculates the quantile at `probability` (in Q30) of the histogram + // distribution. + virtual int Quantile(int probability); + + // Returns the number of buckets in the histogram. + virtual int NumBuckets() const; + + // Returns the probability for each bucket in Q30. + const std::vector& buckets() const { return buckets_; } + + // Accessors only intended for testing purposes. + int base_forget_factor_for_testing() const { return base_forget_factor_; } + int forget_factor_for_testing() const { return forget_factor_; } + absl::optional start_forget_weight_for_testing() const { + return start_forget_weight_; + } + + private: + std::vector buckets_; + int forget_factor_; // Q15 + const int base_forget_factor_; + int add_count_; + const absl::optional start_forget_weight_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_HISTOGRAM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/histogram_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/histogram_unittest.cc new file mode 100644 index 0000000000..e30a2956dc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/histogram_unittest.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/histogram.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +TEST(HistogramTest, Initialization) { + Histogram histogram(65, 32440); + histogram.Reset(); + const auto& buckets = histogram.buckets(); + double sum = 0.0; + for (size_t i = 0; i < buckets.size(); i++) { + EXPECT_NEAR(ldexp(std::pow(0.5, static_cast(i + 1)), 30), buckets[i], + 65537); + // Tolerance 65537 in Q30 corresponds to a delta of approximately 0.00006. + sum += buckets[i]; + } + EXPECT_EQ(1 << 30, static_cast(sum)); // Should be 1 in Q30. +} + +TEST(HistogramTest, Add) { + Histogram histogram(10, 32440); + histogram.Reset(); + const std::vector before = histogram.buckets(); + const int index = 5; + histogram.Add(index); + const std::vector after = histogram.buckets(); + EXPECT_GT(after[index], before[index]); + int sum = 0; + for (int bucket : after) { + sum += bucket; + } + EXPECT_EQ(1 << 30, sum); +} + +TEST(HistogramTest, ForgetFactor) { + Histogram histogram(10, 32440); + histogram.Reset(); + const std::vector before = histogram.buckets(); + const int index = 4; + histogram.Add(index); + const std::vector after = histogram.buckets(); + for (int i = 0; i < histogram.NumBuckets(); ++i) { + if (i != index) { + EXPECT_LT(after[i], before[i]); + } + } +} + +TEST(HistogramTest, ReachSteadyStateForgetFactor) { + static constexpr int kSteadyStateForgetFactor = (1 << 15) * 0.9993; + Histogram histogram(100, kSteadyStateForgetFactor, 1.0); + histogram.Reset(); + int n = (1 << 15) / ((1 << 15) - kSteadyStateForgetFactor); + for (int i = 0; i < n; ++i) { + histogram.Add(0); + } + EXPECT_EQ(histogram.forget_factor_for_testing(), kSteadyStateForgetFactor); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/merge.cc b/third_party/libwebrtc/modules/audio_coding/neteq/merge.cc new file mode 100644 index 0000000000..0aec6d2597 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/merge.cc @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/merge.h" + +#include // memmove, memcpy, memset, size_t + +#include // min, max +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/cross_correlation.h" +#include "modules/audio_coding/neteq/dsp_helper.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +Merge::Merge(int fs_hz, + size_t num_channels, + Expand* expand, + SyncBuffer* sync_buffer) + : fs_hz_(fs_hz), + num_channels_(num_channels), + fs_mult_(fs_hz_ / 8000), + timestamps_per_call_(static_cast(fs_hz_ / 100)), + expand_(expand), + sync_buffer_(sync_buffer), + expanded_(num_channels_) { + RTC_DCHECK_GT(num_channels_, 0); +} + +Merge::~Merge() = default; + +size_t Merge::Process(int16_t* input, + size_t input_length, + AudioMultiVector* output) { + // TODO(hlundin): Change to an enumerator and skip assert. + RTC_DCHECK(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 || + fs_hz_ == 48000); + RTC_DCHECK_LE(fs_hz_, kMaxSampleRate); // Should not be possible. + if (input_length == 0) { + return 0; + } + + size_t old_length; + size_t expand_period; + // Get expansion data to overlap and mix with. + size_t expanded_length = GetExpandedSignal(&old_length, &expand_period); + + // Transfer input signal to an AudioMultiVector. + AudioMultiVector input_vector(num_channels_); + input_vector.PushBackInterleaved( + rtc::ArrayView(input, input_length)); + size_t input_length_per_channel = input_vector.Size(); + RTC_DCHECK_EQ(input_length_per_channel, input_length / num_channels_); + + size_t best_correlation_index = 0; + size_t output_length = 0; + + std::unique_ptr input_channel( + new int16_t[input_length_per_channel]); + std::unique_ptr expanded_channel(new int16_t[expanded_length]); + for (size_t channel = 0; channel < num_channels_; ++channel) { + input_vector[channel].CopyTo(input_length_per_channel, 0, + input_channel.get()); + expanded_[channel].CopyTo(expanded_length, 0, expanded_channel.get()); + + const int16_t new_mute_factor = std::min( + 16384, SignalScaling(input_channel.get(), input_length_per_channel, + expanded_channel.get())); + + if (channel == 0) { + // Downsample, correlate, and find strongest correlation period for the + // reference (i.e., first) channel only. + // Downsample to 4kHz sample rate. + Downsample(input_channel.get(), input_length_per_channel, + expanded_channel.get(), expanded_length); + + // Calculate the lag of the strongest correlation period. + best_correlation_index = CorrelateAndPeakSearch( + old_length, input_length_per_channel, expand_period); + } + + temp_data_.resize(input_length_per_channel + best_correlation_index); + int16_t* decoded_output = temp_data_.data() + best_correlation_index; + + // Mute the new decoded data if needed (and unmute it linearly). + // This is the overlapping part of expanded_signal. + size_t interpolation_length = + std::min(kMaxCorrelationLength * fs_mult_, + expanded_length - best_correlation_index); + interpolation_length = + std::min(interpolation_length, input_length_per_channel); + + RTC_DCHECK_LE(new_mute_factor, 16384); + int16_t mute_factor = + std::max(expand_->MuteFactor(channel), new_mute_factor); + RTC_DCHECK_GE(mute_factor, 0); + + if (mute_factor < 16384) { + // Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB, + // and so on, or as fast as it takes to come back to full gain within the + // frame length. + const int back_to_fullscale_inc = static_cast( + ((16384 - mute_factor) << 6) / input_length_per_channel); + const int increment = std::max(4194 / fs_mult_, back_to_fullscale_inc); + mute_factor = static_cast(DspHelper::RampSignal( + input_channel.get(), interpolation_length, mute_factor, increment)); + DspHelper::UnmuteSignal(&input_channel[interpolation_length], + input_length_per_channel - interpolation_length, + &mute_factor, increment, + &decoded_output[interpolation_length]); + } else { + // No muting needed. + memmove( + &decoded_output[interpolation_length], + &input_channel[interpolation_length], + sizeof(int16_t) * (input_length_per_channel - interpolation_length)); + } + + // Do overlap and mix linearly. + int16_t increment = + static_cast(16384 / (interpolation_length + 1)); // In Q14. + int16_t local_mute_factor = 16384 - increment; + memmove(temp_data_.data(), expanded_channel.get(), + sizeof(int16_t) * best_correlation_index); + DspHelper::CrossFade(&expanded_channel[best_correlation_index], + input_channel.get(), interpolation_length, + &local_mute_factor, increment, decoded_output); + + output_length = best_correlation_index + input_length_per_channel; + if (channel == 0) { + RTC_DCHECK(output->Empty()); // Output should be empty at this point. + output->AssertSize(output_length); + } else { + RTC_DCHECK_EQ(output->Size(), output_length); + } + (*output)[channel].OverwriteAt(temp_data_.data(), output_length, 0); + } + + // Copy back the first part of the data to `sync_buffer_` and remove it from + // `output`. + sync_buffer_->ReplaceAtIndex(*output, old_length, sync_buffer_->next_index()); + output->PopFront(old_length); + + // Return new added length. `old_length` samples were borrowed from + // `sync_buffer_`. + RTC_DCHECK_GE(output_length, old_length); + return output_length - old_length; +} + +size_t Merge::GetExpandedSignal(size_t* old_length, size_t* expand_period) { + // Check how much data that is left since earlier. + *old_length = sync_buffer_->FutureLength(); + // Should never be less than overlap_length. + RTC_DCHECK_GE(*old_length, expand_->overlap_length()); + // Generate data to merge the overlap with using expand. + expand_->SetParametersForMergeAfterExpand(); + + if (*old_length >= 210 * kMaxSampleRate / 8000) { + // TODO(hlundin): Write test case for this. + // The number of samples available in the sync buffer is more than what fits + // in expanded_signal. Keep the first 210 * kMaxSampleRate / 8000 samples, + // but shift them towards the end of the buffer. This is ok, since all of + // the buffer will be expand data anyway, so as long as the beginning is + // left untouched, we're fine. + size_t length_diff = *old_length - 210 * kMaxSampleRate / 8000; + sync_buffer_->InsertZerosAtIndex(length_diff, sync_buffer_->next_index()); + *old_length = 210 * kMaxSampleRate / 8000; + // This is the truncated length. + } + // This assert should always be true thanks to the if statement above. + RTC_DCHECK_GE(210 * kMaxSampleRate / 8000, *old_length); + + AudioMultiVector expanded_temp(num_channels_); + expand_->Process(&expanded_temp); + *expand_period = expanded_temp.Size(); // Samples per channel. + + expanded_.Clear(); + // Copy what is left since earlier into the expanded vector. + expanded_.PushBackFromIndex(*sync_buffer_, sync_buffer_->next_index()); + RTC_DCHECK_EQ(expanded_.Size(), *old_length); + RTC_DCHECK_GT(expanded_temp.Size(), 0); + // Do "ugly" copy and paste from the expanded in order to generate more data + // to correlate (but not interpolate) with. + const size_t required_length = static_cast((120 + 80 + 2) * fs_mult_); + if (expanded_.Size() < required_length) { + while (expanded_.Size() < required_length) { + // Append one more pitch period each time. + expanded_.PushBack(expanded_temp); + } + // Trim the length to exactly `required_length`. + expanded_.PopBack(expanded_.Size() - required_length); + } + RTC_DCHECK_GE(expanded_.Size(), required_length); + return required_length; +} + +int16_t Merge::SignalScaling(const int16_t* input, + size_t input_length, + const int16_t* expanded_signal) const { + // Adjust muting factor if new vector is more or less of the BGN energy. + const auto mod_input_length = rtc::SafeMin( + 64 * rtc::dchecked_cast(fs_mult_), input_length); + + // Missing input, do no muting + if (mod_input_length == 0) { + return 16384; + } + + const int16_t expanded_max = + WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length); + int32_t factor = + (expanded_max * expanded_max) / (std::numeric_limits::max() / + static_cast(mod_input_length)); + const int expanded_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); + int32_t energy_expanded = WebRtcSpl_DotProductWithScale( + expanded_signal, expanded_signal, mod_input_length, expanded_shift); + + // Calculate energy of input signal. + const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length); + factor = (input_max * input_max) / (std::numeric_limits::max() / + static_cast(mod_input_length)); + const int input_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); + int32_t energy_input = WebRtcSpl_DotProductWithScale( + input, input, mod_input_length, input_shift); + + // Align to the same Q-domain. + if (input_shift > expanded_shift) { + energy_expanded = energy_expanded >> (input_shift - expanded_shift); + } else { + energy_input = energy_input >> (expanded_shift - input_shift); + } + + // Calculate muting factor to use for new frame. + int16_t mute_factor; + if (energy_input > energy_expanded) { + // Normalize `energy_input` to 14 bits. + int16_t temp_shift = WebRtcSpl_NormW32(energy_input) - 17; + energy_input = WEBRTC_SPL_SHIFT_W32(energy_input, temp_shift); + // Put `energy_expanded` in a domain 14 higher, so that + // energy_expanded / energy_input is in Q14. + energy_expanded = WEBRTC_SPL_SHIFT_W32(energy_expanded, temp_shift + 14); + // Calculate sqrt(energy_expanded / energy_input) in Q14. + mute_factor = static_cast( + WebRtcSpl_SqrtFloor((energy_expanded / energy_input) << 14)); + } else { + // Set to 1 (in Q14) when `expanded` has higher energy than `input`. + mute_factor = 16384; + } + + return mute_factor; +} + +// TODO(hlundin): There are some parameter values in this method that seem +// strange. Compare with Expand::Correlation. +void Merge::Downsample(const int16_t* input, + size_t input_length, + const int16_t* expanded_signal, + size_t expanded_length) { + const int16_t* filter_coefficients; + size_t num_coefficients; + int decimation_factor = fs_hz_ / 4000; + static const size_t kCompensateDelay = 0; + size_t length_limit = static_cast(fs_hz_ / 100); // 10 ms in samples. + if (fs_hz_ == 8000) { + filter_coefficients = DspHelper::kDownsample8kHzTbl; + num_coefficients = 3; + } else if (fs_hz_ == 16000) { + filter_coefficients = DspHelper::kDownsample16kHzTbl; + num_coefficients = 5; + } else if (fs_hz_ == 32000) { + filter_coefficients = DspHelper::kDownsample32kHzTbl; + num_coefficients = 7; + } else { // fs_hz_ == 48000 + filter_coefficients = DspHelper::kDownsample48kHzTbl; + num_coefficients = 7; + } + size_t signal_offset = num_coefficients - 1; + WebRtcSpl_DownsampleFast( + &expanded_signal[signal_offset], expanded_length - signal_offset, + expanded_downsampled_, kExpandDownsampLength, filter_coefficients, + num_coefficients, decimation_factor, kCompensateDelay); + if (input_length <= length_limit) { + // Not quite long enough, so we have to cheat a bit. + // If the input is shorter than the offset, we consider the input to be 0 + // length. This will cause us to skip the downsampling since it makes no + // sense anyway, and input_downsampled_ will be filled with zeros. This is + // clearly a pathological case, and the signal quality will suffer, but + // there is not much we can do. + const size_t temp_len = + input_length > signal_offset ? input_length - signal_offset : 0; + // TODO(hlundin): Should `downsamp_temp_len` be corrected for round-off + // errors? I.e., (temp_len + decimation_factor - 1) / decimation_factor? + size_t downsamp_temp_len = temp_len / decimation_factor; + if (downsamp_temp_len > 0) { + WebRtcSpl_DownsampleFast(&input[signal_offset], temp_len, + input_downsampled_, downsamp_temp_len, + filter_coefficients, num_coefficients, + decimation_factor, kCompensateDelay); + } + memset(&input_downsampled_[downsamp_temp_len], 0, + sizeof(int16_t) * (kInputDownsampLength - downsamp_temp_len)); + } else { + WebRtcSpl_DownsampleFast( + &input[signal_offset], input_length - signal_offset, input_downsampled_, + kInputDownsampLength, filter_coefficients, num_coefficients, + decimation_factor, kCompensateDelay); + } +} + +size_t Merge::CorrelateAndPeakSearch(size_t start_position, + size_t input_length, + size_t expand_period) const { + // Calculate correlation without any normalization. + const size_t max_corr_length = kMaxCorrelationLength; + size_t stop_position_downsamp = + std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1); + + int32_t correlation[kMaxCorrelationLength]; + CrossCorrelationWithAutoShift(input_downsampled_, expanded_downsampled_, + kInputDownsampLength, stop_position_downsamp, 1, + correlation); + + // Normalize correlation to 14 bits and copy to a 16-bit array. + const size_t pad_length = expand_->overlap_length() - 1; + const size_t correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength; + std::unique_ptr correlation16( + new int16_t[correlation_buffer_size]); + memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t)); + int16_t* correlation_ptr = &correlation16[pad_length]; + int32_t max_correlation = + WebRtcSpl_MaxAbsValueW32(correlation, stop_position_downsamp); + int norm_shift = std::max(0, 17 - WebRtcSpl_NormW32(max_correlation)); + WebRtcSpl_VectorBitShiftW32ToW16(correlation_ptr, stop_position_downsamp, + correlation, norm_shift); + + // Calculate allowed starting point for peak finding. + // The peak location bestIndex must fulfill two criteria: + // (1) w16_bestIndex + input_length < + // timestamps_per_call_ + expand_->overlap_length(); + // (2) w16_bestIndex + input_length < start_position. + size_t start_index = timestamps_per_call_ + expand_->overlap_length(); + start_index = std::max(start_position, start_index); + start_index = (input_length > start_index) ? 0 : (start_index - input_length); + // Downscale starting index to 4kHz domain. (fs_mult_ * 2 = fs_hz_ / 4000.) + size_t start_index_downsamp = start_index / (fs_mult_ * 2); + + // Calculate a modified `stop_position_downsamp` to account for the increased + // start index `start_index_downsamp` and the effective array length. + size_t modified_stop_pos = + std::min(stop_position_downsamp, + kMaxCorrelationLength + pad_length - start_index_downsamp); + size_t best_correlation_index; + int16_t best_correlation; + static const size_t kNumCorrelationCandidates = 1; + DspHelper::PeakDetection(&correlation_ptr[start_index_downsamp], + modified_stop_pos, kNumCorrelationCandidates, + fs_mult_, &best_correlation_index, + &best_correlation); + // Compensate for modified start index. + best_correlation_index += start_index; + + // Ensure that underrun does not occur for 10ms case => we have to get at + // least 10ms + overlap . (This should never happen thanks to the above + // modification of peak-finding starting point.) + while (((best_correlation_index + input_length) < + (timestamps_per_call_ + expand_->overlap_length())) || + ((best_correlation_index + input_length) < start_position)) { + RTC_DCHECK_NOTREACHED(); // Should never happen. + best_correlation_index += expand_period; // Jump one lag ahead. + } + return best_correlation_index; +} + +size_t Merge::RequiredFutureSamples() { + return fs_hz_ / 100 * num_channels_; // 10 ms. +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/merge.h b/third_party/libwebrtc/modules/audio_coding/neteq/merge.h new file mode 100644 index 0000000000..2f27106bfe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/merge.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MERGE_H_ +#define MODULES_AUDIO_CODING_NETEQ_MERGE_H_ + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +namespace webrtc { + +// Forward declarations. +class Expand; +class SyncBuffer; + +// This class handles the transition from expansion to normal operation. +// When a packet is not available for decoding when needed, the expand operation +// is called to generate extrapolation data. If the missing packet arrives, +// i.e., it was just delayed, it can be decoded and appended directly to the +// end of the expanded data (thanks to how the Expand class operates). However, +// if a later packet arrives instead, the loss is a fact, and the new data must +// be stitched together with the end of the expanded data. This stitching is +// what the Merge class does. +class Merge { + public: + Merge(int fs_hz, + size_t num_channels, + Expand* expand, + SyncBuffer* sync_buffer); + virtual ~Merge(); + + Merge(const Merge&) = delete; + Merge& operator=(const Merge&) = delete; + + // The main method to produce the audio data. The decoded data is supplied in + // `input`, having `input_length` samples in total for all channels + // (interleaved). The result is written to `output`. The number of channels + // allocated in `output` defines the number of channels that will be used when + // de-interleaving `input`. + virtual size_t Process(int16_t* input, + size_t input_length, + AudioMultiVector* output); + + virtual size_t RequiredFutureSamples(); + + protected: + const int fs_hz_; + const size_t num_channels_; + + private: + static const int kMaxSampleRate = 48000; + static const size_t kExpandDownsampLength = 100; + static const size_t kInputDownsampLength = 40; + static const size_t kMaxCorrelationLength = 60; + + // Calls `expand_` to get more expansion data to merge with. The data is + // written to `expanded_signal_`. Returns the length of the expanded data, + // while `expand_period` will be the number of samples in one expansion period + // (typically one pitch period). The value of `old_length` will be the number + // of samples that were taken from the `sync_buffer_`. + size_t GetExpandedSignal(size_t* old_length, size_t* expand_period); + + // Analyzes `input` and `expanded_signal` and returns muting factor (Q14) to + // be used on the new data. + int16_t SignalScaling(const int16_t* input, + size_t input_length, + const int16_t* expanded_signal) const; + + // Downsamples `input` (`input_length` samples) and `expanded_signal` to + // 4 kHz sample rate. The downsampled signals are written to + // `input_downsampled_` and `expanded_downsampled_`, respectively. + void Downsample(const int16_t* input, + size_t input_length, + const int16_t* expanded_signal, + size_t expanded_length); + + // Calculates cross-correlation between `input_downsampled_` and + // `expanded_downsampled_`, and finds the correlation maximum. The maximizing + // lag is returned. + size_t CorrelateAndPeakSearch(size_t start_position, + size_t input_length, + size_t expand_period) const; + + const int fs_mult_; // fs_hz_ / 8000. + const size_t timestamps_per_call_; + Expand* expand_; + SyncBuffer* sync_buffer_; + int16_t expanded_downsampled_[kExpandDownsampLength]; + int16_t input_downsampled_[kInputDownsampLength]; + AudioMultiVector expanded_; + std::vector temp_data_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MERGE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/merge_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/merge_unittest.cc new file mode 100644 index 0000000000..d5a55eb056 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/merge_unittest.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Merge class. + +#include "modules/audio_coding/neteq/merge.h" + +#include +#include + +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(Merge, CreateAndDestroy) { + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels); + Merge merge(fs, channels, &expand, &sync_buffer); +} + +namespace { +// This is the same size that is given to the SyncBuffer object in NetEq. +const size_t kNetEqSyncBufferLengthMs = 720; +} // namespace + +class MergeTest : public testing::TestWithParam { + protected: + MergeTest() + : input_file_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"), + 32000), + test_sample_rate_hz_(8000), + num_channels_(1), + background_noise_(num_channels_), + sync_buffer_(num_channels_, + kNetEqSyncBufferLengthMs * test_sample_rate_hz_ / 1000), + expand_(&background_noise_, + &sync_buffer_, + &random_vector_, + &statistics_, + test_sample_rate_hz_, + num_channels_), + merge_(test_sample_rate_hz_, num_channels_, &expand_, &sync_buffer_) { + input_file_.set_output_rate_hz(test_sample_rate_hz_); + } + + void SetUp() override { + // Fast-forward the input file until there is speech (about 1.1 second into + // the file). + const int speech_start_samples = + static_cast(test_sample_rate_hz_ * 1.1f); + ASSERT_TRUE(input_file_.Seek(speech_start_samples)); + + // Pre-load the sync buffer with speech data. + std::unique_ptr temp(new int16_t[sync_buffer_.Size()]); + ASSERT_TRUE(input_file_.Read(sync_buffer_.Size(), temp.get())); + sync_buffer_.Channel(0).OverwriteAt(temp.get(), sync_buffer_.Size(), 0); + // Move index such that the sync buffer appears to have 5 ms left to play. + sync_buffer_.set_next_index(sync_buffer_.next_index() - + test_sample_rate_hz_ * 5 / 1000); + ASSERT_EQ(1u, num_channels_) << "Fix: Must populate all channels."; + ASSERT_GT(sync_buffer_.FutureLength(), 0u); + } + + test::ResampleInputAudioFile input_file_; + int test_sample_rate_hz_; + size_t num_channels_; + BackgroundNoise background_noise_; + SyncBuffer sync_buffer_; + RandomVector random_vector_; + StatisticsCalculator statistics_; + Expand expand_; + Merge merge_; +}; + +TEST_P(MergeTest, Process) { + AudioMultiVector output(num_channels_); + // Start by calling Expand once, to prime the state. + EXPECT_EQ(0, expand_.Process(&output)); + EXPECT_GT(output.Size(), 0u); + output.Clear(); + // Now call Merge, but with a very short decoded input. Try different length + // if the input. + const size_t input_len = GetParam(); + std::vector input(input_len, 17); + merge_.Process(input.data(), input_len, &output); + EXPECT_GT(output.Size(), 0u); +} + +// Instantiate with values for the input length that are interesting in +// Merge::Downsample. Why are these values interesting? +// - In 8000 Hz sample rate, signal_offset in Merge::Downsample will be 2, so +// the values 1, 2, 3 are just around that value. +// - Also in 8000 Hz, the variable length_limit in the same method will be 80, +// so values 80 and 81 will be on either side of the branch point +// "input_length <= length_limit". +// - Finally, 160 is simply 20 ms in 8000 Hz, which is a common packet size. +INSTANTIATE_TEST_SUITE_P(DifferentInputLengths, + MergeTest, + testing::Values(1, 2, 3, 80, 81, 160)); +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_buffer_level_filter.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_buffer_level_filter.h new file mode 100644 index 0000000000..503f6ac6bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_buffer_level_filter.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_ + +#include "modules/audio_coding/neteq/buffer_level_filter.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockBufferLevelFilter : public BufferLevelFilter { + public: + MOCK_METHOD(void, + Update, + (size_t buffer_size_samples, int time_stretched_samples)); + MOCK_METHOD(int, filtered_current_level, (), (const)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h new file mode 100644 index 0000000000..2394120e99 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_ + +#include + +#include "modules/audio_coding/neteq/decoder_database.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockDecoderDatabase : public DecoderDatabase { + public: + explicit MockDecoderDatabase( + rtc::scoped_refptr factory = nullptr) + : DecoderDatabase(factory, absl::nullopt) {} + ~MockDecoderDatabase() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(bool, Empty, (), (const, override)); + MOCK_METHOD(int, Size, (), (const, override)); + MOCK_METHOD(int, + RegisterPayload, + (int rtp_payload_type, const SdpAudioFormat& audio_format), + (override)); + MOCK_METHOD(int, Remove, (uint8_t rtp_payload_type), (override)); + MOCK_METHOD(void, RemoveAll, (), (override)); + MOCK_METHOD(const DecoderInfo*, + GetDecoderInfo, + (uint8_t rtp_payload_type), + (const, override)); + MOCK_METHOD(int, + SetActiveDecoder, + (uint8_t rtp_payload_type, bool* new_decoder), + (override)); + MOCK_METHOD(AudioDecoder*, GetActiveDecoder, (), (const, override)); + MOCK_METHOD(int, SetActiveCngDecoder, (uint8_t rtp_payload_type), (override)); + MOCK_METHOD(ComfortNoiseDecoder*, GetActiveCngDecoder, (), (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h new file mode 100644 index 0000000000..d783f8743b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_ + +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/delay_manager.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockDelayManager : public DelayManager { + public: + MockDelayManager(const MockDelayManager::Config& config, + const TickTimer* tick_timer) + : DelayManager(config, tick_timer) {} + MOCK_METHOD(int, TargetDelayMs, (), (const)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_buffer.h new file mode 100644 index 0000000000..c60c56d36b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_buffer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_ + +#include "modules/audio_coding/neteq/dtmf_buffer.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockDtmfBuffer : public DtmfBuffer { + public: + MockDtmfBuffer(int fs) : DtmfBuffer(fs) {} + ~MockDtmfBuffer() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(void, Flush, (), (override)); + MOCK_METHOD(int, InsertEvent, (const DtmfEvent& event), (override)); + MOCK_METHOD(bool, + GetEvent, + (uint32_t current_timestamp, DtmfEvent* event), + (override)); + MOCK_METHOD(size_t, Length, (), (const, override)); + MOCK_METHOD(bool, Empty, (), (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h new file mode 100644 index 0000000000..60de167c29 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_ + +#include "modules/audio_coding/neteq/dtmf_tone_generator.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockDtmfToneGenerator : public DtmfToneGenerator { + public: + ~MockDtmfToneGenerator() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(int, Init, (int fs, int event, int attenuation), (override)); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(int, + Generate, + (size_t num_samples, AudioMultiVector* output), + (override)); + MOCK_METHOD(bool, initialized, (), (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_expand.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_expand.h new file mode 100644 index 0000000000..9d66779021 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_expand.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_ + +#include "modules/audio_coding/neteq/expand.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockExpand : public Expand { + public: + MockExpand(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels) + : Expand(background_noise, + sync_buffer, + random_vector, + statistics, + fs, + num_channels) {} + ~MockExpand() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(int, Process, (AudioMultiVector * output), (override)); + MOCK_METHOD(void, SetParametersForNormalAfterExpand, (), (override)); + MOCK_METHOD(void, SetParametersForMergeAfterExpand, (), (override)); + MOCK_METHOD(size_t, overlap_length, (), (const, override)); +}; + +} // namespace webrtc + +namespace webrtc { + +class MockExpandFactory : public ExpandFactory { + public: + MOCK_METHOD(Expand*, + Create, + (BackgroundNoise * background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels), + (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_histogram.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_histogram.h new file mode 100644 index 0000000000..03abbc1d4b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_histogram.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_HISTOGRAM_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_HISTOGRAM_H_ + +#include "modules/audio_coding/neteq/histogram.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockHistogram : public Histogram { + public: + MockHistogram(size_t num_buckets, int forget_factor) + : Histogram(num_buckets, forget_factor) {} + virtual ~MockHistogram() {} + + MOCK_METHOD(void, Add, (int), (override)); + MOCK_METHOD(int, Quantile, (int), (override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_HISTOGRAM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_neteq_controller.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_neteq_controller.h new file mode 100644 index 0000000000..6d88e09216 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_neteq_controller.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_NETEQ_CONTROLLER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_NETEQ_CONTROLLER_H_ + +#include "api/neteq/neteq_controller.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockNetEqController : public NetEqController { + public: + MockNetEqController() = default; + ~MockNetEqController() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(void, SoftReset, (), (override)); + MOCK_METHOD(NetEq::Operation, + GetDecision, + (const NetEqStatus& neteq_status, bool* reset_decoder), + (override)); + MOCK_METHOD(void, RegisterEmptyPacket, (), (override)); + MOCK_METHOD(void, + SetSampleRate, + (int fs_hz, size_t output_size_samples), + (override)); + MOCK_METHOD(bool, SetMaximumDelay, (int delay_ms), (override)); + MOCK_METHOD(bool, SetMinimumDelay, (int delay_ms), (override)); + MOCK_METHOD(bool, SetBaseMinimumDelay, (int delay_ms), (override)); + MOCK_METHOD(int, GetBaseMinimumDelay, (), (const, override)); + MOCK_METHOD(bool, CngRfc3389On, (), (const, override)); + MOCK_METHOD(bool, CngOff, (), (const, override)); + MOCK_METHOD(void, SetCngOff, (), (override)); + MOCK_METHOD(void, ExpandDecision, (NetEq::Operation operation), (override)); + MOCK_METHOD(void, AddSampleMemory, (int32_t value), (override)); + MOCK_METHOD(int, TargetLevelMs, (), (const, override)); + MOCK_METHOD(absl::optional, + PacketArrived, + (int fs_hz, + bool should_update_stats, + const PacketArrivedInfo& info), + (override)); + MOCK_METHOD(void, NotifyMutedState, (), (override)); + MOCK_METHOD(bool, PeakFound, (), (const, override)); + MOCK_METHOD(int, GetFilteredBufferLevel, (), (const, override)); + MOCK_METHOD(void, set_sample_memory, (int32_t value), (override)); + MOCK_METHOD(size_t, noise_fast_forward, (), (const, override)); + MOCK_METHOD(size_t, packet_length_samples, (), (const, override)); + MOCK_METHOD(void, set_packet_length_samples, (size_t value), (override)); + MOCK_METHOD(void, set_prev_time_scale, (bool value), (override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_NETEQ_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_buffer.h new file mode 100644 index 0000000000..48357ea466 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_buffer.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_ + +#include "modules/audio_coding/neteq/packet_buffer.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockPacketBuffer : public PacketBuffer { + public: + MockPacketBuffer(size_t max_number_of_packets, const TickTimer* tick_timer) + : PacketBuffer(max_number_of_packets, tick_timer) {} + ~MockPacketBuffer() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(void, Flush, (StatisticsCalculator * stats), (override)); + MOCK_METHOD(void, + PartialFlush, + (int target_level_ms, + size_t sample_rate, + size_t last_decoded_length, + StatisticsCalculator* stats), + (override)); + MOCK_METHOD(bool, Empty, (), (const, override)); + MOCK_METHOD(int, + InsertPacket, + (Packet && packet, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms, + const DecoderDatabase& decoder_database), + (override)); + MOCK_METHOD(int, + InsertPacketList, + (PacketList * packet_list, + const DecoderDatabase& decoder_database, + absl::optional* current_rtp_payload_type, + absl::optional* current_cng_rtp_payload_type, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms), + (override)); + MOCK_METHOD(int, + NextTimestamp, + (uint32_t * next_timestamp), + (const, override)); + MOCK_METHOD(int, + NextHigherTimestamp, + (uint32_t timestamp, uint32_t* next_timestamp), + (const, override)); + MOCK_METHOD(const Packet*, PeekNextPacket, (), (const, override)); + MOCK_METHOD(absl::optional, GetNextPacket, (), (override)); + MOCK_METHOD(int, + DiscardNextPacket, + (StatisticsCalculator * stats), + (override)); + MOCK_METHOD(void, + DiscardOldPackets, + (uint32_t timestamp_limit, + uint32_t horizon_samples, + StatisticsCalculator* stats), + (override)); + MOCK_METHOD(void, + DiscardAllOldPackets, + (uint32_t timestamp_limit, StatisticsCalculator* stats), + (override)); + MOCK_METHOD(size_t, NumPacketsInBuffer, (), (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_red_payload_splitter.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_red_payload_splitter.h new file mode 100644 index 0000000000..9daf571a80 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_red_payload_splitter.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_ + +#include "modules/audio_coding/neteq/red_payload_splitter.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockRedPayloadSplitter : public RedPayloadSplitter { + public: + MOCK_METHOD(bool, SplitRed, (PacketList * packet_list), (override)); + MOCK_METHOD(void, + CheckRedPayloads, + (PacketList * packet_list, + const DecoderDatabase& decoder_database), + (override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_statistics_calculator.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_statistics_calculator.h new file mode 100644 index 0000000000..f8812478d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_statistics_calculator.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_ + +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockStatisticsCalculator : public StatisticsCalculator { + public: + MOCK_METHOD(void, PacketsDiscarded, (size_t num_packets), (override)); + MOCK_METHOD(void, + SecondaryPacketsDiscarded, + (size_t num_packets), + (override)); + MOCK_METHOD(void, RelativePacketArrivalDelay, (size_t delay_ms), (override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.cc b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.cc new file mode 100644 index 0000000000..04cc5b52e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.cc @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/nack_tracker.h" + +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +const int kDefaultSampleRateKhz = 48; +const int kMaxPacketSizeMs = 120; +constexpr char kNackTrackerConfigFieldTrial[] = + "WebRTC-Audio-NetEqNackTrackerConfig"; + +} // namespace + +NackTracker::Config::Config() { + auto parser = StructParametersParser::Create( + "packet_loss_forget_factor", &packet_loss_forget_factor, + "ms_per_loss_percent", &ms_per_loss_percent, "never_nack_multiple_times", + &never_nack_multiple_times, "require_valid_rtt", &require_valid_rtt, + "max_loss_rate", &max_loss_rate); + parser->Parse( + webrtc::field_trial::FindFullName(kNackTrackerConfigFieldTrial)); + RTC_LOG(LS_INFO) << "Nack tracker config:" + " packet_loss_forget_factor=" + << packet_loss_forget_factor + << " ms_per_loss_percent=" << ms_per_loss_percent + << " never_nack_multiple_times=" << never_nack_multiple_times + << " require_valid_rtt=" << require_valid_rtt + << " max_loss_rate=" << max_loss_rate; +} + +NackTracker::NackTracker() + : sequence_num_last_received_rtp_(0), + timestamp_last_received_rtp_(0), + any_rtp_received_(false), + sequence_num_last_decoded_rtp_(0), + timestamp_last_decoded_rtp_(0), + any_rtp_decoded_(false), + sample_rate_khz_(kDefaultSampleRateKhz), + max_nack_list_size_(kNackListSizeLimit) {} + +NackTracker::~NackTracker() = default; + +void NackTracker::UpdateSampleRate(int sample_rate_hz) { + RTC_DCHECK_GT(sample_rate_hz, 0); + sample_rate_khz_ = sample_rate_hz / 1000; +} + +void NackTracker::UpdateLastReceivedPacket(uint16_t sequence_number, + uint32_t timestamp) { + // Just record the value of sequence number and timestamp if this is the + // first packet. + if (!any_rtp_received_) { + sequence_num_last_received_rtp_ = sequence_number; + timestamp_last_received_rtp_ = timestamp; + any_rtp_received_ = true; + // If no packet is decoded, to have a reasonable estimate of time-to-play + // use the given values. + if (!any_rtp_decoded_) { + sequence_num_last_decoded_rtp_ = sequence_number; + timestamp_last_decoded_rtp_ = timestamp; + } + return; + } + + if (sequence_number == sequence_num_last_received_rtp_) + return; + + // Received RTP should not be in the list. + nack_list_.erase(sequence_number); + + // If this is an old sequence number, no more action is required, return. + if (IsNewerSequenceNumber(sequence_num_last_received_rtp_, sequence_number)) + return; + + UpdatePacketLossRate(sequence_number - sequence_num_last_received_rtp_ - 1); + + UpdateList(sequence_number, timestamp); + + sequence_num_last_received_rtp_ = sequence_number; + timestamp_last_received_rtp_ = timestamp; + LimitNackListSize(); +} + +absl::optional NackTracker::GetSamplesPerPacket( + uint16_t sequence_number_current_received_rtp, + uint32_t timestamp_current_received_rtp) const { + uint32_t timestamp_increase = + timestamp_current_received_rtp - timestamp_last_received_rtp_; + uint16_t sequence_num_increase = + sequence_number_current_received_rtp - sequence_num_last_received_rtp_; + + int samples_per_packet = timestamp_increase / sequence_num_increase; + if (samples_per_packet == 0 || + samples_per_packet > kMaxPacketSizeMs * sample_rate_khz_) { + // Not a valid samples per packet. + return absl::nullopt; + } + return samples_per_packet; +} + +void NackTracker::UpdateList(uint16_t sequence_number_current_received_rtp, + uint32_t timestamp_current_received_rtp) { + if (!IsNewerSequenceNumber(sequence_number_current_received_rtp, + sequence_num_last_received_rtp_ + 1)) { + return; + } + RTC_DCHECK(!any_rtp_decoded_ || + IsNewerSequenceNumber(sequence_number_current_received_rtp, + sequence_num_last_decoded_rtp_)); + + absl::optional samples_per_packet = GetSamplesPerPacket( + sequence_number_current_received_rtp, timestamp_current_received_rtp); + if (!samples_per_packet) { + return; + } + + for (uint16_t n = sequence_num_last_received_rtp_ + 1; + IsNewerSequenceNumber(sequence_number_current_received_rtp, n); ++n) { + uint32_t timestamp = EstimateTimestamp(n, *samples_per_packet); + NackElement nack_element(TimeToPlay(timestamp), timestamp); + nack_list_.insert(nack_list_.end(), std::make_pair(n, nack_element)); + } +} + +uint32_t NackTracker::EstimateTimestamp(uint16_t sequence_num, + int samples_per_packet) { + uint16_t sequence_num_diff = sequence_num - sequence_num_last_received_rtp_; + return sequence_num_diff * samples_per_packet + timestamp_last_received_rtp_; +} + +void NackTracker::UpdateEstimatedPlayoutTimeBy10ms() { + while (!nack_list_.empty() && + nack_list_.begin()->second.time_to_play_ms <= 10) + nack_list_.erase(nack_list_.begin()); + + for (NackList::iterator it = nack_list_.begin(); it != nack_list_.end(); ++it) + it->second.time_to_play_ms -= 10; +} + +void NackTracker::UpdateLastDecodedPacket(uint16_t sequence_number, + uint32_t timestamp) { + if (IsNewerSequenceNumber(sequence_number, sequence_num_last_decoded_rtp_) || + !any_rtp_decoded_) { + sequence_num_last_decoded_rtp_ = sequence_number; + timestamp_last_decoded_rtp_ = timestamp; + // Packets in the list with sequence numbers less than the + // sequence number of the decoded RTP should be removed from the lists. + // They will be discarded by the jitter buffer if they arrive. + nack_list_.erase(nack_list_.begin(), + nack_list_.upper_bound(sequence_num_last_decoded_rtp_)); + + // Update estimated time-to-play. + for (NackList::iterator it = nack_list_.begin(); it != nack_list_.end(); + ++it) + it->second.time_to_play_ms = TimeToPlay(it->second.estimated_timestamp); + } else { + RTC_DCHECK_EQ(sequence_number, sequence_num_last_decoded_rtp_); + + // Same sequence number as before. 10 ms is elapsed, update estimations for + // time-to-play. + UpdateEstimatedPlayoutTimeBy10ms(); + + // Update timestamp for better estimate of time-to-play, for packets which + // are added to NACK list later on. + timestamp_last_decoded_rtp_ += sample_rate_khz_ * 10; + } + any_rtp_decoded_ = true; +} + +NackTracker::NackList NackTracker::GetNackList() const { + return nack_list_; +} + +void NackTracker::Reset() { + nack_list_.clear(); + + sequence_num_last_received_rtp_ = 0; + timestamp_last_received_rtp_ = 0; + any_rtp_received_ = false; + sequence_num_last_decoded_rtp_ = 0; + timestamp_last_decoded_rtp_ = 0; + any_rtp_decoded_ = false; + sample_rate_khz_ = kDefaultSampleRateKhz; +} + +void NackTracker::SetMaxNackListSize(size_t max_nack_list_size) { + RTC_CHECK_GT(max_nack_list_size, 0); + // Ugly hack to get around the problem of passing static consts by reference. + const size_t kNackListSizeLimitLocal = NackTracker::kNackListSizeLimit; + RTC_CHECK_LE(max_nack_list_size, kNackListSizeLimitLocal); + + max_nack_list_size_ = max_nack_list_size; + LimitNackListSize(); +} + +void NackTracker::LimitNackListSize() { + uint16_t limit = sequence_num_last_received_rtp_ - + static_cast(max_nack_list_size_) - 1; + nack_list_.erase(nack_list_.begin(), nack_list_.upper_bound(limit)); +} + +int64_t NackTracker::TimeToPlay(uint32_t timestamp) const { + uint32_t timestamp_increase = timestamp - timestamp_last_decoded_rtp_; + return timestamp_increase / sample_rate_khz_; +} + +// We don't erase elements with time-to-play shorter than round-trip-time. +std::vector NackTracker::GetNackList(int64_t round_trip_time_ms) { + RTC_DCHECK_GE(round_trip_time_ms, 0); + std::vector sequence_numbers; + if (round_trip_time_ms == 0) { + if (config_.require_valid_rtt) { + return sequence_numbers; + } else { + round_trip_time_ms = config_.default_rtt_ms; + } + } + if (packet_loss_rate_ > + static_cast(config_.max_loss_rate * (1 << 30))) { + return sequence_numbers; + } + // The estimated packet loss is between 0 and 1, so we need to multiply by 100 + // here. + int max_wait_ms = + 100.0 * config_.ms_per_loss_percent * packet_loss_rate_ / (1 << 30); + for (NackList::const_iterator it = nack_list_.begin(); it != nack_list_.end(); + ++it) { + int64_t time_since_packet_ms = + (timestamp_last_received_rtp_ - it->second.estimated_timestamp) / + sample_rate_khz_; + if (it->second.time_to_play_ms > round_trip_time_ms || + time_since_packet_ms + round_trip_time_ms < max_wait_ms) + sequence_numbers.push_back(it->first); + } + if (config_.never_nack_multiple_times) { + nack_list_.clear(); + } + return sequence_numbers; +} + +void NackTracker::UpdatePacketLossRate(int packets_lost) { + const uint64_t alpha_q30 = (1 << 30) * config_.packet_loss_forget_factor; + // Exponential filter. + packet_loss_rate_ = (alpha_q30 * packet_loss_rate_) >> 30; + for (int i = 0; i < packets_lost; ++i) { + packet_loss_rate_ = + ((alpha_q30 * packet_loss_rate_) >> 30) + ((1 << 30) - alpha_q30); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.h b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.h new file mode 100644 index 0000000000..14ba2166d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_ +#define MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_ + +#include +#include + +#include +#include + +#include "absl/types/optional.h" +#include "modules/include/module_common_types_public.h" +#include "rtc_base/gtest_prod_util.h" + +// +// The NackTracker class keeps track of the lost packets, an estimate of +// time-to-play for each packet is also given. +// +// Every time a packet is pushed into NetEq, LastReceivedPacket() has to be +// called to update the NACK list. +// +// Every time 10ms audio is pulled from NetEq LastDecodedPacket() should be +// called, and time-to-play is updated at that moment. +// +// If packet N is received, any packet prior to N which has not arrived is +// considered lost, and should be labeled as "missing" (the size of +// the list might be limited and older packet eliminated from the list). +// +// The NackTracker class has to know about the sample rate of the packets to +// compute time-to-play. So sample rate should be set as soon as the first +// packet is received. If there is a change in the receive codec (sender changes +// codec) then NackTracker should be reset. This is because NetEQ would flush +// its buffer and re-transmission is meaning less for old packet. Therefore, in +// that case, after reset the sampling rate has to be updated. +// +// Thread Safety +// ============= +// Please note that this class in not thread safe. The class must be protected +// if different APIs are called from different threads. +// +namespace webrtc { + +class NackTracker { + public: + // A limit for the size of the NACK list. + static const size_t kNackListSizeLimit = 500; // 10 seconds for 20 ms frame + // packets. + NackTracker(); + ~NackTracker(); + + // Set a maximum for the size of the NACK list. If the last received packet + // has sequence number of N, then NACK list will not contain any element + // with sequence number earlier than N - `max_nack_list_size`. + // + // The largest maximum size is defined by `kNackListSizeLimit` + void SetMaxNackListSize(size_t max_nack_list_size); + + // Set the sampling rate. + // + // If associated sampling rate of the received packets is changed, call this + // function to update sampling rate. Note that if there is any change in + // received codec then NetEq will flush its buffer and NACK has to be reset. + // After Reset() is called sampling rate has to be set. + void UpdateSampleRate(int sample_rate_hz); + + // Update the sequence number and the timestamp of the last decoded RTP. This + // API should be called every time 10 ms audio is pulled from NetEq. + void UpdateLastDecodedPacket(uint16_t sequence_number, uint32_t timestamp); + + // Update the sequence number and the timestamp of the last received RTP. This + // API should be called every time a packet pushed into ACM. + void UpdateLastReceivedPacket(uint16_t sequence_number, uint32_t timestamp); + + // Get a list of "missing" packets which have expected time-to-play larger + // than the given round-trip-time (in milliseconds). + // Note: Late packets are not included. + // Calling this method multiple times may give different results, since the + // internal nack list may get flushed if never_nack_multiple_times_ is true. + std::vector GetNackList(int64_t round_trip_time_ms); + + // Reset to default values. The NACK list is cleared. + // `max_nack_list_size_` preserves its value. + void Reset(); + + // Returns the estimated packet loss rate in Q30, for testing only. + uint32_t GetPacketLossRateForTest() { return packet_loss_rate_; } + + private: + // This test need to access the private method GetNackList(). + FRIEND_TEST_ALL_PREFIXES(NackTrackerTest, EstimateTimestampAndTimeToPlay); + + // Options that can be configured via field trial. + struct Config { + Config(); + + // The exponential decay factor used to estimate the packet loss rate. + double packet_loss_forget_factor = 0.996; + // How many additional ms we are willing to wait (at most) for nacked + // packets for each additional percentage of packet loss. + int ms_per_loss_percent = 20; + // If true, never nack packets more than once. + bool never_nack_multiple_times = false; + // Only nack if the RTT is valid. + bool require_valid_rtt = false; + // Default RTT to use unless `require_valid_rtt` is set. + int default_rtt_ms = 100; + // Do not nack if the loss rate is above this value. + double max_loss_rate = 1.0; + }; + + struct NackElement { + NackElement(int64_t initial_time_to_play_ms, uint32_t initial_timestamp) + : time_to_play_ms(initial_time_to_play_ms), + estimated_timestamp(initial_timestamp) {} + + // Estimated time (ms) left for this packet to be decoded. This estimate is + // updated every time jitter buffer decodes a packet. + int64_t time_to_play_ms; + + // A guess about the timestamp of the missing packet, it is used for + // estimation of `time_to_play_ms`. The estimate might be slightly wrong if + // there has been frame-size change since the last received packet and the + // missing packet. However, the risk of this is low, and in case of such + // errors, there will be a minor misestimation in time-to-play of missing + // packets. This will have a very minor effect on NACK performance. + uint32_t estimated_timestamp; + }; + + class NackListCompare { + public: + bool operator()(uint16_t sequence_number_old, + uint16_t sequence_number_new) const { + return IsNewerSequenceNumber(sequence_number_new, sequence_number_old); + } + }; + + typedef std::map NackList; + + // This API is used only for testing to assess whether time-to-play is + // computed correctly. + NackList GetNackList() const; + + // This function subtracts 10 ms of time-to-play for all packets in NACK list. + // This is called when 10 ms elapsed with no new RTP packet decoded. + void UpdateEstimatedPlayoutTimeBy10ms(); + + // Returns a valid number of samples per packet given the current received + // sequence number and timestamp or nullopt of none could be computed. + absl::optional GetSamplesPerPacket( + uint16_t sequence_number_current_received_rtp, + uint32_t timestamp_current_received_rtp) const; + + // Given the `sequence_number_current_received_rtp` of currently received RTP + // update the list. Packets that are older than the received packet are added + // to the nack list. + void UpdateList(uint16_t sequence_number_current_received_rtp, + uint32_t timestamp_current_received_rtp); + + // Packets which have sequence number older that + // `sequence_num_last_received_rtp_` - `max_nack_list_size_` are removed + // from the NACK list. + void LimitNackListSize(); + + // Estimate timestamp of a missing packet given its sequence number. + uint32_t EstimateTimestamp(uint16_t sequence_number, int samples_per_packet); + + // Compute time-to-play given a timestamp. + int64_t TimeToPlay(uint32_t timestamp) const; + + // Updates the estimated packet lost rate. + void UpdatePacketLossRate(int packets_lost); + + const Config config_; + + // Valid if a packet is received. + uint16_t sequence_num_last_received_rtp_; + uint32_t timestamp_last_received_rtp_; + bool any_rtp_received_; // If any packet received. + + // Valid if a packet is decoded. + uint16_t sequence_num_last_decoded_rtp_; + uint32_t timestamp_last_decoded_rtp_; + bool any_rtp_decoded_; // If any packet decoded. + + int sample_rate_khz_; // Sample rate in kHz. + + // A list of missing packets to be retransmitted. Components of the list + // contain the sequence number of missing packets and the estimated time that + // each pack is going to be played out. + NackList nack_list_; + + // NACK list will not keep track of missing packets prior to + // `sequence_num_last_received_rtp_` - `max_nack_list_size_`. + size_t max_nack_list_size_; + + // Current estimate of the packet loss rate in Q30. + uint32_t packet_loss_rate_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker_unittest.cc new file mode 100644 index 0000000000..bcc5120ff3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker_unittest.cc @@ -0,0 +1,565 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/nack_tracker.h" + +#include + +#include +#include + +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kSampleRateHz = 16000; +const int kPacketSizeMs = 30; +const uint32_t kTimestampIncrement = 480; // 30 ms. +const int64_t kShortRoundTripTimeMs = 1; + +bool IsNackListCorrect(const std::vector& nack_list, + const uint16_t* lost_sequence_numbers, + size_t num_lost_packets) { + if (nack_list.size() != num_lost_packets) + return false; + + if (num_lost_packets == 0) + return true; + + for (size_t k = 0; k < nack_list.size(); ++k) { + int seq_num = nack_list[k]; + bool seq_num_matched = false; + for (size_t n = 0; n < num_lost_packets; ++n) { + if (seq_num == lost_sequence_numbers[n]) { + seq_num_matched = true; + break; + } + } + if (!seq_num_matched) + return false; + } + return true; +} + +} // namespace + +TEST(NackTrackerTest, EmptyListWhenNoPacketLoss) { + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + int seq_num = 1; + uint32_t timestamp = 0; + + std::vector nack_list; + for (int n = 0; n < 100; n++) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + seq_num++; + timestamp += kTimestampIncrement; + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + } +} + +TEST(NackTrackerTest, LatePacketsMovedToNackThenNackListDoesNotChange) { + const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9}; + static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) / + sizeof(kSequenceNumberLostPackets[0]); + + for (int k = 0; k < 2; k++) { // Two iteration with/without wrap around. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + uint16_t sequence_num_lost_packets[kNumAllLostPackets]; + for (int n = 0; n < kNumAllLostPackets; n++) { + sequence_num_lost_packets[n] = + kSequenceNumberLostPackets[n] + + k * 65531; // Have wrap around in sequence numbers for |k == 1|. + } + uint16_t seq_num = sequence_num_lost_packets[0] - 1; + + uint32_t timestamp = 0; + std::vector nack_list; + + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + + seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1; + timestamp += kTimestampIncrement * (kNumAllLostPackets + 1); + int num_lost_packets = std::max(0, kNumAllLostPackets); + + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets, + num_lost_packets)); + seq_num++; + timestamp += kTimestampIncrement; + num_lost_packets++; + + for (int n = 0; n < 100; ++n) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets, + kNumAllLostPackets)); + seq_num++; + timestamp += kTimestampIncrement; + } + } +} + +TEST(NackTrackerTest, ArrivedPacketsAreRemovedFromNackList) { + const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9}; + static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) / + sizeof(kSequenceNumberLostPackets[0]); + + for (int k = 0; k < 2; ++k) { // Two iteration with/without wrap around. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + uint16_t sequence_num_lost_packets[kNumAllLostPackets]; + for (int n = 0; n < kNumAllLostPackets; ++n) { + sequence_num_lost_packets[n] = kSequenceNumberLostPackets[n] + + k * 65531; // Wrap around for |k == 1|. + } + + uint16_t seq_num = sequence_num_lost_packets[0] - 1; + uint32_t timestamp = 0; + + nack.UpdateLastReceivedPacket(seq_num, timestamp); + std::vector nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + + size_t index_retransmitted_rtp = 0; + uint32_t timestamp_retransmitted_rtp = timestamp + kTimestampIncrement; + + seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1; + timestamp += kTimestampIncrement * (kNumAllLostPackets + 1); + size_t num_lost_packets = kNumAllLostPackets; + for (int n = 0; n < kNumAllLostPackets; ++n) { + // Number of lost packets does not change for the first + // |kNackThreshold + 1| packets, one is added to the list and one is + // removed. Thereafter, the list shrinks every iteration. + if (n >= 1) + num_lost_packets--; + + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect( + nack_list, &sequence_num_lost_packets[index_retransmitted_rtp], + num_lost_packets)); + seq_num++; + timestamp += kTimestampIncrement; + + // Retransmission of a lost RTP. + nack.UpdateLastReceivedPacket( + sequence_num_lost_packets[index_retransmitted_rtp], + timestamp_retransmitted_rtp); + index_retransmitted_rtp++; + timestamp_retransmitted_rtp += kTimestampIncrement; + + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect( + nack_list, &sequence_num_lost_packets[index_retransmitted_rtp], + num_lost_packets - 1)); // One less lost packet in the list. + } + ASSERT_TRUE(nack_list.empty()); + } +} + +// Assess if estimation of timestamps and time-to-play is correct. Introduce all +// combinations that timestamps and sequence numbers might have wrap around. +TEST(NackTrackerTest, EstimateTimestampAndTimeToPlay) { + const uint16_t kLostPackets[] = {2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15}; + static const int kNumAllLostPackets = + sizeof(kLostPackets) / sizeof(kLostPackets[0]); + + for (int k = 0; k < 4; ++k) { + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + // Sequence number wrap around if `k` is 2 or 3; + int seq_num_offset = (k < 2) ? 0 : 65531; + + // Timestamp wrap around if `k` is 1 or 3. + uint32_t timestamp_offset = + (k & 0x1) ? static_cast(0xffffffff) - 6 : 0; + + uint32_t timestamp_lost_packets[kNumAllLostPackets]; + uint16_t seq_num_lost_packets[kNumAllLostPackets]; + for (int n = 0; n < kNumAllLostPackets; ++n) { + timestamp_lost_packets[n] = + timestamp_offset + kLostPackets[n] * kTimestampIncrement; + seq_num_lost_packets[n] = seq_num_offset + kLostPackets[n]; + } + + // We and to push two packets before lost burst starts. + uint16_t seq_num = seq_num_lost_packets[0] - 2; + uint32_t timestamp = timestamp_lost_packets[0] - 2 * kTimestampIncrement; + + const uint16_t first_seq_num = seq_num; + const uint32_t first_timestamp = timestamp; + + // Two consecutive packets to have a correct estimate of timestamp increase. + nack.UpdateLastReceivedPacket(seq_num, timestamp); + seq_num++; + timestamp += kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // A packet after the last one which is supposed to be lost. + seq_num = seq_num_lost_packets[kNumAllLostPackets - 1] + 1; + timestamp = + timestamp_lost_packets[kNumAllLostPackets - 1] + kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + NackTracker::NackList nack_list = nack.GetNackList(); + EXPECT_EQ(static_cast(kNumAllLostPackets), nack_list.size()); + + // Pretend the first packet is decoded. + nack.UpdateLastDecodedPacket(first_seq_num, first_timestamp); + nack_list = nack.GetNackList(); + + NackTracker::NackList::iterator it = nack_list.begin(); + while (it != nack_list.end()) { + seq_num = it->first - seq_num_offset; + int index = seq_num - kLostPackets[0]; + EXPECT_EQ(timestamp_lost_packets[index], it->second.estimated_timestamp); + EXPECT_EQ((index + 2) * kPacketSizeMs, it->second.time_to_play_ms); + ++it; + } + + // Pretend 10 ms is passed, and we had pulled audio from NetEq, it still + // reports the same sequence number as decoded, time-to-play should be + // updated by 10 ms. + nack.UpdateLastDecodedPacket(first_seq_num, first_timestamp); + nack_list = nack.GetNackList(); + it = nack_list.begin(); + while (it != nack_list.end()) { + seq_num = it->first - seq_num_offset; + int index = seq_num - kLostPackets[0]; + EXPECT_EQ((index + 2) * kPacketSizeMs - 10, it->second.time_to_play_ms); + ++it; + } + } +} + +TEST(NackTrackerTest, + MissingPacketsPriorToLastDecodedRtpShouldNotBeInNackList) { + for (int m = 0; m < 2; ++m) { + uint16_t seq_num_offset = (m == 0) ? 0 : 65531; // Wrap around if `m` is 1. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + // Two consecutive packets to have a correct estimate of timestamp increase. + uint16_t seq_num = 0; + nack.UpdateLastReceivedPacket(seq_num_offset + seq_num, + seq_num * kTimestampIncrement); + seq_num++; + nack.UpdateLastReceivedPacket(seq_num_offset + seq_num, + seq_num * kTimestampIncrement); + + // Skip 10 packets (larger than NACK threshold). + const int kNumLostPackets = 10; + seq_num += kNumLostPackets + 1; + nack.UpdateLastReceivedPacket(seq_num_offset + seq_num, + seq_num * kTimestampIncrement); + + const size_t kExpectedListSize = kNumLostPackets; + std::vector nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(kExpectedListSize, nack_list.size()); + + for (int k = 0; k < 2; ++k) { + // Decoding of the first and the second arrived packets. + for (int n = 0; n < kPacketSizeMs / 10; ++n) { + nack.UpdateLastDecodedPacket(seq_num_offset + k, + k * kTimestampIncrement); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(kExpectedListSize, nack_list.size()); + } + } + + // Decoding of the last received packet. + nack.UpdateLastDecodedPacket(seq_num + seq_num_offset, + seq_num * kTimestampIncrement); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + + // Make sure list of late packets is also empty. To check that, push few + // packets, if the late list is not empty its content will pop up in NACK + // list. + for (int n = 0; n < 10; ++n) { + seq_num++; + nack.UpdateLastReceivedPacket(seq_num_offset + seq_num, + seq_num * kTimestampIncrement); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + } + } +} + +TEST(NackTrackerTest, Reset) { + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + // Two consecutive packets to have a correct estimate of timestamp increase. + uint16_t seq_num = 0; + nack.UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement); + seq_num++; + nack.UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement); + + // Skip 10 packets (larger than NACK threshold). + const int kNumLostPackets = 10; + seq_num += kNumLostPackets + 1; + nack.UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement); + + const size_t kExpectedListSize = kNumLostPackets; + std::vector nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(kExpectedListSize, nack_list.size()); + + nack.Reset(); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); +} + +TEST(NackTrackerTest, ListSizeAppliedFromBeginning) { + const size_t kNackListSize = 10; + for (int m = 0; m < 2; ++m) { + uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if `m` is 1. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + + uint16_t seq_num = seq_num_offset; + uint32_t timestamp = 0x12345678; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // Packet lost more than NACK-list size limit. + uint16_t num_lost_packets = kNackListSize + 5; + + seq_num += num_lost_packets + 1; + timestamp += (num_lost_packets + 1) * kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + std::vector nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(kNackListSize, nack_list.size()); + } +} + +TEST(NackTrackerTest, ChangeOfListSizeAppliedAndOldElementsRemoved) { + const size_t kNackListSize = 10; + for (int m = 0; m < 2; ++m) { + uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if `m` is 1. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + uint16_t seq_num = seq_num_offset; + uint32_t timestamp = 0x87654321; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // Packet lost more than NACK-list size limit. + uint16_t num_lost_packets = kNackListSize + 5; + + std::unique_ptr seq_num_lost(new uint16_t[num_lost_packets]); + for (int n = 0; n < num_lost_packets; ++n) { + seq_num_lost[n] = ++seq_num; + } + + ++seq_num; + timestamp += (num_lost_packets + 1) * kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + size_t expected_size = num_lost_packets; + + std::vector nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(expected_size, nack_list.size()); + + nack.SetMaxNackListSize(kNackListSize); + expected_size = kNackListSize; + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect( + nack_list, &seq_num_lost[num_lost_packets - kNackListSize], + expected_size)); + + // NACK list should shrink. + for (size_t n = 1; n < kNackListSize; ++n) { + ++seq_num; + timestamp += kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + --expected_size; + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect( + nack_list, &seq_num_lost[num_lost_packets - kNackListSize + n], + expected_size)); + } + + // After this packet, NACK list should be empty. + ++seq_num; + timestamp += kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + } +} + +TEST(NackTrackerTest, RoudTripTimeIsApplied) { + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // Packet lost more than NACK-list size limit. + uint16_t kNumLostPackets = 5; + + seq_num += (1 + kNumLostPackets); + timestamp += (1 + kNumLostPackets) * kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // Expected time-to-play are: + // kPacketSizeMs - 10, 2*kPacketSizeMs - 10, 3*kPacketSizeMs - 10, ... + // + // sequence number: 1, 2, 3, 4, 5 + // time-to-play: 20, 50, 80, 110, 140 + // + std::vector nack_list = nack.GetNackList(100); + ASSERT_EQ(2u, nack_list.size()); + EXPECT_EQ(4, nack_list[0]); + EXPECT_EQ(5, nack_list[1]); +} + +// Set never_nack_multiple_times to true with a field trial and verify that +// packets are not nacked multiple times. +TEST(NackTrackerTest, DoNotNackMultipleTimes) { + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqNackTrackerConfig/" + "packet_loss_forget_factor:0.996,ms_per_loss_percent:20," + "never_nack_multiple_times:true/"); + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + uint16_t kNumLostPackets = 3; + + seq_num += (1 + kNumLostPackets); + timestamp += (1 + kNumLostPackets) * kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + std::vector nack_list = nack.GetNackList(10); + ASSERT_EQ(3u, nack_list.size()); + EXPECT_EQ(1, nack_list[0]); + EXPECT_EQ(2, nack_list[1]); + EXPECT_EQ(3, nack_list[2]); + // When we get the nack list again, it should be empty. + std::vector nack_list2 = nack.GetNackList(10); + EXPECT_TRUE(nack_list2.empty()); +} + +// Test if estimated packet loss rate is correct. +TEST(NackTrackerTest, PacketLossRateCorrect) { + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + auto add_packet = [&nack, &seq_num, ×tamp](bool received) { + if (received) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + } + seq_num++; + timestamp += kTimestampIncrement; + }; + // Add some packets, but every fourth packet is lost. + for (int i = 0; i < 300; i++) { + add_packet(true); + add_packet(true); + add_packet(true); + add_packet(false); + } + // 1 << 28 is 0.25 in Q30. We expect the packet loss estimate to be within + // 0.01 of that. + EXPECT_NEAR(nack.GetPacketLossRateForTest(), 1 << 28, (1 << 30) / 100); +} + +TEST(NackTrackerTest, DoNotNackAfterDtx) { + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + EXPECT_TRUE(nack.GetNackList(0).empty()); + constexpr int kDtxPeriod = 400; + nack.UpdateLastReceivedPacket(seq_num + 2, + timestamp + kDtxPeriod * kSampleRateHz / 1000); + EXPECT_TRUE(nack.GetNackList(0).empty()); +} + +TEST(NackTrackerTest, DoNotNackIfLossRateIsTooHigh) { + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqNackTrackerConfig/max_loss_rate:0.4/"); + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + auto add_packet = [&nack, &seq_num, ×tamp](bool received) { + if (received) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + } + seq_num++; + timestamp += kTimestampIncrement; + }; + for (int i = 0; i < 500; i++) { + add_packet(true); + add_packet(false); + } + // Expect 50% loss rate which is higher that the configured maximum 40%. + EXPECT_NEAR(nack.GetPacketLossRateForTest(), 1 << 29, (1 << 30) / 100); + EXPECT_TRUE(nack.GetNackList(0).empty()); +} + +TEST(NackTrackerTest, OnlyNackIfRttIsValid) { + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqNackTrackerConfig/require_valid_rtt:true/"); + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + auto add_packet = [&nack, &seq_num, ×tamp](bool received) { + if (received) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + } + seq_num++; + timestamp += kTimestampIncrement; + }; + add_packet(true); + add_packet(false); + add_packet(true); + EXPECT_TRUE(nack.GetNackList(0).empty()); + EXPECT_FALSE(nack.GetNackList(10).empty()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc new file mode 100644 index 0000000000..cf310d1efb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Test to verify correct operation when using the decoder-internal PLC. + +#include +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" +#include "modules/audio_coding/neteq/tools/audio_checksum.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/encode_neteq_input.h" +#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/audio_decoder_proxy_factory.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr int kSampleRateHz = 32000; +constexpr int kRunTimeMs = 10000; + +// This class implements a fake decoder. The decoder will read audio from a file +// and present as output, both for regular decoding and for PLC. +class AudioDecoderPlc : public AudioDecoder { + public: + AudioDecoderPlc(std::unique_ptr input, int sample_rate_hz) + : input_(std::move(input)), sample_rate_hz_(sample_rate_hz) {} + + void Reset() override {} + int SampleRateHz() const override { return sample_rate_hz_; } + size_t Channels() const override { return 1; } + int DecodeInternal(const uint8_t* /*encoded*/, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + RTC_CHECK_GE(encoded_len / 2, 10 * sample_rate_hz_ / 1000); + RTC_CHECK_LE(encoded_len / 2, 2 * 10 * sample_rate_hz_ / 1000); + RTC_CHECK_EQ(sample_rate_hz, sample_rate_hz_); + RTC_CHECK(decoded); + RTC_CHECK(speech_type); + RTC_CHECK(input_->Read(encoded_len / 2, decoded)); + *speech_type = kSpeech; + last_was_plc_ = false; + return encoded_len / 2; + } + + void GeneratePlc(size_t requested_samples_per_channel, + rtc::BufferT* concealment_audio) override { + // Instead of generating random data for GeneratePlc we use the same data as + // the input, so we can check that we produce the same result independently + // of the losses. + RTC_DCHECK_EQ(requested_samples_per_channel, 10 * sample_rate_hz_ / 1000); + + // Must keep a local copy of this since DecodeInternal sets it to false. + const bool last_was_plc = last_was_plc_; + + std::vector decoded(5760); + SpeechType speech_type; + int dec_len = DecodeInternal(nullptr, 2 * 10 * sample_rate_hz_ / 1000, + sample_rate_hz_, decoded.data(), &speech_type); + concealment_audio->AppendData(decoded.data(), dec_len); + concealed_samples_ += rtc::checked_cast(dec_len); + + if (!last_was_plc) { + ++concealment_events_; + } + last_was_plc_ = true; + } + + size_t concealed_samples() { return concealed_samples_; } + size_t concealment_events() { return concealment_events_; } + + private: + const std::unique_ptr input_; + const int sample_rate_hz_; + size_t concealed_samples_ = 0; + size_t concealment_events_ = 0; + bool last_was_plc_ = false; +}; + +// An input sample generator which generates only zero-samples. +class ZeroSampleGenerator : public EncodeNetEqInput::Generator { + public: + rtc::ArrayView Generate(size_t num_samples) override { + vec.resize(num_samples, 0); + rtc::ArrayView view(vec); + RTC_DCHECK_EQ(view.size(), num_samples); + return view; + } + + private: + std::vector vec; +}; + +// A NetEqInput which connects to another NetEqInput, but drops a number of +// consecutive packets on the way +class LossyInput : public NetEqInput { + public: + LossyInput(int loss_cadence, + int burst_length, + std::unique_ptr input) + : loss_cadence_(loss_cadence), + burst_length_(burst_length), + input_(std::move(input)) {} + + absl::optional NextPacketTime() const override { + return input_->NextPacketTime(); + } + + absl::optional NextOutputEventTime() const override { + return input_->NextOutputEventTime(); + } + + std::unique_ptr PopPacket() override { + if (loss_cadence_ != 0 && (++count_ % loss_cadence_) == 0) { + // Pop `burst_length_` packets to create the loss. + auto packet_to_return = input_->PopPacket(); + for (int i = 0; i < burst_length_; i++) { + input_->PopPacket(); + } + return packet_to_return; + } + return input_->PopPacket(); + } + + void AdvanceOutputEvent() override { return input_->AdvanceOutputEvent(); } + + bool ended() const override { return input_->ended(); } + + absl::optional NextHeader() const override { + return input_->NextHeader(); + } + + private: + const int loss_cadence_; + const int burst_length_; + int count_ = 0; + const std::unique_ptr input_; +}; + +class AudioChecksumWithOutput : public AudioChecksum { + public: + explicit AudioChecksumWithOutput(std::string* output_str) + : output_str_(*output_str) {} + ~AudioChecksumWithOutput() { output_str_ = Finish(); } + + private: + std::string& output_str_; +}; + +struct TestStatistics { + NetEqNetworkStatistics network; + NetEqLifetimeStatistics lifetime; +}; + +TestStatistics RunTest(int loss_cadence, + int burst_length, + std::string* checksum) { + NetEq::Config config; + config.for_test_no_time_stretching = true; + + // The input is mostly useless. It sends zero-samples to a PCM16b encoder, + // but the actual encoded samples will never be used by the decoder in the + // test. See below about the decoder. + auto generator = std::make_unique(); + constexpr int kPayloadType = 100; + AudioEncoderPcm16B::Config encoder_config; + encoder_config.sample_rate_hz = kSampleRateHz; + encoder_config.payload_type = kPayloadType; + auto encoder = std::make_unique(encoder_config); + auto input = std::make_unique( + std::move(generator), std::move(encoder), kRunTimeMs); + // Wrap the input in a loss function. + auto lossy_input = std::make_unique(loss_cadence, burst_length, + std::move(input)); + + // Setting up decoders. + NetEqTest::DecoderMap decoders; + // Using a fake decoder which simply reads the output audio from a file. + auto input_file = std::make_unique( + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm")); + AudioDecoderPlc dec(std::move(input_file), kSampleRateHz); + // Masquerading as a PCM16b decoder. + decoders.emplace(kPayloadType, SdpAudioFormat("l16", 32000, 1)); + + // Output is simply a checksum calculator. + auto output = std::make_unique(checksum); + + // No callback objects. + NetEqTest::Callbacks callbacks; + + NetEqTest neteq_test( + config, /*decoder_factory=*/ + rtc::make_ref_counted(&dec), + /*codecs=*/decoders, /*text_log=*/nullptr, /*neteq_factory=*/nullptr, + /*input=*/std::move(lossy_input), std::move(output), callbacks); + EXPECT_LE(kRunTimeMs, neteq_test.Run()); + + auto lifetime_stats = neteq_test.LifetimeStats(); + EXPECT_EQ(dec.concealed_samples(), lifetime_stats.concealed_samples); + EXPECT_EQ(dec.concealment_events(), lifetime_stats.concealment_events); + return {neteq_test.SimulationStats(), neteq_test.LifetimeStats()}; +} +} // namespace + +// Check that some basic metrics are produced in the right direction. In +// particular, expand_rate should only increase if there are losses present. Our +// dummy decoder is designed such as the checksum should always be the same +// regardless of the losses given that calls are executed in the right order. +TEST(NetEqDecoderPlc, BasicMetrics) { + std::string checksum; + + // Drop 1 packet every 10 packets. + auto stats = RunTest(10, 1, &checksum); + + std::string checksum_no_loss; + auto stats_no_loss = RunTest(0, 0, &checksum_no_loss); + + EXPECT_EQ(checksum, checksum_no_loss); + + EXPECT_EQ(stats.network.preemptive_rate, + stats_no_loss.network.preemptive_rate); + EXPECT_EQ(stats.network.accelerate_rate, + stats_no_loss.network.accelerate_rate); + EXPECT_EQ(0, stats_no_loss.network.expand_rate); + EXPECT_GT(stats.network.expand_rate, 0); +} + +// Checks that interruptions are not counted in small losses but they are +// correctly counted in long interruptions. +TEST(NetEqDecoderPlc, CountInterruptions) { + std::string checksum; + std::string checksum_2; + std::string checksum_3; + + // Half of the packets lost but in short interruptions. + auto stats_no_interruptions = RunTest(1, 1, &checksum); + // One lost of 500 ms (250 packets). + auto stats_one_interruption = RunTest(200, 250, &checksum_2); + // Two losses of 250ms each (125 packets). + auto stats_two_interruptions = RunTest(125, 125, &checksum_3); + + EXPECT_EQ(checksum, checksum_2); + EXPECT_EQ(checksum, checksum_3); + EXPECT_GT(stats_no_interruptions.network.expand_rate, 0); + EXPECT_EQ(stats_no_interruptions.lifetime.total_interruption_duration_ms, 0); + EXPECT_EQ(stats_no_interruptions.lifetime.interruption_count, 0); + + EXPECT_GT(stats_one_interruption.network.expand_rate, 0); + EXPECT_EQ(stats_one_interruption.lifetime.total_interruption_duration_ms, + 5000); + EXPECT_EQ(stats_one_interruption.lifetime.interruption_count, 1); + + EXPECT_GT(stats_two_interruptions.network.expand_rate, 0); + EXPECT_EQ(stats_two_interruptions.lifetime.total_interruption_duration_ms, + 5000); + EXPECT_EQ(stats_two_interruptions.lifetime.interruption_count, 2); +} + +// Checks that small losses do not produce interruptions. +TEST(NetEqDecoderPlc, NoInterruptionsInSmallLosses) { + std::string checksum_1; + std::string checksum_4; + + auto stats_1 = RunTest(300, 1, &checksum_1); + auto stats_4 = RunTest(300, 4, &checksum_4); + + EXPECT_EQ(checksum_1, checksum_4); + + EXPECT_EQ(stats_1.lifetime.interruption_count, 0); + EXPECT_EQ(stats_1.lifetime.total_interruption_duration_ms, 0); + EXPECT_EQ(stats_1.lifetime.concealed_samples, 640u); // 20ms of concealment. + EXPECT_EQ(stats_1.lifetime.concealment_events, 1u); // in just one event. + + EXPECT_EQ(stats_4.lifetime.interruption_count, 0); + EXPECT_EQ(stats_4.lifetime.total_interruption_duration_ms, 0); + EXPECT_EQ(stats_4.lifetime.concealed_samples, 2560u); // 80ms of concealment. + EXPECT_EQ(stats_4.lifetime.concealment_events, 1u); // in just one event. +} + +// Checks that interruptions of different sizes report correct duration. +TEST(NetEqDecoderPlc, InterruptionsReportCorrectSize) { + std::string checksum; + + for (int burst_length = 5; burst_length < 10; burst_length++) { + auto stats = RunTest(300, burst_length, &checksum); + auto duration = stats.lifetime.total_interruption_duration_ms; + if (burst_length < 8) { + EXPECT_EQ(duration, 0); + } else { + EXPECT_EQ(duration, burst_length * 20); + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc new file mode 100644 index 0000000000..6a6367d045 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -0,0 +1,2141 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/neteq_impl.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "api/audio_codecs/audio_decoder.h" +#include "api/neteq/tick_timer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "modules/audio_coding/neteq/accelerate.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/comfort_noise.h" +#include "modules/audio_coding/neteq/decision_logic.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/dtmf_buffer.h" +#include "modules/audio_coding/neteq/dtmf_tone_generator.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/merge.h" +#include "modules/audio_coding/neteq/nack_tracker.h" +#include "modules/audio_coding/neteq/normal.h" +#include "modules/audio_coding/neteq/packet.h" +#include "modules/audio_coding/neteq/packet_buffer.h" +#include "modules/audio_coding/neteq/post_decode_vad.h" +#include "modules/audio_coding/neteq/preemptive_expand.h" +#include "modules/audio_coding/neteq/red_payload_splitter.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "modules/audio_coding/neteq/time_stretch.h" +#include "modules/audio_coding/neteq/timestamp_scaler.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/sanitizer.h" +#include "rtc_base/strings/audio_format_to_string.h" +#include "rtc_base/trace_event.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +namespace { + +std::unique_ptr CreateNetEqController( + const NetEqControllerFactory& controller_factory, + int base_min_delay, + int max_packets_in_buffer, + bool allow_time_stretching, + TickTimer* tick_timer, + webrtc::Clock* clock) { + NetEqController::Config config; + config.base_min_delay_ms = base_min_delay; + config.max_packets_in_buffer = max_packets_in_buffer; + config.allow_time_stretching = allow_time_stretching; + config.tick_timer = tick_timer; + config.clock = clock; + return controller_factory.CreateNetEqController(config); +} + +} // namespace + +NetEqImpl::Dependencies::Dependencies( + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr& decoder_factory, + const NetEqControllerFactory& controller_factory) + : clock(clock), + tick_timer(new TickTimer), + stats(new StatisticsCalculator), + decoder_database( + new DecoderDatabase(decoder_factory, config.codec_pair_id)), + dtmf_buffer(new DtmfBuffer(config.sample_rate_hz)), + dtmf_tone_generator(new DtmfToneGenerator), + packet_buffer( + new PacketBuffer(config.max_packets_in_buffer, tick_timer.get())), + neteq_controller( + CreateNetEqController(controller_factory, + config.min_delay_ms, + config.max_packets_in_buffer, + !config.for_test_no_time_stretching, + tick_timer.get(), + clock)), + red_payload_splitter(new RedPayloadSplitter), + timestamp_scaler(new TimestampScaler(*decoder_database)), + accelerate_factory(new AccelerateFactory), + expand_factory(new ExpandFactory), + preemptive_expand_factory(new PreemptiveExpandFactory) {} + +NetEqImpl::Dependencies::~Dependencies() = default; + +NetEqImpl::NetEqImpl(const NetEq::Config& config, + Dependencies&& deps, + bool create_components) + : clock_(deps.clock), + tick_timer_(std::move(deps.tick_timer)), + decoder_database_(std::move(deps.decoder_database)), + dtmf_buffer_(std::move(deps.dtmf_buffer)), + dtmf_tone_generator_(std::move(deps.dtmf_tone_generator)), + packet_buffer_(std::move(deps.packet_buffer)), + red_payload_splitter_(std::move(deps.red_payload_splitter)), + timestamp_scaler_(std::move(deps.timestamp_scaler)), + vad_(new PostDecodeVad()), + expand_factory_(std::move(deps.expand_factory)), + accelerate_factory_(std::move(deps.accelerate_factory)), + preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)), + stats_(std::move(deps.stats)), + controller_(std::move(deps.neteq_controller)), + last_mode_(Mode::kNormal), + decoded_buffer_length_(kMaxFrameSize), + decoded_buffer_(new int16_t[decoded_buffer_length_]), + playout_timestamp_(0), + new_codec_(false), + timestamp_(0), + reset_decoder_(false), + first_packet_(true), + enable_fast_accelerate_(config.enable_fast_accelerate), + nack_enabled_(false), + enable_muted_state_(config.enable_muted_state), + expand_uma_logger_("WebRTC.Audio.ExpandRatePercent", + 10, // Report once every 10 s. + tick_timer_.get()), + speech_expand_uma_logger_("WebRTC.Audio.SpeechExpandRatePercent", + 10, // Report once every 10 s. + tick_timer_.get()), + no_time_stretching_(config.for_test_no_time_stretching) { + RTC_LOG(LS_INFO) << "NetEq config: " << config.ToString(); + int fs = config.sample_rate_hz; + if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) { + RTC_LOG(LS_ERROR) << "Sample rate " << fs + << " Hz not supported. " + "Changing to 8000 Hz."; + fs = 8000; + } + controller_->SetMaximumDelay(config.max_delay_ms); + fs_hz_ = fs; + fs_mult_ = fs / 8000; + last_output_sample_rate_hz_ = fs; + output_size_samples_ = static_cast(kOutputSizeMs * 8 * fs_mult_); + controller_->SetSampleRate(fs_hz_, output_size_samples_); + decoder_frame_length_ = 2 * output_size_samples_; // 20 ms. + if (create_components) { + SetSampleRateAndChannels(fs, 1); // Default is 1 channel. + } + RTC_DCHECK(!vad_->enabled()); + if (config.enable_post_decode_vad) { + vad_->Enable(); + } +} + +NetEqImpl::~NetEqImpl() = default; + +int NetEqImpl::InsertPacket(const RTPHeader& rtp_header, + rtc::ArrayView payload) { + rtc::MsanCheckInitialized(payload); + TRACE_EVENT0("webrtc", "NetEqImpl::InsertPacket"); + MutexLock lock(&mutex_); + if (InsertPacketInternal(rtp_header, payload) != 0) { + return kFail; + } + return kOK; +} + +void NetEqImpl::InsertEmptyPacket(const RTPHeader& rtp_header) { + MutexLock lock(&mutex_); + if (nack_enabled_) { + nack_->UpdateLastReceivedPacket(rtp_header.sequenceNumber, + rtp_header.timestamp); + } + controller_->RegisterEmptyPacket(); +} + +namespace { +void SetAudioFrameActivityAndType(bool vad_enabled, + NetEqImpl::OutputType type, + AudioFrame::VADActivity last_vad_activity, + AudioFrame* audio_frame) { + switch (type) { + case NetEqImpl::OutputType::kNormalSpeech: { + audio_frame->speech_type_ = AudioFrame::kNormalSpeech; + audio_frame->vad_activity_ = AudioFrame::kVadActive; + break; + } + case NetEqImpl::OutputType::kVadPassive: { + // This should only be reached if the VAD is enabled. + RTC_DCHECK(vad_enabled); + audio_frame->speech_type_ = AudioFrame::kNormalSpeech; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case NetEqImpl::OutputType::kCNG: { + audio_frame->speech_type_ = AudioFrame::kCNG; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case NetEqImpl::OutputType::kPLC: { + audio_frame->speech_type_ = AudioFrame::kPLC; + audio_frame->vad_activity_ = last_vad_activity; + break; + } + case NetEqImpl::OutputType::kPLCCNG: { + audio_frame->speech_type_ = AudioFrame::kPLCCNG; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case NetEqImpl::OutputType::kCodecPLC: { + audio_frame->speech_type_ = AudioFrame::kCodecPLC; + audio_frame->vad_activity_ = last_vad_activity; + break; + } + default: + RTC_DCHECK_NOTREACHED(); + } + if (!vad_enabled) { + // Always set kVadUnknown when receive VAD is inactive. + audio_frame->vad_activity_ = AudioFrame::kVadUnknown; + } +} +} // namespace + +int NetEqImpl::GetAudio(AudioFrame* audio_frame, + bool* muted, + int* current_sample_rate_hz, + absl::optional action_override) { + TRACE_EVENT0("webrtc", "NetEqImpl::GetAudio"); + MutexLock lock(&mutex_); + if (GetAudioInternal(audio_frame, muted, action_override) != 0) { + return kFail; + } + RTC_DCHECK_EQ( + audio_frame->sample_rate_hz_, + rtc::dchecked_cast(audio_frame->samples_per_channel_ * 100)); + RTC_DCHECK_EQ(*muted, audio_frame->muted()); + SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(), + last_vad_activity_, audio_frame); + last_vad_activity_ = audio_frame->vad_activity_; + last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_; + RTC_DCHECK(last_output_sample_rate_hz_ == 8000 || + last_output_sample_rate_hz_ == 16000 || + last_output_sample_rate_hz_ == 32000 || + last_output_sample_rate_hz_ == 48000) + << "Unexpected sample rate " << last_output_sample_rate_hz_; + + if (current_sample_rate_hz) { + *current_sample_rate_hz = last_output_sample_rate_hz_; + } + + return kOK; +} + +void NetEqImpl::SetCodecs(const std::map& codecs) { + MutexLock lock(&mutex_); + const std::vector changed_payload_types = + decoder_database_->SetCodecs(codecs); + for (const int pt : changed_payload_types) { + packet_buffer_->DiscardPacketsWithPayloadType(pt, stats_.get()); + } +} + +bool NetEqImpl::RegisterPayloadType(int rtp_payload_type, + const SdpAudioFormat& audio_format) { + RTC_LOG(LS_VERBOSE) << "NetEqImpl::RegisterPayloadType: payload type " + << rtp_payload_type << ", codec " + << rtc::ToString(audio_format); + MutexLock lock(&mutex_); + return decoder_database_->RegisterPayload(rtp_payload_type, audio_format) == + DecoderDatabase::kOK; +} + +int NetEqImpl::RemovePayloadType(uint8_t rtp_payload_type) { + MutexLock lock(&mutex_); + int ret = decoder_database_->Remove(rtp_payload_type); + if (ret == DecoderDatabase::kOK || ret == DecoderDatabase::kDecoderNotFound) { + packet_buffer_->DiscardPacketsWithPayloadType(rtp_payload_type, + stats_.get()); + return kOK; + } + return kFail; +} + +void NetEqImpl::RemoveAllPayloadTypes() { + MutexLock lock(&mutex_); + decoder_database_->RemoveAll(); +} + +bool NetEqImpl::SetMinimumDelay(int delay_ms) { + MutexLock lock(&mutex_); + if (delay_ms >= 0 && delay_ms <= 10000) { + RTC_DCHECK(controller_.get()); + return controller_->SetMinimumDelay(delay_ms); + } + return false; +} + +bool NetEqImpl::SetMaximumDelay(int delay_ms) { + MutexLock lock(&mutex_); + if (delay_ms >= 0 && delay_ms <= 10000) { + RTC_DCHECK(controller_.get()); + return controller_->SetMaximumDelay(delay_ms); + } + return false; +} + +bool NetEqImpl::SetBaseMinimumDelayMs(int delay_ms) { + MutexLock lock(&mutex_); + if (delay_ms >= 0 && delay_ms <= 10000) { + return controller_->SetBaseMinimumDelay(delay_ms); + } + return false; +} + +int NetEqImpl::GetBaseMinimumDelayMs() const { + MutexLock lock(&mutex_); + return controller_->GetBaseMinimumDelay(); +} + +int NetEqImpl::TargetDelayMs() const { + MutexLock lock(&mutex_); + RTC_DCHECK(controller_.get()); + return controller_->TargetLevelMs(); +} + +int NetEqImpl::FilteredCurrentDelayMs() const { + MutexLock lock(&mutex_); + // Sum up the filtered packet buffer level with the future length of the sync + // buffer. + const int delay_samples = + controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength(); + // The division below will truncate. The return value is in ms. + return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000); +} + +int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) { + MutexLock lock(&mutex_); + RTC_DCHECK(decoder_database_.get()); + *stats = CurrentNetworkStatisticsInternal(); + stats_->GetNetworkStatistics(decoder_frame_length_, stats); + return 0; +} + +NetEqNetworkStatistics NetEqImpl::CurrentNetworkStatistics() const { + MutexLock lock(&mutex_); + return CurrentNetworkStatisticsInternal(); +} + +NetEqNetworkStatistics NetEqImpl::CurrentNetworkStatisticsInternal() const { + RTC_DCHECK(decoder_database_.get()); + NetEqNetworkStatistics stats; + const size_t total_samples_in_buffers = + packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) + + sync_buffer_->FutureLength(); + + RTC_DCHECK(controller_.get()); + stats.preferred_buffer_size_ms = controller_->TargetLevelMs(); + stats.jitter_peaks_found = controller_->PeakFound(); + RTC_DCHECK_GT(fs_hz_, 0); + stats.current_buffer_size_ms = + static_cast(total_samples_in_buffers * 1000 / fs_hz_); + return stats; +} + +NetEqLifetimeStatistics NetEqImpl::GetLifetimeStatistics() const { + MutexLock lock(&mutex_); + return stats_->GetLifetimeStatistics(); +} + +NetEqOperationsAndState NetEqImpl::GetOperationsAndState() const { + MutexLock lock(&mutex_); + auto result = stats_->GetOperationsAndState(); + result.current_buffer_size_ms = + (packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) + + sync_buffer_->FutureLength()) * + 1000 / fs_hz_; + result.current_frame_size_ms = decoder_frame_length_ * 1000 / fs_hz_; + result.next_packet_available = packet_buffer_->PeekNextPacket() && + packet_buffer_->PeekNextPacket()->timestamp == + sync_buffer_->end_timestamp(); + return result; +} + +void NetEqImpl::EnableVad() { + MutexLock lock(&mutex_); + RTC_DCHECK(vad_.get()); + vad_->Enable(); +} + +void NetEqImpl::DisableVad() { + MutexLock lock(&mutex_); + RTC_DCHECK(vad_.get()); + vad_->Disable(); +} + +absl::optional NetEqImpl::GetPlayoutTimestamp() const { + MutexLock lock(&mutex_); + if (first_packet_ || last_mode_ == Mode::kRfc3389Cng || + last_mode_ == Mode::kCodecInternalCng) { + // We don't have a valid RTP timestamp until we have decoded our first + // RTP packet. Also, the RTP timestamp is not accurate while playing CNG, + // which is indicated by returning an empty value. + return absl::nullopt; + } + return timestamp_scaler_->ToExternal(playout_timestamp_); +} + +int NetEqImpl::last_output_sample_rate_hz() const { + MutexLock lock(&mutex_); + return last_output_sample_rate_hz_; +} + +absl::optional NetEqImpl::GetDecoderFormat( + int payload_type) const { + MutexLock lock(&mutex_); + const DecoderDatabase::DecoderInfo* const di = + decoder_database_->GetDecoderInfo(payload_type); + if (di) { + const AudioDecoder* const decoder = di->GetDecoder(); + // TODO(kwiberg): Why the special case for RED? + return DecoderFormat{ + /*sample_rate_hz=*/di->IsRed() ? 8000 : di->SampleRateHz(), + /*num_channels=*/ + decoder ? rtc::dchecked_cast(decoder->Channels()) : 1, + /*sdp_format=*/di->GetFormat()}; + } else { + // Payload type not registered. + return absl::nullopt; + } +} + +void NetEqImpl::FlushBuffers() { + MutexLock lock(&mutex_); + RTC_LOG(LS_VERBOSE) << "FlushBuffers"; + packet_buffer_->Flush(stats_.get()); + RTC_DCHECK(sync_buffer_.get()); + RTC_DCHECK(expand_.get()); + sync_buffer_->Flush(); + sync_buffer_->set_next_index(sync_buffer_->next_index() - + expand_->overlap_length()); + // Set to wait for new codec. + first_packet_ = true; +} + +void NetEqImpl::EnableNack(size_t max_nack_list_size) { + MutexLock lock(&mutex_); + if (!nack_enabled_) { + nack_ = std::make_unique(); + nack_enabled_ = true; + nack_->UpdateSampleRate(fs_hz_); + } + nack_->SetMaxNackListSize(max_nack_list_size); +} + +void NetEqImpl::DisableNack() { + MutexLock lock(&mutex_); + nack_.reset(); + nack_enabled_ = false; +} + +std::vector NetEqImpl::GetNackList(int64_t round_trip_time_ms) const { + MutexLock lock(&mutex_); + if (!nack_enabled_) { + return std::vector(); + } + RTC_DCHECK(nack_.get()); + return nack_->GetNackList(round_trip_time_ms); +} + +int NetEqImpl::SyncBufferSizeMs() const { + MutexLock lock(&mutex_); + return rtc::dchecked_cast(sync_buffer_->FutureLength() / + rtc::CheckedDivExact(fs_hz_, 1000)); +} + +const SyncBuffer* NetEqImpl::sync_buffer_for_test() const { + MutexLock lock(&mutex_); + return sync_buffer_.get(); +} + +NetEq::Operation NetEqImpl::last_operation_for_test() const { + MutexLock lock(&mutex_); + return last_operation_; +} + +// Methods below this line are private. + +int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header, + rtc::ArrayView payload) { + if (payload.empty()) { + RTC_LOG_F(LS_ERROR) << "payload is empty"; + return kInvalidPointer; + } + + Timestamp receive_time = clock_->CurrentTime(); + stats_->ReceivedPacket(); + + PacketList packet_list; + // Insert packet in a packet list. + packet_list.push_back([&rtp_header, &payload, &receive_time] { + // Convert to Packet. + Packet packet; + packet.payload_type = rtp_header.payloadType; + packet.sequence_number = rtp_header.sequenceNumber; + packet.timestamp = rtp_header.timestamp; + packet.payload.SetData(payload.data(), payload.size()); + packet.packet_info = RtpPacketInfo(rtp_header, receive_time); + // Waiting time will be set upon inserting the packet in the buffer. + RTC_DCHECK(!packet.waiting_time); + return packet; + }()); + + bool update_sample_rate_and_channels = first_packet_; + + if (update_sample_rate_and_channels) { + // Reset timestamp scaling. + timestamp_scaler_->Reset(); + } + + if (!decoder_database_->IsRed(rtp_header.payloadType)) { + // Scale timestamp to internal domain (only for some codecs). + timestamp_scaler_->ToInternal(&packet_list); + } + + // Store these for later use, since the first packet may very well disappear + // before we need these values. + uint32_t main_timestamp = packet_list.front().timestamp; + uint8_t main_payload_type = packet_list.front().payload_type; + uint16_t main_sequence_number = packet_list.front().sequence_number; + + // Reinitialize NetEq if it's needed (changed SSRC or first call). + if (update_sample_rate_and_channels) { + // Note: `first_packet_` will be cleared further down in this method, once + // the packet has been successfully inserted into the packet buffer. + + // Flush the packet buffer and DTMF buffer. + packet_buffer_->Flush(stats_.get()); + dtmf_buffer_->Flush(); + + // Update audio buffer timestamp. + sync_buffer_->IncreaseEndTimestamp(main_timestamp - timestamp_); + + // Update codecs. + timestamp_ = main_timestamp; + } + + if (nack_enabled_) { + RTC_DCHECK(nack_); + if (update_sample_rate_and_channels) { + nack_->Reset(); + } + nack_->UpdateLastReceivedPacket(main_sequence_number, main_timestamp); + } + + // Check for RED payload type, and separate payloads into several packets. + if (decoder_database_->IsRed(rtp_header.payloadType)) { + if (!red_payload_splitter_->SplitRed(&packet_list)) { + return kRedundancySplitError; + } + // Only accept a few RED payloads of the same type as the main data, + // DTMF events and CNG. + red_payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_); + if (packet_list.empty()) { + return kRedundancySplitError; + } + } + + // Check payload types. + if (decoder_database_->CheckPayloadTypes(packet_list) == + DecoderDatabase::kDecoderNotFound) { + return kUnknownRtpPayloadType; + } + + RTC_DCHECK(!packet_list.empty()); + + // Update main_timestamp, if new packets appear in the list + // after RED splitting. + if (decoder_database_->IsRed(rtp_header.payloadType)) { + timestamp_scaler_->ToInternal(&packet_list); + main_timestamp = packet_list.front().timestamp; + main_payload_type = packet_list.front().payload_type; + main_sequence_number = packet_list.front().sequence_number; + } + + // Process DTMF payloads. Cycle through the list of packets, and pick out any + // DTMF payloads found. + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + const Packet& current_packet = (*it); + RTC_DCHECK(!current_packet.payload.empty()); + if (decoder_database_->IsDtmf(current_packet.payload_type)) { + DtmfEvent event; + int ret = DtmfBuffer::ParseEvent(current_packet.timestamp, + current_packet.payload.data(), + current_packet.payload.size(), &event); + if (ret != DtmfBuffer::kOK) { + return kDtmfParsingError; + } + if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) { + return kDtmfInsertError; + } + it = packet_list.erase(it); + } else { + ++it; + } + } + + PacketList parsed_packet_list; + bool is_dtx = false; + while (!packet_list.empty()) { + Packet& packet = packet_list.front(); + const DecoderDatabase::DecoderInfo* info = + decoder_database_->GetDecoderInfo(packet.payload_type); + if (!info) { + RTC_LOG(LS_WARNING) << "SplitAudio unknown payload type"; + return kUnknownRtpPayloadType; + } + + if (info->IsComfortNoise()) { + // Carry comfort noise packets along. + parsed_packet_list.splice(parsed_packet_list.end(), packet_list, + packet_list.begin()); + } else { + const auto sequence_number = packet.sequence_number; + const auto payload_type = packet.payload_type; + const Packet::Priority original_priority = packet.priority; + const auto& packet_info = packet.packet_info; + auto packet_from_result = [&](AudioDecoder::ParseResult& result) { + Packet new_packet; + new_packet.sequence_number = sequence_number; + new_packet.payload_type = payload_type; + new_packet.timestamp = result.timestamp; + new_packet.priority.codec_level = result.priority; + new_packet.priority.red_level = original_priority.red_level; + new_packet.packet_info = packet_info; + new_packet.frame = std::move(result.frame); + return new_packet; + }; + + std::vector results = + info->GetDecoder()->ParsePayload(std::move(packet.payload), + packet.timestamp); + if (results.empty()) { + packet_list.pop_front(); + } else { + bool first = true; + for (auto& result : results) { + RTC_DCHECK(result.frame); + RTC_DCHECK_GE(result.priority, 0); + is_dtx = is_dtx || result.frame->IsDtxPacket(); + if (first) { + // Re-use the node and move it to parsed_packet_list. + packet_list.front() = packet_from_result(result); + parsed_packet_list.splice(parsed_packet_list.end(), packet_list, + packet_list.begin()); + first = false; + } else { + parsed_packet_list.push_back(packet_from_result(result)); + } + } + } + } + } + + // Calculate the number of primary (non-FEC/RED) packets. + const size_t number_of_primary_packets = std::count_if( + parsed_packet_list.begin(), parsed_packet_list.end(), + [](const Packet& in) { return in.priority.codec_level == 0; }); + if (number_of_primary_packets < parsed_packet_list.size()) { + stats_->SecondaryPacketsReceived(parsed_packet_list.size() - + number_of_primary_packets); + } + + // Insert packets in buffer. + const int target_level_ms = controller_->TargetLevelMs(); + const int ret = packet_buffer_->InsertPacketList( + &parsed_packet_list, *decoder_database_, ¤t_rtp_payload_type_, + ¤t_cng_rtp_payload_type_, stats_.get(), decoder_frame_length_, + last_output_sample_rate_hz_, target_level_ms); + bool buffer_flush_occured = false; + if (ret == PacketBuffer::kFlushed) { + // Reset DSP timestamp etc. if packet buffer flushed. + new_codec_ = true; + update_sample_rate_and_channels = true; + buffer_flush_occured = true; + } else if (ret == PacketBuffer::kPartialFlush) { + // Forward sync buffer timestamp + timestamp_ = packet_buffer_->PeekNextPacket()->timestamp; + sync_buffer_->IncreaseEndTimestamp(timestamp_ - + sync_buffer_->end_timestamp()); + buffer_flush_occured = true; + } else if (ret != PacketBuffer::kOK) { + return kOtherError; + } + + if (first_packet_) { + first_packet_ = false; + // Update the codec on the next GetAudio call. + new_codec_ = true; + } + + if (current_rtp_payload_type_) { + RTC_DCHECK(decoder_database_->GetDecoderInfo(*current_rtp_payload_type_)) + << "Payload type " << static_cast(*current_rtp_payload_type_) + << " is unknown where it shouldn't be"; + } + + if (update_sample_rate_and_channels && !packet_buffer_->Empty()) { + // We do not use `current_rtp_payload_type_` to |set payload_type|, but + // get the next RTP header from `packet_buffer_` to obtain the payload type. + // The reason for it is the following corner case. If NetEq receives a + // CNG packet with a sample rate different than the current CNG then it + // flushes its buffer, assuming send codec must have been changed. However, + // payload type of the hypothetically new send codec is not known. + const Packet* next_packet = packet_buffer_->PeekNextPacket(); + RTC_DCHECK(next_packet); + const int payload_type = next_packet->payload_type; + size_t channels = 1; + if (!decoder_database_->IsComfortNoise(payload_type)) { + AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type); + RTC_DCHECK(decoder); // Payloads are already checked to be valid. + channels = decoder->Channels(); + } + const DecoderDatabase::DecoderInfo* decoder_info = + decoder_database_->GetDecoderInfo(payload_type); + RTC_DCHECK(decoder_info); + if (decoder_info->SampleRateHz() != fs_hz_ || + channels != algorithm_buffer_->Channels()) { + SetSampleRateAndChannels(decoder_info->SampleRateHz(), channels); + } + if (nack_enabled_) { + RTC_DCHECK(nack_); + // Update the sample rate even if the rate is not new, because of Reset(). + nack_->UpdateSampleRate(fs_hz_); + } + } + + const DecoderDatabase::DecoderInfo* dec_info = + decoder_database_->GetDecoderInfo(main_payload_type); + RTC_DCHECK(dec_info); // Already checked that the payload type is known. + + NetEqController::PacketArrivedInfo info; + info.is_cng_or_dtmf = dec_info->IsComfortNoise() || dec_info->IsDtmf(); + info.packet_length_samples = + number_of_primary_packets * decoder_frame_length_; + info.main_timestamp = main_timestamp; + info.main_sequence_number = main_sequence_number; + info.is_dtx = is_dtx; + info.buffer_flush = buffer_flush_occured; + + const bool should_update_stats = !new_codec_; + auto relative_delay = + controller_->PacketArrived(fs_hz_, should_update_stats, info); + if (relative_delay) { + stats_->RelativePacketArrivalDelay(relative_delay.value()); + } + return 0; +} + +int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, + bool* muted, + absl::optional action_override) { + PacketList packet_list; + DtmfEvent dtmf_event; + Operation operation; + bool play_dtmf; + *muted = false; + last_decoded_packet_infos_.clear(); + tick_timer_->Increment(); + stats_->IncreaseCounter(output_size_samples_, fs_hz_); + const auto lifetime_stats = stats_->GetLifetimeStatistics(); + expand_uma_logger_.UpdateSampleCounter(lifetime_stats.concealed_samples, + fs_hz_); + speech_expand_uma_logger_.UpdateSampleCounter( + lifetime_stats.concealed_samples - + lifetime_stats.silent_concealed_samples, + fs_hz_); + + // Check for muted state. + if (enable_muted_state_ && expand_->Muted() && packet_buffer_->Empty()) { + RTC_DCHECK_EQ(last_mode_, Mode::kExpand); + audio_frame->Reset(); + RTC_DCHECK(audio_frame->muted()); // Reset() should mute the frame. + playout_timestamp_ += static_cast(output_size_samples_); + audio_frame->sample_rate_hz_ = fs_hz_; + // Make sure the total number of samples fits in the AudioFrame. + if (output_size_samples_ * sync_buffer_->Channels() > + AudioFrame::kMaxDataSizeSamples) { + return kSampleUnderrun; + } + audio_frame->samples_per_channel_ = output_size_samples_; + audio_frame->timestamp_ = + first_packet_ + ? 0 + : timestamp_scaler_->ToExternal(playout_timestamp_) - + static_cast(audio_frame->samples_per_channel_); + audio_frame->num_channels_ = sync_buffer_->Channels(); + stats_->ExpandedNoiseSamples(output_size_samples_, false); + controller_->NotifyMutedState(); + *muted = true; + return 0; + } + int return_value = GetDecision(&operation, &packet_list, &dtmf_event, + &play_dtmf, action_override); + if (return_value != 0) { + last_mode_ = Mode::kError; + return return_value; + } + + AudioDecoder::SpeechType speech_type; + int length = 0; + const size_t start_num_packets = packet_list.size(); + int decode_return_value = + Decode(&packet_list, &operation, &length, &speech_type); + + RTC_DCHECK(vad_.get()); + bool sid_frame_available = + (operation == Operation::kRfc3389Cng && !packet_list.empty()); + vad_->Update(decoded_buffer_.get(), static_cast(length), speech_type, + sid_frame_available, fs_hz_); + + // This is the criterion that we did decode some data through the speech + // decoder, and the operation resulted in comfort noise. + const bool codec_internal_sid_frame = + (speech_type == AudioDecoder::kComfortNoise && + start_num_packets > packet_list.size()); + + if (sid_frame_available || codec_internal_sid_frame) { + // Start a new stopwatch since we are decoding a new CNG packet. + generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + + algorithm_buffer_->Clear(); + switch (operation) { + case Operation::kNormal: { + DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf); + if (length > 0) { + stats_->DecodedOutputPlayed(); + } + break; + } + case Operation::kMerge: { + DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf); + break; + } + case Operation::kExpand: { + RTC_DCHECK_EQ(return_value, 0); + if (!current_rtp_payload_type_ || !DoCodecPlc()) { + return_value = DoExpand(play_dtmf); + } + RTC_DCHECK_GE(sync_buffer_->FutureLength() - expand_->overlap_length(), + output_size_samples_); + break; + } + case Operation::kAccelerate: + case Operation::kFastAccelerate: { + const bool fast_accelerate = + enable_fast_accelerate_ && (operation == Operation::kFastAccelerate); + return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type, + play_dtmf, fast_accelerate); + break; + } + case Operation::kPreemptiveExpand: { + return_value = DoPreemptiveExpand(decoded_buffer_.get(), length, + speech_type, play_dtmf); + break; + } + case Operation::kRfc3389Cng: + case Operation::kRfc3389CngNoPacket: { + return_value = DoRfc3389Cng(&packet_list, play_dtmf); + break; + } + case Operation::kCodecInternalCng: { + // This handles the case when there is no transmission and the decoder + // should produce internal comfort noise. + // TODO(hlundin): Write test for codec-internal CNG. + DoCodecInternalCng(decoded_buffer_.get(), length); + break; + } + case Operation::kDtmf: { + // TODO(hlundin): Write test for this. + return_value = DoDtmf(dtmf_event, &play_dtmf); + break; + } + case Operation::kUndefined: { + RTC_LOG(LS_ERROR) << "Invalid operation kUndefined."; + RTC_DCHECK_NOTREACHED(); // This should not happen. + last_mode_ = Mode::kError; + return kInvalidOperation; + } + } // End of switch. + last_operation_ = operation; + if (return_value < 0) { + return return_value; + } + + if (last_mode_ != Mode::kRfc3389Cng) { + comfort_noise_->Reset(); + } + + // We treat it as if all packets referenced to by `last_decoded_packet_infos_` + // were mashed together when creating the samples in `algorithm_buffer_`. + RtpPacketInfos packet_infos(last_decoded_packet_infos_); + + // Copy samples from `algorithm_buffer_` to `sync_buffer_`. + // + // TODO(bugs.webrtc.org/10757): + // We would in the future also like to pass `packet_infos` so that we can do + // sample-perfect tracking of that information across `sync_buffer_`. + sync_buffer_->PushBack(*algorithm_buffer_); + + // Extract data from `sync_buffer_` to `output`. + size_t num_output_samples_per_channel = output_size_samples_; + size_t num_output_samples = output_size_samples_ * sync_buffer_->Channels(); + if (num_output_samples > AudioFrame::kMaxDataSizeSamples) { + RTC_LOG(LS_WARNING) << "Output array is too short. " + << AudioFrame::kMaxDataSizeSamples << " < " + << output_size_samples_ << " * " + << sync_buffer_->Channels(); + num_output_samples = AudioFrame::kMaxDataSizeSamples; + num_output_samples_per_channel = + AudioFrame::kMaxDataSizeSamples / sync_buffer_->Channels(); + } + sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel, + audio_frame); + audio_frame->sample_rate_hz_ = fs_hz_; + // TODO(bugs.webrtc.org/10757): + // We don't have the ability to properly track individual packets once their + // audio samples have entered `sync_buffer_`. So for now, treat it as if + // `packet_infos` from packets decoded by the current `GetAudioInternal()` + // call were all consumed assembling the current audio frame and the current + // audio frame only. + audio_frame->packet_infos_ = std::move(packet_infos); + if (sync_buffer_->FutureLength() < expand_->overlap_length()) { + // The sync buffer should always contain `overlap_length` samples, but now + // too many samples have been extracted. Reinstall the `overlap_length` + // lookahead by moving the index. + const size_t missing_lookahead_samples = + expand_->overlap_length() - sync_buffer_->FutureLength(); + RTC_DCHECK_GE(sync_buffer_->next_index(), missing_lookahead_samples); + sync_buffer_->set_next_index(sync_buffer_->next_index() - + missing_lookahead_samples); + } + if (audio_frame->samples_per_channel_ != output_size_samples_) { + RTC_LOG(LS_ERROR) << "audio_frame->samples_per_channel_ (" + << audio_frame->samples_per_channel_ + << ") != output_size_samples_ (" << output_size_samples_ + << ")"; + // TODO(minyue): treatment of under-run, filling zeros + audio_frame->Mute(); + return kSampleUnderrun; + } + + // Should always have overlap samples left in the `sync_buffer_`. + RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length()); + + // TODO(yujo): For muted frames, this can be a copy rather than an addition. + if (play_dtmf) { + return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(), + audio_frame->mutable_data()); + } + + // Update the background noise parameters if last operation wrote data + // straight from the decoder to the `sync_buffer_`. That is, none of the + // operations that modify the signal can be followed by a parameter update. + if ((last_mode_ == Mode::kNormal) || (last_mode_ == Mode::kAccelerateFail) || + (last_mode_ == Mode::kPreemptiveExpandFail) || + (last_mode_ == Mode::kRfc3389Cng) || + (last_mode_ == Mode::kCodecInternalCng)) { + background_noise_->Update(*sync_buffer_, *vad_.get()); + } + + if (operation == Operation::kDtmf) { + // DTMF data was written the end of `sync_buffer_`. + // Update index to end of DTMF data in `sync_buffer_`. + sync_buffer_->set_dtmf_index(sync_buffer_->Size()); + } + + if (last_mode_ != Mode::kExpand && last_mode_ != Mode::kCodecPlc) { + // If last operation was not expand, calculate the `playout_timestamp_` from + // the `sync_buffer_`. However, do not update the `playout_timestamp_` if it + // would be moved "backwards". + uint32_t temp_timestamp = + sync_buffer_->end_timestamp() - + static_cast(sync_buffer_->FutureLength()); + if (static_cast(temp_timestamp - playout_timestamp_) > 0) { + playout_timestamp_ = temp_timestamp; + } + } else { + // Use dead reckoning to estimate the `playout_timestamp_`. + playout_timestamp_ += static_cast(output_size_samples_); + } + // Set the timestamp in the audio frame to zero before the first packet has + // been inserted. Otherwise, subtract the frame size in samples to get the + // timestamp of the first sample in the frame (playout_timestamp_ is the + // last + 1). + audio_frame->timestamp_ = + first_packet_ + ? 0 + : timestamp_scaler_->ToExternal(playout_timestamp_) - + static_cast(audio_frame->samples_per_channel_); + + if (!(last_mode_ == Mode::kRfc3389Cng || + last_mode_ == Mode::kCodecInternalCng || last_mode_ == Mode::kExpand || + last_mode_ == Mode::kCodecPlc)) { + generated_noise_stopwatch_.reset(); + } + + if (decode_return_value) + return decode_return_value; + return return_value; +} + +int NetEqImpl::GetDecision(Operation* operation, + PacketList* packet_list, + DtmfEvent* dtmf_event, + bool* play_dtmf, + absl::optional action_override) { + // Initialize output variables. + *play_dtmf = false; + *operation = Operation::kUndefined; + + RTC_DCHECK(sync_buffer_.get()); + uint32_t end_timestamp = sync_buffer_->end_timestamp(); + if (!new_codec_) { + const uint32_t five_seconds_samples = 5 * fs_hz_; + packet_buffer_->DiscardOldPackets(end_timestamp, five_seconds_samples, + stats_.get()); + } + const Packet* packet = packet_buffer_->PeekNextPacket(); + + RTC_DCHECK(!generated_noise_stopwatch_ || + generated_noise_stopwatch_->ElapsedTicks() >= 1); + uint64_t generated_noise_samples = + generated_noise_stopwatch_ ? (generated_noise_stopwatch_->ElapsedTicks() - + 1) * output_size_samples_ + + controller_->noise_fast_forward() + : 0; + + if (controller_->CngRfc3389On() || last_mode_ == Mode::kRfc3389Cng) { + // Because of timestamp peculiarities, we have to "manually" disallow using + // a CNG packet with the same timestamp as the one that was last played. + // This can happen when using redundancy and will cause the timing to shift. + while (packet && decoder_database_->IsComfortNoise(packet->payload_type) && + (end_timestamp >= packet->timestamp || + end_timestamp + generated_noise_samples > packet->timestamp)) { + // Don't use this packet, discard it. + if (packet_buffer_->DiscardNextPacket(stats_.get()) != + PacketBuffer::kOK) { + RTC_DCHECK_NOTREACHED(); // Must be ok by design. + } + // Check buffer again. + if (!new_codec_) { + packet_buffer_->DiscardOldPackets(end_timestamp, 5 * fs_hz_, + stats_.get()); + } + packet = packet_buffer_->PeekNextPacket(); + } + } + + RTC_DCHECK(expand_.get()); + const int samples_left = static_cast(sync_buffer_->FutureLength() - + expand_->overlap_length()); + if (last_mode_ == Mode::kAccelerateSuccess || + last_mode_ == Mode::kAccelerateLowEnergy || + last_mode_ == Mode::kPreemptiveExpandSuccess || + last_mode_ == Mode::kPreemptiveExpandLowEnergy) { + // Subtract (samples_left + output_size_samples_) from sampleMemory. + controller_->AddSampleMemory( + -(samples_left + rtc::dchecked_cast(output_size_samples_))); + } + + // Check if it is time to play a DTMF event. + if (dtmf_buffer_->GetEvent( + static_cast(end_timestamp + generated_noise_samples), + dtmf_event)) { + *play_dtmf = true; + } + + // Get instruction. + RTC_DCHECK(sync_buffer_.get()); + RTC_DCHECK(expand_.get()); + generated_noise_samples = + generated_noise_stopwatch_ + ? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ + + controller_->noise_fast_forward() + : 0; + NetEqController::NetEqStatus status; + status.packet_buffer_info.dtx_or_cng = + packet_buffer_->ContainsDtxOrCngPacket(decoder_database_.get()); + status.packet_buffer_info.num_samples = + packet_buffer_->NumSamplesInBuffer(decoder_frame_length_); + status.packet_buffer_info.span_samples = packet_buffer_->GetSpanSamples( + decoder_frame_length_, last_output_sample_rate_hz_, true); + status.packet_buffer_info.span_samples_no_dtx = + packet_buffer_->GetSpanSamples(decoder_frame_length_, + last_output_sample_rate_hz_, false); + status.packet_buffer_info.num_packets = packet_buffer_->NumPacketsInBuffer(); + status.target_timestamp = sync_buffer_->end_timestamp(); + status.expand_mutefactor = expand_->MuteFactor(0); + status.last_packet_samples = decoder_frame_length_; + status.last_mode = last_mode_; + status.play_dtmf = *play_dtmf; + status.generated_noise_samples = generated_noise_samples; + status.sync_buffer_samples = sync_buffer_->FutureLength(); + if (packet) { + status.next_packet = { + packet->timestamp, packet->frame && packet->frame->IsDtxPacket(), + decoder_database_->IsComfortNoise(packet->payload_type)}; + } + *operation = controller_->GetDecision(status, &reset_decoder_); + + // Disallow time stretching if this packet is DTX, because such a decision may + // be based on earlier buffer level estimate, as we do not update buffer level + // during DTX. When we have a better way to update buffer level during DTX, + // this can be discarded. + if (packet && packet->frame && packet->frame->IsDtxPacket() && + (*operation == Operation::kMerge || + *operation == Operation::kAccelerate || + *operation == Operation::kFastAccelerate || + *operation == Operation::kPreemptiveExpand)) { + *operation = Operation::kNormal; + } + + if (action_override) { + // Use the provided action instead of the decision NetEq decided on. + *operation = *action_override; + } + // Check if we already have enough samples in the `sync_buffer_`. If so, + // change decision to normal, unless the decision was merge, accelerate, or + // preemptive expand. + if (samples_left >= rtc::dchecked_cast(output_size_samples_) && + *operation != Operation::kMerge && *operation != Operation::kAccelerate && + *operation != Operation::kFastAccelerate && + *operation != Operation::kPreemptiveExpand) { + *operation = Operation::kNormal; + return 0; + } + + controller_->ExpandDecision(*operation); + if ((last_mode_ == Mode::kCodecPlc) && (*operation != Operation::kExpand)) { + // Getting out of the PLC expand mode, reporting interruptions. + // NetEq PLC reports this metrics in expand.cc + stats_->EndExpandEvent(fs_hz_); + } + + // Check conditions for reset. + if (new_codec_ || *operation == Operation::kUndefined) { + // The only valid reason to get kUndefined is that new_codec_ is set. + RTC_DCHECK(new_codec_); + if (*play_dtmf && !packet) { + timestamp_ = dtmf_event->timestamp; + } else { + if (!packet) { + RTC_LOG(LS_ERROR) << "Packet missing where it shouldn't."; + return -1; + } + timestamp_ = packet->timestamp; + if (*operation == Operation::kRfc3389CngNoPacket && + decoder_database_->IsComfortNoise(packet->payload_type)) { + // Change decision to CNG packet, since we do have a CNG packet, but it + // was considered too early to use. Now, use it anyway. + *operation = Operation::kRfc3389Cng; + } else if (*operation != Operation::kRfc3389Cng) { + *operation = Operation::kNormal; + } + } + // Adjust `sync_buffer_` timestamp before setting `end_timestamp` to the + // new value. + sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp); + end_timestamp = timestamp_; + new_codec_ = false; + controller_->SoftReset(); + stats_->ResetMcu(); + } + + size_t required_samples = output_size_samples_; + const size_t samples_10_ms = static_cast(80 * fs_mult_); + const size_t samples_20_ms = 2 * samples_10_ms; + const size_t samples_30_ms = 3 * samples_10_ms; + + switch (*operation) { + case Operation::kExpand: { + timestamp_ = end_timestamp; + return 0; + } + case Operation::kRfc3389CngNoPacket: + case Operation::kCodecInternalCng: { + return 0; + } + case Operation::kDtmf: { + // TODO(hlundin): Write test for this. + // Update timestamp. + timestamp_ = end_timestamp; + const uint64_t generated_noise_samples = + generated_noise_stopwatch_ + ? generated_noise_stopwatch_->ElapsedTicks() * + output_size_samples_ + + controller_->noise_fast_forward() + : 0; + if (generated_noise_samples > 0 && last_mode_ != Mode::kDtmf) { + // Make a jump in timestamp due to the recently played comfort noise. + uint32_t timestamp_jump = + static_cast(generated_noise_samples); + sync_buffer_->IncreaseEndTimestamp(timestamp_jump); + timestamp_ += timestamp_jump; + } + return 0; + } + case Operation::kAccelerate: + case Operation::kFastAccelerate: { + // In order to do an accelerate we need at least 30 ms of audio data. + if (samples_left >= static_cast(samples_30_ms)) { + // Already have enough data, so we do not need to extract any more. + controller_->set_sample_memory(samples_left); + controller_->set_prev_time_scale(true); + return 0; + } else if (samples_left >= static_cast(samples_10_ms) && + decoder_frame_length_ >= samples_30_ms) { + // Avoid decoding more data as it might overflow the playout buffer. + *operation = Operation::kNormal; + return 0; + } else if (samples_left < static_cast(samples_20_ms) && + decoder_frame_length_ < samples_30_ms) { + // Build up decoded data by decoding at least 20 ms of audio data. Do + // not perform accelerate yet, but wait until we only need to do one + // decoding. + required_samples = 2 * output_size_samples_; + *operation = Operation::kNormal; + } + // If none of the above is true, we have one of two possible situations: + // (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or + // (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms. + // In either case, we move on with the accelerate decision, and decode one + // frame now. + break; + } + case Operation::kPreemptiveExpand: { + // In order to do a preemptive expand we need at least 30 ms of decoded + // audio data. + if ((samples_left >= static_cast(samples_30_ms)) || + (samples_left >= static_cast(samples_10_ms) && + decoder_frame_length_ >= samples_30_ms)) { + // Already have enough data, so we do not need to extract any more. + // Or, avoid decoding more data as it might overflow the playout buffer. + // Still try preemptive expand, though. + controller_->set_sample_memory(samples_left); + controller_->set_prev_time_scale(true); + return 0; + } + if (samples_left < static_cast(samples_20_ms) && + decoder_frame_length_ < samples_30_ms) { + // Build up decoded data by decoding at least 20 ms of audio data. + // Still try to perform preemptive expand. + required_samples = 2 * output_size_samples_; + } + // Move on with the preemptive expand decision. + break; + } + case Operation::kMerge: { + required_samples = + std::max(merge_->RequiredFutureSamples(), required_samples); + break; + } + default: { + // Do nothing. + } + } + + // Get packets from buffer. + int extracted_samples = 0; + if (packet) { + sync_buffer_->IncreaseEndTimestamp(packet->timestamp - end_timestamp); + + if (*operation != Operation::kRfc3389Cng) { + // We are about to decode and use a non-CNG packet. + controller_->SetCngOff(); + } + + extracted_samples = ExtractPackets(required_samples, packet_list); + if (extracted_samples < 0) { + return kPacketBufferCorruption; + } + } + + if (*operation == Operation::kAccelerate || + *operation == Operation::kFastAccelerate || + *operation == Operation::kPreemptiveExpand) { + controller_->set_sample_memory(samples_left + extracted_samples); + controller_->set_prev_time_scale(true); + } + + if (*operation == Operation::kAccelerate || + *operation == Operation::kFastAccelerate) { + // Check that we have enough data (30ms) to do accelerate. + if (extracted_samples + samples_left < static_cast(samples_30_ms)) { + // TODO(hlundin): Write test for this. + // Not enough, do normal operation instead. + *operation = Operation::kNormal; + } + } + + timestamp_ = sync_buffer_->end_timestamp(); + return 0; +} + +int NetEqImpl::Decode(PacketList* packet_list, + Operation* operation, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) { + *speech_type = AudioDecoder::kSpeech; + + // When packet_list is empty, we may be in kCodecInternalCng mode, and for + // that we use current active decoder. + AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); + + if (!packet_list->empty()) { + const Packet& packet = packet_list->front(); + uint8_t payload_type = packet.payload_type; + if (!decoder_database_->IsComfortNoise(payload_type)) { + decoder = decoder_database_->GetDecoder(payload_type); + RTC_DCHECK(decoder); + if (!decoder) { + RTC_LOG(LS_WARNING) + << "Unknown payload type " << static_cast(payload_type); + packet_list->clear(); + return kDecoderNotFound; + } + bool decoder_changed; + decoder_database_->SetActiveDecoder(payload_type, &decoder_changed); + if (decoder_changed) { + // We have a new decoder. Re-init some values. + const DecoderDatabase::DecoderInfo* decoder_info = + decoder_database_->GetDecoderInfo(payload_type); + RTC_DCHECK(decoder_info); + if (!decoder_info) { + RTC_LOG(LS_WARNING) + << "Unknown payload type " << static_cast(payload_type); + packet_list->clear(); + return kDecoderNotFound; + } + // If sampling rate or number of channels has changed, we need to make + // a reset. + if (decoder_info->SampleRateHz() != fs_hz_ || + decoder->Channels() != algorithm_buffer_->Channels()) { + // TODO(tlegrand): Add unittest to cover this event. + SetSampleRateAndChannels(decoder_info->SampleRateHz(), + decoder->Channels()); + } + sync_buffer_->set_end_timestamp(timestamp_); + playout_timestamp_ = timestamp_; + } + } + } + + if (reset_decoder_) { + // TODO(hlundin): Write test for this. + if (decoder) + decoder->Reset(); + + // Reset comfort noise decoder. + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + if (cng_decoder) + cng_decoder->Reset(); + + reset_decoder_ = false; + } + + *decoded_length = 0; + // Update codec-internal PLC state. + if ((*operation == Operation::kMerge) && decoder && decoder->HasDecodePlc()) { + decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]); + } + + int return_value; + if (*operation == Operation::kCodecInternalCng) { + RTC_DCHECK(packet_list->empty()); + return_value = DecodeCng(decoder, decoded_length, speech_type); + } else { + return_value = DecodeLoop(packet_list, *operation, decoder, decoded_length, + speech_type); + } + + if (*decoded_length < 0) { + // Error returned from the decoder. + *decoded_length = 0; + sync_buffer_->IncreaseEndTimestamp( + static_cast(decoder_frame_length_)); + int error_code = 0; + if (decoder) + error_code = decoder->ErrorCode(); + if (error_code != 0) { + // Got some error code from the decoder. + return_value = kDecoderErrorCode; + RTC_LOG(LS_WARNING) << "Decoder returned error code: " << error_code; + } else { + // Decoder does not implement error codes. Return generic error. + return_value = kOtherDecoderError; + RTC_LOG(LS_WARNING) << "Decoder error (no error code)"; + } + *operation = Operation::kExpand; // Do expansion to get data instead. + } + if (*speech_type != AudioDecoder::kComfortNoise) { + // Don't increment timestamp if codec returned CNG speech type + // since in this case, the we will increment the CNGplayedTS counter. + // Increase with number of samples per channel. + RTC_DCHECK(*decoded_length == 0 || + (decoder && decoder->Channels() == sync_buffer_->Channels())); + sync_buffer_->IncreaseEndTimestamp( + *decoded_length / static_cast(sync_buffer_->Channels())); + } + return return_value; +} + +int NetEqImpl::DecodeCng(AudioDecoder* decoder, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) { + if (!decoder) { + // This happens when active decoder is not defined. + *decoded_length = -1; + return 0; + } + + while (*decoded_length < rtc::dchecked_cast(output_size_samples_)) { + const int length = decoder->Decode( + nullptr, 0, fs_hz_, + (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t), + &decoded_buffer_[*decoded_length], speech_type); + if (length > 0) { + *decoded_length += length; + } else { + // Error. + RTC_LOG(LS_WARNING) << "Failed to decode CNG"; + *decoded_length = -1; + break; + } + if (*decoded_length > static_cast(decoded_buffer_length_)) { + // Guard against overflow. + RTC_LOG(LS_WARNING) << "Decoded too much CNG."; + return kDecodedTooMuch; + } + } + stats_->GeneratedNoiseSamples(*decoded_length); + return 0; +} + +int NetEqImpl::DecodeLoop(PacketList* packet_list, + const Operation& operation, + AudioDecoder* decoder, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) { + RTC_DCHECK(last_decoded_packet_infos_.empty()); + + // Do decoding. + while (!packet_list->empty() && !decoder_database_->IsComfortNoise( + packet_list->front().payload_type)) { + RTC_DCHECK(decoder); // At this point, we must have a decoder object. + // The number of channels in the `sync_buffer_` should be the same as the + // number decoder channels. + RTC_DCHECK_EQ(sync_buffer_->Channels(), decoder->Channels()); + RTC_DCHECK_GE(decoded_buffer_length_, kMaxFrameSize * decoder->Channels()); + RTC_DCHECK(operation == Operation::kNormal || + operation == Operation::kAccelerate || + operation == Operation::kFastAccelerate || + operation == Operation::kMerge || + operation == Operation::kPreemptiveExpand); + + auto opt_result = packet_list->front().frame->Decode( + rtc::ArrayView(&decoded_buffer_[*decoded_length], + decoded_buffer_length_ - *decoded_length)); + last_decoded_packet_infos_.push_back( + std::move(packet_list->front().packet_info)); + packet_list->pop_front(); + if (opt_result) { + const auto& result = *opt_result; + *speech_type = result.speech_type; + if (result.num_decoded_samples > 0) { + *decoded_length += rtc::dchecked_cast(result.num_decoded_samples); + // Update `decoder_frame_length_` with number of samples per channel. + decoder_frame_length_ = + result.num_decoded_samples / decoder->Channels(); + } + } else { + // Error. + // TODO(ossu): What to put here? + RTC_LOG(LS_WARNING) << "Decode error"; + *decoded_length = -1; + last_decoded_packet_infos_.clear(); + packet_list->clear(); + break; + } + if (*decoded_length > rtc::dchecked_cast(decoded_buffer_length_)) { + // Guard against overflow. + RTC_LOG(LS_WARNING) << "Decoded too much."; + packet_list->clear(); + return kDecodedTooMuch; + } + } // End of decode loop. + + // If the list is not empty at this point, either a decoding error terminated + // the while-loop, or list must hold exactly one CNG packet. + RTC_DCHECK( + packet_list->empty() || *decoded_length < 0 || + (packet_list->size() == 1 && + decoder_database_->IsComfortNoise(packet_list->front().payload_type))); + return 0; +} + +void NetEqImpl::DoNormal(const int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) { + RTC_DCHECK(normal_.get()); + normal_->Process(decoded_buffer, decoded_length, last_mode_, + algorithm_buffer_.get()); + if (decoded_length != 0) { + last_mode_ = Mode::kNormal; + } + + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if ((speech_type == AudioDecoder::kComfortNoise) || + ((last_mode_ == Mode::kCodecInternalCng) && (decoded_length == 0))) { + // TODO(hlundin): Remove second part of || statement above. + last_mode_ = Mode::kCodecInternalCng; + } + + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } +} + +void NetEqImpl::DoMerge(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) { + RTC_DCHECK(merge_.get()); + size_t new_length = + merge_->Process(decoded_buffer, decoded_length, algorithm_buffer_.get()); + // Correction can be negative. + int expand_length_correction = + rtc::dchecked_cast(new_length) - + rtc::dchecked_cast(decoded_length / algorithm_buffer_->Channels()); + + // Update in-call and post-call statistics. + if (expand_->MuteFactor(0) == 0) { + // Expand generates only noise. + stats_->ExpandedNoiseSamplesCorrection(expand_length_correction); + } else { + // Expansion generates more than only noise. + stats_->ExpandedVoiceSamplesCorrection(expand_length_correction); + } + + last_mode_ = Mode::kMerge; + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if (speech_type == AudioDecoder::kComfortNoise) { + last_mode_ = Mode::kCodecInternalCng; + } + expand_->Reset(); + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } +} + +bool NetEqImpl::DoCodecPlc() { + AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); + if (!decoder) { + return false; + } + const size_t channels = algorithm_buffer_->Channels(); + const size_t requested_samples_per_channel = + output_size_samples_ - + (sync_buffer_->FutureLength() - expand_->overlap_length()); + concealment_audio_.Clear(); + decoder->GeneratePlc(requested_samples_per_channel, &concealment_audio_); + if (concealment_audio_.empty()) { + // Nothing produced. Resort to regular expand. + return false; + } + RTC_CHECK_GE(concealment_audio_.size(), + requested_samples_per_channel * channels); + sync_buffer_->PushBackInterleaved(concealment_audio_); + RTC_DCHECK_NE(algorithm_buffer_->Channels(), 0); + const size_t concealed_samples_per_channel = + concealment_audio_.size() / channels; + + // Update in-call and post-call statistics. + const bool is_new_concealment_event = (last_mode_ != Mode::kCodecPlc); + if (std::all_of(concealment_audio_.cbegin(), concealment_audio_.cend(), + [](int16_t i) { return i == 0; })) { + // Expand operation generates only noise. + stats_->ExpandedNoiseSamples(concealed_samples_per_channel, + is_new_concealment_event); + } else { + // Expand operation generates more than only noise. + stats_->ExpandedVoiceSamples(concealed_samples_per_channel, + is_new_concealment_event); + } + last_mode_ = Mode::kCodecPlc; + if (!generated_noise_stopwatch_) { + // Start a new stopwatch since we may be covering for a lost CNG packet. + generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + return true; +} + +int NetEqImpl::DoExpand(bool play_dtmf) { + while ((sync_buffer_->FutureLength() - expand_->overlap_length()) < + output_size_samples_) { + algorithm_buffer_->Clear(); + int return_value = expand_->Process(algorithm_buffer_.get()); + size_t length = algorithm_buffer_->Size(); + bool is_new_concealment_event = (last_mode_ != Mode::kExpand); + + // Update in-call and post-call statistics. + if (expand_->MuteFactor(0) == 0) { + // Expand operation generates only noise. + stats_->ExpandedNoiseSamples(length, is_new_concealment_event); + } else { + // Expand operation generates more than only noise. + stats_->ExpandedVoiceSamples(length, is_new_concealment_event); + } + + last_mode_ = Mode::kExpand; + + if (return_value < 0) { + return return_value; + } + + sync_buffer_->PushBack(*algorithm_buffer_); + algorithm_buffer_->Clear(); + } + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + + if (!generated_noise_stopwatch_) { + // Start a new stopwatch since we may be covering for a lost CNG packet. + generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + + return 0; +} + +int NetEqImpl::DoAccelerate(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf, + bool fast_accelerate) { + const size_t required_samples = + static_cast(240 * fs_mult_); // Must have 30 ms. + size_t borrowed_samples_per_channel = 0; + size_t num_channels = algorithm_buffer_->Channels(); + size_t decoded_length_per_channel = decoded_length / num_channels; + if (decoded_length_per_channel < required_samples) { + // Must move data from the `sync_buffer_` in order to get 30 ms. + borrowed_samples_per_channel = + static_cast(required_samples - decoded_length_per_channel); + memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], + decoded_buffer, sizeof(int16_t) * decoded_length); + sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, + decoded_buffer); + decoded_length = required_samples * num_channels; + } + + size_t samples_removed = 0; + Accelerate::ReturnCodes return_code = + accelerate_->Process(decoded_buffer, decoded_length, fast_accelerate, + algorithm_buffer_.get(), &samples_removed); + stats_->AcceleratedSamples(samples_removed); + switch (return_code) { + case Accelerate::kSuccess: + last_mode_ = Mode::kAccelerateSuccess; + break; + case Accelerate::kSuccessLowEnergy: + last_mode_ = Mode::kAccelerateLowEnergy; + break; + case Accelerate::kNoStretch: + last_mode_ = Mode::kAccelerateFail; + break; + case Accelerate::kError: + // TODO(hlundin): Map to Modes::kError instead? + last_mode_ = Mode::kAccelerateFail; + return kAccelerateError; + } + + if (borrowed_samples_per_channel > 0) { + // Copy borrowed samples back to the `sync_buffer_`. + size_t length = algorithm_buffer_->Size(); + if (length < borrowed_samples_per_channel) { + // This destroys the beginning of the buffer, but will not cause any + // problems. + sync_buffer_->ReplaceAtIndex( + *algorithm_buffer_, + sync_buffer_->Size() - borrowed_samples_per_channel); + sync_buffer_->PushFrontZeros(borrowed_samples_per_channel - length); + algorithm_buffer_->PopFront(length); + RTC_DCHECK(algorithm_buffer_->Empty()); + } else { + sync_buffer_->ReplaceAtIndex( + *algorithm_buffer_, borrowed_samples_per_channel, + sync_buffer_->Size() - borrowed_samples_per_channel); + algorithm_buffer_->PopFront(borrowed_samples_per_channel); + } + } + + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if (speech_type == AudioDecoder::kComfortNoise) { + last_mode_ = Mode::kCodecInternalCng; + } + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + expand_->Reset(); + return 0; +} + +int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) { + const size_t required_samples = + static_cast(240 * fs_mult_); // Must have 30 ms. + size_t num_channels = algorithm_buffer_->Channels(); + size_t borrowed_samples_per_channel = 0; + size_t old_borrowed_samples_per_channel = 0; + size_t decoded_length_per_channel = decoded_length / num_channels; + if (decoded_length_per_channel < required_samples) { + // Must move data from the `sync_buffer_` in order to get 30 ms. + borrowed_samples_per_channel = + required_samples - decoded_length_per_channel; + // Calculate how many of these were already played out. + old_borrowed_samples_per_channel = + (borrowed_samples_per_channel > sync_buffer_->FutureLength()) + ? (borrowed_samples_per_channel - sync_buffer_->FutureLength()) + : 0; + memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], + decoded_buffer, sizeof(int16_t) * decoded_length); + sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, + decoded_buffer); + decoded_length = required_samples * num_channels; + } + + size_t samples_added = 0; + PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process( + decoded_buffer, decoded_length, old_borrowed_samples_per_channel, + algorithm_buffer_.get(), &samples_added); + stats_->PreemptiveExpandedSamples(samples_added); + switch (return_code) { + case PreemptiveExpand::kSuccess: + last_mode_ = Mode::kPreemptiveExpandSuccess; + break; + case PreemptiveExpand::kSuccessLowEnergy: + last_mode_ = Mode::kPreemptiveExpandLowEnergy; + break; + case PreemptiveExpand::kNoStretch: + last_mode_ = Mode::kPreemptiveExpandFail; + break; + case PreemptiveExpand::kError: + // TODO(hlundin): Map to Modes::kError instead? + last_mode_ = Mode::kPreemptiveExpandFail; + return kPreemptiveExpandError; + } + + if (borrowed_samples_per_channel > 0) { + // Copy borrowed samples back to the `sync_buffer_`. + sync_buffer_->ReplaceAtIndex( + *algorithm_buffer_, borrowed_samples_per_channel, + sync_buffer_->Size() - borrowed_samples_per_channel); + algorithm_buffer_->PopFront(borrowed_samples_per_channel); + } + + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if (speech_type == AudioDecoder::kComfortNoise) { + last_mode_ = Mode::kCodecInternalCng; + } + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + expand_->Reset(); + return 0; +} + +int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) { + if (!packet_list->empty()) { + // Must have exactly one SID frame at this point. + RTC_DCHECK_EQ(packet_list->size(), 1); + const Packet& packet = packet_list->front(); + if (!decoder_database_->IsComfortNoise(packet.payload_type)) { + RTC_LOG(LS_ERROR) << "Trying to decode non-CNG payload as CNG."; + return kOtherError; + } + if (comfort_noise_->UpdateParameters(packet) == + ComfortNoise::kInternalError) { + algorithm_buffer_->Zeros(output_size_samples_); + return -comfort_noise_->internal_error_code(); + } + } + int cn_return = + comfort_noise_->Generate(output_size_samples_, algorithm_buffer_.get()); + expand_->Reset(); + last_mode_ = Mode::kRfc3389Cng; + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + if (cn_return == ComfortNoise::kInternalError) { + RTC_LOG(LS_WARNING) << "Comfort noise generator returned error code: " + << comfort_noise_->internal_error_code(); + return kComfortNoiseErrorCode; + } else if (cn_return == ComfortNoise::kUnknownPayloadType) { + return kUnknownRtpPayloadType; + } + return 0; +} + +void NetEqImpl::DoCodecInternalCng(const int16_t* decoded_buffer, + size_t decoded_length) { + RTC_DCHECK(normal_.get()); + normal_->Process(decoded_buffer, decoded_length, last_mode_, + algorithm_buffer_.get()); + last_mode_ = Mode::kCodecInternalCng; + expand_->Reset(); +} + +int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) { + // This block of the code and the block further down, handling `dtmf_switch` + // are commented out. Otherwise playing out-of-band DTMF would fail in VoE + // test, DtmfTest.ManualSuccessfullySendsOutOfBandTelephoneEvents. This is + // equivalent to `dtmf_switch` always be false. + // + // See http://webrtc-codereview.appspot.com/1195004/ for discussion + // On this issue. This change might cause some glitches at the point of + // switch from audio to DTMF. Issue 1545 is filed to track this. + // + // bool dtmf_switch = false; + // if ((last_mode_ != Modes::kDtmf) && + // dtmf_tone_generator_->initialized()) { + // // Special case; see below. + // // We must catch this before calling Generate, since `initialized` is + // // modified in that call. + // dtmf_switch = true; + // } + + int dtmf_return_value = 0; + if (!dtmf_tone_generator_->initialized()) { + // Initialize if not already done. + dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, + dtmf_event.volume); + } + + if (dtmf_return_value == 0) { + // Generate DTMF signal. + dtmf_return_value = dtmf_tone_generator_->Generate(output_size_samples_, + algorithm_buffer_.get()); + } + + if (dtmf_return_value < 0) { + algorithm_buffer_->Zeros(output_size_samples_); + return dtmf_return_value; + } + + // if (dtmf_switch) { + // // This is the special case where the previous operation was DTMF + // // overdub, but the current instruction is "regular" DTMF. We must make + // // sure that the DTMF does not have any discontinuities. The first DTMF + // // sample that we generate now must be played out immediately, therefore + // // it must be copied to the speech buffer. + // // TODO(hlundin): This code seems incorrect. (Legacy.) Write test and + // // verify correct operation. + // RTC_DCHECK_NOTREACHED(); + // // Must generate enough data to replace all of the `sync_buffer_` + // // "future". + // int required_length = sync_buffer_->FutureLength(); + // RTC_DCHECK(dtmf_tone_generator_->initialized()); + // dtmf_return_value = dtmf_tone_generator_->Generate(required_length, + // algorithm_buffer_); + // RTC_DCHECK((size_t) required_length == algorithm_buffer_->Size()); + // if (dtmf_return_value < 0) { + // algorithm_buffer_->Zeros(output_size_samples_); + // return dtmf_return_value; + // } + // + // // Overwrite the "future" part of the speech buffer with the new DTMF + // // data. + // // TODO(hlundin): It seems that this overwriting has gone lost. + // // Not adapted for multi-channel yet. + // RTC_DCHECK(algorithm_buffer_->Channels() == 1); + // if (algorithm_buffer_->Channels() != 1) { + // RTC_LOG(LS_WARNING) << "DTMF not supported for more than one channel"; + // return kStereoNotSupported; + // } + // // Shuffle the remaining data to the beginning of algorithm buffer. + // algorithm_buffer_->PopFront(sync_buffer_->FutureLength()); + // } + + sync_buffer_->IncreaseEndTimestamp( + static_cast(output_size_samples_)); + expand_->Reset(); + last_mode_ = Mode::kDtmf; + + // Set to false because the DTMF is already in the algorithm buffer. + *play_dtmf = false; + return 0; +} + +int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, + size_t num_channels, + int16_t* output) const { + size_t out_index = 0; + size_t overdub_length = output_size_samples_; // Default value. + + if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) { + // Special operation for transition from "DTMF only" to "DTMF overdub". + out_index = + std::min(sync_buffer_->dtmf_index() - sync_buffer_->next_index(), + output_size_samples_); + overdub_length = output_size_samples_ - out_index; + } + + AudioMultiVector dtmf_output(num_channels); + int dtmf_return_value = 0; + if (!dtmf_tone_generator_->initialized()) { + dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, + dtmf_event.volume); + } + if (dtmf_return_value == 0) { + dtmf_return_value = + dtmf_tone_generator_->Generate(overdub_length, &dtmf_output); + RTC_DCHECK_EQ(overdub_length, dtmf_output.Size()); + } + dtmf_output.ReadInterleaved(overdub_length, &output[out_index]); + return dtmf_return_value < 0 ? dtmf_return_value : 0; +} + +int NetEqImpl::ExtractPackets(size_t required_samples, + PacketList* packet_list) { + bool first_packet = true; + uint8_t prev_payload_type = 0; + uint32_t prev_timestamp = 0; + uint16_t prev_sequence_number = 0; + bool next_packet_available = false; + + const Packet* next_packet = packet_buffer_->PeekNextPacket(); + RTC_DCHECK(next_packet); + if (!next_packet) { + RTC_LOG(LS_ERROR) << "Packet buffer unexpectedly empty."; + return -1; + } + uint32_t first_timestamp = next_packet->timestamp; + size_t extracted_samples = 0; + + // Packet extraction loop. + do { + timestamp_ = next_packet->timestamp; + absl::optional packet = packet_buffer_->GetNextPacket(); + // `next_packet` may be invalid after the `packet_buffer_` operation. + next_packet = nullptr; + if (!packet) { + RTC_LOG(LS_ERROR) << "Should always be able to extract a packet here"; + RTC_DCHECK_NOTREACHED(); // Should always be able to extract a packet + // here. + return -1; + } + const uint64_t waiting_time_ms = packet->waiting_time->ElapsedMs(); + stats_->StoreWaitingTime(waiting_time_ms); + RTC_DCHECK(!packet->empty()); + + if (first_packet) { + first_packet = false; + if (nack_enabled_) { + RTC_DCHECK(nack_); + // TODO(henrik.lundin): Should we update this for all decoded packets? + nack_->UpdateLastDecodedPacket(packet->sequence_number, + packet->timestamp); + } + prev_sequence_number = packet->sequence_number; + prev_timestamp = packet->timestamp; + prev_payload_type = packet->payload_type; + } + + const bool has_cng_packet = + decoder_database_->IsComfortNoise(packet->payload_type); + // Store number of extracted samples. + size_t packet_duration = 0; + if (packet->frame) { + packet_duration = packet->frame->Duration(); + // TODO(ossu): Is this the correct way to track Opus FEC packets? + if (packet->priority.codec_level > 0) { + stats_->SecondaryDecodedSamples( + rtc::dchecked_cast(packet_duration)); + } + } else if (!has_cng_packet) { + RTC_LOG(LS_WARNING) << "Unknown payload type " + << static_cast(packet->payload_type); + RTC_DCHECK_NOTREACHED(); + } + + if (packet_duration == 0) { + // Decoder did not return a packet duration. Assume that the packet + // contains the same number of samples as the previous one. + packet_duration = decoder_frame_length_; + } + extracted_samples = packet->timestamp - first_timestamp + packet_duration; + + RTC_DCHECK(controller_); + stats_->JitterBufferDelay(packet_duration, waiting_time_ms, + controller_->TargetLevelMs(), + controller_->UnlimitedTargetLevelMs()); + + packet_list->push_back(std::move(*packet)); // Store packet in list. + packet = absl::nullopt; // Ensure it's never used after the move. + + // Check what packet is available next. + next_packet = packet_buffer_->PeekNextPacket(); + next_packet_available = false; + if (next_packet && prev_payload_type == next_packet->payload_type && + !has_cng_packet) { + int16_t seq_no_diff = next_packet->sequence_number - prev_sequence_number; + size_t ts_diff = next_packet->timestamp - prev_timestamp; + if ((seq_no_diff == 1 || seq_no_diff == 0) && + ts_diff <= packet_duration) { + // The next sequence number is available, or the next part of a packet + // that was split into pieces upon insertion. + next_packet_available = true; + } + prev_sequence_number = next_packet->sequence_number; + prev_timestamp = next_packet->timestamp; + } + } while (extracted_samples < required_samples && next_packet_available); + + if (extracted_samples > 0) { + // Delete old packets only when we are going to decode something. Otherwise, + // we could end up in the situation where we never decode anything, since + // all incoming packets are considered too old but the buffer will also + // never be flooded and flushed. + packet_buffer_->DiscardAllOldPackets(timestamp_, stats_.get()); + } + + return rtc::dchecked_cast(extracted_samples); +} + +void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) { + // Delete objects and create new ones. + expand_.reset(expand_factory_->Create(background_noise_.get(), + sync_buffer_.get(), &random_vector_, + stats_.get(), fs_hz, channels)); + merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get())); +} + +void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { + RTC_LOG(LS_VERBOSE) << "SetSampleRateAndChannels " << fs_hz << " " + << channels; + // TODO(hlundin): Change to an enumerator and skip assert. + RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || + fs_hz == 48000); + RTC_DCHECK_GT(channels, 0); + + // Before changing the sample rate, end and report any ongoing expand event. + stats_->EndExpandEvent(fs_hz_); + fs_hz_ = fs_hz; + fs_mult_ = fs_hz / 8000; + output_size_samples_ = static_cast(kOutputSizeMs * 8 * fs_mult_); + decoder_frame_length_ = 3 * output_size_samples_; // Initialize to 30ms. + + last_mode_ = Mode::kNormal; + + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + if (cng_decoder) + cng_decoder->Reset(); + + // Reinit post-decode VAD with new sample rate. + RTC_DCHECK(vad_.get()); // Cannot be NULL here. + vad_->Init(); + + // Delete algorithm buffer and create a new one. + algorithm_buffer_.reset(new AudioMultiVector(channels)); + + // Delete sync buffer and create a new one. + sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_)); + + // Delete BackgroundNoise object and create a new one. + background_noise_.reset(new BackgroundNoise(channels)); + + // Reset random vector. + random_vector_.Reset(); + + UpdatePlcComponents(fs_hz, channels); + + // Move index so that we create a small set of future samples (all 0). + sync_buffer_->set_next_index(sync_buffer_->next_index() - + expand_->overlap_length()); + + normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_, + expand_.get(), stats_.get())); + accelerate_.reset( + accelerate_factory_->Create(fs_hz, channels, *background_noise_)); + preemptive_expand_.reset(preemptive_expand_factory_->Create( + fs_hz, channels, *background_noise_, expand_->overlap_length())); + + // Delete ComfortNoise object and create a new one. + comfort_noise_.reset( + new ComfortNoise(fs_hz, decoder_database_.get(), sync_buffer_.get())); + + // Verify that `decoded_buffer_` is long enough. + if (decoded_buffer_length_ < kMaxFrameSize * channels) { + // Reallocate to larger size. + decoded_buffer_length_ = kMaxFrameSize * channels; + decoded_buffer_.reset(new int16_t[decoded_buffer_length_]); + } + RTC_CHECK(controller_) << "Unexpectedly found no NetEqController"; + controller_->SetSampleRate(fs_hz_, output_size_samples_); +} + +NetEqImpl::OutputType NetEqImpl::LastOutputType() { + RTC_DCHECK(vad_.get()); + RTC_DCHECK(expand_.get()); + if (last_mode_ == Mode::kCodecInternalCng || + last_mode_ == Mode::kRfc3389Cng) { + return OutputType::kCNG; + } else if (last_mode_ == Mode::kExpand && expand_->MuteFactor(0) == 0) { + // Expand mode has faded down to background noise only (very long expand). + return OutputType::kPLCCNG; + } else if (last_mode_ == Mode::kExpand) { + return OutputType::kPLC; + } else if (vad_->running() && !vad_->active_speech()) { + return OutputType::kVadPassive; + } else if (last_mode_ == Mode::kCodecPlc) { + return OutputType::kCodecPLC; + } else { + return OutputType::kNormalSpeech; + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h new file mode 100644 index 0000000000..6120eab5b6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ +#define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ + +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/audio/audio_frame.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_controller.h" +#include "api/neteq/neteq_controller_factory.h" +#include "api/neteq/tick_timer.h" +#include "api/rtp_packet_info.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/expand_uma_logger.h" +#include "modules/audio_coding/neteq/packet.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +// Forward declarations. +class Accelerate; +class BackgroundNoise; +class Clock; +class ComfortNoise; +class DecoderDatabase; +class DtmfBuffer; +class DtmfToneGenerator; +class Expand; +class Merge; +class NackTracker; +class Normal; +class PacketBuffer; +class RedPayloadSplitter; +class PostDecodeVad; +class PreemptiveExpand; +class RandomVector; +class SyncBuffer; +class TimestampScaler; +struct AccelerateFactory; +struct DtmfEvent; +struct ExpandFactory; +struct PreemptiveExpandFactory; + +class NetEqImpl : public webrtc::NetEq { + public: + enum class OutputType { + kNormalSpeech, + kPLC, + kCNG, + kPLCCNG, + kVadPassive, + kCodecPLC + }; + + enum ErrorCodes { + kNoError = 0, + kOtherError, + kUnknownRtpPayloadType, + kDecoderNotFound, + kInvalidPointer, + kAccelerateError, + kPreemptiveExpandError, + kComfortNoiseErrorCode, + kDecoderErrorCode, + kOtherDecoderError, + kInvalidOperation, + kDtmfParsingError, + kDtmfInsertError, + kSampleUnderrun, + kDecodedTooMuch, + kRedundancySplitError, + kPacketBufferCorruption + }; + + struct Dependencies { + // The constructor populates the Dependencies struct with the default + // implementations of the objects. They can all be replaced by the user + // before sending the struct to the NetEqImpl constructor. However, there + // are dependencies between some of the classes inside the struct, so + // swapping out one may make it necessary to re-create another one. + Dependencies(const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr& decoder_factory, + const NetEqControllerFactory& controller_factory); + ~Dependencies(); + + Clock* const clock; + std::unique_ptr tick_timer; + std::unique_ptr stats; + std::unique_ptr decoder_database; + std::unique_ptr dtmf_buffer; + std::unique_ptr dtmf_tone_generator; + std::unique_ptr packet_buffer; + std::unique_ptr neteq_controller; + std::unique_ptr red_payload_splitter; + std::unique_ptr timestamp_scaler; + std::unique_ptr accelerate_factory; + std::unique_ptr expand_factory; + std::unique_ptr preemptive_expand_factory; + }; + + // Creates a new NetEqImpl object. + NetEqImpl(const NetEq::Config& config, + Dependencies&& deps, + bool create_components = true); + + ~NetEqImpl() override; + + NetEqImpl(const NetEqImpl&) = delete; + NetEqImpl& operator=(const NetEqImpl&) = delete; + + // Inserts a new packet into NetEq. Returns 0 on success, -1 on failure. + int InsertPacket(const RTPHeader& rtp_header, + rtc::ArrayView payload) override; + + void InsertEmptyPacket(const RTPHeader& rtp_header) override; + + int GetAudio( + AudioFrame* audio_frame, + bool* muted, + int* current_sample_rate_hz = nullptr, + absl::optional action_override = absl::nullopt) override; + + void SetCodecs(const std::map& codecs) override; + + bool RegisterPayloadType(int rtp_payload_type, + const SdpAudioFormat& audio_format) override; + + // Removes `rtp_payload_type` from the codec database. Returns 0 on success, + // -1 on failure. + int RemovePayloadType(uint8_t rtp_payload_type) override; + + void RemoveAllPayloadTypes() override; + + bool SetMinimumDelay(int delay_ms) override; + + bool SetMaximumDelay(int delay_ms) override; + + bool SetBaseMinimumDelayMs(int delay_ms) override; + + int GetBaseMinimumDelayMs() const override; + + int TargetDelayMs() const override; + + int FilteredCurrentDelayMs() const override; + + // Writes the current network statistics to `stats`. The statistics are reset + // after the call. + int NetworkStatistics(NetEqNetworkStatistics* stats) override; + + NetEqNetworkStatistics CurrentNetworkStatistics() const override; + + NetEqLifetimeStatistics GetLifetimeStatistics() const override; + + NetEqOperationsAndState GetOperationsAndState() const override; + + // Enables post-decode VAD. When enabled, GetAudio() will return + // kOutputVADPassive when the signal contains no speech. + void EnableVad() override; + + // Disables post-decode VAD. + void DisableVad() override; + + absl::optional GetPlayoutTimestamp() const override; + + int last_output_sample_rate_hz() const override; + + absl::optional GetDecoderFormat( + int payload_type) const override; + + // Flushes both the packet buffer and the sync buffer. + void FlushBuffers() override; + + void EnableNack(size_t max_nack_list_size) override; + + void DisableNack() override; + + std::vector GetNackList(int64_t round_trip_time_ms) const override; + + int SyncBufferSizeMs() const override; + + // This accessor method is only intended for testing purposes. + const SyncBuffer* sync_buffer_for_test() const; + Operation last_operation_for_test() const; + + protected: + static const int kOutputSizeMs = 10; + static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz. + // TODO(hlundin): Provide a better value for kSyncBufferSize. + // Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for + // calculating correlations of current frame against history. + static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48; + + // Inserts a new packet into NetEq. This is used by the InsertPacket method + // above. Returns 0 on success, otherwise an error code. + // TODO(hlundin): Merge this with InsertPacket above? + int InsertPacketInternal(const RTPHeader& rtp_header, + rtc::ArrayView payload) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Delivers 10 ms of audio data. The data is written to `audio_frame`. + // Returns 0 on success, otherwise an error code. + int GetAudioInternal(AudioFrame* audio_frame, + bool* muted, + absl::optional action_override) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Provides a decision to the GetAudioInternal method. The decision what to + // do is written to `operation`. Packets to decode are written to + // `packet_list`, and a DTMF event to play is written to `dtmf_event`. When + // DTMF should be played, `play_dtmf` is set to true by the method. + // Returns 0 on success, otherwise an error code. + int GetDecision(Operation* operation, + PacketList* packet_list, + DtmfEvent* dtmf_event, + bool* play_dtmf, + absl::optional action_override) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Decodes the speech packets in `packet_list`, and writes the results to + // `decoded_buffer`, which is allocated to hold `decoded_buffer_length` + // elements. The length of the decoded data is written to `decoded_length`. + // The speech type -- speech or (codec-internal) comfort noise -- is written + // to `speech_type`. If `packet_list` contains any SID frames for RFC 3389 + // comfort noise, those are not decoded. + int Decode(PacketList* packet_list, + Operation* operation, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method to Decode(). Performs codec internal CNG. + int DecodeCng(AudioDecoder* decoder, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method to Decode(). Performs the actual decoding. + int DecodeLoop(PacketList* packet_list, + const Operation& operation, + AudioDecoder* decoder, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the Normal class to perform the normal operation. + void DoNormal(const int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the Merge class to perform the merge operation. + void DoMerge(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the Expand class to perform the expand operation. + int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the Accelerate class to perform the accelerate + // operation. + int DoAccelerate(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf, + bool fast_accelerate) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the PreemptiveExpand class to perform the + // preemtive expand operation. + int DoPreemptiveExpand(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort + // noise. `packet_list` can either contain one SID frame to update the + // noise parameters, or no payload at all, in which case the previously + // received parameters are used. + int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Calls the audio decoder to generate codec-internal comfort noise when + // no packet was received. + void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Calls the DtmfToneGenerator class to generate DTMF tones. + int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Overdub DTMF on top of `output`. + int DtmfOverdub(const DtmfEvent& dtmf_event, + size_t num_channels, + int16_t* output) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Extracts packets from `packet_buffer_` to produce at least + // `required_samples` samples. The packets are inserted into `packet_list`. + // Returns the number of samples that the packets in the list will produce, or + // -1 in case of an error. + int ExtractPackets(size_t required_samples, PacketList* packet_list) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Resets various variables and objects to new values based on the sample rate + // `fs_hz` and `channels` number audio channels. + void SetSampleRateAndChannels(int fs_hz, size_t channels) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Returns the output type for the audio produced by the latest call to + // GetAudio(). + OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Updates Expand and Merge. + virtual void UpdatePlcComponents(int fs_hz, size_t channels) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + NetEqNetworkStatistics CurrentNetworkStatisticsInternal() const + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + Clock* const clock_; + + mutable Mutex mutex_; + const std::unique_ptr tick_timer_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr decoder_database_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr dtmf_buffer_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr dtmf_tone_generator_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr packet_buffer_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr red_payload_splitter_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr timestamp_scaler_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr vad_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr expand_factory_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr accelerate_factory_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr preemptive_expand_factory_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr stats_ RTC_GUARDED_BY(mutex_); + + std::unique_ptr background_noise_ RTC_GUARDED_BY(mutex_); + std::unique_ptr controller_ RTC_GUARDED_BY(mutex_); + std::unique_ptr algorithm_buffer_ RTC_GUARDED_BY(mutex_); + std::unique_ptr sync_buffer_ RTC_GUARDED_BY(mutex_); + std::unique_ptr expand_ RTC_GUARDED_BY(mutex_); + std::unique_ptr normal_ RTC_GUARDED_BY(mutex_); + std::unique_ptr merge_ RTC_GUARDED_BY(mutex_); + std::unique_ptr accelerate_ RTC_GUARDED_BY(mutex_); + std::unique_ptr preemptive_expand_ RTC_GUARDED_BY(mutex_); + RandomVector random_vector_ RTC_GUARDED_BY(mutex_); + std::unique_ptr comfort_noise_ RTC_GUARDED_BY(mutex_); + int fs_hz_ RTC_GUARDED_BY(mutex_); + int fs_mult_ RTC_GUARDED_BY(mutex_); + int last_output_sample_rate_hz_ RTC_GUARDED_BY(mutex_); + size_t output_size_samples_ RTC_GUARDED_BY(mutex_); + size_t decoder_frame_length_ RTC_GUARDED_BY(mutex_); + Mode last_mode_ RTC_GUARDED_BY(mutex_); + Operation last_operation_ RTC_GUARDED_BY(mutex_); + size_t decoded_buffer_length_ RTC_GUARDED_BY(mutex_); + std::unique_ptr decoded_buffer_ RTC_GUARDED_BY(mutex_); + uint32_t playout_timestamp_ RTC_GUARDED_BY(mutex_); + bool new_codec_ RTC_GUARDED_BY(mutex_); + uint32_t timestamp_ RTC_GUARDED_BY(mutex_); + bool reset_decoder_ RTC_GUARDED_BY(mutex_); + absl::optional current_rtp_payload_type_ RTC_GUARDED_BY(mutex_); + absl::optional current_cng_rtp_payload_type_ RTC_GUARDED_BY(mutex_); + bool first_packet_ RTC_GUARDED_BY(mutex_); + bool enable_fast_accelerate_ RTC_GUARDED_BY(mutex_); + std::unique_ptr nack_ RTC_GUARDED_BY(mutex_); + bool nack_enabled_ RTC_GUARDED_BY(mutex_); + const bool enable_muted_state_ RTC_GUARDED_BY(mutex_); + AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) = + AudioFrame::kVadPassive; + std::unique_ptr generated_noise_stopwatch_ + RTC_GUARDED_BY(mutex_); + std::vector last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_); + ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(mutex_); + ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(mutex_); + bool no_time_stretching_ RTC_GUARDED_BY(mutex_); // Only used for test. + rtc::BufferT concealment_audio_ RTC_GUARDED_BY(mutex_); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc new file mode 100644 index 0000000000..ce2be656ef --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -0,0 +1,1871 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/neteq_impl.h" + +#include +#include +#include + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/default_neteq_controller_factory.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_controller.h" +#include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h" +#include "modules/audio_coding/neteq/accelerate.h" +#include "modules/audio_coding/neteq/decision_logic.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/histogram.h" +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/mock/mock_dtmf_buffer.h" +#include "modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h" +#include "modules/audio_coding/neteq/mock/mock_neteq_controller.h" +#include "modules/audio_coding/neteq/mock/mock_packet_buffer.h" +#include "modules/audio_coding/neteq/mock/mock_red_payload_splitter.h" +#include "modules/audio_coding/neteq/preemptive_expand.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "modules/audio_coding/neteq/timestamp_scaler.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/clock.h" +#include "test/audio_decoder_proxy_factory.h" +#include "test/function_audio_decoder_factory.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder.h" +#include "test/mock_audio_decoder_factory.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::DoAll; +using ::testing::ElementsAre; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::IsEmpty; +using ::testing::IsNull; +using ::testing::Pointee; +using ::testing::Return; +using ::testing::ReturnNull; +using ::testing::SetArgPointee; +using ::testing::SetArrayArgument; +using ::testing::SizeIs; +using ::testing::WithArg; + +namespace webrtc { + +// This function is called when inserting a packet list into the mock packet +// buffer. The purpose is to delete all inserted packets properly, to avoid +// memory leaks in the test. +int DeletePacketsAndReturnOk(PacketList* packet_list) { + packet_list->clear(); + return PacketBuffer::kOK; +} + +class NetEqImplTest : public ::testing::Test { + protected: + NetEqImplTest() : clock_(0) { config_.sample_rate_hz = 8000; } + + void CreateInstance( + const rtc::scoped_refptr& decoder_factory) { + ASSERT_TRUE(decoder_factory); + config_.enable_muted_state = enable_muted_state_; + NetEqImpl::Dependencies deps(config_, &clock_, decoder_factory, + DefaultNetEqControllerFactory()); + + // Get a local pointer to NetEq's TickTimer object. + tick_timer_ = deps.tick_timer.get(); + + if (use_mock_decoder_database_) { + std::unique_ptr mock(new MockDecoderDatabase); + mock_decoder_database_ = mock.get(); + EXPECT_CALL(*mock_decoder_database_, GetActiveCngDecoder()) + .WillOnce(ReturnNull()); + deps.decoder_database = std::move(mock); + } + decoder_database_ = deps.decoder_database.get(); + + if (use_mock_dtmf_buffer_) { + std::unique_ptr mock( + new MockDtmfBuffer(config_.sample_rate_hz)); + mock_dtmf_buffer_ = mock.get(); + deps.dtmf_buffer = std::move(mock); + } + dtmf_buffer_ = deps.dtmf_buffer.get(); + + if (use_mock_dtmf_tone_generator_) { + std::unique_ptr mock(new MockDtmfToneGenerator); + mock_dtmf_tone_generator_ = mock.get(); + deps.dtmf_tone_generator = std::move(mock); + } + dtmf_tone_generator_ = deps.dtmf_tone_generator.get(); + + if (use_mock_packet_buffer_) { + std::unique_ptr mock( + new MockPacketBuffer(config_.max_packets_in_buffer, tick_timer_)); + mock_packet_buffer_ = mock.get(); + deps.packet_buffer = std::move(mock); + } + packet_buffer_ = deps.packet_buffer.get(); + + if (use_mock_neteq_controller_) { + std::unique_ptr mock(new MockNetEqController()); + mock_neteq_controller_ = mock.get(); + deps.neteq_controller = std::move(mock); + } else { + deps.stats = std::make_unique(); + NetEqController::Config controller_config; + controller_config.tick_timer = tick_timer_; + controller_config.base_min_delay_ms = config_.min_delay_ms; + controller_config.allow_time_stretching = true; + controller_config.max_packets_in_buffer = config_.max_packets_in_buffer; + controller_config.clock = &clock_; + deps.neteq_controller = + std::make_unique(std::move(controller_config)); + } + neteq_controller_ = deps.neteq_controller.get(); + + if (use_mock_payload_splitter_) { + std::unique_ptr mock(new MockRedPayloadSplitter); + mock_payload_splitter_ = mock.get(); + deps.red_payload_splitter = std::move(mock); + } + red_payload_splitter_ = deps.red_payload_splitter.get(); + + deps.timestamp_scaler = std::unique_ptr( + new TimestampScaler(*deps.decoder_database.get())); + + neteq_.reset(new NetEqImpl(config_, std::move(deps))); + ASSERT_TRUE(neteq_ != NULL); + } + + void CreateInstance() { CreateInstance(CreateBuiltinAudioDecoderFactory()); } + + void UseNoMocks() { + ASSERT_TRUE(neteq_ == NULL) << "Must call UseNoMocks before CreateInstance"; + use_mock_decoder_database_ = false; + use_mock_neteq_controller_ = false; + use_mock_dtmf_buffer_ = false; + use_mock_dtmf_tone_generator_ = false; + use_mock_packet_buffer_ = false; + use_mock_payload_splitter_ = false; + } + + virtual ~NetEqImplTest() { + if (use_mock_decoder_database_) { + EXPECT_CALL(*mock_decoder_database_, Die()).Times(1); + } + if (use_mock_neteq_controller_) { + EXPECT_CALL(*mock_neteq_controller_, Die()).Times(1); + } + if (use_mock_dtmf_buffer_) { + EXPECT_CALL(*mock_dtmf_buffer_, Die()).Times(1); + } + if (use_mock_dtmf_tone_generator_) { + EXPECT_CALL(*mock_dtmf_tone_generator_, Die()).Times(1); + } + if (use_mock_packet_buffer_) { + EXPECT_CALL(*mock_packet_buffer_, Die()).Times(1); + } + } + + void TestDtmfPacket(int sample_rate_hz) { + const size_t kPayloadLength = 4; + const uint8_t kPayloadType = 110; + const int kSampleRateHz = 16000; + config_.sample_rate_hz = kSampleRateHz; + UseNoMocks(); + CreateInstance(); + // Event: 2, E bit, Volume: 17, Length: 4336. + uint8_t payload[kPayloadLength] = {0x02, 0x80 + 0x11, 0x10, 0xF0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("telephone-event", sample_rate_hz, 1))); + + // Insert first packet. + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Pull audio once. + const size_t kMaxOutputSize = + static_cast(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_FALSE(muted); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // DTMF packets are immediately consumed by `InsertPacket()` and won't be + // returned by `GetAudio()`. + EXPECT_THAT(output.packet_infos_, IsEmpty()); + + // Verify first 64 samples of actual output. + const std::vector kOutput( + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1578, -2816, -3460, -3403, -2709, -1594, -363, 671, 1269, 1328, + 908, 202, -513, -964, -955, -431, 504, 1617, 2602, 3164, + 3101, 2364, 1073, -511, -2047, -3198, -3721, -3525, -2688, -1440, + -99, 1015, 1663, 1744, 1319, 588, -171, -680, -747, -315, + 515, 1512, 2378, 2828, 2674, 1877, 568, -986, -2446, -3482, + -3864, -3516, -2534, -1163}); + ASSERT_GE(kMaxOutputSize, kOutput.size()); + EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data())); + } + + std::unique_ptr neteq_; + NetEq::Config config_; + SimulatedClock clock_; + TickTimer* tick_timer_ = nullptr; + MockDecoderDatabase* mock_decoder_database_ = nullptr; + DecoderDatabase* decoder_database_ = nullptr; + bool use_mock_decoder_database_ = true; + MockNetEqController* mock_neteq_controller_ = nullptr; + NetEqController* neteq_controller_ = nullptr; + bool use_mock_neteq_controller_ = true; + MockDtmfBuffer* mock_dtmf_buffer_ = nullptr; + DtmfBuffer* dtmf_buffer_ = nullptr; + bool use_mock_dtmf_buffer_ = true; + MockDtmfToneGenerator* mock_dtmf_tone_generator_ = nullptr; + DtmfToneGenerator* dtmf_tone_generator_ = nullptr; + bool use_mock_dtmf_tone_generator_ = true; + MockPacketBuffer* mock_packet_buffer_ = nullptr; + PacketBuffer* packet_buffer_ = nullptr; + bool use_mock_packet_buffer_ = true; + MockRedPayloadSplitter* mock_payload_splitter_ = nullptr; + RedPayloadSplitter* red_payload_splitter_ = nullptr; + bool use_mock_payload_splitter_ = true; + bool enable_muted_state_ = false; +}; + +// This tests the interface class NetEq. +// TODO(hlundin): Move to separate file? +TEST(NetEq, CreateAndDestroy) { + NetEq::Config config; + SimulatedClock clock(0); + auto decoder_factory = CreateBuiltinAudioDecoderFactory(); + std::unique_ptr neteq = + DefaultNetEqFactory().CreateNetEq(config, decoder_factory, &clock); +} + +TEST_F(NetEqImplTest, RegisterPayloadType) { + CreateInstance(); + constexpr int rtp_payload_type = 0; + const SdpAudioFormat format("pcmu", 8000, 1); + EXPECT_CALL(*mock_decoder_database_, + RegisterPayload(rtp_payload_type, format)); + neteq_->RegisterPayloadType(rtp_payload_type, format); +} + +TEST_F(NetEqImplTest, RemovePayloadType) { + CreateInstance(); + uint8_t rtp_payload_type = 0; + EXPECT_CALL(*mock_decoder_database_, Remove(rtp_payload_type)) + .WillOnce(Return(DecoderDatabase::kDecoderNotFound)); + // Check that kOK is returned when database returns kDecoderNotFound, because + // removing a payload type that was never registered is not an error. + EXPECT_EQ(NetEq::kOK, neteq_->RemovePayloadType(rtp_payload_type)); +} + +TEST_F(NetEqImplTest, RemoveAllPayloadTypes) { + CreateInstance(); + EXPECT_CALL(*mock_decoder_database_, RemoveAll()).WillOnce(Return()); + neteq_->RemoveAllPayloadTypes(); +} + +TEST_F(NetEqImplTest, InsertPacket) { + using ::testing::AllOf; + using ::testing::Field; + CreateInstance(); + const size_t kPayloadLength = 100; + const uint8_t kPayloadType = 0; + const uint16_t kFirstSequenceNumber = 0x1234; + const uint32_t kFirstTimestamp = 0x12345678; + const uint32_t kSsrc = 0x87654321; + uint8_t payload[kPayloadLength] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = kFirstSequenceNumber; + rtp_header.timestamp = kFirstTimestamp; + rtp_header.ssrc = kSsrc; + Packet fake_packet; + fake_packet.payload_type = kPayloadType; + fake_packet.sequence_number = kFirstSequenceNumber; + fake_packet.timestamp = kFirstTimestamp; + + auto mock_decoder_factory = rtc::make_ref_counted(); + EXPECT_CALL(*mock_decoder_factory, MakeAudioDecoderMock(_, _, _)) + .WillOnce(Invoke([&](const SdpAudioFormat& format, + absl::optional codec_pair_id, + std::unique_ptr* dec) { + EXPECT_EQ("pcmu", format.name); + + std::unique_ptr mock_decoder(new MockAudioDecoder); + EXPECT_CALL(*mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(*mock_decoder, SampleRateHz()).WillRepeatedly(Return(8000)); + EXPECT_CALL(*mock_decoder, Die()).Times(1); // Called when deleted. + + *dec = std::move(mock_decoder); + })); + DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, mock_decoder_factory.get()); + + // Expectations for decoder database. + EXPECT_CALL(*mock_decoder_database_, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(Return(&info)); + + // Expectations for packet buffer. + EXPECT_CALL(*mock_packet_buffer_, Empty()) + .WillOnce(Return(false)); // Called once after first packet is inserted. + EXPECT_CALL(*mock_packet_buffer_, Flush(_)).Times(1); + EXPECT_CALL(*mock_packet_buffer_, InsertPacketList(_, _, _, _, _, _, _, _)) + .Times(2) + .WillRepeatedly(DoAll(SetArgPointee<2>(kPayloadType), + WithArg<0>(Invoke(DeletePacketsAndReturnOk)))); + // SetArgPointee<2>(kPayloadType) means that the third argument (zero-based + // index) is a pointer, and the variable pointed to is set to kPayloadType. + // Also invoke the function DeletePacketsAndReturnOk to properly delete all + // packets in the list (to avoid memory leaks in the test). + EXPECT_CALL(*mock_packet_buffer_, PeekNextPacket()) + .Times(1) + .WillOnce(Return(&fake_packet)); + + // Expectations for DTMF buffer. + EXPECT_CALL(*mock_dtmf_buffer_, Flush()).Times(1); + + // Expectations for delay manager. + { + // All expectations within this block must be called in this specific order. + InSequence sequence; // Dummy variable. + // Expectations when the first packet is inserted. + EXPECT_CALL( + *mock_neteq_controller_, + PacketArrived( + /*fs_hz*/ 8000, + /*should_update_stats*/ _, + /*info*/ + AllOf( + Field(&NetEqController::PacketArrivedInfo::is_cng_or_dtmf, + false), + Field(&NetEqController::PacketArrivedInfo::main_sequence_number, + kFirstSequenceNumber), + Field(&NetEqController::PacketArrivedInfo::main_timestamp, + kFirstTimestamp)))); + EXPECT_CALL( + *mock_neteq_controller_, + PacketArrived( + /*fs_hz*/ 8000, + /*should_update_stats*/ _, + /*info*/ + AllOf( + Field(&NetEqController::PacketArrivedInfo::is_cng_or_dtmf, + false), + Field(&NetEqController::PacketArrivedInfo::main_sequence_number, + kFirstSequenceNumber + 1), + Field(&NetEqController::PacketArrivedInfo::main_timestamp, + kFirstTimestamp + 160)))); + } + + // Insert first packet. + neteq_->InsertPacket(rtp_header, payload); + + // Insert second packet. + rtp_header.timestamp += 160; + rtp_header.sequenceNumber += 1; + neteq_->InsertPacket(rtp_header, payload); +} + +TEST_F(NetEqImplTest, InsertPacketsUntilBufferIsFull) { + UseNoMocks(); + CreateInstance(); + + const int kPayloadLengthSamples = 80; + const size_t kPayloadLengthBytes = 2 * kPayloadLengthSamples; // PCM 16-bit. + const uint8_t kPayloadType = 17; // Just an arbitrary number. + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + + // Insert packets. The buffer should not flush. + for (size_t i = 1; i <= config_.max_packets_in_buffer; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + rtp_header.timestamp += kPayloadLengthSamples; + rtp_header.sequenceNumber += 1; + EXPECT_EQ(i, packet_buffer_->NumPacketsInBuffer()); + } + + // Insert one more packet and make sure the buffer got flushed. That is, it + // should only hold one single packet. + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + EXPECT_EQ(1u, packet_buffer_->NumPacketsInBuffer()); + const Packet* test_packet = packet_buffer_->PeekNextPacket(); + EXPECT_EQ(rtp_header.timestamp, test_packet->timestamp); + EXPECT_EQ(rtp_header.sequenceNumber, test_packet->sequence_number); +} + +TEST_F(NetEqImplTest, TestDtmfPacketAVT) { + TestDtmfPacket(8000); +} + +TEST_F(NetEqImplTest, TestDtmfPacketAVT16kHz) { + TestDtmfPacket(16000); +} + +TEST_F(NetEqImplTest, TestDtmfPacketAVT32kHz) { + TestDtmfPacket(32000); +} + +TEST_F(NetEqImplTest, TestDtmfPacketAVT48kHz) { + TestDtmfPacket(48000); +} + +// This test verifies that timestamps propagate from the incoming packets +// through to the sync buffer and to the playout timestamp. +TEST_F(NetEqImplTest, VerifyTimestampPropagation) { + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const size_t kPayloadLengthSamples = + static_cast(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + rtp_header.numCSRCs = 3; + rtp_header.arrOfCSRCs[0] = 43; + rtp_header.arrOfCSRCs[1] = 65; + rtp_header.arrOfCSRCs[2] = 17; + + // This is a dummy decoder that produces as many output samples as the input + // has bytes. The output is an increasing series, starting at 1 for the first + // sample, and then increasing by 1 for each sample. + class CountingSamplesDecoder : public AudioDecoder { + public: + CountingSamplesDecoder() : next_value_(1) {} + + // Produce as many samples as input bytes (`encoded_len`). + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int /* sample_rate_hz */, + int16_t* decoded, + SpeechType* speech_type) override { + for (size_t i = 0; i < encoded_len; ++i) { + decoded[i] = next_value_++; + } + *speech_type = kSpeech; + return rtc::checked_cast(encoded_len); + } + + void Reset() override { next_value_ = 1; } + + int SampleRateHz() const override { return kSampleRateHz; } + + size_t Channels() const override { return 1; } + + uint16_t next_value() const { return next_value_; } + + private: + int16_t next_value_; + } decoder_; + + auto decoder_factory = + rtc::make_ref_counted(&decoder_); + + UseNoMocks(); + CreateInstance(decoder_factory); + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert one packet. + clock_.AdvanceTimeMilliseconds(123456); + Timestamp expected_receive_time = clock_.CurrentTime(); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Pull audio once. + const size_t kMaxOutputSize = static_cast(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_FALSE(muted); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // Verify `output.packet_infos_`. + ASSERT_THAT(output.packet_infos_, SizeIs(1)); + { + const auto& packet_info = output.packet_infos_[0]; + EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); + EXPECT_THAT(packet_info.csrcs(), ElementsAre(43, 65, 17)); + EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); + EXPECT_FALSE(packet_info.audio_level().has_value()); + EXPECT_EQ(packet_info.receive_time(), expected_receive_time); + } + + // Start with a simple check that the fake decoder is behaving as expected. + EXPECT_EQ(kPayloadLengthSamples, + static_cast(decoder_.next_value() - 1)); + + // The value of the last of the output samples is the same as the number of + // samples played from the decoded packet. Thus, this number + the RTP + // timestamp should match the playout timestamp. + // Wrap the expected value in an absl::optional to compare them as such. + EXPECT_EQ( + absl::optional(rtp_header.timestamp + + output.data()[output.samples_per_channel_ - 1]), + neteq_->GetPlayoutTimestamp()); + + // Check the timestamp for the last value in the sync buffer. This should + // be one full frame length ahead of the RTP timestamp. + const SyncBuffer* sync_buffer = neteq_->sync_buffer_for_test(); + ASSERT_TRUE(sync_buffer != NULL); + EXPECT_EQ(rtp_header.timestamp + kPayloadLengthSamples, + sync_buffer->end_timestamp()); + + // Check that the number of samples still to play from the sync buffer add + // up with what was already played out. + EXPECT_EQ( + kPayloadLengthSamples - output.data()[output.samples_per_channel_ - 1], + sync_buffer->FutureLength()); +} + +TEST_F(NetEqImplTest, ReorderedPacket) { + UseNoMocks(); + + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + + CreateInstance( + rtc::make_ref_counted(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const size_t kPayloadLengthSamples = + static_cast(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + rtp_header.extension.hasAudioLevel = true; + rtp_header.extension.audioLevel = 42; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateHz)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, kPayloadLengthBytes)) + .WillRepeatedly(Return(rtc::checked_cast(kPayloadLengthSamples))); + int16_t dummy_output[kPayloadLengthSamples] = {0}; + // The below expectation will make the mock decoder write + // `kPayloadLengthSamples` zeros to the output array, and mark it as speech. + EXPECT_CALL(mock_decoder, DecodeInternal(Pointee(0), kPayloadLengthBytes, + kSampleRateHz, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast(kPayloadLengthSamples)))); + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert one packet. + clock_.AdvanceTimeMilliseconds(123456); + Timestamp expected_receive_time = clock_.CurrentTime(); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Pull audio once. + const size_t kMaxOutputSize = static_cast(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // Verify `output.packet_infos_`. + ASSERT_THAT(output.packet_infos_, SizeIs(1)); + { + const auto& packet_info = output.packet_infos_[0]; + EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); + EXPECT_THAT(packet_info.csrcs(), IsEmpty()); + EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); + EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); + EXPECT_EQ(packet_info.receive_time(), expected_receive_time); + } + + // Insert two more packets. The first one is out of order, and is already too + // old, the second one is the expected next packet. + rtp_header.sequenceNumber -= 1; + rtp_header.timestamp -= kPayloadLengthSamples; + rtp_header.extension.audioLevel = 1; + payload[0] = 1; + clock_.AdvanceTimeMilliseconds(1000); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + rtp_header.sequenceNumber += 2; + rtp_header.timestamp += 2 * kPayloadLengthSamples; + rtp_header.extension.audioLevel = 2; + payload[0] = 2; + clock_.AdvanceTimeMilliseconds(2000); + expected_receive_time = clock_.CurrentTime(); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Expect only the second packet to be decoded (the one with "2" as the first + // payload byte). + EXPECT_CALL(mock_decoder, DecodeInternal(Pointee(2), kPayloadLengthBytes, + kSampleRateHz, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast(kPayloadLengthSamples)))); + + // Pull audio once. + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // Now check the packet buffer, and make sure it is empty, since the + // out-of-order packet should have been discarded. + EXPECT_TRUE(packet_buffer_->Empty()); + + // NetEq `packets_discarded` should capture this packet discard. + EXPECT_EQ(1u, neteq_->GetLifetimeStatistics().packets_discarded); + + // Verify `output.packet_infos_`. Expect to only see the second packet. + ASSERT_THAT(output.packet_infos_, SizeIs(1)); + { + const auto& packet_info = output.packet_infos_[0]; + EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); + EXPECT_THAT(packet_info.csrcs(), IsEmpty()); + EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); + EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); + EXPECT_EQ(packet_info.receive_time(), expected_receive_time); + } + + EXPECT_CALL(mock_decoder, Die()); +} + +// This test verifies that NetEq can handle the situation where the first +// incoming packet is rejected. +TEST_F(NetEqImplTest, FirstPacketUnknown) { + UseNoMocks(); + CreateInstance(); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const size_t kPayloadLengthSamples = + static_cast(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + // Insert one packet. Note that we have not registered any payload type, so + // this packet will be rejected. + EXPECT_EQ(NetEq::kFail, neteq_->InsertPacket(rtp_header, payload)); + + // Pull audio once. + const size_t kMaxOutputSize = static_cast(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_LE(output.samples_per_channel_, kMaxOutputSize); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kPLC, output.speech_type_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); + + // Register the payload type. + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + + // Insert 10 packets. + for (size_t i = 0; i < 10; ++i) { + rtp_header.sequenceNumber++; + rtp_header.timestamp += kPayloadLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer()); + } + + // Pull audio repeatedly and make sure we get normal output, that is not PLC. + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_LE(output.samples_per_channel_, kMaxOutputSize); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_) + << "NetEq did not decode the packets as expected."; + EXPECT_THAT(output.packet_infos_, SizeIs(1)); + } +} + +// This test verifies that audio interruption is not logged for the initial +// PLC period before the first packet is deocoded. +// TODO(henrik.lundin) Maybe move this test to neteq_network_stats_unittest.cc. +// Make the test parametrized, so that we can test with different initial +// sample rates in NetEq. +class NetEqImplTestSampleRateParameter + : public NetEqImplTest, + public testing::WithParamInterface { + protected: + NetEqImplTestSampleRateParameter() + : NetEqImplTest(), initial_sample_rate_hz_(GetParam()) { + config_.sample_rate_hz = initial_sample_rate_hz_; + } + + const int initial_sample_rate_hz_; +}; + +class NetEqImplTestSdpFormatParameter + : public NetEqImplTest, + public testing::WithParamInterface { + protected: + NetEqImplTestSdpFormatParameter() + : NetEqImplTest(), sdp_format_(GetParam()) {} + const SdpAudioFormat sdp_format_; +}; + +// This test does the following: +// 0. Set up NetEq with initial sample rate given by test parameter, and a codec +// sample rate of 16000. +// 1. Start calling GetAudio before inserting any encoded audio. The audio +// produced will be PLC. +// 2. Insert a number of encoded audio packets. +// 3. Keep calling GetAudio and verify that no audio interruption was logged. +// Call GetAudio until NetEq runs out of data again; PLC starts. +// 4. Insert one more packet. +// 5. Call GetAudio until that packet is decoded and the PLC ends. + +TEST_P(NetEqImplTestSampleRateParameter, + NoAudioInterruptionLoggedBeforeFirstDecode) { + UseNoMocks(); + CreateInstance(); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kPayloadSampleRateHz = 16000; + const size_t kPayloadLengthSamples = + static_cast(10 * kPayloadSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + // Register the payload type. + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("l16", kPayloadSampleRateHz, 1))); + + // Pull audio several times. No packets have been inserted yet. + const size_t initial_output_size = + static_cast(10 * initial_sample_rate_hz_ / 1000); // 10 ms + AudioFrame output; + bool muted; + for (int i = 0; i < 100; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(initial_output_size, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); + } + + // Lambda for inserting packets. + auto insert_packet = [&]() { + rtp_header.sequenceNumber++; + rtp_header.timestamp += kPayloadLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + }; + // Insert 10 packets. + for (size_t i = 0; i < 10; ++i) { + insert_packet(); + EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer()); + } + + // Pull audio repeatedly and make sure we get normal output, that is not PLC. + constexpr size_t kOutputSize = + static_cast(10 * kPayloadSampleRateHz / 1000); // 10 ms + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_) + << "NetEq did not decode the packets as expected."; + EXPECT_THAT(output.packet_infos_, SizeIs(1)); + } + + // Verify that no interruption was logged. + auto lifetime_stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(0, lifetime_stats.interruption_count); + + // Keep pulling audio data until a new PLC period is started. + size_t count_loops = 0; + while (output.speech_type_ == AudioFrame::kNormalSpeech) { + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } + + // Insert a few packets to avoid postpone decoding after expand. + for (size_t i = 0; i < 5; ++i) { + insert_packet(); + } + + // Pull audio until the newly inserted packet is decoded and the PLC ends. + while (output.speech_type_ != AudioFrame::kNormalSpeech) { + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } + + // Verify that no interruption was logged. + lifetime_stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(0, lifetime_stats.interruption_count); +} + +// This test does the following: +// 0. Set up NetEq with initial sample rate given by test parameter, and a codec +// sample rate of 16000. +// 1. Insert a number of encoded audio packets. +// 2. Call GetAudio and verify that decoded audio is produced. +// 3. Keep calling GetAudio until NetEq runs out of data; PLC starts. +// 4. Keep calling GetAudio until PLC has been produced for at least 150 ms. +// 5. Insert one more packet. +// 6. Call GetAudio until that packet is decoded and the PLC ends. +// 7. Verify that an interruption was logged. + +TEST_P(NetEqImplTestSampleRateParameter, AudioInterruptionLogged) { + UseNoMocks(); + CreateInstance(); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kPayloadSampleRateHz = 16000; + const size_t kPayloadLengthSamples = + static_cast(10 * kPayloadSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + // Register the payload type. + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("l16", kPayloadSampleRateHz, 1))); + + // Lambda for inserting packets. + auto insert_packet = [&]() { + rtp_header.sequenceNumber++; + rtp_header.timestamp += kPayloadLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + }; + // Insert 10 packets. + for (size_t i = 0; i < 10; ++i) { + insert_packet(); + EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer()); + } + + AudioFrame output; + bool muted; + // Keep pulling audio data until a new PLC period is started. + size_t count_loops = 0; + do { + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } while (output.speech_type_ == AudioFrame::kNormalSpeech); + + // Pull audio 15 times, which produces 150 ms of output audio. This should + // all be produced as PLC. The total length of the gap will then be 150 ms + // plus an initial fraction of 10 ms at the start and the end of the PLC + // period. In total, less than 170 ms. + for (size_t i = 0; i < 15; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_); + } + + // Insert a few packets to avoid postpone decoding after expand. + for (size_t i = 0; i < 5; ++i) { + insert_packet(); + } + + // Pull audio until the newly inserted packet is decoded and the PLC ends. + while (output.speech_type_ != AudioFrame::kNormalSpeech) { + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } + + // Verify that the interruption was logged. + auto lifetime_stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(1, lifetime_stats.interruption_count); + EXPECT_GT(lifetime_stats.total_interruption_duration_ms, 150); + EXPECT_LT(lifetime_stats.total_interruption_duration_ms, 170); +} + +INSTANTIATE_TEST_SUITE_P(SampleRates, + NetEqImplTestSampleRateParameter, + testing::Values(8000, 16000, 32000, 48000)); + +TEST_P(NetEqImplTestSdpFormatParameter, GetNackListScaledTimestamp) { + UseNoMocks(); + CreateInstance(); + + neteq_->EnableNack(128); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kPayloadSampleRateHz = sdp_format_.clockrate_hz; + const size_t kPayloadLengthSamples = + static_cast(10 * kPayloadSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2; + std::vector payload(kPayloadLengthBytes, 0); + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, sdp_format_)); + + auto insert_packet = [&](bool lost = false) { + rtp_header.sequenceNumber++; + rtp_header.timestamp += kPayloadLengthSamples; + if (!lost) + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + }; + + // Insert and decode 10 packets. + for (size_t i = 0; i < 10; ++i) { + insert_packet(); + } + AudioFrame output; + size_t count_loops = 0; + do { + bool muted; + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } while (output.speech_type_ == AudioFrame::kNormalSpeech); + + insert_packet(); + + insert_packet(/*lost=*/true); + + // Ensure packet gets marked as missing. + for (int i = 0; i < 5; ++i) { + insert_packet(); + } + + // Missing packet recoverable with 5ms RTT. + EXPECT_THAT(neteq_->GetNackList(5), Not(IsEmpty())); + + // No packets should have TimeToPlay > 500ms. + EXPECT_THAT(neteq_->GetNackList(500), IsEmpty()); +} + +INSTANTIATE_TEST_SUITE_P(GetNackList, + NetEqImplTestSdpFormatParameter, + testing::Values(SdpAudioFormat("g722", 8000, 1), + SdpAudioFormat("opus", 48000, 2))); + +// This test verifies that NetEq can handle comfort noise and enters/quits codec +// internal CNG mode properly. +TEST_F(NetEqImplTest, CodecInternalCng) { + UseNoMocks(); + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + CreateInstance( + rtc::make_ref_counted(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateKhz = 48; + const size_t kPayloadLengthSamples = + static_cast(20 * kSampleRateKhz); // 20 ms. + const size_t kPayloadLengthBytes = 10; + uint8_t payload[kPayloadLengthBytes] = {0}; + int16_t dummy_output[kPayloadLengthSamples] = {0}; + + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateKhz * 1000)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, kPayloadLengthBytes)) + .WillRepeatedly(Return(rtc::checked_cast(kPayloadLengthSamples))); + // Packed duration when asking the decoder for more CNG data (without a new + // packet). + EXPECT_CALL(mock_decoder, PacketDuration(nullptr, 0)) + .WillRepeatedly(Return(rtc::checked_cast(kPayloadLengthSamples))); + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("opus", 48000, 2))); + + struct Packet { + int sequence_number_delta; + int timestamp_delta; + AudioDecoder::SpeechType decoder_output_type; + }; + std::vector packets = { + {0, 0, AudioDecoder::kSpeech}, + {1, kPayloadLengthSamples, AudioDecoder::kComfortNoise}, + {2, 2 * kPayloadLengthSamples, AudioDecoder::kSpeech}, + {1, kPayloadLengthSamples, AudioDecoder::kSpeech}}; + + for (size_t i = 0; i < packets.size(); ++i) { + rtp_header.sequenceNumber += packets[i].sequence_number_delta; + rtp_header.timestamp += packets[i].timestamp_delta; + payload[0] = i; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Pointee(x) verifies that first byte of the payload equals x, this makes + // it possible to verify that the correct payload is fed to Decode(). + EXPECT_CALL(mock_decoder, DecodeInternal(Pointee(i), kPayloadLengthBytes, + kSampleRateKhz * 1000, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>( + dummy_output, dummy_output + kPayloadLengthSamples), + SetArgPointee<4>(packets[i].decoder_output_type), + Return(rtc::checked_cast(kPayloadLengthSamples)))); + } + + // Expect comfort noise to be returned by the decoder. + EXPECT_CALL(mock_decoder, + DecodeInternal(IsNull(), 0, kSampleRateKhz * 1000, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples), + SetArgPointee<4>(AudioDecoder::kComfortNoise), + Return(rtc::checked_cast(kPayloadLengthSamples)))); + + std::vector expected_output = { + AudioFrame::kNormalSpeech, AudioFrame::kCNG, AudioFrame::kNormalSpeech}; + size_t output_index = 0; + + int timeout_counter = 0; + while (!packet_buffer_->Empty()) { + ASSERT_LT(timeout_counter++, 20) << "Test timed out"; + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + if (output_index + 1 < expected_output.size() && + output.speech_type_ == expected_output[output_index + 1]) { + ++output_index; + } else { + EXPECT_EQ(output.speech_type_, expected_output[output_index]); + } + } + + EXPECT_CALL(mock_decoder, Die()); +} + +TEST_F(NetEqImplTest, UnsupportedDecoder) { + UseNoMocks(); + ::testing::NiceMock decoder; + + CreateInstance( + rtc::make_ref_counted(&decoder)); + static const size_t kNetEqMaxFrameSize = 5760; // 120 ms @ 48 kHz. + static const size_t kChannels = 2; + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + + const size_t kPayloadLengthSamples = + static_cast(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = 1; + uint8_t payload[kPayloadLengthBytes] = {0}; + int16_t dummy_output[kPayloadLengthSamples * kChannels] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + const uint8_t kFirstPayloadValue = 1; + const uint8_t kSecondPayloadValue = 2; + + EXPECT_CALL(decoder, + PacketDuration(Pointee(kFirstPayloadValue), kPayloadLengthBytes)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(rtc::checked_cast(kNetEqMaxFrameSize + 1))); + + EXPECT_CALL(decoder, DecodeInternal(Pointee(kFirstPayloadValue), _, _, _, _)) + .Times(0); + + EXPECT_CALL(decoder, DecodeInternal(Pointee(kSecondPayloadValue), + kPayloadLengthBytes, kSampleRateHz, _, _)) + .Times(1) + .WillOnce(DoAll( + SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples * kChannels), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(static_cast(kPayloadLengthSamples * kChannels)))); + + EXPECT_CALL(decoder, + PacketDuration(Pointee(kSecondPayloadValue), kPayloadLengthBytes)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(rtc::checked_cast(kNetEqMaxFrameSize))); + + EXPECT_CALL(decoder, SampleRateHz()).WillRepeatedly(Return(kSampleRateHz)); + + EXPECT_CALL(decoder, Channels()).WillRepeatedly(Return(kChannels)); + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert one packet. + payload[0] = kFirstPayloadValue; // This will make Decode() fail. + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Insert another packet. + payload[0] = kSecondPayloadValue; // This will make Decode() successful. + rtp_header.sequenceNumber++; + // The second timestamp needs to be at least 30 ms after the first to make + // the second packet get decoded. + rtp_header.timestamp += 3 * kPayloadLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + AudioFrame output; + bool muted; + // First call to GetAudio will try to decode the "faulty" packet. + // Expect kFail return value. + EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&output, &muted)); + // Output size and number of channels should be correct. + const size_t kExpectedOutputSize = 10 * (kSampleRateHz / 1000) * kChannels; + EXPECT_EQ(kExpectedOutputSize, output.samples_per_channel_ * kChannels); + EXPECT_EQ(kChannels, output.num_channels_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); + + // Second call to GetAudio will decode the packet that is ok. No errors are + // expected. + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kExpectedOutputSize, output.samples_per_channel_ * kChannels); + EXPECT_EQ(kChannels, output.num_channels_); + EXPECT_THAT(output.packet_infos_, SizeIs(1)); + + // Die isn't called through NiceMock (since it's called by the + // MockAudioDecoder constructor), so it needs to be mocked explicitly. + EXPECT_CALL(decoder, Die()); +} + +// This test inserts packets until the buffer is flushed. After that, it asks +// NetEq for the network statistics. The purpose of the test is to make sure +// that even though the buffer size increment is negative (which it becomes when +// the packet causing a flush is inserted), the packet length stored in the +// decision logic remains valid. +TEST_F(NetEqImplTest, FloodBufferAndGetNetworkStats) { + UseNoMocks(); + CreateInstance(); + + const size_t kPayloadLengthSamples = 80; + const size_t kPayloadLengthBytes = 2 * kPayloadLengthSamples; // PCM 16-bit. + const uint8_t kPayloadType = 17; // Just an arbitrary number. + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + + // Insert packets until the buffer flushes. + for (size_t i = 0; i <= config_.max_packets_in_buffer; ++i) { + EXPECT_EQ(i, packet_buffer_->NumPacketsInBuffer()); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + rtp_header.timestamp += rtc::checked_cast(kPayloadLengthSamples); + ++rtp_header.sequenceNumber; + } + EXPECT_EQ(1u, packet_buffer_->NumPacketsInBuffer()); + + // Ask for network statistics. This should not crash. + NetEqNetworkStatistics stats; + EXPECT_EQ(NetEq::kOK, neteq_->NetworkStatistics(&stats)); +} + +TEST_F(NetEqImplTest, DecodedPayloadTooShort) { + UseNoMocks(); + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + + CreateInstance( + rtc::make_ref_counted(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const size_t kPayloadLengthSamples = + static_cast(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = 2 * kPayloadLengthSamples; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateHz)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, _)) + .WillRepeatedly(Return(rtc::checked_cast(kPayloadLengthSamples))); + int16_t dummy_output[kPayloadLengthSamples] = {0}; + // The below expectation will make the mock decoder write + // `kPayloadLengthSamples` - 5 zeros to the output array, and mark it as + // speech. That is, the decoded length is 5 samples shorter than the expected. + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .WillOnce( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples - 5), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast(kPayloadLengthSamples - 5)))); + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert one packet. + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + EXPECT_EQ(5u, neteq_->sync_buffer_for_test()->FutureLength()); + + // Pull audio once. + const size_t kMaxOutputSize = static_cast(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, SizeIs(1)); + + EXPECT_CALL(mock_decoder, Die()); +} + +// This test checks the behavior of NetEq when audio decoder fails. +TEST_F(NetEqImplTest, DecodingError) { + UseNoMocks(); + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + + CreateInstance( + rtc::make_ref_counted(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const int kDecoderErrorCode = -97; // Any negative number. + + // We let decoder return 5 ms each time, and therefore, 2 packets make 10 ms. + const size_t kFrameLengthSamples = + static_cast(5 * kSampleRateHz / 1000); + + const size_t kPayloadLengthBytes = 1; // This can be arbitrary. + + uint8_t payload[kPayloadLengthBytes] = {0}; + + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateHz)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, _)) + .WillRepeatedly(Return(rtc::checked_cast(kFrameLengthSamples))); + EXPECT_CALL(mock_decoder, ErrorCode()).WillOnce(Return(kDecoderErrorCode)); + EXPECT_CALL(mock_decoder, HasDecodePlc()).WillOnce(Return(false)); + int16_t dummy_output[kFrameLengthSamples] = {0}; + + { + InSequence sequence; // Dummy variable. + // Mock decoder works normally the first time. + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .Times(3) + .WillRepeatedly( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast(kFrameLengthSamples)))) + .RetiresOnSaturation(); + + // Then mock decoder fails. A common reason for failure can be buffer being + // too short + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .WillOnce(Return(-1)) + .RetiresOnSaturation(); + + // Mock decoder finally returns to normal. + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .Times(2) + .WillRepeatedly( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast(kFrameLengthSamples)))); + } + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert packets. + for (int i = 0; i < 20; ++i) { + rtp_header.sequenceNumber += 1; + rtp_header.timestamp += kFrameLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + } + + // Pull audio. + const size_t kMaxOutputSize = static_cast(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, SizeIs(2)); // 5 ms packets vs 10 ms output + + // Pull audio again. Decoder fails. + EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + // We are not expecting anything for output.speech_type_, since an error was + // returned. + + // Pull audio again, should behave normal. + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, SizeIs(2)); // 5 ms packets vs 10 ms output + + EXPECT_CALL(mock_decoder, Die()); +} + +// This test checks the behavior of NetEq when audio decoder fails during CNG. +TEST_F(NetEqImplTest, DecodingErrorDuringInternalCng) { + UseNoMocks(); + + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + CreateInstance( + rtc::make_ref_counted(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const int kDecoderErrorCode = -97; // Any negative number. + + // We let decoder return 5 ms each time, and therefore, 2 packets make 10 ms. + const size_t kFrameLengthSamples = + static_cast(5 * kSampleRateHz / 1000); + + const size_t kPayloadLengthBytes = 1; // This can be arbitrary. + + uint8_t payload[kPayloadLengthBytes] = {0}; + + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateHz)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, _)) + .WillRepeatedly(Return(rtc::checked_cast(kFrameLengthSamples))); + EXPECT_CALL(mock_decoder, ErrorCode()).WillOnce(Return(kDecoderErrorCode)); + int16_t dummy_output[kFrameLengthSamples] = {0}; + + { + InSequence sequence; // Dummy variable. + // Mock decoder works normally the first 2 times. + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .Times(2) + .WillRepeatedly( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<4>(AudioDecoder::kComfortNoise), + Return(rtc::checked_cast(kFrameLengthSamples)))) + .RetiresOnSaturation(); + + // Then mock decoder fails. A common reason for failure can be buffer being + // too short + EXPECT_CALL(mock_decoder, DecodeInternal(nullptr, 0, kSampleRateHz, _, _)) + .WillOnce(Return(-1)) + .RetiresOnSaturation(); + + // Mock decoder finally returns to normal. + EXPECT_CALL(mock_decoder, DecodeInternal(nullptr, 0, kSampleRateHz, _, _)) + .Times(2) + .WillRepeatedly( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<4>(AudioDecoder::kComfortNoise), + Return(rtc::checked_cast(kFrameLengthSamples)))); + } + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + + // Insert 2 packets. This will make netEq into codec internal CNG mode. + for (int i = 0; i < 2; ++i) { + rtp_header.sequenceNumber += 1; + rtp_header.timestamp += kFrameLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + } + + // Pull audio. + const size_t kMaxOutputSize = static_cast(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kCNG, output.speech_type_); + + // Pull audio again. Decoder fails. + EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + // We are not expecting anything for output.speech_type_, since an error was + // returned. + + // Pull audio again, should resume codec CNG. + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kCNG, output.speech_type_); + + EXPECT_CALL(mock_decoder, Die()); +} + +// Tests that the return value from last_output_sample_rate_hz() is equal to the +// configured inital sample rate. +TEST_F(NetEqImplTest, InitialLastOutputSampleRate) { + UseNoMocks(); + config_.sample_rate_hz = 48000; + CreateInstance(); + EXPECT_EQ(48000, neteq_->last_output_sample_rate_hz()); +} + +TEST_F(NetEqImplTest, TickTimerIncrement) { + UseNoMocks(); + CreateInstance(); + ASSERT_TRUE(tick_timer_); + EXPECT_EQ(0u, tick_timer_->ticks()); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(1u, tick_timer_->ticks()); +} + +TEST_F(NetEqImplTest, SetBaseMinimumDelay) { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + + EXPECT_CALL(*mock_neteq_controller_, SetBaseMinimumDelay(_)) + .WillOnce(Return(true)) + .WillOnce(Return(false)); + + const int delay_ms = 200; + + EXPECT_EQ(true, neteq_->SetBaseMinimumDelayMs(delay_ms)); + EXPECT_EQ(false, neteq_->SetBaseMinimumDelayMs(delay_ms)); +} + +TEST_F(NetEqImplTest, GetBaseMinimumDelayMs) { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + + const int delay_ms = 200; + + EXPECT_CALL(*mock_neteq_controller_, GetBaseMinimumDelay()) + .WillOnce(Return(delay_ms)); + + EXPECT_EQ(delay_ms, neteq_->GetBaseMinimumDelayMs()); +} + +TEST_F(NetEqImplTest, TargetDelayMs) { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + constexpr int kTargetLevelMs = 510; + EXPECT_CALL(*mock_neteq_controller_, TargetLevelMs()) + .WillOnce(Return(kTargetLevelMs)); + EXPECT_EQ(510, neteq_->TargetDelayMs()); +} + +TEST_F(NetEqImplTest, InsertEmptyPacket) { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + + RTPHeader rtp_header; + rtp_header.payloadType = 17; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(*mock_neteq_controller_, RegisterEmptyPacket()); + neteq_->InsertEmptyPacket(rtp_header); +} + +TEST_F(NetEqImplTest, NotifyControllerOfReorderedPacket) { + using ::testing::AllOf; + using ::testing::Field; + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .Times(1) + .WillOnce(Return(NetEq::Operation::kNormal)); + + const int kPayloadLengthSamples = 80; + const size_t kPayloadLengthBytes = 2 * kPayloadLengthSamples; // PCM 16-bit. + const uint8_t kPayloadType = 17; // Just an arbitrary number. + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + + // Insert second packet that was sent before the first packet. + rtp_header.sequenceNumber -= 1; + rtp_header.timestamp -= kPayloadLengthSamples; + EXPECT_CALL( + *mock_neteq_controller_, + PacketArrived( + /*fs_hz*/ 8000, + /*should_update_stats*/ true, + /*info*/ + AllOf( + Field(&NetEqController::PacketArrivedInfo::packet_length_samples, + kPayloadLengthSamples), + Field(&NetEqController::PacketArrivedInfo::main_sequence_number, + rtp_header.sequenceNumber), + Field(&NetEqController::PacketArrivedInfo::main_timestamp, + rtp_header.timestamp)))); + + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); +} + +// When using a codec with 1000 channels, there should be no crashes. +TEST_F(NetEqImplTest, NoCrashWith1000Channels) { + using ::testing::AllOf; + using ::testing::Field; + UseNoMocks(); + use_mock_decoder_database_ = true; + enable_muted_state_ = true; + CreateInstance(); + const size_t kPayloadLength = 100; + const uint8_t kPayloadType = 0; + const uint16_t kFirstSequenceNumber = 0x1234; + const uint32_t kFirstTimestamp = 0x12345678; + const uint32_t kSsrc = 0x87654321; + uint8_t payload[kPayloadLength] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = kFirstSequenceNumber; + rtp_header.timestamp = kFirstTimestamp; + rtp_header.ssrc = kSsrc; + Packet fake_packet; + fake_packet.payload_type = kPayloadType; + fake_packet.sequence_number = kFirstSequenceNumber; + fake_packet.timestamp = kFirstTimestamp; + + AudioDecoder* decoder = nullptr; + + auto mock_decoder_factory = rtc::make_ref_counted(); + EXPECT_CALL(*mock_decoder_factory, MakeAudioDecoderMock(_, _, _)) + .WillOnce(Invoke([&](const SdpAudioFormat& format, + absl::optional codec_pair_id, + std::unique_ptr* dec) { + EXPECT_EQ("pcmu", format.name); + *dec = std::make_unique(1000); + decoder = dec->get(); + })); + DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, mock_decoder_factory.get()); + // Expectations for decoder database. + EXPECT_CALL(*mock_decoder_database_, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(Return(&info)); + EXPECT_CALL(*mock_decoder_database_, GetActiveCngDecoder()) + .WillRepeatedly(ReturnNull()); + EXPECT_CALL(*mock_decoder_database_, GetActiveDecoder()) + .WillRepeatedly(Return(decoder)); + EXPECT_CALL(*mock_decoder_database_, SetActiveDecoder(_, _)) + .WillOnce(Invoke([](uint8_t rtp_payload_type, bool* new_decoder) { + *new_decoder = true; + return 0; + })); + + // Insert first packet. + neteq_->InsertPacket(rtp_header, payload); + + AudioFrame audio_frame; + bool muted; + + // Repeat 40 times to ensure we enter muted state. + for (int i = 0; i < 40; i++) { + // GetAudio should return an error, and not crash, even in muted state. + EXPECT_NE(0, neteq_->GetAudio(&audio_frame, &muted)); + } +} + +class Decoder120ms : public AudioDecoder { + public: + Decoder120ms(int sample_rate_hz, SpeechType speech_type) + : sample_rate_hz_(sample_rate_hz), + next_value_(1), + speech_type_(speech_type) {} + + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + EXPECT_EQ(sample_rate_hz_, sample_rate_hz); + size_t decoded_len = + rtc::CheckedDivExact(sample_rate_hz, 1000) * 120 * Channels(); + for (size_t i = 0; i < decoded_len; ++i) { + decoded[i] = next_value_++; + } + *speech_type = speech_type_; + return rtc::checked_cast(decoded_len); + } + + void Reset() override { next_value_ = 1; } + int SampleRateHz() const override { return sample_rate_hz_; } + size_t Channels() const override { return 2; } + + private: + int sample_rate_hz_; + int16_t next_value_; + SpeechType speech_type_; +}; + +class NetEqImplTest120ms : public NetEqImplTest { + protected: + NetEqImplTest120ms() : NetEqImplTest() {} + virtual ~NetEqImplTest120ms() {} + + void CreateInstanceNoMocks() { + UseNoMocks(); + CreateInstance(decoder_factory_); + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("opus", 48000, 2, {{"stereo", "1"}}))); + } + + void CreateInstanceWithDelayManagerMock() { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(decoder_factory_); + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("opus", 48000, 2, {{"stereo", "1"}}))); + } + + uint32_t timestamp_diff_between_packets() const { + return rtc::CheckedDivExact(kSamplingFreq_, 1000u) * 120; + } + + uint32_t first_timestamp() const { return 10u; } + + void GetFirstPacket() { + bool muted; + for (int i = 0; i < 12; i++) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_FALSE(muted); + } + } + + void InsertPacket(uint32_t timestamp) { + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = sequence_number_; + rtp_header.timestamp = timestamp; + rtp_header.ssrc = 15; + const size_t kPayloadLengthBytes = 1; // This can be arbitrary. + uint8_t payload[kPayloadLengthBytes] = {0}; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + sequence_number_++; + } + + void Register120msCodec(AudioDecoder::SpeechType speech_type) { + const uint32_t sampling_freq = kSamplingFreq_; + decoder_factory_ = rtc::make_ref_counted( + [sampling_freq, speech_type]() { + std::unique_ptr decoder = + std::make_unique(sampling_freq, speech_type); + RTC_CHECK_EQ(2, decoder->Channels()); + return decoder; + }); + } + + rtc::scoped_refptr decoder_factory_; + AudioFrame output_; + const uint32_t kPayloadType = 17; + const uint32_t kSamplingFreq_ = 48000; + uint16_t sequence_number_ = 1; +}; + +TEST_F(NetEqImplTest120ms, CodecInternalCng) { + Register120msCodec(AudioDecoder::kComfortNoise); + CreateInstanceNoMocks(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kCodecInternalCng, + neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Normal) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceNoMocks(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + EXPECT_EQ(NetEq::Operation::kNormal, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Merge) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceWithDelayManagerMock(); + + EXPECT_CALL(*mock_neteq_controller_, CngOff()).WillRepeatedly(Return(true)); + InsertPacket(first_timestamp()); + + GetFirstPacket(); + bool muted; + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .WillOnce(Return(NetEq::Operation::kExpand)); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + + InsertPacket(first_timestamp() + 2 * timestamp_diff_between_packets()); + + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .WillOnce(Return(NetEq::Operation::kMerge)); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kMerge, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Expand) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceNoMocks(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kExpand, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, FastAccelerate) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceWithDelayManagerMock(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + InsertPacket(first_timestamp() + timestamp_diff_between_packets()); + + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .Times(1) + .WillOnce(Return(NetEq::Operation::kFastAccelerate)); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kFastAccelerate, + neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, PreemptiveExpand) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceWithDelayManagerMock(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + InsertPacket(first_timestamp() + timestamp_diff_between_packets()); + + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .Times(1) + .WillOnce(Return(NetEq::Operation::kPreemptiveExpand)); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kPreemptiveExpand, + neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Accelerate) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceWithDelayManagerMock(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + InsertPacket(first_timestamp() + timestamp_diff_between_packets()); + + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .Times(1) + .WillOnce(Return(NetEq::Operation::kAccelerate)); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kAccelerate, neteq_->last_operation_for_test()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc new file mode 100644 index 0000000000..a669ad727e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "absl/memory/memory.h" +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/audio_decoder.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "system_wrappers/include/clock.h" +#include "test/audio_decoder_proxy_factory.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +namespace { + +std::unique_ptr CreateNetEq( + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr& decoder_factory) { + return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock); +} + +} // namespace + +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; + +class MockAudioDecoder final : public AudioDecoder { + public: + static const int kPacketDuration = 960; // 48 kHz * 20 ms + + MockAudioDecoder(int sample_rate_hz, size_t num_channels) + : sample_rate_hz_(sample_rate_hz), + num_channels_(num_channels), + fec_enabled_(false) {} + ~MockAudioDecoder() override { Die(); } + MOCK_METHOD(void, Die, ()); + + MOCK_METHOD(void, Reset, (), (override)); + + class MockFrame : public AudioDecoder::EncodedAudioFrame { + public: + MockFrame(size_t num_channels) : num_channels_(num_channels) {} + + size_t Duration() const override { return kPacketDuration; } + + absl::optional Decode( + rtc::ArrayView decoded) const override { + const size_t output_size = + sizeof(int16_t) * kPacketDuration * num_channels_; + if (decoded.size() >= output_size) { + memset(decoded.data(), 0, + sizeof(int16_t) * kPacketDuration * num_channels_); + return DecodeResult{kPacketDuration * num_channels_, kSpeech}; + } else { + ADD_FAILURE() << "Expected decoded.size() to be >= output_size (" + << decoded.size() << " vs. " << output_size << ")"; + return absl::nullopt; + } + } + + private: + const size_t num_channels_; + }; + + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override { + std::vector results; + if (fec_enabled_) { + std::unique_ptr fec_frame(new MockFrame(num_channels_)); + results.emplace_back(timestamp - kPacketDuration, 1, + std::move(fec_frame)); + } + + std::unique_ptr frame(new MockFrame(num_channels_)); + results.emplace_back(timestamp, 0, std::move(frame)); + return results; + } + + int PacketDuration(const uint8_t* encoded, + size_t encoded_len) const override { + ADD_FAILURE() << "Since going through ParsePayload, PacketDuration should " + "never get called."; + return kPacketDuration; + } + + bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override { + ADD_FAILURE() << "Since going through ParsePayload, PacketHasFec should " + "never get called."; + return fec_enabled_; + } + + int SampleRateHz() const override { return sample_rate_hz_; } + + size_t Channels() const override { return num_channels_; } + + void set_fec_enabled(bool enable_fec) { fec_enabled_ = enable_fec; } + + bool fec_enabled() const { return fec_enabled_; } + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + ADD_FAILURE() << "Since going through ParsePayload, DecodeInternal should " + "never get called."; + return -1; + } + + private: + const int sample_rate_hz_; + const size_t num_channels_; + bool fec_enabled_; +}; + +class NetEqNetworkStatsTest { + public: + static const int kPayloadSizeByte = 30; + static const int kFrameSizeMs = 20; + static const uint8_t kPayloadType = 95; + static const int kOutputLengthMs = 10; + + enum logic { + kIgnore, + kEqual, + kSmallerThan, + kLargerThan, + }; + + struct NetEqNetworkStatsCheck { + logic current_buffer_size_ms; + logic preferred_buffer_size_ms; + logic jitter_peaks_found; + logic packet_loss_rate; + logic expand_rate; + logic speech_expand_rate; + logic preemptive_rate; + logic accelerate_rate; + logic secondary_decoded_rate; + logic secondary_discarded_rate; + logic added_zero_samples; + NetEqNetworkStatistics stats_ref; + }; + + NetEqNetworkStatsTest(const SdpAudioFormat& format, MockAudioDecoder* decoder) + : decoder_(decoder), + decoder_factory_( + rtc::make_ref_counted(decoder)), + samples_per_ms_(format.clockrate_hz / 1000), + frame_size_samples_(kFrameSizeMs * samples_per_ms_), + rtp_generator_(new RtpGenerator(samples_per_ms_)), + last_lost_time_(0), + packet_loss_interval_(0xffffffff) { + NetEq::Config config; + config.sample_rate_hz = format.clockrate_hz; + neteq_ = CreateNetEq(config, Clock::GetRealTimeClock(), decoder_factory_); + neteq_->RegisterPayloadType(kPayloadType, format); + } + + bool Lost(uint32_t send_time) { + if (send_time - last_lost_time_ >= packet_loss_interval_) { + last_lost_time_ = send_time; + return true; + } + return false; + } + + void SetPacketLossRate(double loss_rate) { + packet_loss_interval_ = + (loss_rate >= 1e-3 ? static_cast(kFrameSizeMs) / loss_rate + : 0xffffffff); + } + + // `stats_ref` + // expects.x = -1, do not care + // expects.x = 0, 'x' in current stats should equal 'x' in `stats_ref` + // expects.x = 1, 'x' in current stats should < 'x' in `stats_ref` + // expects.x = 2, 'x' in current stats should > 'x' in `stats_ref` + void CheckNetworkStatistics(NetEqNetworkStatsCheck expects) { + NetEqNetworkStatistics stats; + neteq_->NetworkStatistics(&stats); + +#define CHECK_NETEQ_NETWORK_STATS(x) \ + switch (expects.x) { \ + case kEqual: \ + EXPECT_EQ(stats.x, expects.stats_ref.x); \ + break; \ + case kSmallerThan: \ + EXPECT_LT(stats.x, expects.stats_ref.x); \ + break; \ + case kLargerThan: \ + EXPECT_GT(stats.x, expects.stats_ref.x); \ + break; \ + default: \ + break; \ + } + + CHECK_NETEQ_NETWORK_STATS(current_buffer_size_ms); + CHECK_NETEQ_NETWORK_STATS(preferred_buffer_size_ms); + CHECK_NETEQ_NETWORK_STATS(jitter_peaks_found); + CHECK_NETEQ_NETWORK_STATS(expand_rate); + CHECK_NETEQ_NETWORK_STATS(speech_expand_rate); + CHECK_NETEQ_NETWORK_STATS(preemptive_rate); + CHECK_NETEQ_NETWORK_STATS(accelerate_rate); + CHECK_NETEQ_NETWORK_STATS(secondary_decoded_rate); + CHECK_NETEQ_NETWORK_STATS(secondary_discarded_rate); + +#undef CHECK_NETEQ_NETWORK_STATS + } + + void RunTest(int num_loops, NetEqNetworkStatsCheck expects) { + uint32_t time_now; + uint32_t next_send_time; + + // Initiate `last_lost_time_`. + time_now = next_send_time = last_lost_time_ = rtp_generator_->GetRtpHeader( + kPayloadType, frame_size_samples_, &rtp_header_); + for (int k = 0; k < num_loops; ++k) { + // Delay by one frame such that the FEC can come in. + while (time_now + kFrameSizeMs >= next_send_time) { + next_send_time = rtp_generator_->GetRtpHeader( + kPayloadType, frame_size_samples_, &rtp_header_); + if (!Lost(next_send_time)) { + static const uint8_t payload[kPayloadSizeByte] = {0}; + ASSERT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header_, payload)); + } + } + bool muted = true; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_frame_, &muted)); + ASSERT_FALSE(muted); + EXPECT_EQ(decoder_->Channels(), output_frame_.num_channels_); + EXPECT_EQ(static_cast(kOutputLengthMs * samples_per_ms_), + output_frame_.samples_per_channel_); + EXPECT_EQ(48000, neteq_->last_output_sample_rate_hz()); + + time_now += kOutputLengthMs; + } + CheckNetworkStatistics(expects); + neteq_->FlushBuffers(); + } + + void DecodeFecTest() { + decoder_->set_fec_enabled(false); + NetEqNetworkStatsCheck expects = {kIgnore, // current_buffer_size_ms + kIgnore, // preferred_buffer_size_ms + kIgnore, // jitter_peaks_found + kEqual, // packet_loss_rate + kEqual, // expand_rate + kEqual, // voice_expand_rate + kIgnore, // preemptive_rate + kEqual, // accelerate_rate + kEqual, // decoded_fec_rate + kEqual, // discarded_fec_rate + kEqual, // added_zero_samples + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + RunTest(50, expects); + + // Next we introduce packet losses. + SetPacketLossRate(0.1); + expects.stats_ref.expand_rate = expects.stats_ref.speech_expand_rate = 898; + RunTest(50, expects); + + // Next we enable FEC. + decoder_->set_fec_enabled(true); + // If FEC fills in the lost packets, no packet loss will be counted. + expects.stats_ref.expand_rate = expects.stats_ref.speech_expand_rate = 0; + expects.stats_ref.secondary_decoded_rate = 2006; + expects.stats_ref.secondary_discarded_rate = 14336; + RunTest(50, expects); + } + + void NoiseExpansionTest() { + NetEqNetworkStatsCheck expects = {kIgnore, // current_buffer_size_ms + kIgnore, // preferred_buffer_size_ms + kIgnore, // jitter_peaks_found + kEqual, // packet_loss_rate + kEqual, // expand_rate + kEqual, // speech_expand_rate + kIgnore, // preemptive_rate + kEqual, // accelerate_rate + kEqual, // decoded_fec_rate + kEqual, // discard_fec_rate + kEqual, // added_zero_samples + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + RunTest(50, expects); + + SetPacketLossRate(1); + expects.stats_ref.expand_rate = 16384; + expects.stats_ref.speech_expand_rate = 5324; + RunTest(10, expects); // Lost 10 * 20ms in a row. + } + + private: + MockAudioDecoder* decoder_; + rtc::scoped_refptr decoder_factory_; + std::unique_ptr neteq_; + + const int samples_per_ms_; + const size_t frame_size_samples_; + std::unique_ptr rtp_generator_; + RTPHeader rtp_header_; + uint32_t last_lost_time_; + uint32_t packet_loss_interval_; + AudioFrame output_frame_; +}; + +TEST(NetEqNetworkStatsTest, DecodeFec) { + MockAudioDecoder decoder(48000, 1); + NetEqNetworkStatsTest test(SdpAudioFormat("opus", 48000, 2), &decoder); + test.DecodeFecTest(); + EXPECT_CALL(decoder, Die()).Times(1); +} + +TEST(NetEqNetworkStatsTest, StereoDecodeFec) { + MockAudioDecoder decoder(48000, 2); + NetEqNetworkStatsTest test(SdpAudioFormat("opus", 48000, 2), &decoder); + test.DecodeFecTest(); + EXPECT_CALL(decoder, Die()).Times(1); +} + +TEST(NetEqNetworkStatsTest, NoiseExpansionTest) { + MockAudioDecoder decoder(48000, 1); + NetEqNetworkStatsTest test(SdpAudioFormat("opus", 48000, 2), &decoder); + test.NoiseExpansionTest(); + EXPECT_CALL(decoder, Die()).Times(1); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc new file mode 100644 index 0000000000..6fa56fd1c1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Test to verify correct stereo and multi-channel operation. + +#include +#include +#include +#include + +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +struct TestParameters { + int frame_size; + int sample_rate; + size_t num_channels; +}; + +// This is a parameterized test. The test parameters are supplied through a +// TestParameters struct, which is obtained through the GetParam() method. +// +// The objective of the test is to create a mono input signal and a +// multi-channel input signal, where each channel is identical to the mono +// input channel. The two input signals are processed through their respective +// NetEq instances. After that, the output signals are compared. The expected +// result is that each channel in the multi-channel output is identical to the +// mono output. +class NetEqStereoTest : public ::testing::TestWithParam { + protected: + static const int kTimeStepMs = 10; + static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz. + static const uint8_t kPayloadTypeMono = 95; + static const uint8_t kPayloadTypeMulti = 96; + + NetEqStereoTest() + : num_channels_(GetParam().num_channels), + sample_rate_hz_(GetParam().sample_rate), + samples_per_ms_(sample_rate_hz_ / 1000), + frame_size_ms_(GetParam().frame_size), + frame_size_samples_( + static_cast(frame_size_ms_ * samples_per_ms_)), + output_size_samples_(10 * samples_per_ms_), + clock_(0), + rtp_generator_mono_(samples_per_ms_), + rtp_generator_(samples_per_ms_), + payload_size_bytes_(0), + multi_payload_size_bytes_(0), + last_send_time_(0), + last_arrival_time_(0) { + NetEq::Config config; + config.sample_rate_hz = sample_rate_hz_; + DefaultNetEqFactory neteq_factory; + auto decoder_factory = CreateBuiltinAudioDecoderFactory(); + neteq_mono_ = neteq_factory.CreateNetEq(config, decoder_factory, &clock_); + neteq_ = neteq_factory.CreateNetEq(config, decoder_factory, &clock_); + input_ = new int16_t[frame_size_samples_]; + encoded_ = new uint8_t[2 * frame_size_samples_]; + input_multi_channel_ = new int16_t[frame_size_samples_ * num_channels_]; + encoded_multi_channel_ = + new uint8_t[frame_size_samples_ * 2 * num_channels_]; + } + + ~NetEqStereoTest() { + delete[] input_; + delete[] encoded_; + delete[] input_multi_channel_; + delete[] encoded_multi_channel_; + } + + virtual void SetUp() { + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + input_file_.reset(new test::InputAudioFile(file_name)); + RTC_CHECK_GE(num_channels_, 2); + ASSERT_TRUE(neteq_mono_->RegisterPayloadType( + kPayloadTypeMono, SdpAudioFormat("l16", sample_rate_hz_, 1))); + ASSERT_TRUE(neteq_->RegisterPayloadType( + kPayloadTypeMulti, + SdpAudioFormat("l16", sample_rate_hz_, num_channels_))); + } + + virtual void TearDown() {} + + int GetNewPackets() { + if (!input_file_->Read(frame_size_samples_, input_)) { + return -1; + } + payload_size_bytes_ = + WebRtcPcm16b_Encode(input_, frame_size_samples_, encoded_); + if (frame_size_samples_ * 2 != payload_size_bytes_) { + return -1; + } + int next_send_time = rtp_generator_mono_.GetRtpHeader( + kPayloadTypeMono, frame_size_samples_, &rtp_header_mono_); + MakeMultiChannelInput(); + multi_payload_size_bytes_ = WebRtcPcm16b_Encode( + input_multi_channel_, frame_size_samples_ * num_channels_, + encoded_multi_channel_); + if (frame_size_samples_ * 2 * num_channels_ != multi_payload_size_bytes_) { + return -1; + } + rtp_generator_.GetRtpHeader(kPayloadTypeMulti, frame_size_samples_, + &rtp_header_); + return next_send_time; + } + + virtual void MakeMultiChannelInput() { + test::InputAudioFile::DuplicateInterleaved( + input_, frame_size_samples_, num_channels_, input_multi_channel_); + } + + virtual void VerifyOutput(size_t num_samples) { + const int16_t* output_data = output_.data(); + const int16_t* output_multi_channel_data = output_multi_channel_.data(); + for (size_t i = 0; i < num_samples; ++i) { + for (size_t j = 0; j < num_channels_; ++j) { + ASSERT_EQ(output_data[i], + output_multi_channel_data[i * num_channels_ + j]) + << "Diff in sample " << i << ", channel " << j << "."; + } + } + } + + virtual int GetArrivalTime(int send_time) { + int arrival_time = last_arrival_time_ + (send_time - last_send_time_); + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + virtual bool Lost() { return false; } + + void RunTest(int num_loops) { + // Get next input packets (mono and multi-channel). + int next_send_time; + int next_arrival_time; + do { + next_send_time = GetNewPackets(); + ASSERT_NE(-1, next_send_time); + next_arrival_time = GetArrivalTime(next_send_time); + } while (Lost()); // If lost, immediately read the next packet. + + int time_now = 0; + for (int k = 0; k < num_loops; ++k) { + while (time_now >= next_arrival_time) { + // Insert packet in mono instance. + ASSERT_EQ(NetEq::kOK, + neteq_mono_->InsertPacket( + rtp_header_mono_, rtc::ArrayView( + encoded_, payload_size_bytes_))); + // Insert packet in multi-channel instance. + ASSERT_EQ(NetEq::kOK, neteq_->InsertPacket( + rtp_header_, rtc::ArrayView( + encoded_multi_channel_, + multi_payload_size_bytes_))); + // Get next input packets (mono and multi-channel). + do { + next_send_time = GetNewPackets(); + ASSERT_NE(-1, next_send_time); + next_arrival_time = GetArrivalTime(next_send_time); + } while (Lost()); // If lost, immediately read the next packet. + } + // Get audio from mono instance. + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_mono_->GetAudio(&output_, &muted)); + ASSERT_FALSE(muted); + EXPECT_EQ(1u, output_.num_channels_); + EXPECT_EQ(output_size_samples_, output_.samples_per_channel_); + // Get audio from multi-channel instance. + ASSERT_EQ(NetEq::kOK, neteq_->GetAudio(&output_multi_channel_, &muted)); + ASSERT_FALSE(muted); + EXPECT_EQ(num_channels_, output_multi_channel_.num_channels_); + EXPECT_EQ(output_size_samples_, + output_multi_channel_.samples_per_channel_); + rtc::StringBuilder ss; + ss << "Lap number " << k << "."; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + // Compare mono and multi-channel. + ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_)); + + time_now += kTimeStepMs; + clock_.AdvanceTimeMilliseconds(kTimeStepMs); + } + } + + const size_t num_channels_; + const int sample_rate_hz_; + const int samples_per_ms_; + const int frame_size_ms_; + const size_t frame_size_samples_; + const size_t output_size_samples_; + SimulatedClock clock_; + std::unique_ptr neteq_mono_; + std::unique_ptr neteq_; + test::RtpGenerator rtp_generator_mono_; + test::RtpGenerator rtp_generator_; + int16_t* input_; + int16_t* input_multi_channel_; + uint8_t* encoded_; + uint8_t* encoded_multi_channel_; + AudioFrame output_; + AudioFrame output_multi_channel_; + RTPHeader rtp_header_mono_; + RTPHeader rtp_header_; + size_t payload_size_bytes_; + size_t multi_payload_size_bytes_; + int last_send_time_; + int last_arrival_time_; + std::unique_ptr input_file_; +}; + +class NetEqStereoTestNoJitter : public NetEqStereoTest { + protected: + NetEqStereoTestNoJitter() : NetEqStereoTest() { + // Start the sender 100 ms before the receiver to pre-fill the buffer. + // This is to avoid doing preemptive expand early in the test. + // TODO(hlundin): Mock the decision making instead to control the modes. + last_arrival_time_ = -100; + } +}; + +TEST_P(NetEqStereoTestNoJitter, RunTest) { + RunTest(8); +} + +class NetEqStereoTestPositiveDrift : public NetEqStereoTest { + protected: + NetEqStereoTestPositiveDrift() : NetEqStereoTest(), drift_factor(0.9) { + // Start the sender 100 ms before the receiver to pre-fill the buffer. + // This is to avoid doing preemptive expand early in the test. + // TODO(hlundin): Mock the decision making instead to control the modes. + last_arrival_time_ = -100; + } + virtual int GetArrivalTime(int send_time) { + int arrival_time = + last_arrival_time_ + drift_factor * (send_time - last_send_time_); + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + double drift_factor; +}; + +TEST_P(NetEqStereoTestPositiveDrift, RunTest) { + RunTest(100); +} + +class NetEqStereoTestNegativeDrift : public NetEqStereoTestPositiveDrift { + protected: + NetEqStereoTestNegativeDrift() : NetEqStereoTestPositiveDrift() { + drift_factor = 1.1; + last_arrival_time_ = 0; + } +}; + +TEST_P(NetEqStereoTestNegativeDrift, RunTest) { + RunTest(100); +} + +class NetEqStereoTestDelays : public NetEqStereoTest { + protected: + static const int kDelayInterval = 10; + static const int kDelay = 1000; + NetEqStereoTestDelays() : NetEqStereoTest(), frame_index_(0) {} + + virtual int GetArrivalTime(int send_time) { + // Deliver immediately, unless we have a back-log. + int arrival_time = std::min(last_arrival_time_, send_time); + if (++frame_index_ % kDelayInterval == 0) { + // Delay this packet. + arrival_time += kDelay; + } + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + int frame_index_; +}; + +TEST_P(NetEqStereoTestDelays, RunTest) { + RunTest(1000); +} + +class NetEqStereoTestLosses : public NetEqStereoTest { + protected: + static const int kLossInterval = 10; + NetEqStereoTestLosses() : NetEqStereoTest(), frame_index_(0) {} + + virtual bool Lost() { return (++frame_index_) % kLossInterval == 0; } + + // TODO(hlundin): NetEq is not giving bitexact results for these cases. + virtual void VerifyOutput(size_t num_samples) { + for (size_t i = 0; i < num_samples; ++i) { + const int16_t* output_data = output_.data(); + const int16_t* output_multi_channel_data = output_multi_channel_.data(); + auto first_channel_sample = output_multi_channel_data[i * num_channels_]; + for (size_t j = 0; j < num_channels_; ++j) { + const int kErrorMargin = 200; + EXPECT_NEAR(output_data[i], + output_multi_channel_data[i * num_channels_ + j], + kErrorMargin) + << "Diff in sample " << i << ", channel " << j << "."; + EXPECT_EQ(first_channel_sample, + output_multi_channel_data[i * num_channels_ + j]); + } + } + } + + int frame_index_; +}; + +TEST_P(NetEqStereoTestLosses, RunTest) { + RunTest(100); +} + +class NetEqStereoTestSingleActiveChannelPlc : public NetEqStereoTestLosses { + protected: + NetEqStereoTestSingleActiveChannelPlc() : NetEqStereoTestLosses() {} + + virtual void MakeMultiChannelInput() override { + // Create a multi-channel input by copying the mono channel from file to the + // first channel, and setting the others to zero. + memset(input_multi_channel_, 0, + frame_size_samples_ * num_channels_ * sizeof(int16_t)); + for (size_t i = 0; i < frame_size_samples_; ++i) { + input_multi_channel_[i * num_channels_] = input_[i]; + } + } + + virtual void VerifyOutput(size_t num_samples) override { + // Simply verify that all samples in channels other than the first are zero. + const int16_t* output_multi_channel_data = output_multi_channel_.data(); + for (size_t i = 0; i < num_samples; ++i) { + for (size_t j = 1; j < num_channels_; ++j) { + EXPECT_EQ(0, output_multi_channel_data[i * num_channels_ + j]) + << "Sample " << i << ", channel " << j << " is non-zero."; + } + } + } +}; + +TEST_P(NetEqStereoTestSingleActiveChannelPlc, RunTest) { + RunTest(100); +} + +// Creates a list of parameter sets. +std::list GetTestParameters() { + std::list l; + const int sample_rates[] = {8000, 16000, 32000}; + const int num_rates = sizeof(sample_rates) / sizeof(sample_rates[0]); + // Loop through sample rates. + for (int rate_index = 0; rate_index < num_rates; ++rate_index) { + int sample_rate = sample_rates[rate_index]; + // Loop through all frame sizes between 10 and 60 ms. + for (int frame_size = 10; frame_size <= 60; frame_size += 10) { + TestParameters p; + p.frame_size = frame_size; + p.sample_rate = sample_rate; + p.num_channels = 2; + l.push_back(p); + if (sample_rate == 8000) { + // Add a five-channel test for 8000 Hz. + p.num_channels = 5; + l.push_back(p); + } + } + } + return l; +} + +// Pretty-printing the test parameters in case of an error. +void PrintTo(const TestParameters& p, ::std::ostream* os) { + *os << "{frame_size = " << p.frame_size + << ", num_channels = " << p.num_channels + << ", sample_rate = " << p.sample_rate << "}"; +} + +// Instantiate the tests. Each test is instantiated using the function above, +// so that all different parameter combinations are tested. +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestNoJitter, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestPositiveDrift, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestNegativeDrift, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestDelays, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestLosses, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestSingleActiveChannelPlc, + ::testing::ValuesIn(GetTestParameters())); +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc new file mode 100644 index 0000000000..fff14bf830 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc @@ -0,0 +1,1009 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/neteq/neteq.h" + +#include +#include +#include // memset + +#include +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "modules/audio_coding/neteq/test/neteq_decoding_test.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "modules/audio_coding/neteq/tools/neteq_packet_source_input.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "modules/include/module_common_types_public.h" +#include "modules/rtp_rtcp/include/rtcp_statistics.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/message_digest.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/arch.h" +#include "test/field_trial.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(bool, gen_ref, false, "Generate reference files."); + +namespace webrtc { + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) && \ + defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && \ + (defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)) && \ + defined(WEBRTC_CODEC_ILBC) +#define MAYBE_TestBitExactness TestBitExactness +#else +#define MAYBE_TestBitExactness DISABLED_TestBitExactness +#endif +TEST_F(NetEqDecodingTest, MAYBE_TestBitExactness) { + const std::string input_rtp_file = + webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp"); + + const std::string output_checksum = + "dee7a10ab92526876a70a85bc48a4906901af3df"; + + const std::string network_stats_checksum = + "911dbf5fd97f48d25b8f0967286eb73c9d6f6158"; + + DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum, + absl::GetFlag(FLAGS_gen_ref)); +} + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) && \ + defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && defined(WEBRTC_CODEC_OPUS) +#define MAYBE_TestOpusBitExactness TestOpusBitExactness +#else +#define MAYBE_TestOpusBitExactness DISABLED_TestOpusBitExactness +#endif +TEST_F(NetEqDecodingTest, MAYBE_TestOpusBitExactness) { + const std::string input_rtp_file = + webrtc::test::ResourcePath("audio_coding/neteq_opus", "rtp"); + + const std::string output_checksum = + "fec6827bb9ee0b21770bbbb4a3a6f8823bf537dc|" + "3610cc7be4b3407b9c273b1299ab7f8f47cca96b"; + + const std::string network_stats_checksum = + "3d043e47e5f4bb81d37e7bce8c44bf802965c853|" + "076662525572dba753b11578330bd491923f7f5e"; + + DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum, + absl::GetFlag(FLAGS_gen_ref)); +} + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) && \ + defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && defined(WEBRTC_CODEC_OPUS) +#define MAYBE_TestOpusDtxBitExactness TestOpusDtxBitExactness +#else +#define MAYBE_TestOpusDtxBitExactness DISABLED_TestOpusDtxBitExactness +#endif +TEST_F(NetEqDecodingTest, MAYBE_TestOpusDtxBitExactness) { + const std::string input_rtp_file = + webrtc::test::ResourcePath("audio_coding/neteq_opus_dtx", "rtp"); + + const std::string output_checksum = + "b3c4899eab5378ef5e54f2302948872149f6ad5e|" + "589e975ec31ea13f302457fea1425be9380ffb96"; + + const std::string network_stats_checksum = + "dc8447b9fee1a21fd5d1f4045d62b982a3fb0215"; + + DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum, + absl::GetFlag(FLAGS_gen_ref)); +} + +// Use fax mode to avoid time-scaling. This is to simplify the testing of +// packet waiting times in the packet buffer. +class NetEqDecodingTestFaxMode : public NetEqDecodingTest { + protected: + NetEqDecodingTestFaxMode() : NetEqDecodingTest() { + config_.for_test_no_time_stretching = true; + } + void TestJitterBufferDelay(bool apply_packet_loss); +}; + +TEST_F(NetEqDecodingTestFaxMode, TestFrameWaitingTimeStatistics) { + // Insert 30 dummy packets at once. Each packet contains 10 ms 16 kHz audio. + size_t num_frames = 30; + const size_t kSamples = 10 * 16; + const size_t kPayloadBytes = kSamples * 2; + for (size_t i = 0; i < num_frames; ++i) { + const uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + rtp_info.sequenceNumber = rtc::checked_cast(i); + rtp_info.timestamp = rtc::checked_cast(i * kSamples); + rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.payloadType = 94; // PCM16b WB codec. + rtp_info.markerBit = 0; + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + } + // Pull out all data. + for (size_t i = 0; i < num_frames; ++i) { + bool muted; + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } + + NetEqNetworkStatistics stats; + EXPECT_EQ(0, neteq_->NetworkStatistics(&stats)); + // Since all frames are dumped into NetEQ at once, but pulled out with 10 ms + // spacing (per definition), we expect the delay to increase with 10 ms for + // each packet. Thus, we are calculating the statistics for a series from 10 + // to 300, in steps of 10 ms. + EXPECT_EQ(155, stats.mean_waiting_time_ms); + EXPECT_EQ(155, stats.median_waiting_time_ms); + EXPECT_EQ(10, stats.min_waiting_time_ms); + EXPECT_EQ(300, stats.max_waiting_time_ms); + + // Check statistics again and make sure it's been reset. + EXPECT_EQ(0, neteq_->NetworkStatistics(&stats)); + EXPECT_EQ(-1, stats.mean_waiting_time_ms); + EXPECT_EQ(-1, stats.median_waiting_time_ms); + EXPECT_EQ(-1, stats.min_waiting_time_ms); + EXPECT_EQ(-1, stats.max_waiting_time_ms); +} + + +TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDrift) { + // Apply a clock drift of -25 ms / s (sender faster than receiver). + const double kDriftFactor = 1000.0 / (1000.0 + 25.0); + const double kNetworkFreezeTimeMs = 0.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 20; + const int kMaxTimeToSpeechMs = 100; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDrift) { + // Apply a clock drift of +25 ms / s (sender slower than receiver). + const double kDriftFactor = 1000.0 / (1000.0 - 25.0); + const double kNetworkFreezeTimeMs = 0.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 40; + const int kMaxTimeToSpeechMs = 100; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDriftNetworkFreeze) { + // Apply a clock drift of -25 ms / s (sender faster than receiver). + const double kDriftFactor = 1000.0 / (1000.0 + 25.0); + const double kNetworkFreezeTimeMs = 5000.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 60; + const int kMaxTimeToSpeechMs = 200; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreeze) { + // Apply a clock drift of +25 ms / s (sender slower than receiver). + const double kDriftFactor = 1000.0 / (1000.0 - 25.0); + const double kNetworkFreezeTimeMs = 5000.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 40; + const int kMaxTimeToSpeechMs = 100; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreezeExtraPull) { + // Apply a clock drift of +25 ms / s (sender slower than receiver). + const double kDriftFactor = 1000.0 / (1000.0 - 25.0); + const double kNetworkFreezeTimeMs = 5000.0; + const bool kGetAudioDuringFreezeRecovery = true; + const int kDelayToleranceMs = 40; + const int kMaxTimeToSpeechMs = 100; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithoutClockDrift) { + const double kDriftFactor = 1.0; // No drift. + const double kNetworkFreezeTimeMs = 0.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 10; + const int kMaxTimeToSpeechMs = 50; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, UnknownPayloadType) { + const size_t kPayloadBytes = 100; + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + rtp_info.payloadType = 1; // Not registered as a decoder. + EXPECT_EQ(NetEq::kFail, neteq_->InsertPacket(rtp_info, payload)); +} + +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) +#define MAYBE_DecoderError DecoderError +#else +#define MAYBE_DecoderError DISABLED_DecoderError +#endif + +TEST_F(NetEqDecodingTest, MAYBE_DecoderError) { + const size_t kPayloadBytes = 100; + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + rtp_info.payloadType = 103; // iSAC, but the payload is invalid. + EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + // Set all of `out_data_` to 1, and verify that it was set to 0 by the call + // to GetAudio. + int16_t* out_frame_data = out_frame_.mutable_data(); + for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) { + out_frame_data[i] = 1; + } + bool muted; + EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_FALSE(muted); + + // Verify that the first 160 samples are set to 0. + static const int kExpectedOutputLength = 160; // 10 ms at 16 kHz sample rate. + const int16_t* const_out_frame_data = out_frame_.data(); + for (int i = 0; i < kExpectedOutputLength; ++i) { + rtc::StringBuilder ss; + ss << "i = " << i; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + EXPECT_EQ(0, const_out_frame_data[i]); + } +} + +TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) { + // Set all of `out_data_` to 1, and verify that it was set to 0 by the call + // to GetAudio. + int16_t* out_frame_data = out_frame_.mutable_data(); + for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) { + out_frame_data[i] = 1; + } + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_FALSE(muted); + // Verify that the first block of samples is set to 0. + static const int kExpectedOutputLength = + kInitSampleRateHz / 100; // 10 ms at initial sample rate. + const int16_t* const_out_frame_data = out_frame_.data(); + for (int i = 0; i < kExpectedOutputLength; ++i) { + rtc::StringBuilder ss; + ss << "i = " << i; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + EXPECT_EQ(0, const_out_frame_data[i]); + } + // Verify that the sample rate did not change from the initial configuration. + EXPECT_EQ(config_.sample_rate_hz, neteq_->last_output_sample_rate_hz()); +} + +class NetEqBgnTest : public NetEqDecodingTest { + protected: + void CheckBgn(int sampling_rate_hz) { + size_t expected_samples_per_channel = 0; + uint8_t payload_type = 0xFF; // Invalid. + if (sampling_rate_hz == 8000) { + expected_samples_per_channel = kBlockSize8kHz; + payload_type = 93; // PCM 16, 8 kHz. + } else if (sampling_rate_hz == 16000) { + expected_samples_per_channel = kBlockSize16kHz; + payload_type = 94; // PCM 16, 16 kHZ. + } else if (sampling_rate_hz == 32000) { + expected_samples_per_channel = kBlockSize32kHz; + payload_type = 95; // PCM 16, 32 kHz. + } else { + ASSERT_TRUE(false); // Unsupported test case. + } + + AudioFrame output; + test::AudioLoop input; + // We are using the same 32 kHz input file for all tests, regardless of + // `sampling_rate_hz`. The output may sound weird, but the test is still + // valid. + ASSERT_TRUE(input.Init( + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), + 10 * sampling_rate_hz, // Max 10 seconds loop length. + expected_samples_per_channel)); + + // Payload of 10 ms of PCM16 32 kHz. + uint8_t payload[kBlockSize32kHz * sizeof(int16_t)]; + RTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + rtp_info.payloadType = payload_type; + + bool muted; + for (int n = 0; n < 10; ++n) { // Insert few packets and get audio. + auto block = input.GetNextBlock(); + ASSERT_EQ(expected_samples_per_channel, block.size()); + size_t enc_len_bytes = + WebRtcPcm16b_Encode(block.data(), block.size(), payload); + ASSERT_EQ(enc_len_bytes, expected_samples_per_channel * 2); + + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView( + payload, enc_len_bytes))); + output.Reset(); + ASSERT_EQ(0, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(1u, output.num_channels_); + ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_); + ASSERT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // Next packet. + rtp_info.timestamp += + rtc::checked_cast(expected_samples_per_channel); + rtp_info.sequenceNumber++; + } + + output.Reset(); + + // Get audio without inserting packets, expecting PLC and PLC-to-CNG. Pull + // one frame without checking speech-type. This is the first frame pulled + // without inserting any packet, and might not be labeled as PLC. + ASSERT_EQ(0, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(1u, output.num_channels_); + ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_); + + // To be able to test the fading of background noise we need at lease to + // pull 611 frames. + const int kFadingThreshold = 611; + + // Test several CNG-to-PLC packet for the expected behavior. The number 20 + // is arbitrary, but sufficiently large to test enough number of frames. + const int kNumPlcToCngTestFrames = 20; + bool plc_to_cng = false; + for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) { + output.Reset(); + // Set to non-zero. + memset(output.mutable_data(), 1, AudioFrame::kMaxDataSizeBytes); + ASSERT_EQ(0, neteq_->GetAudio(&output, &muted)); + ASSERT_FALSE(muted); + ASSERT_EQ(1u, output.num_channels_); + ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_); + if (output.speech_type_ == AudioFrame::kPLCCNG) { + plc_to_cng = true; + double sum_squared = 0; + const int16_t* output_data = output.data(); + for (size_t k = 0; + k < output.num_channels_ * output.samples_per_channel_; ++k) + sum_squared += output_data[k] * output_data[k]; + EXPECT_EQ(0, sum_squared); + } else { + EXPECT_EQ(AudioFrame::kPLC, output.speech_type_); + } + } + EXPECT_TRUE(plc_to_cng); // Just to be sure that PLC-to-CNG has occurred. + } +}; + +TEST_F(NetEqBgnTest, RunTest) { + CheckBgn(8000); + CheckBgn(16000); + CheckBgn(32000); +} + +TEST_F(NetEqDecodingTest, SequenceNumberWrap) { + // Start with a sequence number that will soon wrap. + std::set drop_seq_numbers; // Don't drop any packets. + WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false); +} + +TEST_F(NetEqDecodingTest, SequenceNumberWrapAndDrop) { + // Start with a sequence number that will soon wrap. + std::set drop_seq_numbers; + drop_seq_numbers.insert(0xFFFF); + drop_seq_numbers.insert(0x0); + WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false); +} + +TEST_F(NetEqDecodingTest, TimestampWrap) { + // Start with a timestamp that will soon wrap. + std::set drop_seq_numbers; + WrapTest(0, 0xFFFFFFFF - 3000, drop_seq_numbers, false, true); +} + +TEST_F(NetEqDecodingTest, TimestampAndSequenceNumberWrap) { + // Start with a timestamp and a sequence number that will wrap at the same + // time. + std::set drop_seq_numbers; + WrapTest(0xFFFF - 10, 0xFFFFFFFF - 5000, drop_seq_numbers, true, true); +} + +TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { + uint16_t seq_no = 0; + uint32_t timestamp = 0; + const int kFrameSizeMs = 10; + const int kSampleRateKhz = 16; + const int kSamples = kFrameSizeMs * kSampleRateKhz; + const size_t kPayloadBytes = kSamples * 2; + + const int algorithmic_delay_samples = + std::max(algorithmic_delay_ms_ * kSampleRateKhz, 5 * kSampleRateKhz / 8); + // Insert three speech packets. Three are needed to get the frame length + // correct. + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + bool muted; + for (int i = 0; i < 3; ++i) { + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + + // Pull audio once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } + // Verify speech output. + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); + + // Insert same CNG packet twice. + const int kCngPeriodMs = 100; + const int kCngPeriodSamples = kCngPeriodMs * kSampleRateKhz; + size_t payload_len; + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + // This is the first time this CNG packet is inserted. + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView( + payload, payload_len))); + + // Pull audio once and make sure CNG is played. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + EXPECT_FALSE( + neteq_->GetPlayoutTimestamp()); // Returns empty value during CNG. + EXPECT_EQ(timestamp - algorithmic_delay_samples, + out_frame_.timestamp_ + out_frame_.samples_per_channel_); + + // Insert the same CNG packet again. Note that at this point it is old, since + // we have already decoded the first copy of it. + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView( + payload, payload_len))); + + // Pull audio until we have played `kCngPeriodMs` of CNG. Start at 10 ms since + // we have already pulled out CNG once. + for (int cng_time_ms = 10; cng_time_ms < kCngPeriodMs; cng_time_ms += 10) { + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + EXPECT_FALSE( + neteq_->GetPlayoutTimestamp()); // Returns empty value during CNG. + EXPECT_EQ(timestamp - algorithmic_delay_samples, + out_frame_.timestamp_ + out_frame_.samples_per_channel_); + } + + ++seq_no; + timestamp += kCngPeriodSamples; + uint32_t first_speech_timestamp = timestamp; + // Insert speech again. + for (int i = 0; i < 3; ++i) { + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + } + + // Pull audio once and verify that the output is speech again. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); + absl::optional playout_timestamp = neteq_->GetPlayoutTimestamp(); + ASSERT_TRUE(playout_timestamp); + EXPECT_EQ(first_speech_timestamp + kSamples - algorithmic_delay_samples, + *playout_timestamp); +} + +TEST_F(NetEqDecodingTest, CngFirst) { + uint16_t seq_no = 0; + uint32_t timestamp = 0; + const int kFrameSizeMs = 10; + const int kSampleRateKhz = 16; + const int kSamples = kFrameSizeMs * kSampleRateKhz; + const int kPayloadBytes = kSamples * 2; + const int kCngPeriodMs = 100; + const int kCngPeriodSamples = kCngPeriodMs * kSampleRateKhz; + size_t payload_len; + + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + ASSERT_EQ(NetEq::kOK, + neteq_->InsertPacket( + rtp_info, rtc::ArrayView(payload, payload_len))); + ++seq_no; + timestamp += kCngPeriodSamples; + + // Pull audio once and make sure CNG is played. + bool muted; + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + + // Insert some speech packets. + const uint32_t first_speech_timestamp = timestamp; + int timeout_counter = 0; + do { + ASSERT_LT(timeout_counter++, 20) << "Test timed out"; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + + // Pull audio once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } while (!IsNewerTimestamp(out_frame_.timestamp_, first_speech_timestamp)); + // Verify speech output. + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); +} + +class NetEqDecodingTestWithMutedState : public NetEqDecodingTest { + public: + NetEqDecodingTestWithMutedState() : NetEqDecodingTest() { + config_.enable_muted_state = true; + } + + protected: + static constexpr size_t kSamples = 10 * 16; + static constexpr size_t kPayloadBytes = kSamples * 2; + + void InsertPacket(uint32_t rtp_timestamp) { + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(0, rtp_timestamp, &rtp_info); + EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + } + + void InsertCngPacket(uint32_t rtp_timestamp) { + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + size_t payload_len; + PopulateCng(0, rtp_timestamp, &rtp_info, payload, &payload_len); + EXPECT_EQ(NetEq::kOK, + neteq_->InsertPacket(rtp_info, rtc::ArrayView( + payload, payload_len))); + } + + bool GetAudioReturnMuted() { + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + return muted; + } + + void GetAudioUntilMuted() { + while (!GetAudioReturnMuted()) { + ASSERT_LT(counter_++, 1000) << "Test timed out"; + } + } + + void GetAudioUntilNormal() { + bool muted = false; + while (out_frame_.speech_type_ != AudioFrame::kNormalSpeech) { + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_LT(counter_++, 1000) << "Test timed out"; + } + EXPECT_FALSE(muted); + } + + int counter_ = 0; +}; + +// Verifies that NetEq goes in and out of muted state as expected. +TEST_F(NetEqDecodingTestWithMutedState, MutedState) { + // Insert one speech packet. + InsertPacket(0); + // Pull out audio once and expect it not to be muted. + EXPECT_FALSE(GetAudioReturnMuted()); + // Pull data until faded out. + GetAudioUntilMuted(); + EXPECT_TRUE(out_frame_.muted()); + + // Verify that output audio is not written during muted mode. Other parameters + // should be correct, though. + AudioFrame new_frame; + int16_t* frame_data = new_frame.mutable_data(); + for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) { + frame_data[i] = 17; + } + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&new_frame, &muted)); + EXPECT_TRUE(muted); + EXPECT_TRUE(out_frame_.muted()); + for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) { + EXPECT_EQ(17, frame_data[i]); + } + EXPECT_EQ(out_frame_.timestamp_ + out_frame_.samples_per_channel_, + new_frame.timestamp_); + EXPECT_EQ(out_frame_.samples_per_channel_, new_frame.samples_per_channel_); + EXPECT_EQ(out_frame_.sample_rate_hz_, new_frame.sample_rate_hz_); + EXPECT_EQ(out_frame_.num_channels_, new_frame.num_channels_); + EXPECT_EQ(out_frame_.speech_type_, new_frame.speech_type_); + EXPECT_EQ(out_frame_.vad_activity_, new_frame.vad_activity_); + + // Insert new data. Timestamp is corrected for the time elapsed since the last + // packet. Verify that normal operation resumes. + InsertPacket(kSamples * counter_); + GetAudioUntilNormal(); + EXPECT_FALSE(out_frame_.muted()); + + NetEqNetworkStatistics stats; + EXPECT_EQ(0, neteq_->NetworkStatistics(&stats)); + // NetEqNetworkStatistics::expand_rate tells the fraction of samples that were + // concealment samples, in Q14 (16384 = 100%) .The vast majority should be + // concealment samples in this test. + EXPECT_GT(stats.expand_rate, 14000); + // And, it should be greater than the speech_expand_rate. + EXPECT_GT(stats.expand_rate, stats.speech_expand_rate); +} + +// Verifies that NetEq goes out of muted state when given a delayed packet. +TEST_F(NetEqDecodingTestWithMutedState, MutedStateDelayedPacket) { + // Insert one speech packet. + InsertPacket(0); + // Pull out audio once and expect it not to be muted. + EXPECT_FALSE(GetAudioReturnMuted()); + // Pull data until faded out. + GetAudioUntilMuted(); + // Insert new data. Timestamp is only corrected for the half of the time + // elapsed since the last packet. That is, the new packet is delayed. Verify + // that normal operation resumes. + InsertPacket(kSamples * counter_ / 2); + GetAudioUntilNormal(); +} + +// Verifies that NetEq goes out of muted state when given a future packet. +TEST_F(NetEqDecodingTestWithMutedState, MutedStateFuturePacket) { + // Insert one speech packet. + InsertPacket(0); + // Pull out audio once and expect it not to be muted. + EXPECT_FALSE(GetAudioReturnMuted()); + // Pull data until faded out. + GetAudioUntilMuted(); + // Insert new data. Timestamp is over-corrected for the time elapsed since the + // last packet. That is, the new packet is too early. Verify that normal + // operation resumes. + InsertPacket(kSamples * counter_ * 2); + GetAudioUntilNormal(); +} + +// Verifies that NetEq goes out of muted state when given an old packet. +TEST_F(NetEqDecodingTestWithMutedState, MutedStateOldPacket) { + // Insert one speech packet. + InsertPacket(0); + // Pull out audio once and expect it not to be muted. + EXPECT_FALSE(GetAudioReturnMuted()); + // Pull data until faded out. + GetAudioUntilMuted(); + + EXPECT_NE(AudioFrame::kNormalSpeech, out_frame_.speech_type_); + // Insert a few packets which are older than the first packet. + for (int i = 0; i < 5; ++i) { + InsertPacket(kSamples * (i - 1000)); + } + EXPECT_FALSE(GetAudioReturnMuted()); + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); +} + +// Verifies that NetEq doesn't enter muted state when CNG mode is active and the +// packet stream is suspended for a long time. +TEST_F(NetEqDecodingTestWithMutedState, DoNotMuteExtendedCngWithoutPackets) { + // Insert one CNG packet. + InsertCngPacket(0); + + // Pull 10 seconds of audio (10 ms audio generated per lap). + for (int i = 0; i < 1000; ++i) { + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_FALSE(muted); + } + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); +} + +// Verifies that NetEq goes back to normal after a long CNG period with the +// packet stream suspended. +TEST_F(NetEqDecodingTestWithMutedState, RecoverAfterExtendedCngWithoutPackets) { + // Insert one CNG packet. + InsertCngPacket(0); + + // Pull 10 seconds of audio (10 ms audio generated per lap). + for (int i = 0; i < 1000; ++i) { + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + } + + // Insert new data. Timestamp is corrected for the time elapsed since the last + // packet. Verify that normal operation resumes. + InsertPacket(kSamples * counter_); + GetAudioUntilNormal(); +} + +namespace { +::testing::AssertionResult AudioFramesEqualExceptData(const AudioFrame& a, + const AudioFrame& b) { + if (a.timestamp_ != b.timestamp_) + return ::testing::AssertionFailure() << "timestamp_ diff (" << a.timestamp_ + << " != " << b.timestamp_ << ")"; + if (a.sample_rate_hz_ != b.sample_rate_hz_) + return ::testing::AssertionFailure() + << "sample_rate_hz_ diff (" << a.sample_rate_hz_ + << " != " << b.sample_rate_hz_ << ")"; + if (a.samples_per_channel_ != b.samples_per_channel_) + return ::testing::AssertionFailure() + << "samples_per_channel_ diff (" << a.samples_per_channel_ + << " != " << b.samples_per_channel_ << ")"; + if (a.num_channels_ != b.num_channels_) + return ::testing::AssertionFailure() + << "num_channels_ diff (" << a.num_channels_ + << " != " << b.num_channels_ << ")"; + if (a.speech_type_ != b.speech_type_) + return ::testing::AssertionFailure() + << "speech_type_ diff (" << a.speech_type_ + << " != " << b.speech_type_ << ")"; + if (a.vad_activity_ != b.vad_activity_) + return ::testing::AssertionFailure() + << "vad_activity_ diff (" << a.vad_activity_ + << " != " << b.vad_activity_ << ")"; + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult AudioFramesEqual(const AudioFrame& a, + const AudioFrame& b) { + ::testing::AssertionResult res = AudioFramesEqualExceptData(a, b); + if (!res) + return res; + if (memcmp(a.data(), b.data(), + a.samples_per_channel_ * a.num_channels_ * sizeof(*a.data())) != + 0) { + return ::testing::AssertionFailure() << "data_ diff"; + } + return ::testing::AssertionSuccess(); +} + +} // namespace + +TEST_F(NetEqDecodingTestTwoInstances, CompareMutedStateOnOff) { + ASSERT_FALSE(config_.enable_muted_state); + config2_.enable_muted_state = true; + CreateSecondInstance(); + + // Insert one speech packet into both NetEqs. + const size_t kSamples = 10 * 16; + const size_t kPayloadBytes = kSamples * 2; + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + EXPECT_EQ(0, neteq2_->InsertPacket(rtp_info, payload)); + + AudioFrame out_frame1, out_frame2; + bool muted; + for (int i = 0; i < 1000; ++i) { + rtc::StringBuilder ss; + ss << "i = " << i; + SCOPED_TRACE(ss.str()); // Print out the loop iterator on failure. + EXPECT_EQ(0, neteq_->GetAudio(&out_frame1, &muted)); + EXPECT_FALSE(muted); + EXPECT_EQ(0, neteq2_->GetAudio(&out_frame2, &muted)); + if (muted) { + EXPECT_TRUE(AudioFramesEqualExceptData(out_frame1, out_frame2)); + } else { + EXPECT_TRUE(AudioFramesEqual(out_frame1, out_frame2)); + } + } + EXPECT_TRUE(muted); + + // Insert new data. Timestamp is corrected for the time elapsed since the last + // packet. + for (int i = 0; i < 5; ++i) { + PopulateRtpInfo(0, kSamples * 1000 + kSamples * i, &rtp_info); + EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + EXPECT_EQ(0, neteq2_->InsertPacket(rtp_info, payload)); + } + + int counter = 0; + while (out_frame1.speech_type_ != AudioFrame::kNormalSpeech) { + ASSERT_LT(counter++, 1000) << "Test timed out"; + rtc::StringBuilder ss; + ss << "counter = " << counter; + SCOPED_TRACE(ss.str()); // Print out the loop iterator on failure. + EXPECT_EQ(0, neteq_->GetAudio(&out_frame1, &muted)); + EXPECT_FALSE(muted); + EXPECT_EQ(0, neteq2_->GetAudio(&out_frame2, &muted)); + if (muted) { + EXPECT_TRUE(AudioFramesEqualExceptData(out_frame1, out_frame2)); + } else { + EXPECT_TRUE(AudioFramesEqual(out_frame1, out_frame2)); + } + } + EXPECT_FALSE(muted); +} + +TEST_F(NetEqDecodingTest, TestConcealmentEvents) { + const int kNumConcealmentEvents = 19; + const size_t kSamples = 10 * 16; + const size_t kPayloadBytes = kSamples * 2; + int seq_no = 0; + RTPHeader rtp_info; + rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.payloadType = 94; // PCM16b WB codec. + rtp_info.markerBit = 0; + const uint8_t payload[kPayloadBytes] = {0}; + bool muted; + + for (int i = 0; i < kNumConcealmentEvents; i++) { + // Insert some packets of 10 ms size. + for (int j = 0; j < 10; j++) { + rtp_info.sequenceNumber = seq_no++; + rtp_info.timestamp = rtp_info.sequenceNumber * kSamples; + neteq_->InsertPacket(rtp_info, payload); + neteq_->GetAudio(&out_frame_, &muted); + } + + // Lose a number of packets. + int num_lost = 1 + i; + for (int j = 0; j < num_lost; j++) { + seq_no++; + neteq_->GetAudio(&out_frame_, &muted); + } + } + + // Check number of concealment events. + NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(kNumConcealmentEvents, static_cast(stats.concealment_events)); +} + +// Test that the jitter buffer delay stat is computed correctly. +void NetEqDecodingTestFaxMode::TestJitterBufferDelay(bool apply_packet_loss) { + const int kNumPackets = 10; + const int kDelayInNumPackets = 2; + const int kPacketLenMs = 10; // All packets are of 10 ms size. + const size_t kSamples = kPacketLenMs * 16; + const size_t kPayloadBytes = kSamples * 2; + RTPHeader rtp_info; + rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.payloadType = 94; // PCM16b WB codec. + rtp_info.markerBit = 0; + const uint8_t payload[kPayloadBytes] = {0}; + bool muted; + int packets_sent = 0; + int packets_received = 0; + int expected_delay = 0; + int expected_target_delay = 0; + uint64_t expected_emitted_count = 0; + while (packets_received < kNumPackets) { + // Insert packet. + if (packets_sent < kNumPackets) { + rtp_info.sequenceNumber = packets_sent++; + rtp_info.timestamp = rtp_info.sequenceNumber * kSamples; + neteq_->InsertPacket(rtp_info, payload); + } + + // Get packet. + if (packets_sent > kDelayInNumPackets) { + neteq_->GetAudio(&out_frame_, &muted); + packets_received++; + + // The delay reported by the jitter buffer never exceeds + // the number of samples previously fetched with GetAudio + // (hence the min()). + int packets_delay = std::min(packets_received, kDelayInNumPackets + 1); + + // The increase of the expected delay is the product of + // the current delay of the jitter buffer in ms * the + // number of samples that are sent for play out. + int current_delay_ms = packets_delay * kPacketLenMs; + expected_delay += current_delay_ms * kSamples; + expected_target_delay += neteq_->TargetDelayMs() * kSamples; + expected_emitted_count += kSamples; + } + } + + if (apply_packet_loss) { + // Extra call to GetAudio to cause concealment. + neteq_->GetAudio(&out_frame_, &muted); + } + + // Check jitter buffer delay. + NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(expected_delay, + rtc::checked_cast(stats.jitter_buffer_delay_ms)); + EXPECT_EQ(expected_emitted_count, stats.jitter_buffer_emitted_count); + EXPECT_EQ(expected_target_delay, + rtc::checked_cast(stats.jitter_buffer_target_delay_ms)); +} + +TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithoutLoss) { + TestJitterBufferDelay(false); +} + +TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithLoss) { + TestJitterBufferDelay(true); +} + +TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithAcceleration) { + const int kPacketLenMs = 10; // All packets are of 10 ms size. + const size_t kSamples = kPacketLenMs * 16; + const size_t kPayloadBytes = kSamples * 2; + RTPHeader rtp_info; + rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.payloadType = 94; // PCM16b WB codec. + rtp_info.markerBit = 0; + const uint8_t payload[kPayloadBytes] = {0}; + + int expected_target_delay = neteq_->TargetDelayMs() * kSamples; + neteq_->InsertPacket(rtp_info, payload); + + bool muted; + neteq_->GetAudio(&out_frame_, &muted); + + rtp_info.sequenceNumber += 1; + rtp_info.timestamp += kSamples; + neteq_->InsertPacket(rtp_info, payload); + rtp_info.sequenceNumber += 1; + rtp_info.timestamp += kSamples; + neteq_->InsertPacket(rtp_info, payload); + + expected_target_delay += neteq_->TargetDelayMs() * 2 * kSamples; + // We have two packets in the buffer and kAccelerate operation will + // extract 20 ms of data. + neteq_->GetAudio(&out_frame_, &muted, nullptr, NetEq::Operation::kAccelerate); + + // Check jitter buffer delay. + NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(10 * kSamples * 3, stats.jitter_buffer_delay_ms); + EXPECT_EQ(kSamples * 3, stats.jitter_buffer_emitted_count); + EXPECT_EQ(expected_target_delay, + rtc::checked_cast(stats.jitter_buffer_target_delay_ms)); +} + +namespace test { +TEST(NetEqNoTimeStretchingMode, RunTest) { + NetEq::Config config; + config.for_test_no_time_stretching = true; + auto codecs = NetEqTest::StandardDecoderMap(); + NetEqPacketSourceInput::RtpHeaderExtensionMap rtp_ext_map = { + {1, kRtpExtensionAudioLevel}, + {3, kRtpExtensionAbsoluteSendTime}, + {5, kRtpExtensionTransportSequenceNumber}, + {7, kRtpExtensionVideoContentType}, + {8, kRtpExtensionVideoTiming}}; + std::unique_ptr input(new NetEqRtpDumpInput( + webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp"), + rtp_ext_map, absl::nullopt /*No SSRC filter*/)); + std::unique_ptr input_time_limit( + new TimeLimitedNetEqInput(std::move(input), 20000)); + std::unique_ptr output(new VoidAudioSink); + NetEqTest::Callbacks callbacks; + NetEqTest test(config, CreateBuiltinAudioDecoderFactory(), codecs, + /*text_log=*/nullptr, /*neteq_factory=*/nullptr, + /*input=*/std::move(input_time_limit), std::move(output), + callbacks); + test.Run(); + const auto stats = test.SimulationStats(); + EXPECT_EQ(0, stats.accelerate_rate); + EXPECT_EQ(0, stats.preemptive_rate); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.proto b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.proto new file mode 100644 index 0000000000..b4b4253c3d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.proto @@ -0,0 +1,31 @@ +syntax = "proto2"; +option optimize_for = LITE_RUNTIME; +package webrtc.neteq_unittest; + +message NetEqNetworkStatistics { + // Next field number 18. + optional uint32 current_buffer_size_ms = 1; + optional uint32 preferred_buffer_size_ms = 2; + optional uint32 jitter_peaks_found = 3; + reserved 4; // Was packet_loss_rate. + reserved 5; // Was packet_discard_rate. + optional uint32 expand_rate = 6; + optional uint32 speech_expand_rate = 7; + optional uint32 preemptive_rate = 8; + optional uint32 accelerate_rate = 9; + optional uint32 secondary_decoded_rate = 10; + optional uint32 secondary_discarded_rate = 17; + optional int32 clockdrift_ppm = 11; + reserved 12; // Was added_zero_samples. + optional int32 mean_waiting_time_ms = 13; + optional int32 median_waiting_time_ms = 14; + optional int32 min_waiting_time_ms = 15; + optional int32 max_waiting_time_ms = 16; +} + +message RtcpStatistics { + optional uint32 fraction_lost = 1; + optional uint32 cumulative_lost = 2; + optional uint32 extended_max_sequence_number = 3; + optional uint32 jitter = 4; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/normal.cc b/third_party/libwebrtc/modules/audio_coding/neteq/normal.cc new file mode 100644 index 0000000000..461ee7fa4a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/normal.cc @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/normal.h" + +#include // memset, memcpy + +#include // min + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/expand.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +int Normal::Process(const int16_t* input, + size_t length, + NetEq::Mode last_mode, + AudioMultiVector* output) { + if (length == 0) { + // Nothing to process. + output->Clear(); + return static_cast(length); + } + + RTC_DCHECK(output->Empty()); + // Output should be empty at this point. + if (length % output->Channels() != 0) { + // The length does not match the number of channels. + output->Clear(); + return 0; + } + output->PushBackInterleaved(rtc::ArrayView(input, length)); + + const int fs_mult = fs_hz_ / 8000; + RTC_DCHECK_GT(fs_mult, 0); + // fs_shift = log2(fs_mult), rounded down. + // Note that `fs_shift` is not "exact" for 48 kHz. + // TODO(hlundin): Investigate this further. + const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult); + + // If last call resulted in a CodedPlc we don't need to do cross-fading but we + // need to report the end of the interruption once we are back to normal + // operation. + if (last_mode == NetEq::Mode::kCodecPlc) { + statistics_->EndExpandEvent(fs_hz_); + } + + // Check if last RecOut call resulted in an Expand. If so, we have to take + // care of some cross-fading and unmuting. + if (last_mode == NetEq::Mode::kExpand) { + // Generate interpolation data using Expand. + // First, set Expand parameters to appropriate values. + expand_->SetParametersForNormalAfterExpand(); + + // Call Expand. + AudioMultiVector expanded(output->Channels()); + expand_->Process(&expanded); + expand_->Reset(); + + size_t length_per_channel = length / output->Channels(); + std::unique_ptr signal(new int16_t[length_per_channel]); + for (size_t channel_ix = 0; channel_ix < output->Channels(); ++channel_ix) { + // Set muting factor to the same as expand muting factor. + int16_t mute_factor = expand_->MuteFactor(channel_ix); + + (*output)[channel_ix].CopyTo(length_per_channel, 0, signal.get()); + + // Find largest absolute value in new data. + int16_t decoded_max = + WebRtcSpl_MaxAbsValueW16(signal.get(), length_per_channel); + // Adjust muting factor if needed (to BGN level). + size_t energy_length = + std::min(static_cast(fs_mult * 64), length_per_channel); + int scaling = 6 + fs_shift - WebRtcSpl_NormW32(decoded_max * decoded_max); + scaling = std::max(scaling, 0); // `scaling` should always be >= 0. + int32_t energy = WebRtcSpl_DotProductWithScale(signal.get(), signal.get(), + energy_length, scaling); + int32_t scaled_energy_length = + static_cast(energy_length >> scaling); + if (scaled_energy_length > 0) { + energy = energy / scaled_energy_length; + } else { + energy = 0; + } + + int local_mute_factor = 16384; // 1.0 in Q14. + if ((energy != 0) && (energy > background_noise_.Energy(channel_ix))) { + // Normalize new frame energy to 15 bits. + scaling = WebRtcSpl_NormW32(energy) - 16; + // We want background_noise_.energy() / energy in Q14. + int32_t bgn_energy = WEBRTC_SPL_SHIFT_W32( + background_noise_.Energy(channel_ix), scaling + 14); + int16_t energy_scaled = + static_cast(WEBRTC_SPL_SHIFT_W32(energy, scaling)); + int32_t ratio = WebRtcSpl_DivW32W16(bgn_energy, energy_scaled); + local_mute_factor = + std::min(local_mute_factor, WebRtcSpl_SqrtFloor(ratio << 14)); + } + mute_factor = std::max(mute_factor, local_mute_factor); + RTC_DCHECK_LE(mute_factor, 16384); + RTC_DCHECK_GE(mute_factor, 0); + + // If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14), + // or as fast as it takes to come back to full gain within the frame + // length. + const int back_to_fullscale_inc = + static_cast((16384 - mute_factor) / length_per_channel); + const int increment = std::max(64 / fs_mult, back_to_fullscale_inc); + for (size_t i = 0; i < length_per_channel; i++) { + // Scale with mute factor. + RTC_DCHECK_LT(channel_ix, output->Channels()); + RTC_DCHECK_LT(i, output->Size()); + int32_t scaled_signal = (*output)[channel_ix][i] * mute_factor; + // Shift 14 with proper rounding. + (*output)[channel_ix][i] = + static_cast((scaled_signal + 8192) >> 14); + // Increase mute_factor towards 16384. + mute_factor = + static_cast(std::min(mute_factor + increment, 16384)); + } + + // Interpolate the expanded data into the new vector. + // (NB/WB/SWB32/SWB48 8/16/32/48 samples.) + size_t win_length = samples_per_ms_; + int16_t win_slope_Q14 = default_win_slope_Q14_; + RTC_DCHECK_LT(channel_ix, output->Channels()); + if (win_length > output->Size()) { + win_length = output->Size(); + win_slope_Q14 = (1 << 14) / static_cast(win_length); + } + int16_t win_up_Q14 = 0; + for (size_t i = 0; i < win_length; i++) { + win_up_Q14 += win_slope_Q14; + (*output)[channel_ix][i] = + (win_up_Q14 * (*output)[channel_ix][i] + + ((1 << 14) - win_up_Q14) * expanded[channel_ix][i] + (1 << 13)) >> + 14; + } + RTC_DCHECK_GT(win_up_Q14, + (1 << 14) - 32); // Worst case rouding is a length of 34 + } + } else if (last_mode == NetEq::Mode::kRfc3389Cng) { + RTC_DCHECK_EQ(output->Channels(), 1); // Not adapted for multi-channel yet. + static const size_t kCngLength = 48; + RTC_DCHECK_LE(8 * fs_mult, kCngLength); + int16_t cng_output[kCngLength]; + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + + if (cng_decoder) { + // Generate long enough for 48kHz. + if (!cng_decoder->Generate(cng_output, false)) { + // Error returned; set return vector to all zeros. + memset(cng_output, 0, sizeof(cng_output)); + } + } else { + // If no CNG instance is defined, just copy from the decoded data. + // (This will result in interpolating the decoded with itself.) + (*output)[0].CopyTo(fs_mult * 8, 0, cng_output); + } + // Interpolate the CNG into the new vector. + // (NB/WB/SWB32/SWB48 8/16/32/48 samples.) + size_t win_length = samples_per_ms_; + int16_t win_slope_Q14 = default_win_slope_Q14_; + if (win_length > kCngLength) { + win_length = kCngLength; + win_slope_Q14 = (1 << 14) / static_cast(win_length); + } + int16_t win_up_Q14 = 0; + for (size_t i = 0; i < win_length; i++) { + win_up_Q14 += win_slope_Q14; + (*output)[0][i] = + (win_up_Q14 * (*output)[0][i] + + ((1 << 14) - win_up_Q14) * cng_output[i] + (1 << 13)) >> + 14; + } + RTC_DCHECK_GT(win_up_Q14, + (1 << 14) - 32); // Worst case rouding is a length of 34 + } + + return static_cast(length); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/normal.h b/third_party/libwebrtc/modules/audio_coding/neteq/normal.h new file mode 100644 index 0000000000..772293b605 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/normal.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_NORMAL_H_ +#define MODULES_AUDIO_CODING_NETEQ_NORMAL_H_ + +#include +#include // Access to size_t. + +#include "api/neteq/neteq.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +// Forward declarations. +class AudioMultiVector; +class BackgroundNoise; +class DecoderDatabase; +class Expand; + +// This class provides the "Normal" DSP operation, that is performed when +// there is no data loss, no need to stretch the timing of the signal, and +// no other "special circumstances" are at hand. +class Normal { + public: + Normal(int fs_hz, + DecoderDatabase* decoder_database, + const BackgroundNoise& background_noise, + Expand* expand, + StatisticsCalculator* statistics) + : fs_hz_(fs_hz), + decoder_database_(decoder_database), + background_noise_(background_noise), + expand_(expand), + samples_per_ms_(rtc::CheckedDivExact(fs_hz_, 1000)), + default_win_slope_Q14_( + rtc::dchecked_cast((1 << 14) / samples_per_ms_)), + statistics_(statistics) {} + + virtual ~Normal() {} + + Normal(const Normal&) = delete; + Normal& operator=(const Normal&) = delete; + + // Performs the "Normal" operation. The decoder data is supplied in `input`, + // having `length` samples in total for all channels (interleaved). The + // result is written to `output`. The number of channels allocated in + // `output` defines the number of channels that will be used when + // de-interleaving `input`. `last_mode` contains the mode used in the previous + // GetAudio call (i.e., not the current one). + int Process(const int16_t* input, + size_t length, + NetEq::Mode last_mode, + AudioMultiVector* output); + + private: + int fs_hz_; + DecoderDatabase* decoder_database_; + const BackgroundNoise& background_noise_; + Expand* expand_; + const size_t samples_per_ms_; + const int16_t default_win_slope_Q14_; + StatisticsCalculator* const statistics_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_NORMAL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/normal_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/normal_unittest.cc new file mode 100644 index 0000000000..4554d79576 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/normal_unittest.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Normal class. + +#include "modules/audio_coding/neteq/normal.h" + +#include +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/mock/mock_expand.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::Invoke; + +namespace webrtc { + +namespace { + +int ExpandProcess120ms(AudioMultiVector* output) { + AudioMultiVector dummy_audio(1, 11520u); + dummy_audio.CopyTo(output); + return 0; +} + +} // namespace + +TEST(Normal, CreateAndDestroy) { + MockDecoderDatabase db; + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels); + Normal normal(fs, &db, bgn, &expand, &statistics); + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. +} + +TEST(Normal, AvoidDivideByZero) { + MockDecoderDatabase db; + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, + channels); + Normal normal(fs, &db, bgn, &expand, &statistics); + + int16_t input[1000] = {0}; + AudioMultiVector output(channels); + + // Zero input length. + EXPECT_EQ(0, normal.Process(input, 0, NetEq::Mode::kExpand, &output)); + EXPECT_EQ(0u, output.Size()); + + // Try to make energy_length >> scaling = 0; + EXPECT_CALL(expand, SetParametersForNormalAfterExpand()); + EXPECT_CALL(expand, Process(_)); + EXPECT_CALL(expand, Reset()); + // If input_size_samples < 64, then energy_length in Normal::Process() will + // be equal to input_size_samples. Since the input is all zeros, decoded_max + // will be zero, and scaling will be >= 6. Thus, energy_length >> scaling = 0, + // and using this as a denominator would lead to problems. + int input_size_samples = 63; + EXPECT_EQ(input_size_samples, normal.Process(input, input_size_samples, + NetEq::Mode::kExpand, &output)); + + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. + EXPECT_CALL(expand, Die()); // Called when `expand` goes out of scope. +} + +TEST(Normal, InputLengthAndChannelsDoNotMatch) { + MockDecoderDatabase db; + int fs = 8000; + size_t channels = 2; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(channels, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, + channels); + Normal normal(fs, &db, bgn, &expand, &statistics); + + int16_t input[1000] = {0}; + AudioMultiVector output(channels); + + // Let the number of samples be one sample less than 80 samples per channel. + size_t input_len = 80 * channels - 1; + EXPECT_EQ(0, normal.Process(input, input_len, NetEq::Mode::kExpand, &output)); + EXPECT_EQ(0u, output.Size()); + + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. + EXPECT_CALL(expand, Die()); // Called when `expand` goes out of scope. +} + +TEST(Normal, LastModeExpand120msPacket) { + MockDecoderDatabase db; + const int kFs = 48000; + const size_t kPacketsizeBytes = 11520u; + const size_t kChannels = 1; + BackgroundNoise bgn(kChannels); + SyncBuffer sync_buffer(kChannels, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs, + kChannels); + Normal normal(kFs, &db, bgn, &expand, &statistics); + + int16_t input[kPacketsizeBytes] = {0}; + AudioMultiVector output(kChannels); + + EXPECT_CALL(expand, SetParametersForNormalAfterExpand()); + EXPECT_CALL(expand, Process(_)).WillOnce(Invoke(ExpandProcess120ms)); + EXPECT_CALL(expand, Reset()); + EXPECT_EQ( + static_cast(kPacketsizeBytes), + normal.Process(input, kPacketsizeBytes, NetEq::Mode::kExpand, &output)); + + EXPECT_EQ(kPacketsizeBytes, output.Size()); + + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. + EXPECT_CALL(expand, Die()); // Called when `expand` goes out of scope. +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet.cc new file mode 100644 index 0000000000..333f161229 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/packet.h" + +namespace webrtc { + +Packet::Packet() = default; +Packet::Packet(Packet&& b) = default; + +Packet::~Packet() = default; + +Packet& Packet::operator=(Packet&& b) = default; + +Packet Packet::Clone() const { + RTC_CHECK(!frame); + + Packet clone; + clone.timestamp = timestamp; + clone.sequence_number = sequence_number; + clone.payload_type = payload_type; + clone.payload.SetData(payload.data(), payload.size()); + clone.priority = priority; + clone.packet_info = packet_info; + + return clone; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet.h b/third_party/libwebrtc/modules/audio_coding/neteq/packet.h new file mode 100644 index 0000000000..0c6f204edb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_H_ +#define MODULES_AUDIO_CODING_NETEQ_PACKET_H_ + +#include + +#include +#include + +#include "api/audio_codecs/audio_decoder.h" +#include "api/neteq/tick_timer.h" +#include "api/rtp_packet_info.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Struct for holding RTP packets. +struct Packet { + struct Priority { + Priority() : codec_level(0), red_level(0) {} + Priority(int codec_level, int red_level) + : codec_level(codec_level), red_level(red_level) { + CheckInvariant(); + } + + int codec_level; + int red_level; + + // Priorities are sorted low-to-high, first on the level the codec + // prioritizes it, then on the level of RED packet it is; i.e. if it is a + // primary or secondary payload of a RED packet. For example: with Opus, an + // Fec packet (which the decoder prioritizes lower than a regular packet) + // will not be used if there is _any_ RED payload for the same + // timeframe. The highest priority packet will have levels {0, 0}. Negative + // priorities are not allowed. + bool operator<(const Priority& b) const { + CheckInvariant(); + b.CheckInvariant(); + if (codec_level == b.codec_level) + return red_level < b.red_level; + + return codec_level < b.codec_level; + } + bool operator==(const Priority& b) const { + CheckInvariant(); + b.CheckInvariant(); + return codec_level == b.codec_level && red_level == b.red_level; + } + bool operator!=(const Priority& b) const { return !(*this == b); } + bool operator>(const Priority& b) const { return b < *this; } + bool operator<=(const Priority& b) const { return !(b > *this); } + bool operator>=(const Priority& b) const { return !(b < *this); } + + private: + void CheckInvariant() const { + RTC_DCHECK_GE(codec_level, 0); + RTC_DCHECK_GE(red_level, 0); + } + }; + + uint32_t timestamp; + uint16_t sequence_number; + uint8_t payload_type; + // Datagram excluding RTP header and header extension. + rtc::Buffer payload; + Priority priority; + RtpPacketInfo packet_info; + std::unique_ptr waiting_time; + std::unique_ptr frame; + + Packet(); + Packet(Packet&& b); + ~Packet(); + + // Packets should generally be moved around but sometimes it's useful to make + // a copy, for example for testing purposes. NOTE: Will only work for + // un-parsed packets, i.e. `frame` must be unset. The payload will, however, + // be copied. `waiting_time` will also not be copied. + Packet Clone() const; + + Packet& operator=(Packet&& b); + + // Comparison operators. Establish a packet ordering based on (1) timestamp, + // (2) sequence number and (3) redundancy. + // Timestamp and sequence numbers are compared taking wrap-around into + // account. For two packets with the same sequence number and timestamp a + // primary payload is considered "smaller" than a secondary. + bool operator==(const Packet& rhs) const { + return (this->timestamp == rhs.timestamp && + this->sequence_number == rhs.sequence_number && + this->priority == rhs.priority); + } + bool operator!=(const Packet& rhs) const { return !operator==(rhs); } + bool operator<(const Packet& rhs) const { + if (this->timestamp == rhs.timestamp) { + if (this->sequence_number == rhs.sequence_number) { + // Timestamp and sequence numbers are identical - deem the left hand + // side to be "smaller" (i.e., "earlier") if it has higher priority. + return this->priority < rhs.priority; + } + return (static_cast(rhs.sequence_number - + this->sequence_number) < 0xFFFF / 2); + } + return (static_cast(rhs.timestamp - this->timestamp) < + 0xFFFFFFFF / 2); + } + bool operator>(const Packet& rhs) const { return rhs.operator<(*this); } + bool operator<=(const Packet& rhs) const { return !operator>(rhs); } + bool operator>=(const Packet& rhs) const { return !operator<(rhs); } + + bool empty() const { return !frame && payload.empty(); } +}; + +// A list of packets. +typedef std::list PacketList; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc new file mode 100644 index 0000000000..2077383f76 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/packet_arrival_history.h" + +#include + +#include "api/neteq/tick_timer.h" + +namespace webrtc { + +PacketArrivalHistory::PacketArrivalHistory(int window_size_ms) + : window_size_ms_(window_size_ms) {} + +void PacketArrivalHistory::Insert(uint32_t rtp_timestamp, + int64_t arrival_time_ms) { + RTC_DCHECK(sample_rate_khz_ > 0); + int64_t unwrapped_rtp_timestamp = timestamp_unwrapper_.Unwrap(rtp_timestamp); + if (!newest_rtp_timestamp_ || + unwrapped_rtp_timestamp > *newest_rtp_timestamp_) { + newest_rtp_timestamp_ = unwrapped_rtp_timestamp; + } + history_.emplace_back(unwrapped_rtp_timestamp / sample_rate_khz_, + arrival_time_ms); + MaybeUpdateCachedArrivals(history_.back()); + while (history_.front().rtp_timestamp_ms + window_size_ms_ < + unwrapped_rtp_timestamp / sample_rate_khz_) { + if (&history_.front() == min_packet_arrival_) { + min_packet_arrival_ = nullptr; + } + if (&history_.front() == max_packet_arrival_) { + max_packet_arrival_ = nullptr; + } + history_.pop_front(); + } + if (!min_packet_arrival_ || !max_packet_arrival_) { + for (const PacketArrival& packet : history_) { + MaybeUpdateCachedArrivals(packet); + } + } +} + +void PacketArrivalHistory::MaybeUpdateCachedArrivals( + const PacketArrival& packet_arrival) { + if (!min_packet_arrival_ || packet_arrival <= *min_packet_arrival_) { + min_packet_arrival_ = &packet_arrival; + } + if (!max_packet_arrival_ || packet_arrival >= *max_packet_arrival_) { + max_packet_arrival_ = &packet_arrival; + } +} + +void PacketArrivalHistory::Reset() { + history_.clear(); + min_packet_arrival_ = nullptr; + max_packet_arrival_ = nullptr; + timestamp_unwrapper_.Reset(); + newest_rtp_timestamp_ = absl::nullopt; +} + +int PacketArrivalHistory::GetDelayMs(uint32_t rtp_timestamp, + int64_t time_ms) const { + RTC_DCHECK(sample_rate_khz_ > 0); + int64_t unwrapped_rtp_timestamp_ms = + timestamp_unwrapper_.PeekUnwrap(rtp_timestamp) / sample_rate_khz_; + PacketArrival packet(unwrapped_rtp_timestamp_ms, time_ms); + return GetPacketArrivalDelayMs(packet); +} + +int PacketArrivalHistory::GetMaxDelayMs() const { + if (!max_packet_arrival_) { + return 0; + } + return GetPacketArrivalDelayMs(*max_packet_arrival_); +} + +bool PacketArrivalHistory::IsNewestRtpTimestamp(uint32_t rtp_timestamp) const { + if (!newest_rtp_timestamp_) { + return false; + } + int64_t unwrapped_rtp_timestamp = + timestamp_unwrapper_.PeekUnwrap(rtp_timestamp); + return unwrapped_rtp_timestamp == *newest_rtp_timestamp_; +} + +int PacketArrivalHistory::GetPacketArrivalDelayMs( + const PacketArrival& packet_arrival) const { + if (!min_packet_arrival_) { + return 0; + } + return std::max(static_cast(packet_arrival.arrival_time_ms - + min_packet_arrival_->arrival_time_ms - + (packet_arrival.rtp_timestamp_ms - + min_packet_arrival_->rtp_timestamp_ms)), + 0); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h new file mode 100644 index 0000000000..cad362b469 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ +#define MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/neteq/tick_timer.h" +#include "rtc_base/numerics/sequence_number_unwrapper.h" + +namespace webrtc { + +// Stores timing information about previously received packets. +// The history has a fixed window size beyond which old data is automatically +// pruned. +class PacketArrivalHistory { + public: + explicit PacketArrivalHistory(int window_size_ms); + + // Insert packet with `rtp_timestamp` and `arrival_time_ms` into the history. + void Insert(uint32_t rtp_timestamp, int64_t arrival_time_ms); + + // The delay for `rtp_timestamp` at `time_ms` is calculated as + // `(time_ms - p.arrival_time_ms) - (rtp_timestamp - p.rtp_timestamp)` + // where `p` is chosen as the packet arrival in the history that maximizes the + // delay. + int GetDelayMs(uint32_t rtp_timestamp, int64_t time_ms) const; + + // Get the maximum packet arrival delay observed in the history. + int GetMaxDelayMs() const; + + bool IsNewestRtpTimestamp(uint32_t rtp_timestamp) const; + + void Reset(); + + void set_sample_rate(int sample_rate) { + sample_rate_khz_ = sample_rate / 1000; + } + + size_t size() const { return history_.size(); } + + private: + struct PacketArrival { + PacketArrival(int64_t rtp_timestamp_ms, int64_t arrival_time_ms) + : rtp_timestamp_ms(rtp_timestamp_ms), + arrival_time_ms(arrival_time_ms) {} + int64_t rtp_timestamp_ms; + int64_t arrival_time_ms; + bool operator<=(const PacketArrival& other) const { + return arrival_time_ms - rtp_timestamp_ms <= + other.arrival_time_ms - other.rtp_timestamp_ms; + } + bool operator>=(const PacketArrival& other) const { + return arrival_time_ms - rtp_timestamp_ms >= + other.arrival_time_ms - other.rtp_timestamp_ms; + } + }; + std::deque history_; + int GetPacketArrivalDelayMs(const PacketArrival& packet_arrival) const; + // Updates `min_packet_arrival_` and `max_packet_arrival_`. + void MaybeUpdateCachedArrivals(const PacketArrival& packet); + const PacketArrival* min_packet_arrival_ = nullptr; + const PacketArrival* max_packet_arrival_ = nullptr; + const int window_size_ms_; + RtpTimestampUnwrapper timestamp_unwrapper_; + absl::optional newest_rtp_timestamp_; + int sample_rate_khz_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc new file mode 100644 index 0000000000..539a318fe1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/packet_arrival_history.h" + +#include +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr int kFs = 8000; +constexpr int kFsKhz = kFs / 1000; +constexpr int kFrameSizeMs = 20; +constexpr int kWindowSizeMs = 1000; + +class PacketArrivalHistoryTest : public testing::Test { + public: + PacketArrivalHistoryTest() : history_(kWindowSizeMs) { + history_.set_sample_rate(kFs); + } + void IncrementTime(int delta_ms) { time_ms_ += delta_ms; } + int InsertPacketAndGetDelay(int timestamp_delta_ms) { + uint32_t timestamp = timestamp_ + timestamp_delta_ms * kFsKhz; + if (timestamp_delta_ms > 0) { + timestamp_ = timestamp; + } + history_.Insert(timestamp, time_ms_); + EXPECT_EQ(history_.IsNewestRtpTimestamp(timestamp), + timestamp_delta_ms >= 0); + return history_.GetDelayMs(timestamp, time_ms_); + } + + protected: + int64_t time_ms_ = 0; + PacketArrivalHistory history_; + uint32_t timestamp_ = 0x12345678; +}; + +TEST_F(PacketArrivalHistoryTest, RelativeArrivalDelay) { + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); + + // Reordered packet. + EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 60); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 40); + + // Move reference packet forward. + EXPECT_EQ(InsertPacketAndGetDelay(4 * kFrameSizeMs), 0); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); + + // Earlier packet is now more delayed due to the new reference packet. + EXPECT_EQ(history_.GetMaxDelayMs(), 100); +} + +TEST_F(PacketArrivalHistoryTest, ReorderedPackets) { + // Insert first packet. + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + // Insert reordered packet. + EXPECT_EQ(InsertPacketAndGetDelay(-80), 80); + + // Insert another reordered packet. + EXPECT_EQ(InsertPacketAndGetDelay(-kFrameSizeMs), 20); + + // Insert the next packet in order and verify that the relative delay is + // estimated based on the first inserted packet. + IncrementTime(4 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 60); + + EXPECT_EQ(history_.GetMaxDelayMs(), 80); +} + +TEST_F(PacketArrivalHistoryTest, MaxHistorySize) { + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); + EXPECT_EQ(history_.GetMaxDelayMs(), 20); + + // Insert next packet with a timestamp difference larger than maximum history + // size. This removes the previously inserted packet from the history. + IncrementTime(kWindowSizeMs + kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs + kWindowSizeMs), 0); + EXPECT_EQ(history_.GetMaxDelayMs(), 0); +} + +TEST_F(PacketArrivalHistoryTest, TimestampWraparound) { + timestamp_ = std::numeric_limits::max(); + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(2 * kFrameSizeMs); + // Insert timestamp that will wrap around. + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), kFrameSizeMs); + + // Insert reordered packet before the wraparound. + EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 3 * kFrameSizeMs); + + // Insert another in-order packet after the wraparound. + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); + + EXPECT_EQ(history_.GetMaxDelayMs(), 3 * kFrameSizeMs); +} + +TEST_F(PacketArrivalHistoryTest, TimestampWraparoundBackwards) { + timestamp_ = 0; + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(2 * kFrameSizeMs); + // Insert timestamp that will wrap around. + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), kFrameSizeMs); + + // Insert reordered packet before the wraparound. + EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 3 * kFrameSizeMs); + + // Insert another in-order packet after the wraparound. + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); + + EXPECT_EQ(history_.GetMaxDelayMs(), 3 * kFrameSizeMs); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.cc new file mode 100644 index 0000000000..f6b5a476c9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.cc @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This is the implementation of the PacketBuffer class. It is mostly based on +// an STL list. The list is kept sorted at all times so that the next packet to +// decode is at the beginning of the list. + +#include "modules/audio_coding/neteq/packet_buffer.h" + +#include +#include +#include +#include +#include + +#include "api/audio_codecs/audio_decoder.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { +// Predicate used when inserting packets in the buffer list. +// Operator() returns true when `packet` goes before `new_packet`. +class NewTimestampIsLarger { + public: + explicit NewTimestampIsLarger(const Packet& new_packet) + : new_packet_(new_packet) {} + bool operator()(const Packet& packet) { return (new_packet_ >= packet); } + + private: + const Packet& new_packet_; +}; + +// Returns true if both payload types are known to the decoder database, and +// have the same sample rate. +bool EqualSampleRates(uint8_t pt1, + uint8_t pt2, + const DecoderDatabase& decoder_database) { + auto* di1 = decoder_database.GetDecoderInfo(pt1); + auto* di2 = decoder_database.GetDecoderInfo(pt2); + return di1 && di2 && di1->SampleRateHz() == di2->SampleRateHz(); +} + +void LogPacketDiscarded(int codec_level, StatisticsCalculator* stats) { + RTC_CHECK(stats); + if (codec_level > 0) { + stats->SecondaryPacketsDiscarded(1); + } else { + stats->PacketsDiscarded(1); + } +} + +absl::optional GetSmartflushingConfig() { + absl::optional result; + std::string field_trial_string = + field_trial::FindFullName("WebRTC-Audio-NetEqSmartFlushing"); + result = SmartFlushingConfig(); + bool enabled = false; + auto parser = StructParametersParser::Create( + "enabled", &enabled, "target_level_threshold_ms", + &result->target_level_threshold_ms, "target_level_multiplier", + &result->target_level_multiplier); + parser->Parse(field_trial_string); + if (!enabled) { + return absl::nullopt; + } + RTC_LOG(LS_INFO) << "Using smart flushing, target_level_threshold_ms: " + << result->target_level_threshold_ms + << ", target_level_multiplier: " + << result->target_level_multiplier; + return result; +} + +} // namespace + +PacketBuffer::PacketBuffer(size_t max_number_of_packets, + const TickTimer* tick_timer) + : smart_flushing_config_(GetSmartflushingConfig()), + max_number_of_packets_(max_number_of_packets), + tick_timer_(tick_timer) {} + +// Destructor. All packets in the buffer will be destroyed. +PacketBuffer::~PacketBuffer() { + buffer_.clear(); +} + +// Flush the buffer. All packets in the buffer will be destroyed. +void PacketBuffer::Flush(StatisticsCalculator* stats) { + for (auto& p : buffer_) { + LogPacketDiscarded(p.priority.codec_level, stats); + } + buffer_.clear(); + stats->FlushedPacketBuffer(); +} + +void PacketBuffer::PartialFlush(int target_level_ms, + size_t sample_rate, + size_t last_decoded_length, + StatisticsCalculator* stats) { + // Make sure that at least half the packet buffer capacity will be available + // after the flush. This is done to avoid getting stuck if the target level is + // very high. + int target_level_samples = + std::min(target_level_ms * sample_rate / 1000, + max_number_of_packets_ * last_decoded_length / 2); + // We should avoid flushing to very low levels. + target_level_samples = std::max( + target_level_samples, smart_flushing_config_->target_level_threshold_ms); + while (GetSpanSamples(last_decoded_length, sample_rate, true) > + static_cast(target_level_samples) || + buffer_.size() > max_number_of_packets_ / 2) { + LogPacketDiscarded(PeekNextPacket()->priority.codec_level, stats); + buffer_.pop_front(); + } +} + +bool PacketBuffer::Empty() const { + return buffer_.empty(); +} + +int PacketBuffer::InsertPacket(Packet&& packet, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms, + const DecoderDatabase& decoder_database) { + if (packet.empty()) { + RTC_LOG(LS_WARNING) << "InsertPacket invalid packet"; + return kInvalidPacket; + } + + RTC_DCHECK_GE(packet.priority.codec_level, 0); + RTC_DCHECK_GE(packet.priority.red_level, 0); + + int return_val = kOK; + + packet.waiting_time = tick_timer_->GetNewStopwatch(); + + // Perform a smart flush if the buffer size exceeds a multiple of the target + // level. + const size_t span_threshold = + smart_flushing_config_ + ? smart_flushing_config_->target_level_multiplier * + std::max(smart_flushing_config_->target_level_threshold_ms, + target_level_ms) * + sample_rate / 1000 + : 0; + const bool smart_flush = + smart_flushing_config_.has_value() && + GetSpanSamples(last_decoded_length, sample_rate, true) >= span_threshold; + if (buffer_.size() >= max_number_of_packets_ || smart_flush) { + size_t buffer_size_before_flush = buffer_.size(); + if (smart_flushing_config_.has_value()) { + // Flush down to the target level. + PartialFlush(target_level_ms, sample_rate, last_decoded_length, stats); + return_val = kPartialFlush; + } else { + // Buffer is full. + Flush(stats); + return_val = kFlushed; + } + RTC_LOG(LS_WARNING) << "Packet buffer flushed, " + << (buffer_size_before_flush - buffer_.size()) + << " packets discarded."; + } + + // Get an iterator pointing to the place in the buffer where the new packet + // should be inserted. The list is searched from the back, since the most + // likely case is that the new packet should be near the end of the list. + PacketList::reverse_iterator rit = std::find_if( + buffer_.rbegin(), buffer_.rend(), NewTimestampIsLarger(packet)); + + // The new packet is to be inserted to the right of `rit`. If it has the same + // timestamp as `rit`, which has a higher priority, do not insert the new + // packet to list. + if (rit != buffer_.rend() && packet.timestamp == rit->timestamp) { + LogPacketDiscarded(packet.priority.codec_level, stats); + return return_val; + } + + // The new packet is to be inserted to the left of `it`. If it has the same + // timestamp as `it`, which has a lower priority, replace `it` with the new + // packet. + PacketList::iterator it = rit.base(); + if (it != buffer_.end() && packet.timestamp == it->timestamp) { + LogPacketDiscarded(it->priority.codec_level, stats); + it = buffer_.erase(it); + } + buffer_.insert(it, std::move(packet)); // Insert the packet at that position. + + return return_val; +} + +int PacketBuffer::InsertPacketList( + PacketList* packet_list, + const DecoderDatabase& decoder_database, + absl::optional* current_rtp_payload_type, + absl::optional* current_cng_rtp_payload_type, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms) { + RTC_DCHECK(stats); + bool flushed = false; + for (auto& packet : *packet_list) { + if (decoder_database.IsComfortNoise(packet.payload_type)) { + if (*current_cng_rtp_payload_type && + **current_cng_rtp_payload_type != packet.payload_type) { + // New CNG payload type implies new codec type. + *current_rtp_payload_type = absl::nullopt; + Flush(stats); + flushed = true; + } + *current_cng_rtp_payload_type = packet.payload_type; + } else if (!decoder_database.IsDtmf(packet.payload_type)) { + // This must be speech. + if ((*current_rtp_payload_type && + **current_rtp_payload_type != packet.payload_type) || + (*current_cng_rtp_payload_type && + !EqualSampleRates(packet.payload_type, + **current_cng_rtp_payload_type, + decoder_database))) { + *current_cng_rtp_payload_type = absl::nullopt; + Flush(stats); + flushed = true; + } + *current_rtp_payload_type = packet.payload_type; + } + int return_val = + InsertPacket(std::move(packet), stats, last_decoded_length, sample_rate, + target_level_ms, decoder_database); + if (return_val == kFlushed) { + // The buffer flushed, but this is not an error. We can still continue. + flushed = true; + } else if (return_val != kOK) { + // An error occurred. Delete remaining packets in list and return. + packet_list->clear(); + return return_val; + } + } + packet_list->clear(); + return flushed ? kFlushed : kOK; +} + +int PacketBuffer::NextTimestamp(uint32_t* next_timestamp) const { + if (Empty()) { + return kBufferEmpty; + } + if (!next_timestamp) { + return kInvalidPointer; + } + *next_timestamp = buffer_.front().timestamp; + return kOK; +} + +int PacketBuffer::NextHigherTimestamp(uint32_t timestamp, + uint32_t* next_timestamp) const { + if (Empty()) { + return kBufferEmpty; + } + if (!next_timestamp) { + return kInvalidPointer; + } + PacketList::const_iterator it; + for (it = buffer_.begin(); it != buffer_.end(); ++it) { + if (it->timestamp >= timestamp) { + // Found a packet matching the search. + *next_timestamp = it->timestamp; + return kOK; + } + } + return kNotFound; +} + +const Packet* PacketBuffer::PeekNextPacket() const { + return buffer_.empty() ? nullptr : &buffer_.front(); +} + +absl::optional PacketBuffer::GetNextPacket() { + if (Empty()) { + // Buffer is empty. + return absl::nullopt; + } + + absl::optional packet(std::move(buffer_.front())); + // Assert that the packet sanity checks in InsertPacket method works. + RTC_DCHECK(!packet->empty()); + buffer_.pop_front(); + + return packet; +} + +int PacketBuffer::DiscardNextPacket(StatisticsCalculator* stats) { + if (Empty()) { + return kBufferEmpty; + } + // Assert that the packet sanity checks in InsertPacket method works. + const Packet& packet = buffer_.front(); + RTC_DCHECK(!packet.empty()); + LogPacketDiscarded(packet.priority.codec_level, stats); + buffer_.pop_front(); + return kOK; +} + +void PacketBuffer::DiscardOldPackets(uint32_t timestamp_limit, + uint32_t horizon_samples, + StatisticsCalculator* stats) { + buffer_.remove_if([timestamp_limit, horizon_samples, stats](const Packet& p) { + if (timestamp_limit == p.timestamp || + !IsObsoleteTimestamp(p.timestamp, timestamp_limit, horizon_samples)) { + return false; + } + LogPacketDiscarded(p.priority.codec_level, stats); + return true; + }); +} + +void PacketBuffer::DiscardAllOldPackets(uint32_t timestamp_limit, + StatisticsCalculator* stats) { + DiscardOldPackets(timestamp_limit, 0, stats); +} + +void PacketBuffer::DiscardPacketsWithPayloadType(uint8_t payload_type, + StatisticsCalculator* stats) { + buffer_.remove_if([payload_type, stats](const Packet& p) { + if (p.payload_type != payload_type) { + return false; + } + LogPacketDiscarded(p.priority.codec_level, stats); + return true; + }); +} + +size_t PacketBuffer::NumPacketsInBuffer() const { + return buffer_.size(); +} + +size_t PacketBuffer::NumSamplesInBuffer(size_t last_decoded_length) const { + size_t num_samples = 0; + size_t last_duration = last_decoded_length; + for (const Packet& packet : buffer_) { + if (packet.frame) { + // TODO(hlundin): Verify that it's fine to count all packets and remove + // this check. + if (packet.priority != Packet::Priority(0, 0)) { + continue; + } + size_t duration = packet.frame->Duration(); + if (duration > 0) { + last_duration = duration; // Save the most up-to-date (valid) duration. + } + } + num_samples += last_duration; + } + return num_samples; +} + +size_t PacketBuffer::GetSpanSamples(size_t last_decoded_length, + size_t sample_rate, + bool count_dtx_waiting_time) const { + if (buffer_.size() == 0) { + return 0; + } + + size_t span = buffer_.back().timestamp - buffer_.front().timestamp; + if (buffer_.back().frame && buffer_.back().frame->Duration() > 0) { + size_t duration = buffer_.back().frame->Duration(); + if (count_dtx_waiting_time && buffer_.back().frame->IsDtxPacket()) { + size_t waiting_time_samples = rtc::dchecked_cast( + buffer_.back().waiting_time->ElapsedMs() * (sample_rate / 1000)); + duration = std::max(duration, waiting_time_samples); + } + span += duration; + } else { + span += last_decoded_length; + } + return span; +} + +bool PacketBuffer::ContainsDtxOrCngPacket( + const DecoderDatabase* decoder_database) const { + RTC_DCHECK(decoder_database); + for (const Packet& packet : buffer_) { + if ((packet.frame && packet.frame->IsDtxPacket()) || + decoder_database->IsComfortNoise(packet.payload_type)) { + return true; + } + } + return false; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.h new file mode 100644 index 0000000000..c6fb47ffbf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.h @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_ + +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/packet.h" +#include "modules/include/module_common_types_public.h" // IsNewerTimestamp + +namespace webrtc { + +class DecoderDatabase; +class StatisticsCalculator; +class TickTimer; +struct SmartFlushingConfig { + // When calculating the flushing threshold, the maximum between the target + // level and this value is used. + int target_level_threshold_ms = 500; + // A smart flush is triggered when the packet buffer contains a multiple of + // the target level. + int target_level_multiplier = 3; +}; + +// This is the actual buffer holding the packets before decoding. +class PacketBuffer { + public: + enum BufferReturnCodes { + kOK = 0, + kFlushed, + kPartialFlush, + kNotFound, + kBufferEmpty, + kInvalidPacket, + kInvalidPointer + }; + + // Constructor creates a buffer which can hold a maximum of + // `max_number_of_packets` packets. + PacketBuffer(size_t max_number_of_packets, const TickTimer* tick_timer); + + // Deletes all packets in the buffer before destroying the buffer. + virtual ~PacketBuffer(); + + PacketBuffer(const PacketBuffer&) = delete; + PacketBuffer& operator=(const PacketBuffer&) = delete; + + // Flushes the buffer and deletes all packets in it. + virtual void Flush(StatisticsCalculator* stats); + + // Partial flush. Flush packets but leave some packets behind. + virtual void PartialFlush(int target_level_ms, + size_t sample_rate, + size_t last_decoded_length, + StatisticsCalculator* stats); + + // Returns true for an empty buffer. + virtual bool Empty() const; + + // Inserts `packet` into the buffer. The buffer will take over ownership of + // the packet object. + // Returns PacketBuffer::kOK on success, PacketBuffer::kFlushed if the buffer + // was flushed due to overfilling. + virtual int InsertPacket(Packet&& packet, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms, + const DecoderDatabase& decoder_database); + + // Inserts a list of packets into the buffer. The buffer will take over + // ownership of the packet objects. + // Returns PacketBuffer::kOK if all packets were inserted successfully. + // If the buffer was flushed due to overfilling, only a subset of the list is + // inserted, and PacketBuffer::kFlushed is returned. + // The last three parameters are included for legacy compatibility. + // TODO(hlundin): Redesign to not use current_*_payload_type and + // decoder_database. + virtual int InsertPacketList( + PacketList* packet_list, + const DecoderDatabase& decoder_database, + absl::optional* current_rtp_payload_type, + absl::optional* current_cng_rtp_payload_type, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms); + + // Gets the timestamp for the first packet in the buffer and writes it to the + // output variable `next_timestamp`. + // Returns PacketBuffer::kBufferEmpty if the buffer is empty, + // PacketBuffer::kOK otherwise. + virtual int NextTimestamp(uint32_t* next_timestamp) const; + + // Gets the timestamp for the first packet in the buffer with a timestamp no + // lower than the input limit `timestamp`. The result is written to the output + // variable `next_timestamp`. + // Returns PacketBuffer::kBufferEmpty if the buffer is empty, + // PacketBuffer::kOK otherwise. + virtual int NextHigherTimestamp(uint32_t timestamp, + uint32_t* next_timestamp) const; + + // Returns a (constant) pointer to the first packet in the buffer. Returns + // NULL if the buffer is empty. + virtual const Packet* PeekNextPacket() const; + + // Extracts the first packet in the buffer and returns it. + // Returns an empty optional if the buffer is empty. + virtual absl::optional GetNextPacket(); + + // Discards the first packet in the buffer. The packet is deleted. + // Returns PacketBuffer::kBufferEmpty if the buffer is empty, + // PacketBuffer::kOK otherwise. + virtual int DiscardNextPacket(StatisticsCalculator* stats); + + // Discards all packets that are (strictly) older than timestamp_limit, + // but newer than timestamp_limit - horizon_samples. Setting horizon_samples + // to zero implies that the horizon is set to half the timestamp range. That + // is, if a packet is more than 2^31 timestamps into the future compared with + // timestamp_limit (including wrap-around), it is considered old. + virtual void DiscardOldPackets(uint32_t timestamp_limit, + uint32_t horizon_samples, + StatisticsCalculator* stats); + + // Discards all packets that are (strictly) older than timestamp_limit. + virtual void DiscardAllOldPackets(uint32_t timestamp_limit, + StatisticsCalculator* stats); + + // Removes all packets with a specific payload type from the buffer. + virtual void DiscardPacketsWithPayloadType(uint8_t payload_type, + StatisticsCalculator* stats); + + // Returns the number of packets in the buffer, including duplicates and + // redundant packets. + virtual size_t NumPacketsInBuffer() const; + + // Returns the number of samples in the buffer, including samples carried in + // duplicate and redundant packets. + virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const; + + // Returns the total duration in samples that the packets in the buffer spans + // across. + virtual size_t GetSpanSamples(size_t last_decoded_length, + size_t sample_rate, + bool count_dtx_waiting_time) const; + + // Returns true if the packet buffer contains any DTX or CNG packets. + virtual bool ContainsDtxOrCngPacket( + const DecoderDatabase* decoder_database) const; + + // Static method returning true if `timestamp` is older than `timestamp_limit` + // but less than `horizon_samples` behind `timestamp_limit`. For instance, + // with timestamp_limit = 100 and horizon_samples = 10, a timestamp in the + // range (90, 100) is considered obsolete, and will yield true. + // Setting `horizon_samples` to 0 is the same as setting it to 2^31, i.e., + // half the 32-bit timestamp range. + static bool IsObsoleteTimestamp(uint32_t timestamp, + uint32_t timestamp_limit, + uint32_t horizon_samples) { + return IsNewerTimestamp(timestamp_limit, timestamp) && + (horizon_samples == 0 || + IsNewerTimestamp(timestamp, timestamp_limit - horizon_samples)); + } + + private: + absl::optional smart_flushing_config_; + size_t max_number_of_packets_; + PacketList buffer_; + const TickTimer* tick_timer_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc new file mode 100644 index 0000000000..1a054daca3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc @@ -0,0 +1,989 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for PacketBuffer class. + +#include "modules/audio_coding/neteq/packet_buffer.h" + +#include + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/mock/mock_statistics_calculator.h" +#include "modules/audio_coding/neteq/packet.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::InSequence; +using ::testing::MockFunction; +using ::testing::Return; +using ::testing::StrictMock; + +namespace { +class MockEncodedAudioFrame : public webrtc::AudioDecoder::EncodedAudioFrame { + public: + MOCK_METHOD(size_t, Duration, (), (const, override)); + + MOCK_METHOD(bool, IsDtxPacket, (), (const, override)); + + MOCK_METHOD(absl::optional, + Decode, + (rtc::ArrayView decoded), + (const, override)); +}; + +// Helper class to generate packets. Packets must be deleted by the user. +class PacketGenerator { + public: + PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size); + virtual ~PacketGenerator() {} + void Reset(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size); + webrtc::Packet NextPacket( + int payload_size_bytes, + std::unique_ptr audio_frame); + + uint16_t seq_no_; + uint32_t ts_; + uint8_t pt_; + int frame_size_; +}; + +PacketGenerator::PacketGenerator(uint16_t seq_no, + uint32_t ts, + uint8_t pt, + int frame_size) { + Reset(seq_no, ts, pt, frame_size); +} + +void PacketGenerator::Reset(uint16_t seq_no, + uint32_t ts, + uint8_t pt, + int frame_size) { + seq_no_ = seq_no; + ts_ = ts; + pt_ = pt; + frame_size_ = frame_size; +} + +webrtc::Packet PacketGenerator::NextPacket( + int payload_size_bytes, + std::unique_ptr audio_frame) { + webrtc::Packet packet; + packet.sequence_number = seq_no_; + packet.timestamp = ts_; + packet.payload_type = pt_; + packet.payload.SetSize(payload_size_bytes); + ++seq_no_; + ts_ += frame_size_; + packet.frame = std::move(audio_frame); + return packet; +} + +struct PacketsToInsert { + uint16_t sequence_number; + uint32_t timestamp; + uint8_t payload_type; + bool primary; + // Order of this packet to appear upon extraction, after inserting a series + // of packets. A negative number means that it should have been discarded + // before extraction. + int extract_order; +}; + +} // namespace + +namespace webrtc { + +// Start of test definitions. + +TEST(PacketBuffer, CreateAndDestroy) { + TickTimer tick_timer; + PacketBuffer* buffer = new PacketBuffer(10, &tick_timer); // 10 packets. + EXPECT_TRUE(buffer->Empty()); + delete buffer; +} + +TEST(PacketBuffer, InsertPacket) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(17u, 4711u, 0, 10); + StrictMock mock_stats; + MockDecoderDatabase decoder_database; + + const int payload_len = 100; + const Packet packet = gen.NextPacket(payload_len, nullptr); + EXPECT_EQ(0, buffer.InsertPacket(/*packet=*/packet.Clone(), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/10000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + uint32_t next_ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + EXPECT_EQ(4711u, next_ts); + EXPECT_FALSE(buffer.Empty()); + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); + const Packet* next_packet = buffer.PeekNextPacket(); + EXPECT_EQ(packet, *next_packet); // Compare contents. + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. + + // Do not explicitly flush buffer or delete packet to test that it is deleted + // with the buffer. (Tested with Valgrind or similar tool.) +} + +// Test to flush buffer. +TEST(PacketBuffer, FlushBuffer) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + const int payload_len = 10; + StrictMock mock_stats; + MockDecoderDatabase decoder_database; + + // Insert 10 small packets; should be ok. + for (int i = 0; i < 10; ++i) { + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + EXPECT_FALSE(buffer.Empty()); + + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(10); + buffer.Flush(&mock_stats); + // Buffer should delete the payloads itself. + EXPECT_EQ(0u, buffer.NumPacketsInBuffer()); + EXPECT_TRUE(buffer.Empty()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test to fill the buffer over the limits, and verify that it flushes. +TEST(PacketBuffer, OverfillBuffer) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + StrictMock mock_stats; + MockDecoderDatabase decoder_database; + + // Insert 10 small packets; should be ok. + const int payload_len = 10; + int i; + for (i = 0; i < 10; ++i) { + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + uint32_t next_ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line. + + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(10); + const Packet packet = gen.NextPacket(payload_len, nullptr); + // Insert 11th packet; should flush the buffer and insert it after flushing. + EXPECT_EQ(PacketBuffer::kFlushed, + buffer.InsertPacket(/*packet=*/packet.Clone(), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + // Expect last inserted packet to be first in line. + EXPECT_EQ(packet.timestamp, next_ts); + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test a partial buffer flush. +TEST(PacketBuffer, PartialFlush) { + // Use a field trial to configure smart flushing. + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqSmartFlushing/enabled:true," + "target_level_threshold_ms:0,target_level_multiplier:2/"); + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + const int payload_len = 10; + StrictMock mock_stats; + MockDecoderDatabase decoder_database; + + // Insert 10 small packets; should be ok. + for (int i = 0; i < 10; ++i) { + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/100, + /*decoder_database=*/decoder_database)); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + EXPECT_FALSE(buffer.Empty()); + + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(7); + buffer.PartialFlush(/*target_level_ms=*/30, + /*sample_rate=*/1000, + /*last_decoded_length=*/payload_len, + /*stats=*/&mock_stats); + // There should still be some packets left in the buffer. + EXPECT_EQ(3u, buffer.NumPacketsInBuffer()); + EXPECT_FALSE(buffer.Empty()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test to fill the buffer over the limits, and verify that the smart flush +// functionality works as expected. +TEST(PacketBuffer, SmartFlushOverfillBuffer) { + // Use a field trial to configure smart flushing. + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqSmartFlushing/enabled:true," + "target_level_threshold_ms:0,target_level_multiplier:2/"); + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + StrictMock mock_stats; + MockDecoderDatabase decoder_database; + + // Insert 10 small packets; should be ok. + const int payload_len = 10; + int i; + for (i = 0; i < 10; ++i) { + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/100, + /*decoder_database=*/decoder_database)); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + uint32_t next_ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line. + + const Packet packet = gen.NextPacket(payload_len, nullptr); + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(6); + // Insert 11th packet; should cause a partial flush and insert the packet + // after flushing. + EXPECT_EQ(PacketBuffer::kPartialFlush, + buffer.InsertPacket(/*packet=*/packet.Clone(), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/40, + /*decoder_database=*/decoder_database)); + EXPECT_EQ(5u, buffer.NumPacketsInBuffer()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test inserting a list of packets. +TEST(PacketBuffer, InsertPacketList) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + PacketList list; + const int payload_len = 10; + + // Insert 10 small packets. + for (int i = 0; i < 10; ++i) { + list.push_back(gen.NextPacket(payload_len, nullptr)); + } + + MockDecoderDatabase decoder_database; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(0)) + .WillRepeatedly(Return(&info)); + + StrictMock mock_stats; + + absl::optional current_pt; + absl::optional current_cng_pt; + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list. + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + EXPECT_EQ(0, current_pt); // Current payload type changed to 0. + EXPECT_EQ(absl::nullopt, current_cng_pt); // CNG payload type not changed. + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test inserting a list of packets. Last packet is of a different payload type. +// Expecting the buffer to flush. +// TODO(hlundin): Remove this test when legacy operation is no longer needed. +TEST(PacketBuffer, InsertPacketListChangePayloadType) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + PacketList list; + const int payload_len = 10; + + // Insert 10 small packets. + for (int i = 0; i < 10; ++i) { + list.push_back(gen.NextPacket(payload_len, nullptr)); + } + // Insert 11th packet of another payload type (not CNG). + { + Packet packet = gen.NextPacket(payload_len, nullptr); + packet.payload_type = 1; + list.push_back(std::move(packet)); + } + + MockDecoderDatabase decoder_database; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info0(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(0)) + .WillRepeatedly(Return(&info0)); + const DecoderDatabase::DecoderInfo info1(SdpAudioFormat("pcma", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(1)) + .WillRepeatedly(Return(&info1)); + + StrictMock mock_stats; + + absl::optional current_pt; + absl::optional current_cng_pt; + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(10); + EXPECT_EQ( + PacketBuffer::kFlushed, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list. + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); // Only the last packet. + EXPECT_EQ(1, current_pt); // Current payload type changed to 1. + EXPECT_EQ(absl::nullopt, current_cng_pt); // CNG payload type not changed. + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +TEST(PacketBuffer, ExtractOrderRedundancy) { + TickTimer tick_timer; + PacketBuffer buffer(100, &tick_timer); // 100 packets. + const int kPackets = 18; + const int kFrameSize = 10; + const int kPayloadLength = 10; + + PacketsToInsert packet_facts[kPackets] = { + {0xFFFD, 0xFFFFFFD7, 0, true, 0}, {0xFFFE, 0xFFFFFFE1, 0, true, 1}, + {0xFFFE, 0xFFFFFFD7, 1, false, -1}, {0xFFFF, 0xFFFFFFEB, 0, true, 2}, + {0xFFFF, 0xFFFFFFE1, 1, false, -1}, {0x0000, 0xFFFFFFF5, 0, true, 3}, + {0x0000, 0xFFFFFFEB, 1, false, -1}, {0x0001, 0xFFFFFFFF, 0, true, 4}, + {0x0001, 0xFFFFFFF5, 1, false, -1}, {0x0002, 0x0000000A, 0, true, 5}, + {0x0002, 0xFFFFFFFF, 1, false, -1}, {0x0003, 0x0000000A, 1, false, -1}, + {0x0004, 0x0000001E, 0, true, 7}, {0x0004, 0x00000014, 1, false, 6}, + {0x0005, 0x0000001E, 0, true, -1}, {0x0005, 0x00000014, 1, false, -1}, + {0x0006, 0x00000028, 0, true, 8}, {0x0006, 0x0000001E, 1, false, -1}, + }; + MockDecoderDatabase decoder_database; + + const size_t kExpectPacketsInBuffer = 9; + + std::vector expect_order(kExpectPacketsInBuffer); + + PacketGenerator gen(0, 0, 0, kFrameSize); + + StrictMock mock_stats; + + // Interleaving the EXPECT_CALL sequence with expectations on the MockFunction + // check ensures that exactly one call to PacketsDiscarded happens in each + // DiscardNextPacket call. + InSequence s; + MockFunction check; + for (int i = 0; i < kPackets; ++i) { + gen.Reset(packet_facts[i].sequence_number, packet_facts[i].timestamp, + packet_facts[i].payload_type, kFrameSize); + Packet packet = gen.NextPacket(kPayloadLength, nullptr); + packet.priority.codec_level = packet_facts[i].primary ? 0 : 1; + if (packet_facts[i].extract_order < 0) { + if (packet.priority.codec_level > 0) { + EXPECT_CALL(mock_stats, SecondaryPacketsDiscarded(1)); + } else { + EXPECT_CALL(mock_stats, PacketsDiscarded(1)); + } + } + EXPECT_CALL(check, Call(i)); + EXPECT_EQ(PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/packet.Clone(), + /*stats=*/&mock_stats, + /*last_decoded_length=*/kPayloadLength, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + if (packet_facts[i].extract_order >= 0) { + expect_order[packet_facts[i].extract_order] = std::move(packet); + } + check.Call(i); + } + + EXPECT_EQ(kExpectPacketsInBuffer, buffer.NumPacketsInBuffer()); + + for (size_t i = 0; i < kExpectPacketsInBuffer; ++i) { + const absl::optional packet = buffer.GetNextPacket(); + EXPECT_EQ(packet, expect_order[i]); // Compare contents. + } + EXPECT_TRUE(buffer.Empty()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +TEST(PacketBuffer, DiscardPackets) { + TickTimer tick_timer; + PacketBuffer buffer(100, &tick_timer); // 100 packets. + const uint16_t start_seq_no = 17; + const uint32_t start_ts = 4711; + const uint32_t ts_increment = 10; + PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment); + PacketList list; + const int payload_len = 10; + StrictMock mock_stats; + MockDecoderDatabase decoder_database; + + constexpr int kTotalPackets = 10; + // Insert 10 small packets. + for (int i = 0; i < kTotalPackets; ++i) { + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + + uint32_t current_ts = start_ts; + + // Discard them one by one and make sure that the right packets are at the + // front of the buffer. + constexpr int kDiscardPackets = 5; + + // Interleaving the EXPECT_CALL sequence with expectations on the MockFunction + // check ensures that exactly one call to PacketsDiscarded happens in each + // DiscardNextPacket call. + InSequence s; + MockFunction check; + for (int i = 0; i < kDiscardPackets; ++i) { + uint32_t ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts)); + EXPECT_EQ(current_ts, ts); + EXPECT_CALL(mock_stats, PacketsDiscarded(1)); + EXPECT_CALL(check, Call(i)); + EXPECT_EQ(PacketBuffer::kOK, buffer.DiscardNextPacket(&mock_stats)); + current_ts += ts_increment; + check.Call(i); + } + + constexpr int kRemainingPackets = kTotalPackets - kDiscardPackets; + // This will discard all remaining packets but one. The oldest packet is older + // than the indicated horizon_samples, and will thus be left in the buffer. + constexpr size_t kSkipPackets = 1; + EXPECT_CALL(mock_stats, PacketsDiscarded(1)) + .Times(kRemainingPackets - kSkipPackets); + EXPECT_CALL(check, Call(17)); // Arbitrary id number. + buffer.DiscardOldPackets(start_ts + kTotalPackets * ts_increment, + kRemainingPackets * ts_increment, &mock_stats); + check.Call(17); // Same arbitrary id number. + + EXPECT_EQ(kSkipPackets, buffer.NumPacketsInBuffer()); + uint32_t ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts)); + EXPECT_EQ(current_ts, ts); + + // Discard all remaining packets. + EXPECT_CALL(mock_stats, PacketsDiscarded(kSkipPackets)); + buffer.DiscardAllOldPackets(start_ts + kTotalPackets * ts_increment, + &mock_stats); + + EXPECT_TRUE(buffer.Empty()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +TEST(PacketBuffer, Reordering) { + TickTimer tick_timer; + PacketBuffer buffer(100, &tick_timer); // 100 packets. + const uint16_t start_seq_no = 17; + const uint32_t start_ts = 4711; + const uint32_t ts_increment = 10; + PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment); + const int payload_len = 10; + + // Generate 10 small packets and insert them into a PacketList. Insert every + // odd packet to the front, and every even packet to the back, thus creating + // a (rather strange) reordering. + PacketList list; + for (int i = 0; i < 10; ++i) { + Packet packet = gen.NextPacket(payload_len, nullptr); + if (i % 2) { + list.push_front(std::move(packet)); + } else { + list.push_back(std::move(packet)); + } + } + + MockDecoderDatabase decoder_database; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(0)) + .WillRepeatedly(Return(&info)); + absl::optional current_pt; + absl::optional current_cng_pt; + + StrictMock mock_stats; + + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + + // Extract them and make sure that come out in the right order. + uint32_t current_ts = start_ts; + for (int i = 0; i < 10; ++i) { + const absl::optional packet = buffer.GetNextPacket(); + ASSERT_TRUE(packet); + EXPECT_EQ(current_ts, packet->timestamp); + current_ts += ts_increment; + } + EXPECT_TRUE(buffer.Empty()); + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// The test first inserts a packet with narrow-band CNG, then a packet with +// wide-band speech. The expected behavior of the packet buffer is to detect a +// change in sample rate, even though no speech packet has been inserted before, +// and flush out the CNG packet. +TEST(PacketBuffer, CngFirstThenSpeechWithNewSampleRate) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + const uint8_t kCngPt = 13; + const int kPayloadLen = 10; + const uint8_t kSpeechPt = 100; + + MockDecoderDatabase decoder_database; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info_cng(SdpAudioFormat("cn", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(kCngPt)) + .WillRepeatedly(Return(&info_cng)); + const DecoderDatabase::DecoderInfo info_speech( + SdpAudioFormat("l16", 16000, 1), absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(kSpeechPt)) + .WillRepeatedly(Return(&info_speech)); + + // Insert first packet, which is narrow-band CNG. + PacketGenerator gen(0, 0, kCngPt, 10); + PacketList list; + list.push_back(gen.NextPacket(kPayloadLen, nullptr)); + absl::optional current_pt; + absl::optional current_cng_pt; + + StrictMock mock_stats; + + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/kPayloadLen, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); + ASSERT_TRUE(buffer.PeekNextPacket()); + EXPECT_EQ(kCngPt, buffer.PeekNextPacket()->payload_type); + EXPECT_EQ(current_pt, absl::nullopt); // Current payload type not set. + EXPECT_EQ(kCngPt, current_cng_pt); // CNG payload type set. + + // Insert second packet, which is wide-band speech. + { + Packet packet = gen.NextPacket(kPayloadLen, nullptr); + packet.payload_type = kSpeechPt; + list.push_back(std::move(packet)); + } + // Expect the buffer to flush out the CNG packet, since it does not match the + // new speech sample rate. + EXPECT_CALL(mock_stats, PacketsDiscarded(1)); + EXPECT_EQ( + PacketBuffer::kFlushed, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/kPayloadLen, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); + ASSERT_TRUE(buffer.PeekNextPacket()); + EXPECT_EQ(kSpeechPt, buffer.PeekNextPacket()->payload_type); + + EXPECT_EQ(kSpeechPt, current_pt); // Current payload type set. + EXPECT_EQ(absl::nullopt, current_cng_pt); // CNG payload type reset. + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +TEST(PacketBuffer, Failures) { + const uint16_t start_seq_no = 17; + const uint32_t start_ts = 4711; + const uint32_t ts_increment = 10; + int payload_len = 100; + PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment); + TickTimer tick_timer; + StrictMock mock_stats; + MockDecoderDatabase decoder_database; + + PacketBuffer* buffer = new PacketBuffer(100, &tick_timer); // 100 packets. + { + Packet packet = gen.NextPacket(payload_len, nullptr); + packet.payload.Clear(); + EXPECT_EQ(PacketBuffer::kInvalidPacket, + buffer->InsertPacket(/*packet=*/std::move(packet), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + } + // Buffer should still be empty. Test all empty-checks. + uint32_t temp_ts; + EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->NextTimestamp(&temp_ts)); + EXPECT_EQ(PacketBuffer::kBufferEmpty, + buffer->NextHigherTimestamp(0, &temp_ts)); + EXPECT_EQ(NULL, buffer->PeekNextPacket()); + EXPECT_FALSE(buffer->GetNextPacket()); + + // Discarding packets will not invoke mock_stats.PacketDiscarded() because the + // packet buffer is empty. + EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->DiscardNextPacket(&mock_stats)); + buffer->DiscardAllOldPackets(0, &mock_stats); + + // Insert one packet to make the buffer non-empty. + EXPECT_EQ( + PacketBuffer::kOK, + buffer->InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + EXPECT_EQ(PacketBuffer::kInvalidPointer, buffer->NextTimestamp(NULL)); + EXPECT_EQ(PacketBuffer::kInvalidPointer, + buffer->NextHigherTimestamp(0, NULL)); + delete buffer; + + // Insert packet list of three packets, where the second packet has an invalid + // payload. Expect first packet to be inserted, and the remaining two to be + // discarded. + buffer = new PacketBuffer(100, &tick_timer); // 100 packets. + PacketList list; + list.push_back(gen.NextPacket(payload_len, nullptr)); // Valid packet. + { + Packet packet = gen.NextPacket(payload_len, nullptr); + packet.payload.Clear(); // Invalid. + list.push_back(std::move(packet)); + } + list.push_back(gen.NextPacket(payload_len, nullptr)); // Valid packet. + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(0)) + .WillRepeatedly(Return(&info)); + absl::optional current_pt; + absl::optional current_cng_pt; + EXPECT_EQ( + PacketBuffer::kInvalidPacket, + buffer->InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list. + EXPECT_EQ(1u, buffer->NumPacketsInBuffer()); + delete buffer; + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test packet comparison function. +// The function should return true if the first packet "goes before" the second. +TEST(PacketBuffer, ComparePackets) { + PacketGenerator gen(0, 0, 0, 10); + Packet a(gen.NextPacket(10, nullptr)); // SN = 0, TS = 0. + Packet b(gen.NextPacket(10, nullptr)); // SN = 1, TS = 10. + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a >= b); + + // Testing wrap-around case; 'a' is earlier but has a larger timestamp value. + a.timestamp = 0xFFFFFFFF - 10; + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a >= b); + + // Test equal packets. + EXPECT_TRUE(a == a); + EXPECT_FALSE(a != a); + EXPECT_FALSE(a < a); + EXPECT_FALSE(a > a); + EXPECT_TRUE(a <= a); + EXPECT_TRUE(a >= a); + + // Test equal timestamps but different sequence numbers (0 and 1). + a.timestamp = b.timestamp; + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a >= b); + + // Test equal timestamps but different sequence numbers (32767 and 1). + a.sequence_number = 0xFFFF; + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a >= b); + + // Test equal timestamps and sequence numbers, but differing priorities. + a.sequence_number = b.sequence_number; + a.priority = {1, 0}; + b.priority = {0, 0}; + // a after b + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_FALSE(a < b); + EXPECT_TRUE(a > b); + EXPECT_FALSE(a <= b); + EXPECT_TRUE(a >= b); + + Packet c(gen.NextPacket(0, nullptr)); // SN = 2, TS = 20. + Packet d(gen.NextPacket(0, nullptr)); // SN = 3, TS = 20. + c.timestamp = b.timestamp; + d.timestamp = b.timestamp; + c.sequence_number = b.sequence_number; + d.sequence_number = b.sequence_number; + c.priority = {1, 1}; + d.priority = {0, 1}; + // c after d + EXPECT_FALSE(c == d); + EXPECT_TRUE(c != d); + EXPECT_FALSE(c < d); + EXPECT_TRUE(c > d); + EXPECT_FALSE(c <= d); + EXPECT_TRUE(c >= d); + + // c after a + EXPECT_FALSE(c == a); + EXPECT_TRUE(c != a); + EXPECT_FALSE(c < a); + EXPECT_TRUE(c > a); + EXPECT_FALSE(c <= a); + EXPECT_TRUE(c >= a); + + // c after b + EXPECT_FALSE(c == b); + EXPECT_TRUE(c != b); + EXPECT_FALSE(c < b); + EXPECT_TRUE(c > b); + EXPECT_FALSE(c <= b); + EXPECT_TRUE(c >= b); + + // a after d + EXPECT_FALSE(a == d); + EXPECT_TRUE(a != d); + EXPECT_FALSE(a < d); + EXPECT_TRUE(a > d); + EXPECT_FALSE(a <= d); + EXPECT_TRUE(a >= d); + + // d after b + EXPECT_FALSE(d == b); + EXPECT_TRUE(d != b); + EXPECT_FALSE(d < b); + EXPECT_TRUE(d > b); + EXPECT_FALSE(d <= b); + EXPECT_TRUE(d >= b); +} + +TEST(PacketBuffer, GetSpanSamples) { + constexpr size_t kFrameSizeSamples = 10; + constexpr int kPayloadSizeBytes = 1; // Does not matter to this test; + constexpr uint32_t kStartTimeStamp = 0xFFFFFFFE; // Close to wrap around. + constexpr int kSampleRateHz = 48000; + constexpr bool KCountDtxWaitingTime = false; + TickTimer tick_timer; + PacketBuffer buffer(3, &tick_timer); + PacketGenerator gen(0, kStartTimeStamp, 0, kFrameSizeSamples); + StrictMock mock_stats; + MockDecoderDatabase decoder_database; + + Packet packet_1 = gen.NextPacket(kPayloadSizeBytes, nullptr); + + std::unique_ptr mock_audio_frame = + std::make_unique(); + EXPECT_CALL(*mock_audio_frame, Duration()) + .WillRepeatedly(Return(kFrameSizeSamples)); + Packet packet_2 = + gen.NextPacket(kPayloadSizeBytes, std::move(mock_audio_frame)); + + RTC_DCHECK_GT(packet_1.timestamp, + packet_2.timestamp); // Tmestamp wrapped around. + + EXPECT_EQ(PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/std::move(packet_1), + /*stats=*/&mock_stats, + /*last_decoded_length=*/kFrameSizeSamples, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + + constexpr size_t kLastDecodedSizeSamples = 2; + // packet_1 has no access to duration, and relies last decoded duration as + // input. + EXPECT_EQ(kLastDecodedSizeSamples, + buffer.GetSpanSamples(kLastDecodedSizeSamples, kSampleRateHz, + KCountDtxWaitingTime)); + + EXPECT_EQ(PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/std::move(packet_2), + /*stats=*/&mock_stats, + /*last_decoded_length=*/kFrameSizeSamples, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + + EXPECT_EQ(kFrameSizeSamples * 2, + buffer.GetSpanSamples(0, kSampleRateHz, KCountDtxWaitingTime)); + + // packet_2 has access to duration, and ignores last decoded duration as + // input. + EXPECT_EQ(kFrameSizeSamples * 2, + buffer.GetSpanSamples(kLastDecodedSizeSamples, kSampleRateHz, + KCountDtxWaitingTime)); +} + +namespace { +void TestIsObsoleteTimestamp(uint32_t limit_timestamp) { + // Check with zero horizon, which implies that the horizon is at 2^31, i.e., + // half the timestamp range. + static const uint32_t kZeroHorizon = 0; + static const uint32_t k2Pow31Minus1 = 0x7FFFFFFF; + // Timestamp on the limit is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp( + limit_timestamp, limit_timestamp, kZeroHorizon)); + // 1 sample behind is old. + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 1, + limit_timestamp, kZeroHorizon)); + // 2^31 - 1 samples behind is old. + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - k2Pow31Minus1, + limit_timestamp, kZeroHorizon)); + // 1 sample ahead is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp( + limit_timestamp + 1, limit_timestamp, kZeroHorizon)); + // If |t1-t2|=2^31 and t1>t2, t2 is older than t1 but not the opposite. + uint32_t other_timestamp = limit_timestamp + (1 << 31); + uint32_t lowest_timestamp = std::min(limit_timestamp, other_timestamp); + uint32_t highest_timestamp = std::max(limit_timestamp, other_timestamp); + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp( + lowest_timestamp, highest_timestamp, kZeroHorizon)); + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp( + highest_timestamp, lowest_timestamp, kZeroHorizon)); + + // Fixed horizon at 10 samples. + static const uint32_t kHorizon = 10; + // Timestamp on the limit is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp, + limit_timestamp, kHorizon)); + // 1 sample behind is old. + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 1, + limit_timestamp, kHorizon)); + // 9 samples behind is old. + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 9, + limit_timestamp, kHorizon)); + // 10 samples behind is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 10, + limit_timestamp, kHorizon)); + // 2^31 - 1 samples behind is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp( + limit_timestamp - k2Pow31Minus1, limit_timestamp, kHorizon)); + // 1 sample ahead is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp + 1, + limit_timestamp, kHorizon)); + // 2^31 samples ahead is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp + (1 << 31), + limit_timestamp, kHorizon)); +} +} // namespace + +// Test the IsObsoleteTimestamp method with different limit timestamps. +TEST(PacketBuffer, IsObsoleteTimestamp) { + TestIsObsoleteTimestamp(0); + TestIsObsoleteTimestamp(1); + TestIsObsoleteTimestamp(0xFFFFFFFF); // -1 in uint32_t. + TestIsObsoleteTimestamp(0x80000000); // 2^31. + TestIsObsoleteTimestamp(0x80000001); // 2^31 + 1. + TestIsObsoleteTimestamp(0x7FFFFFFF); // 2^31 - 1. +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc new file mode 100644 index 0000000000..9999d6764b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/post_decode_vad.h" + +namespace webrtc { + +PostDecodeVad::~PostDecodeVad() { + if (vad_instance_) + WebRtcVad_Free(vad_instance_); +} + +void PostDecodeVad::Enable() { + if (!vad_instance_) { + // Create the instance. + vad_instance_ = WebRtcVad_Create(); + if (vad_instance_ == nullptr) { + // Failed to create instance. + Disable(); + return; + } + } + Init(); + enabled_ = true; +} + +void PostDecodeVad::Disable() { + enabled_ = false; + running_ = false; +} + +void PostDecodeVad::Init() { + running_ = false; + if (vad_instance_) { + WebRtcVad_Init(vad_instance_); + WebRtcVad_set_mode(vad_instance_, kVadMode); + running_ = true; + } +} + +void PostDecodeVad::Update(int16_t* signal, + size_t length, + AudioDecoder::SpeechType speech_type, + bool sid_frame, + int fs_hz) { + if (!vad_instance_ || !enabled_) { + return; + } + + if (speech_type == AudioDecoder::kComfortNoise || sid_frame || + fs_hz > 16000) { + // TODO(hlundin): Remove restriction on fs_hz. + running_ = false; + active_speech_ = true; + sid_interval_counter_ = 0; + } else if (!running_) { + ++sid_interval_counter_; + } + + if (sid_interval_counter_ >= kVadAutoEnable) { + Init(); + } + + if (length > 0 && running_) { + size_t vad_sample_index = 0; + active_speech_ = false; + // Loop through frame sizes 30, 20, and 10 ms. + for (int vad_frame_size_ms = 30; vad_frame_size_ms >= 10; + vad_frame_size_ms -= 10) { + size_t vad_frame_size_samples = + static_cast(vad_frame_size_ms * fs_hz / 1000); + while (length - vad_sample_index >= vad_frame_size_samples) { + int vad_return = + WebRtcVad_Process(vad_instance_, fs_hz, &signal[vad_sample_index], + vad_frame_size_samples); + active_speech_ |= (vad_return == 1); + vad_sample_index += vad_frame_size_samples; + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h new file mode 100644 index 0000000000..3bd91b9edb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_ +#define MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_ + +#include +#include + +#include "api/audio_codecs/audio_decoder.h" +#include "common_audio/vad/include/webrtc_vad.h" + +namespace webrtc { + +class PostDecodeVad { + public: + PostDecodeVad() + : enabled_(false), + running_(false), + active_speech_(true), + sid_interval_counter_(0), + vad_instance_(NULL) {} + + virtual ~PostDecodeVad(); + + PostDecodeVad(const PostDecodeVad&) = delete; + PostDecodeVad& operator=(const PostDecodeVad&) = delete; + + // Enables post-decode VAD. + void Enable(); + + // Disables post-decode VAD. + void Disable(); + + // Initializes post-decode VAD. + void Init(); + + // Updates post-decode VAD with the audio data in `signal` having `length` + // samples. The data is of type `speech_type`, at the sample rate `fs_hz`. + void Update(int16_t* signal, + size_t length, + AudioDecoder::SpeechType speech_type, + bool sid_frame, + int fs_hz); + + // Accessors. + bool enabled() const { return enabled_; } + bool running() const { return running_; } + bool active_speech() const { return active_speech_; } + + private: + static const int kVadMode = 0; // Sets aggressiveness to "Normal". + // Number of Update() calls without CNG/SID before re-enabling VAD. + static const int kVadAutoEnable = 3000; + + bool enabled_; + bool running_; + bool active_speech_; + int sid_interval_counter_; + ::VadInst* vad_instance_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc new file mode 100644 index 0000000000..da3e4e864e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for PostDecodeVad class. + +#include "modules/audio_coding/neteq/post_decode_vad.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(PostDecodeVad, CreateAndDestroy) { + PostDecodeVad vad; +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.cc b/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.cc new file mode 100644 index 0000000000..232170b177 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.cc @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/preemptive_expand.h" + +#include + +#include "api/array_view.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/time_stretch.h" + +namespace webrtc { + +PreemptiveExpand::ReturnCodes PreemptiveExpand::Process( + const int16_t* input, + size_t input_length, + size_t old_data_length, + AudioMultiVector* output, + size_t* length_change_samples) { + old_data_length_per_channel_ = old_data_length; + // Input length must be (almost) 30 ms. + // Also, the new part must be at least `overlap_samples_` elements. + static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate. + if (num_channels_ == 0 || + input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_ || + old_data_length >= input_length / num_channels_ - overlap_samples_) { + // Length of input data too short to do preemptive expand. Simply move all + // data from input to output. + output->PushBackInterleaved( + rtc::ArrayView(input, input_length)); + return kError; + } + const bool kFastMode = false; // Fast mode is not available for PE Expand. + return TimeStretch::Process(input, input_length, kFastMode, output, + length_change_samples); +} + +void PreemptiveExpand::SetParametersForPassiveSpeech(size_t len, + int16_t* best_correlation, + size_t* peak_index) const { + // When the signal does not contain any active speech, the correlation does + // not matter. Simply set it to zero. + *best_correlation = 0; + + // For low energy expansion, the new data can be less than 15 ms, + // but we must ensure that best_correlation is not larger than the length of + // the new data. + // but we must ensure that best_correlation is not larger than the new data. + *peak_index = std::min(*peak_index, len - old_data_length_per_channel_); +} + +PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch( + const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool /*fast_mode*/, + AudioMultiVector* output) const { + // Pre-calculate common multiplication with `fs_mult_`. + // 120 corresponds to 15 ms. + size_t fs_mult_120 = static_cast(fs_mult_ * 120); + // Check for strong correlation (>0.9 in Q14) and at least 15 ms new data, + // or passive speech. + if (((best_correlation > kCorrelationThreshold) && + (old_data_length_per_channel_ <= fs_mult_120)) || + !active_speech) { + // Do accelerate operation by overlap add. + + // Set length of the first part, not to be modified. + size_t unmodified_length = + std::max(old_data_length_per_channel_, fs_mult_120); + // Copy first part, including cross-fade region. + output->PushBackInterleaved(rtc::ArrayView( + input, (unmodified_length + peak_index) * num_channels_)); + // Copy the last `peak_index` samples up to 15 ms to `temp_vector`. + AudioMultiVector temp_vector(num_channels_); + temp_vector.PushBackInterleaved(rtc::ArrayView( + &input[(unmodified_length - peak_index) * num_channels_], + peak_index * num_channels_)); + // Cross-fade `temp_vector` onto the end of `output`. + output->CrossFade(temp_vector, peak_index); + // Copy the last unmodified part, 15 ms + pitch period until the end. + output->PushBackInterleaved(rtc::ArrayView( + &input[unmodified_length * num_channels_], + input_length - unmodified_length * num_channels_)); + + if (active_speech) { + return kSuccess; + } else { + return kSuccessLowEnergy; + } + } else { + // Accelerate not allowed. Simply move all data from decoded to outData. + output->PushBackInterleaved( + rtc::ArrayView(input, input_length)); + return kNoStretch; + } +} + +PreemptiveExpand* PreemptiveExpandFactory::Create( + int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise, + size_t overlap_samples) const { + return new PreemptiveExpand(sample_rate_hz, num_channels, background_noise, + overlap_samples); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.h b/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.h new file mode 100644 index 0000000000..6338b993fd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_ +#define MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_ + +#include +#include + +#include "modules/audio_coding/neteq/time_stretch.h" + +namespace webrtc { + +class AudioMultiVector; +class BackgroundNoise; + +// This class implements the PreemptiveExpand operation. Most of the work is +// done in the base class TimeStretch, which is shared with the Accelerate +// operation. In the PreemptiveExpand class, the operations that are specific to +// PreemptiveExpand are implemented. +class PreemptiveExpand : public TimeStretch { + public: + PreemptiveExpand(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise, + size_t overlap_samples) + : TimeStretch(sample_rate_hz, num_channels, background_noise), + old_data_length_per_channel_(0), + overlap_samples_(overlap_samples) {} + + PreemptiveExpand(const PreemptiveExpand&) = delete; + PreemptiveExpand& operator=(const PreemptiveExpand&) = delete; + + // This method performs the actual PreemptiveExpand operation. The samples are + // read from `input`, of length `input_length` elements, and are written to + // `output`. The number of samples added through time-stretching is + // is provided in the output `length_change_samples`. The method returns + // the outcome of the operation as an enumerator value. + ReturnCodes Process(const int16_t* pw16_decoded, + size_t len, + size_t old_data_len, + AudioMultiVector* output, + size_t* length_change_samples); + + protected: + // Sets the parameters `best_correlation` and `peak_index` to suitable + // values when the signal contains no active speech. + void SetParametersForPassiveSpeech(size_t input_length, + int16_t* best_correlation, + size_t* peak_index) const override; + + // Checks the criteria for performing the time-stretching operation and, + // if possible, performs the time-stretching. + ReturnCodes CheckCriteriaAndStretch(const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool /*fast_mode*/, + AudioMultiVector* output) const override; + + private: + size_t old_data_length_per_channel_; + size_t overlap_samples_; +}; + +struct PreemptiveExpandFactory { + PreemptiveExpandFactory() {} + virtual ~PreemptiveExpandFactory() {} + + virtual PreemptiveExpand* Create(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise, + size_t overlap_samples) const; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.cc b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.cc new file mode 100644 index 0000000000..ada175831c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/random_vector.h" + +namespace webrtc { + +const int16_t RandomVector::kRandomTable[RandomVector::kRandomTableSize] = { + 2680, 5532, 441, 5520, 16170, -5146, -1024, -8733, 3115, + 9598, -10380, -4959, -1280, -21716, 7133, -1522, 13458, -3902, + 2789, -675, 3441, 5016, -13599, -4003, -2739, 3922, -7209, + 13352, -11617, -7241, 12905, -2314, 5426, 10121, -9702, 11207, + -13542, 1373, 816, -5934, -12504, 4798, 1811, 4112, -613, + 201, -10367, -2960, -2419, 3442, 4299, -6116, -6092, 1552, + -1650, -480, -1237, 18720, -11858, -8303, -8212, 865, -2890, + -16968, 12052, -5845, -5912, 9777, -5665, -6294, 5426, -4737, + -6335, 1652, 761, 3832, 641, -8552, -9084, -5753, 8146, + 12156, -4915, 15086, -1231, -1869, 11749, -9319, -6403, 11407, + 6232, -1683, 24340, -11166, 4017, -10448, 3153, -2936, 6212, + 2891, -866, -404, -4807, -2324, -1917, -2388, -6470, -3895, + -10300, 5323, -5403, 2205, 4640, 7022, -21186, -6244, -882, + -10031, -3395, -12885, 7155, -5339, 5079, -2645, -9515, 6622, + 14651, 15852, 359, 122, 8246, -3502, -6696, -3679, -13535, + -1409, -704, -7403, -4007, 1798, 279, -420, -12796, -14219, + 1141, 3359, 11434, 7049, -6684, -7473, 14283, -4115, -9123, + -8969, 4152, 4117, 13792, 5742, 16168, 8661, -1609, -6095, + 1881, 14380, -5588, 6758, -6425, -22969, -7269, 7031, 1119, + -1611, -5850, -11281, 3559, -8952, -10146, -4667, -16251, -1538, + 2062, -1012, -13073, 227, -3142, -5265, 20, 5770, -7559, + 4740, -4819, 992, -8208, -7130, -4652, 6725, 7369, -1036, + 13144, -1588, -5304, -2344, -449, -5705, -8894, 5205, -17904, + -11188, -1022, 4852, 10101, -5255, -4200, -752, 7941, -1543, + 5959, 14719, 13346, 17045, -15605, -1678, -1600, -9230, 68, + 23348, 1172, 7750, 11212, -18227, 9956, 4161, 883, 3947, + 4341, 1014, -4889, -2603, 1246, -5630, -3596, -870, -1298, + 2784, -3317, -6612, -20541, 4166, 4181, -8625, 3562, 12890, + 4761, 3205, -12259, -8579}; + +void RandomVector::Reset() { + seed_ = 777; + seed_increment_ = 1; +} + +void RandomVector::Generate(size_t length, int16_t* output) { + for (size_t i = 0; i < length; i++) { + seed_ += seed_increment_; + size_t position = seed_ & (kRandomTableSize - 1); + output[i] = kRandomTable[position]; + } +} + +void RandomVector::IncreaseSeedIncrement(int16_t increase_by) { + seed_increment_ += increase_by; + seed_increment_ &= kRandomTableSize - 1; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.h b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.h new file mode 100644 index 0000000000..4a782f1116 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_ + +#include +#include + +namespace webrtc { + +// This class generates pseudo-random samples. +class RandomVector { + public: + static const size_t kRandomTableSize = 256; + static const int16_t kRandomTable[kRandomTableSize]; + + RandomVector() : seed_(777), seed_increment_(1) {} + + RandomVector(const RandomVector&) = delete; + RandomVector& operator=(const RandomVector&) = delete; + + void Reset(); + + void Generate(size_t length, int16_t* output); + + void IncreaseSeedIncrement(int16_t increase_by); + + // Accessors and mutators. + int16_t seed_increment() { return seed_increment_; } + void set_seed_increment(int16_t value) { seed_increment_ = value; } + + private: + uint32_t seed_; + int16_t seed_increment_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/random_vector_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector_unittest.cc new file mode 100644 index 0000000000..44479a6dd6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector_unittest.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for RandomVector class. + +#include "modules/audio_coding/neteq/random_vector.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(RandomVector, CreateAndDestroy) { + RandomVector random_vector; +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.cc new file mode 100644 index 0000000000..cec9f2f8a0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.cc @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/red_payload_splitter.h" + +#include + +#include +#include +#include +#include + +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/packet.h" +#include "rtc_base/buffer.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +// The method loops through a list of packets {A, B, C, ...}. Each packet is +// split into its corresponding RED payloads, {A1, A2, ...}, which is +// temporarily held in the list `new_packets`. +// When the first packet in `packet_list` has been processed, the original +// packet is replaced by the new ones in `new_packets`, so that `packet_list` +// becomes: {A1, A2, ..., B, C, ...}. The method then continues with B, and C, +// until all the original packets have been replaced by their split payloads. +bool RedPayloadSplitter::SplitRed(PacketList* packet_list) { + // Too many RED blocks indicates that something is wrong. Clamp it at some + // reasonable value. + const size_t kMaxRedBlocks = 32; + bool ret = true; + PacketList::iterator it = packet_list->begin(); + while (it != packet_list->end()) { + const Packet& red_packet = *it; + RTC_DCHECK(!red_packet.payload.empty()); + const uint8_t* payload_ptr = red_packet.payload.data(); + size_t payload_length = red_packet.payload.size(); + + // Read RED headers (according to RFC 2198): + // + // 0 1 2 3 + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |F| block PT | timestamp offset | block length | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // Last RED header: + // 0 1 2 3 4 5 6 7 + // +-+-+-+-+-+-+-+-+ + // |0| Block PT | + // +-+-+-+-+-+-+-+-+ + + struct RedHeader { + uint8_t payload_type; + uint32_t timestamp; + size_t payload_length; + }; + + std::vector new_headers; + bool last_block = false; + size_t sum_length = 0; + while (!last_block) { + if (payload_length == 0) { + RTC_LOG(LS_WARNING) << "SplitRed header too short"; + return false; + } + RedHeader new_header; + // Check the F bit. If F == 0, this was the last block. + last_block = ((*payload_ptr & 0x80) == 0); + // Bits 1 through 7 are payload type. + new_header.payload_type = payload_ptr[0] & 0x7F; + if (last_block) { + // No more header data to read. + sum_length += kRedLastHeaderLength; // Account for RED header size. + new_header.timestamp = red_packet.timestamp; + new_header.payload_length = red_packet.payload.size() - sum_length; + payload_ptr += kRedLastHeaderLength; // Advance to first payload byte. + payload_length -= kRedLastHeaderLength; + } else { + if (payload_length < kRedHeaderLength) { + RTC_LOG(LS_WARNING) << "SplitRed header too short"; + return false; + } + // Bits 8 through 21 are timestamp offset. + int timestamp_offset = + (payload_ptr[1] << 6) + ((payload_ptr[2] & 0xFC) >> 2); + new_header.timestamp = red_packet.timestamp - timestamp_offset; + // Bits 22 through 31 are payload length. + new_header.payload_length = + ((payload_ptr[2] & 0x03) << 8) + payload_ptr[3]; + + sum_length += new_header.payload_length; + sum_length += kRedHeaderLength; // Account for RED header size. + + payload_ptr += kRedHeaderLength; // Advance to next RED header. + payload_length -= kRedHeaderLength; + } + // Store in new list of packets. + if (new_header.payload_length > 0) { + new_headers.push_back(new_header); + } + } + + if (new_headers.size() <= kMaxRedBlocks) { + // Populate the new packets with payload data. + // `payload_ptr` now points at the first payload byte. + PacketList new_packets; // An empty list to store the split packets in. + for (size_t i = 0; i != new_headers.size(); ++i) { + const auto& new_header = new_headers[i]; + size_t payload_length = new_header.payload_length; + if (payload_ptr + payload_length > + red_packet.payload.data() + red_packet.payload.size()) { + // The block lengths in the RED headers do not match the overall + // packet length. Something is corrupt. Discard this and the remaining + // payloads from this packet. + RTC_LOG(LS_WARNING) << "SplitRed length mismatch"; + ret = false; + break; + } + + Packet new_packet; + new_packet.timestamp = new_header.timestamp; + new_packet.payload_type = new_header.payload_type; + new_packet.sequence_number = red_packet.sequence_number; + new_packet.priority.red_level = + rtc::dchecked_cast((new_headers.size() - 1) - i); + new_packet.payload.SetData(payload_ptr, payload_length); + new_packet.packet_info = RtpPacketInfo( + /*ssrc=*/red_packet.packet_info.ssrc(), + /*csrcs=*/std::vector(), + /*rtp_timestamp=*/new_packet.timestamp, + /*receive_time=*/red_packet.packet_info.receive_time()); + new_packet.packet_info.set_audio_level( + red_packet.packet_info.audio_level()); + new_packets.push_front(std::move(new_packet)); + payload_ptr += payload_length; + } + // Insert new packets into original list, before the element pointed to by + // iterator `it`. + packet_list->splice(it, std::move(new_packets)); + } else { + RTC_LOG(LS_WARNING) << "SplitRed too many blocks: " << new_headers.size(); + ret = false; + } + // Remove `it` from the packet list. This operation effectively moves the + // iterator `it` to the next packet in the list. Thus, we do not have to + // increment it manually. + it = packet_list->erase(it); + } + return ret; +} + +void RedPayloadSplitter::CheckRedPayloads( + PacketList* packet_list, + const DecoderDatabase& decoder_database) { + int main_payload_type = -1; + for (auto it = packet_list->begin(); it != packet_list->end(); /* */) { + uint8_t this_payload_type = it->payload_type; + if (decoder_database.IsRed(this_payload_type)) { + it = packet_list->erase(it); + continue; + } + if (!decoder_database.IsDtmf(this_payload_type) && + !decoder_database.IsComfortNoise(this_payload_type)) { + if (main_payload_type == -1) { + // This is the first packet in the list which is non-DTMF non-CNG. + main_payload_type = this_payload_type; + } else { + if (this_payload_type != main_payload_type) { + // We do not allow redundant payloads of a different type. + // Remove `it` from the packet list. This operation effectively + // moves the iterator `it` to the next packet in the list. Thus, we + // do not have to increment it manually. + it = packet_list->erase(it); + continue; + } + } + } + ++it; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.h b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.h new file mode 100644 index 0000000000..2f48e4b7d4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_ + +#include "modules/audio_coding/neteq/packet.h" + +namespace webrtc { + +class DecoderDatabase; + +static const size_t kRedHeaderLength = 4; // 4 bytes RED header. +static const size_t kRedLastHeaderLength = + 1; // reduced size for last RED header. +// This class handles splitting of RED payloads into smaller parts. +// Codec-specific packet splitting can be performed by +// AudioDecoder::ParsePayload. +class RedPayloadSplitter { + public: + RedPayloadSplitter() {} + + virtual ~RedPayloadSplitter() {} + + RedPayloadSplitter(const RedPayloadSplitter&) = delete; + RedPayloadSplitter& operator=(const RedPayloadSplitter&) = delete; + + // Splits each packet in `packet_list` into its separate RED payloads. Each + // RED payload is packetized into a Packet. The original elements in + // `packet_list` are properly deleted, and replaced by the new packets. + // Note that all packets in `packet_list` must be RED payloads, i.e., have + // RED headers according to RFC 2198 at the very beginning of the payload. + // Returns kOK or an error. + virtual bool SplitRed(PacketList* packet_list); + + // Checks all packets in `packet_list`. Packets that are DTMF events or + // comfort noise payloads are kept. Except that, only one single payload type + // is accepted. Any packet with another payload type is discarded. + virtual void CheckRedPayloads(PacketList* packet_list, + const DecoderDatabase& decoder_database); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter_unittest.cc new file mode 100644 index 0000000000..a0ba5414ea --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter_unittest.cc @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for RedPayloadSplitter class. + +#include "modules/audio_coding/neteq/red_payload_splitter.h" + + +#include +#include // pair + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/packet.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder_factory.h" + +using ::testing::Return; +using ::testing::ReturnNull; + +namespace webrtc { + +static const int kRedPayloadType = 100; +static const size_t kPayloadLength = 10; +static const uint16_t kSequenceNumber = 0; +static const uint32_t kBaseTimestamp = 0x12345678; + +// A possible Opus packet that contains FEC is the following. +// The frame is 20 ms in duration. +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |0|0|0|0|1|0|0|0|x|1|x|x|x|x|x|x|x| | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +// | Compressed frame 1 (N-2 bytes)... : +// : | +// | | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +void CreateOpusFecPayload(uint8_t* payload, + size_t payload_length, + uint8_t payload_value) { + if (payload_length < 2) { + return; + } + payload[0] = 0x08; + payload[1] = 0x40; + memset(&payload[2], payload_value, payload_length - 2); +} + +// RED headers (according to RFC 2198): +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |F| block PT | timestamp offset | block length | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// Last RED header: +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |0| Block PT | +// +-+-+-+-+-+-+-+-+ + +// Creates a RED packet, with `num_payloads` payloads, with payload types given +// by the values in array `payload_types` (which must be of length +// `num_payloads`). Each redundant payload is `timestamp_offset` samples +// "behind" the the previous payload. +Packet CreateRedPayload(size_t num_payloads, + uint8_t* payload_types, + int timestamp_offset, + bool embed_opus_fec = false) { + Packet packet; + packet.payload_type = kRedPayloadType; + packet.timestamp = kBaseTimestamp; + packet.sequence_number = kSequenceNumber; + packet.payload.SetSize((kPayloadLength + 1) + + (num_payloads - 1) * + (kPayloadLength + kRedHeaderLength)); + uint8_t* payload_ptr = packet.payload.data(); + for (size_t i = 0; i < num_payloads; ++i) { + // Write the RED headers. + if (i == num_payloads - 1) { + // Special case for last payload. + *payload_ptr = payload_types[i] & 0x7F; // F = 0; + ++payload_ptr; + break; + } + *payload_ptr = payload_types[i] & 0x7F; + // Not the last block; set F = 1. + *payload_ptr |= 0x80; + ++payload_ptr; + int this_offset = + rtc::checked_cast((num_payloads - i - 1) * timestamp_offset); + *payload_ptr = this_offset >> 6; + ++payload_ptr; + RTC_DCHECK_LE(kPayloadLength, 1023); // Max length described by 10 bits. + *payload_ptr = ((this_offset & 0x3F) << 2) | (kPayloadLength >> 8); + ++payload_ptr; + *payload_ptr = kPayloadLength & 0xFF; + ++payload_ptr; + } + for (size_t i = 0; i < num_payloads; ++i) { + // Write `i` to all bytes in each payload. + if (embed_opus_fec) { + CreateOpusFecPayload(payload_ptr, kPayloadLength, + static_cast(i)); + } else { + memset(payload_ptr, static_cast(i), kPayloadLength); + } + payload_ptr += kPayloadLength; + } + return packet; +} + +// Create a packet with all payload bytes set to `payload_value`. +Packet CreatePacket(uint8_t payload_type, + size_t payload_length, + uint8_t payload_value, + bool opus_fec = false) { + Packet packet; + packet.payload_type = payload_type; + packet.timestamp = kBaseTimestamp; + packet.sequence_number = kSequenceNumber; + packet.payload.SetSize(payload_length); + if (opus_fec) { + CreateOpusFecPayload(packet.payload.data(), packet.payload.size(), + payload_value); + } else { + memset(packet.payload.data(), payload_value, packet.payload.size()); + } + return packet; +} + +// Checks that `packet` has the attributes given in the remaining parameters. +void VerifyPacket(const Packet& packet, + size_t payload_length, + uint8_t payload_type, + uint16_t sequence_number, + uint32_t timestamp, + uint8_t payload_value, + Packet::Priority priority) { + EXPECT_EQ(payload_length, packet.payload.size()); + EXPECT_EQ(payload_type, packet.payload_type); + EXPECT_EQ(sequence_number, packet.sequence_number); + EXPECT_EQ(timestamp, packet.timestamp); + EXPECT_EQ(priority, packet.priority); + ASSERT_FALSE(packet.payload.empty()); + for (size_t i = 0; i < packet.payload.size(); ++i) { + ASSERT_EQ(payload_value, packet.payload.data()[i]); + } +} + +void VerifyPacket(const Packet& packet, + size_t payload_length, + uint8_t payload_type, + uint16_t sequence_number, + uint32_t timestamp, + uint8_t payload_value, + bool primary) { + return VerifyPacket(packet, payload_length, payload_type, sequence_number, + timestamp, payload_value, + Packet::Priority{0, primary ? 0 : 1}); +} + +// Start of test definitions. + +TEST(RedPayloadSplitter, CreateAndDestroy) { + RedPayloadSplitter* splitter = new RedPayloadSplitter; + delete splitter; +} + +// Packet A is split into A1 and A2. +TEST(RedPayloadSplitter, OnePacketTwoPayloads) { + uint8_t payload_types[] = {0, 0}; + const int kTimestampOffset = 160; + PacketList packet_list; + packet_list.push_back(CreateRedPayload(2, payload_types, kTimestampOffset)); + RedPayloadSplitter splitter; + EXPECT_TRUE(splitter.SplitRed(&packet_list)); + ASSERT_EQ(2u, packet_list.size()); + // Check first packet. The first in list should always be the primary payload. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1], + kSequenceNumber, kBaseTimestamp, 1, true); + packet_list.pop_front(); + // Check second packet. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber, kBaseTimestamp - kTimestampOffset, 0, false); +} + +// Packets A and B are not split at all. Only the RED header in each packet is +// removed. +TEST(RedPayloadSplitter, TwoPacketsOnePayload) { + uint8_t payload_types[] = {0}; + const int kTimestampOffset = 160; + // Create first packet, with a single RED payload. + PacketList packet_list; + packet_list.push_back(CreateRedPayload(1, payload_types, kTimestampOffset)); + // Create second packet, with a single RED payload. + { + Packet packet = CreateRedPayload(1, payload_types, kTimestampOffset); + // Manually change timestamp and sequence number of second packet. + packet.timestamp += kTimestampOffset; + packet.sequence_number++; + packet_list.push_back(std::move(packet)); + } + RedPayloadSplitter splitter; + EXPECT_TRUE(splitter.SplitRed(&packet_list)); + ASSERT_EQ(2u, packet_list.size()); + // Check first packet. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber, kBaseTimestamp, 0, true); + packet_list.pop_front(); + // Check second packet. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 0, true); +} + +// Packets A and B are split into packets A1, A2, A3, B1, B2, B3, with +// attributes as follows: +// +// A1* A2 A3 B1* B2 B3 +// Payload type 0 1 2 0 1 2 +// Timestamp b b-o b-2o b+o b b-o +// Sequence number 0 0 0 1 1 1 +// +// b = kBaseTimestamp, o = kTimestampOffset, * = primary. +TEST(RedPayloadSplitter, TwoPacketsThreePayloads) { + uint8_t payload_types[] = {2, 1, 0}; // Primary is the last one. + const int kTimestampOffset = 160; + // Create first packet, with 3 RED payloads. + PacketList packet_list; + packet_list.push_back(CreateRedPayload(3, payload_types, kTimestampOffset)); + // Create first packet, with 3 RED payloads. + { + Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset); + // Manually change timestamp and sequence number of second packet. + packet.timestamp += kTimestampOffset; + packet.sequence_number++; + packet_list.push_back(std::move(packet)); + } + RedPayloadSplitter splitter; + EXPECT_TRUE(splitter.SplitRed(&packet_list)); + ASSERT_EQ(6u, packet_list.size()); + // Check first packet, A1. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2], + kSequenceNumber, kBaseTimestamp, 2, {0, 0}); + packet_list.pop_front(); + // Check second packet, A2. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1], + kSequenceNumber, kBaseTimestamp - kTimestampOffset, 1, {0, 1}); + packet_list.pop_front(); + // Check third packet, A3. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0, + {0, 2}); + packet_list.pop_front(); + // Check fourth packet, B1. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2], + kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 2, + {0, 0}); + packet_list.pop_front(); + // Check fifth packet, B2. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1], + kSequenceNumber + 1, kBaseTimestamp, 1, {0, 1}); + packet_list.pop_front(); + // Check sixth packet, B3. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber + 1, kBaseTimestamp - kTimestampOffset, 0, + {0, 2}); +} + +// Creates a list with 4 packets with these payload types: +// 0 = CNGnb +// 1 = PCMu +// 2 = DTMF (AVT) +// 3 = iLBC +// We expect the method CheckRedPayloads to discard the iLBC packet, since it +// is a non-CNG, non-DTMF payload of another type than the first speech payload +// found in the list (which is PCMu). +TEST(RedPayloadSplitter, CheckRedPayloads) { + PacketList packet_list; + for (uint8_t i = 0; i <= 3; ++i) { + // Create packet with payload type `i`, payload length 10 bytes, all 0. + packet_list.push_back(CreatePacket(i, 10, 0)); + } + + // Use a real DecoderDatabase object here instead of a mock, since it is + // easier to just register the payload types and let the actual implementation + // do its job. + DecoderDatabase decoder_database( + rtc::make_ref_counted(), absl::nullopt); + decoder_database.RegisterPayload(0, SdpAudioFormat("cn", 8000, 1)); + decoder_database.RegisterPayload(1, SdpAudioFormat("pcmu", 8000, 1)); + decoder_database.RegisterPayload(2, + SdpAudioFormat("telephone-event", 8000, 1)); + decoder_database.RegisterPayload(3, SdpAudioFormat("ilbc", 8000, 1)); + + RedPayloadSplitter splitter; + splitter.CheckRedPayloads(&packet_list, decoder_database); + + ASSERT_EQ(3u, packet_list.size()); // Should have dropped the last packet. + // Verify packets. The loop verifies that payload types 0, 1, and 2 are in the + // list. + for (int i = 0; i <= 2; ++i) { + VerifyPacket(packet_list.front(), 10, i, kSequenceNumber, kBaseTimestamp, 0, + true); + packet_list.pop_front(); + } + EXPECT_TRUE(packet_list.empty()); +} + +// This test creates a RED packet where the payloads also have the payload type +// for RED. That is, some kind of weird nested RED packet. This is not supported +// and the splitter should discard all packets. +TEST(RedPayloadSplitter, CheckRedPayloadsRecursiveRed) { + PacketList packet_list; + for (uint8_t i = 0; i <= 3; ++i) { + // Create packet with RED payload type, payload length 10 bytes, all 0. + packet_list.push_back(CreatePacket(kRedPayloadType, 10, 0)); + } + + // Use a real DecoderDatabase object here instead of a mock, since it is + // easier to just register the payload types and let the actual implementation + // do its job. + DecoderDatabase decoder_database( + rtc::make_ref_counted(), absl::nullopt); + decoder_database.RegisterPayload(kRedPayloadType, + SdpAudioFormat("red", 8000, 1)); + + RedPayloadSplitter splitter; + splitter.CheckRedPayloads(&packet_list, decoder_database); + + EXPECT_TRUE(packet_list.empty()); // Should have dropped all packets. +} + +// Packet A is split into A1, A2 and A3. But the length parameter is off, so +// the last payloads should be discarded. +TEST(RedPayloadSplitter, WrongPayloadLength) { + uint8_t payload_types[] = {0, 0, 0}; + const int kTimestampOffset = 160; + PacketList packet_list; + { + Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset); + // Manually tamper with the payload length of the packet. + // This is one byte too short for the second payload (out of three). + // We expect only the first payload to be returned. + packet.payload.SetSize(packet.payload.size() - (kPayloadLength + 1)); + packet_list.push_back(std::move(packet)); + } + RedPayloadSplitter splitter; + EXPECT_FALSE(splitter.SplitRed(&packet_list)); + ASSERT_EQ(1u, packet_list.size()); + // Check first packet. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0, + {0, 2}); + packet_list.pop_front(); +} + +// Test that we reject packets too short to contain a RED header. +TEST(RedPayloadSplitter, RejectsIncompleteHeaders) { + RedPayloadSplitter splitter; + + uint8_t payload_types[] = {0, 0}; + const int kTimestampOffset = 160; + + PacketList packet_list; + + // Truncate the packet such that the first block can not be parsed. + packet_list.push_back(CreateRedPayload(2, payload_types, kTimestampOffset)); + packet_list.front().payload.SetSize(4); + EXPECT_FALSE(splitter.SplitRed(&packet_list)); + EXPECT_FALSE(packet_list.empty()); + + // Truncate the packet such that the first block can not be parsed. + packet_list.front().payload.SetSize(3); + EXPECT_FALSE(splitter.SplitRed(&packet_list)); + EXPECT_FALSE(packet_list.empty()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.cc new file mode 100644 index 0000000000..f6e073fc88 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/reorder_optimizer.h" + +#include +#include +#include + +namespace webrtc { + +namespace { + +constexpr int kDelayBuckets = 100; +constexpr int kBucketSizeMs = 20; + +} // namespace + +ReorderOptimizer::ReorderOptimizer(int forget_factor, + int ms_per_loss_percent, + absl::optional start_forget_weight) + : histogram_(kDelayBuckets, forget_factor, start_forget_weight), + ms_per_loss_percent_(ms_per_loss_percent) {} + +void ReorderOptimizer::Update(int relative_delay_ms, + bool reordered, + int base_delay_ms) { + const int index = reordered ? relative_delay_ms / kBucketSizeMs : 0; + if (index < histogram_.NumBuckets()) { + // Maximum delay to register is 2000 ms. + histogram_.Add(index); + } + int bucket_index = MinimizeCostFunction(base_delay_ms); + optimal_delay_ms_ = (1 + bucket_index) * kBucketSizeMs; +} + +void ReorderOptimizer::Reset() { + histogram_.Reset(); + optimal_delay_ms_.reset(); +} + +int ReorderOptimizer::MinimizeCostFunction(int base_delay_ms) const { + const std::vector& buckets = histogram_.buckets(); + + // Values are calculated in Q30. + int64_t loss_probability = 1 << 30; + int64_t min_cost = std::numeric_limits::max(); + int min_bucket = 0; + for (int i = 0; i < static_cast(buckets.size()); ++i) { + loss_probability -= buckets[i]; + int64_t delay_ms = + static_cast(std::max(0, i * kBucketSizeMs - base_delay_ms)) + << 30; + int64_t cost = delay_ms + 100 * ms_per_loss_percent_ * loss_probability; + + if (cost < min_cost) { + min_cost = cost; + min_bucket = i; + } + if (loss_probability == 0) { + break; + } + } + + return min_bucket; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.h b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.h new file mode 100644 index 0000000000..06f6bc7e50 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_REORDER_OPTIMIZER_H_ +#define MODULES_AUDIO_CODING_NETEQ_REORDER_OPTIMIZER_H_ + +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/histogram.h" + +namespace webrtc { + +// Calculates an optimal delay to reduce the chance of missing reordered +// packets. The delay/loss trade-off can be tune using the `ms_per_loss_percent` +// parameter. +class ReorderOptimizer { + public: + ReorderOptimizer(int forget_factor, + int ms_per_loss_percent, + absl::optional start_forget_weight); + + void Update(int relative_delay_ms, bool reordered, int base_delay_ms); + + absl::optional GetOptimalDelayMs() const { return optimal_delay_ms_; } + + void Reset(); + + private: + int MinimizeCostFunction(int base_delay_ms) const; + + Histogram histogram_; + const int ms_per_loss_percent_; + absl::optional optimal_delay_ms_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_REORDER_OPTIMIZER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer_unittest.cc new file mode 100644 index 0000000000..aaa1062560 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer_unittest.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/reorder_optimizer.h" + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kForgetFactor = 32745; // 0.9993 in Q15. +constexpr int kMsPerLossPercent = 20; +constexpr int kStartForgetWeight = 1; + +} // namespace + +TEST(ReorderOptimizerTest, OnlyIncreaseDelayForReorderedPackets) { + ReorderOptimizer reorder_optimizer(kForgetFactor, kMsPerLossPercent, + kStartForgetWeight); + EXPECT_FALSE(reorder_optimizer.GetOptimalDelayMs()); + + // Delay should not increase for in-order packets. + reorder_optimizer.Update(60, /*reordered=*/false, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 20); + + reorder_optimizer.Update(100, /*reordered=*/false, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 20); + + reorder_optimizer.Update(80, /*reordered=*/true, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 100); +} + +TEST(ReorderOptimizerTest, AvoidIncreasingDelayWhenProbabilityIsLow) { + ReorderOptimizer reorder_optimizer(kForgetFactor, kMsPerLossPercent, + kStartForgetWeight); + + reorder_optimizer.Update(40, /*reordered=*/true, 0); + reorder_optimizer.Update(40, /*reordered=*/true, 0); + reorder_optimizer.Update(40, /*reordered=*/true, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 60); + + // The cost of the delay is too high relative the probability. + reorder_optimizer.Update(600, /*reordered=*/true, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 60); +} + +TEST(ReorderOptimizerTest, BaseDelayIsSubtractedFromCost) { + constexpr int kBaseDelayMs = 200; + ReorderOptimizer reorder_optimizer(kForgetFactor, kMsPerLossPercent, + kStartForgetWeight); + + reorder_optimizer.Update(40, /*reordered=*/true, kBaseDelayMs); + reorder_optimizer.Update(40, /*reordered=*/true, kBaseDelayMs); + reorder_optimizer.Update(40, /*reordered=*/true, kBaseDelayMs); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 60); + + // The cost of the delay is too high relative the probability. + reorder_optimizer.Update(600, /*reordered=*/true, kBaseDelayMs); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 620); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.cc b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.cc new file mode 100644 index 0000000000..52d3fa90f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.cc @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/statistics_calculator.h" + +#include // memset + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/delay_manager.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { +size_t AddIntToSizeTWithLowerCap(int a, size_t b) { + const size_t ret = b + a; + // If a + b is negative, resulting in a negative wrap, cap it to zero instead. + static_assert(sizeof(size_t) >= sizeof(int), + "int must not be wider than size_t for this to work"); + return (a < 0 && ret > b) ? 0 : ret; +} + +constexpr int kInterruptionLenMs = 150; +} // namespace + +// Allocating the static const so that it can be passed by reference to +// RTC_DCHECK. +const size_t StatisticsCalculator::kLenWaitingTimes; + +StatisticsCalculator::PeriodicUmaLogger::PeriodicUmaLogger( + absl::string_view uma_name, + int report_interval_ms, + int max_value) + : uma_name_(uma_name), + report_interval_ms_(report_interval_ms), + max_value_(max_value), + timer_(0) {} + +StatisticsCalculator::PeriodicUmaLogger::~PeriodicUmaLogger() = default; + +void StatisticsCalculator::PeriodicUmaLogger::AdvanceClock(int step_ms) { + timer_ += step_ms; + if (timer_ < report_interval_ms_) { + return; + } + LogToUma(Metric()); + Reset(); + timer_ -= report_interval_ms_; + RTC_DCHECK_GE(timer_, 0); +} + +void StatisticsCalculator::PeriodicUmaLogger::LogToUma(int value) const { + RTC_HISTOGRAM_COUNTS_SPARSE(uma_name_, value, 1, max_value_, 50); +} + +StatisticsCalculator::PeriodicUmaCount::PeriodicUmaCount( + absl::string_view uma_name, + int report_interval_ms, + int max_value) + : PeriodicUmaLogger(uma_name, report_interval_ms, max_value) {} + +StatisticsCalculator::PeriodicUmaCount::~PeriodicUmaCount() { + // Log the count for the current (incomplete) interval. + LogToUma(Metric()); +} + +void StatisticsCalculator::PeriodicUmaCount::RegisterSample() { + ++counter_; +} + +int StatisticsCalculator::PeriodicUmaCount::Metric() const { + return counter_; +} + +void StatisticsCalculator::PeriodicUmaCount::Reset() { + counter_ = 0; +} + +StatisticsCalculator::PeriodicUmaAverage::PeriodicUmaAverage( + absl::string_view uma_name, + int report_interval_ms, + int max_value) + : PeriodicUmaLogger(uma_name, report_interval_ms, max_value) {} + +StatisticsCalculator::PeriodicUmaAverage::~PeriodicUmaAverage() { + // Log the average for the current (incomplete) interval. + LogToUma(Metric()); +} + +void StatisticsCalculator::PeriodicUmaAverage::RegisterSample(int value) { + sum_ += value; + ++counter_; +} + +int StatisticsCalculator::PeriodicUmaAverage::Metric() const { + return counter_ == 0 ? 0 : static_cast(sum_ / counter_); +} + +void StatisticsCalculator::PeriodicUmaAverage::Reset() { + sum_ = 0.0; + counter_ = 0; +} + +StatisticsCalculator::StatisticsCalculator() + : preemptive_samples_(0), + accelerate_samples_(0), + expanded_speech_samples_(0), + expanded_noise_samples_(0), + timestamps_since_last_report_(0), + secondary_decoded_samples_(0), + discarded_secondary_packets_(0), + delayed_packet_outage_counter_( + "WebRTC.Audio.DelayedPacketOutageEventsPerMinute", + 60000, // 60 seconds report interval. + 100), + excess_buffer_delay_("WebRTC.Audio.AverageExcessBufferDelayMs", + 60000, // 60 seconds report interval. + 1000), + buffer_full_counter_("WebRTC.Audio.JitterBufferFullPerMinute", + 60000, // 60 seconds report interval. + 100) {} + +StatisticsCalculator::~StatisticsCalculator() = default; + +void StatisticsCalculator::Reset() { + preemptive_samples_ = 0; + accelerate_samples_ = 0; + expanded_speech_samples_ = 0; + expanded_noise_samples_ = 0; + secondary_decoded_samples_ = 0; + discarded_secondary_packets_ = 0; + waiting_times_.clear(); +} + +void StatisticsCalculator::ResetMcu() { + timestamps_since_last_report_ = 0; +} + +void StatisticsCalculator::ExpandedVoiceSamples(size_t num_samples, + bool is_new_concealment_event) { + expanded_speech_samples_ += num_samples; + ConcealedSamplesCorrection(rtc::dchecked_cast(num_samples), true); + lifetime_stats_.concealment_events += is_new_concealment_event; +} + +void StatisticsCalculator::ExpandedNoiseSamples(size_t num_samples, + bool is_new_concealment_event) { + expanded_noise_samples_ += num_samples; + ConcealedSamplesCorrection(rtc::dchecked_cast(num_samples), false); + lifetime_stats_.concealment_events += is_new_concealment_event; +} + +void StatisticsCalculator::ExpandedVoiceSamplesCorrection(int num_samples) { + expanded_speech_samples_ = + AddIntToSizeTWithLowerCap(num_samples, expanded_speech_samples_); + ConcealedSamplesCorrection(num_samples, true); +} + +void StatisticsCalculator::ExpandedNoiseSamplesCorrection(int num_samples) { + expanded_noise_samples_ = + AddIntToSizeTWithLowerCap(num_samples, expanded_noise_samples_); + ConcealedSamplesCorrection(num_samples, false); +} + +void StatisticsCalculator::DecodedOutputPlayed() { + decoded_output_played_ = true; +} + +void StatisticsCalculator::EndExpandEvent(int fs_hz) { + RTC_DCHECK_GE(lifetime_stats_.concealed_samples, + concealed_samples_at_event_end_); + const int event_duration_ms = + 1000 * + (lifetime_stats_.concealed_samples - concealed_samples_at_event_end_) / + fs_hz; + if (event_duration_ms >= kInterruptionLenMs && decoded_output_played_) { + lifetime_stats_.interruption_count++; + lifetime_stats_.total_interruption_duration_ms += event_duration_ms; + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AudioInterruptionMs", event_duration_ms, + /*min=*/150, /*max=*/5000, /*bucket_count=*/50); + } + concealed_samples_at_event_end_ = lifetime_stats_.concealed_samples; +} + +void StatisticsCalculator::ConcealedSamplesCorrection(int num_samples, + bool is_voice) { + if (num_samples < 0) { + // Store negative correction to subtract from future positive additions. + // See also the function comment in the header file. + concealed_samples_correction_ -= num_samples; + if (!is_voice) { + silent_concealed_samples_correction_ -= num_samples; + } + return; + } + + const size_t canceled_out = + std::min(static_cast(num_samples), concealed_samples_correction_); + concealed_samples_correction_ -= canceled_out; + lifetime_stats_.concealed_samples += num_samples - canceled_out; + + if (!is_voice) { + const size_t silent_canceled_out = std::min( + static_cast(num_samples), silent_concealed_samples_correction_); + silent_concealed_samples_correction_ -= silent_canceled_out; + lifetime_stats_.silent_concealed_samples += + num_samples - silent_canceled_out; + } +} + +void StatisticsCalculator::PreemptiveExpandedSamples(size_t num_samples) { + preemptive_samples_ += num_samples; + operations_and_state_.preemptive_samples += num_samples; + lifetime_stats_.inserted_samples_for_deceleration += num_samples; +} + +void StatisticsCalculator::AcceleratedSamples(size_t num_samples) { + accelerate_samples_ += num_samples; + operations_and_state_.accelerate_samples += num_samples; + lifetime_stats_.removed_samples_for_acceleration += num_samples; +} + +void StatisticsCalculator::GeneratedNoiseSamples(size_t num_samples) { + lifetime_stats_.generated_noise_samples += num_samples; +} + +void StatisticsCalculator::PacketsDiscarded(size_t num_packets) { + lifetime_stats_.packets_discarded += num_packets; +} + +void StatisticsCalculator::SecondaryPacketsDiscarded(size_t num_packets) { + discarded_secondary_packets_ += num_packets; + lifetime_stats_.fec_packets_discarded += num_packets; +} + +void StatisticsCalculator::SecondaryPacketsReceived(size_t num_packets) { + lifetime_stats_.fec_packets_received += num_packets; +} + +void StatisticsCalculator::IncreaseCounter(size_t num_samples, int fs_hz) { + const int time_step_ms = + rtc::CheckedDivExact(static_cast(1000 * num_samples), fs_hz); + delayed_packet_outage_counter_.AdvanceClock(time_step_ms); + excess_buffer_delay_.AdvanceClock(time_step_ms); + buffer_full_counter_.AdvanceClock(time_step_ms); + timestamps_since_last_report_ += static_cast(num_samples); + if (timestamps_since_last_report_ > + static_cast(fs_hz * kMaxReportPeriod)) { + timestamps_since_last_report_ = 0; + } + lifetime_stats_.total_samples_received += num_samples; +} + +void StatisticsCalculator::JitterBufferDelay( + size_t num_samples, + uint64_t waiting_time_ms, + uint64_t target_delay_ms, + uint64_t unlimited_target_delay_ms) { + lifetime_stats_.jitter_buffer_delay_ms += waiting_time_ms * num_samples; + lifetime_stats_.jitter_buffer_target_delay_ms += + target_delay_ms * num_samples; + lifetime_stats_.jitter_buffer_minimum_delay_ms += + unlimited_target_delay_ms * num_samples; + lifetime_stats_.jitter_buffer_emitted_count += num_samples; +} + +void StatisticsCalculator::SecondaryDecodedSamples(int num_samples) { + secondary_decoded_samples_ += num_samples; +} + +void StatisticsCalculator::FlushedPacketBuffer() { + operations_and_state_.packet_buffer_flushes++; + buffer_full_counter_.RegisterSample(); +} + +void StatisticsCalculator::ReceivedPacket() { + ++lifetime_stats_.jitter_buffer_packets_received; +} + +void StatisticsCalculator::RelativePacketArrivalDelay(size_t delay_ms) { + lifetime_stats_.relative_packet_arrival_delay_ms += delay_ms; +} + +void StatisticsCalculator::LogDelayedPacketOutageEvent(int num_samples, + int fs_hz) { + int outage_duration_ms = num_samples / (fs_hz / 1000); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.DelayedPacketOutageEventMs", + outage_duration_ms, 1 /* min */, 2000 /* max */, + 100 /* bucket count */); + delayed_packet_outage_counter_.RegisterSample(); + lifetime_stats_.delayed_packet_outage_samples += num_samples; +} + +void StatisticsCalculator::StoreWaitingTime(int waiting_time_ms) { + excess_buffer_delay_.RegisterSample(waiting_time_ms); + RTC_DCHECK_LE(waiting_times_.size(), kLenWaitingTimes); + if (waiting_times_.size() == kLenWaitingTimes) { + // Erase first value. + waiting_times_.pop_front(); + } + waiting_times_.push_back(waiting_time_ms); + operations_and_state_.last_waiting_time_ms = waiting_time_ms; +} + +void StatisticsCalculator::GetNetworkStatistics(size_t samples_per_packet, + NetEqNetworkStatistics* stats) { + RTC_DCHECK(stats); + + stats->accelerate_rate = + CalculateQ14Ratio(accelerate_samples_, timestamps_since_last_report_); + + stats->preemptive_rate = + CalculateQ14Ratio(preemptive_samples_, timestamps_since_last_report_); + + stats->expand_rate = + CalculateQ14Ratio(expanded_speech_samples_ + expanded_noise_samples_, + timestamps_since_last_report_); + + stats->speech_expand_rate = CalculateQ14Ratio(expanded_speech_samples_, + timestamps_since_last_report_); + + stats->secondary_decoded_rate = CalculateQ14Ratio( + secondary_decoded_samples_, timestamps_since_last_report_); + + const size_t discarded_secondary_samples = + discarded_secondary_packets_ * samples_per_packet; + stats->secondary_discarded_rate = + CalculateQ14Ratio(discarded_secondary_samples, + static_cast(discarded_secondary_samples + + secondary_decoded_samples_)); + + if (waiting_times_.size() == 0) { + stats->mean_waiting_time_ms = -1; + stats->median_waiting_time_ms = -1; + stats->min_waiting_time_ms = -1; + stats->max_waiting_time_ms = -1; + } else { + std::sort(waiting_times_.begin(), waiting_times_.end()); + // Find mid-point elements. If the size is odd, the two values + // `middle_left` and `middle_right` will both be the one middle element; if + // the size is even, they will be the the two neighboring elements at the + // middle of the list. + const int middle_left = waiting_times_[(waiting_times_.size() - 1) / 2]; + const int middle_right = waiting_times_[waiting_times_.size() / 2]; + // Calculate the average of the two. (Works also for odd sizes.) + stats->median_waiting_time_ms = (middle_left + middle_right) / 2; + stats->min_waiting_time_ms = waiting_times_.front(); + stats->max_waiting_time_ms = waiting_times_.back(); + double sum = 0; + for (auto time : waiting_times_) { + sum += time; + } + stats->mean_waiting_time_ms = static_cast(sum / waiting_times_.size()); + } + + // Reset counters. + ResetMcu(); + Reset(); +} + +NetEqLifetimeStatistics StatisticsCalculator::GetLifetimeStatistics() const { + return lifetime_stats_; +} + +NetEqOperationsAndState StatisticsCalculator::GetOperationsAndState() const { + return operations_and_state_; +} + +uint16_t StatisticsCalculator::CalculateQ14Ratio(size_t numerator, + uint32_t denominator) { + if (numerator == 0) { + return 0; + } else if (numerator < denominator) { + // Ratio must be smaller than 1 in Q14. + RTC_DCHECK_LT((numerator << 14) / denominator, (1 << 14)); + return static_cast((numerator << 14) / denominator); + } else { + // Will not produce a ratio larger than 1, since this is probably an error. + return 1 << 14; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.h b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.h new file mode 100644 index 0000000000..33a22d02dd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_STATISTICS_CALCULATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_STATISTICS_CALCULATOR_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/neteq/neteq.h" + +namespace webrtc { + +class DelayManager; + +// This class handles various network statistics in NetEq. +class StatisticsCalculator { + public: + StatisticsCalculator(); + + virtual ~StatisticsCalculator(); + + StatisticsCalculator(const StatisticsCalculator&) = delete; + StatisticsCalculator& operator=(const StatisticsCalculator&) = delete; + + // Resets most of the counters. + void Reset(); + + // Resets the counters that are not handled by Reset(). + void ResetMcu(); + + // Reports that `num_samples` samples were produced through expansion, and + // that the expansion produced other than just noise samples. + void ExpandedVoiceSamples(size_t num_samples, bool is_new_concealment_event); + + // Reports that `num_samples` samples were produced through expansion, and + // that the expansion produced only noise samples. + void ExpandedNoiseSamples(size_t num_samples, bool is_new_concealment_event); + + // Corrects the statistics for number of samples produced through non-noise + // expansion by adding `num_samples` (negative or positive) to the current + // value. The result is capped to zero to avoid negative values. + void ExpandedVoiceSamplesCorrection(int num_samples); + + // Same as ExpandedVoiceSamplesCorrection but for noise samples. + void ExpandedNoiseSamplesCorrection(int num_samples); + + void DecodedOutputPlayed(); + + // Mark end of expand event; triggers some stats to be reported. + void EndExpandEvent(int fs_hz); + + // Reports that `num_samples` samples were produced through preemptive + // expansion. + void PreemptiveExpandedSamples(size_t num_samples); + + // Reports that `num_samples` samples were removed through accelerate. + void AcceleratedSamples(size_t num_samples); + + // Reports that `num_samples` comfort noise samples were generated. + void GeneratedNoiseSamples(size_t num_samples); + + // Reports that `num_packets` packets were discarded. + virtual void PacketsDiscarded(size_t num_packets); + + // Reports that `num_packets` secondary (FEC) packets were discarded. + virtual void SecondaryPacketsDiscarded(size_t num_packets); + + // Reports that `num_packets` secondary (FEC) packets were received. + virtual void SecondaryPacketsReceived(size_t num_packets); + + // Increases the report interval counter with `num_samples` at a sample rate + // of `fs_hz`. This is how the StatisticsCalculator gets notified that current + // time is increasing. + void IncreaseCounter(size_t num_samples, int fs_hz); + + // Update jitter buffer delay counter. + void JitterBufferDelay(size_t num_samples, + uint64_t waiting_time_ms, + uint64_t target_delay_ms, + uint64_t unlimited_target_delay_ms); + + // Stores new packet waiting time in waiting time statistics. + void StoreWaitingTime(int waiting_time_ms); + + // Reports that `num_samples` samples were decoded from secondary packets. + void SecondaryDecodedSamples(int num_samples); + + // Reports that the packet buffer was flushed. + void FlushedPacketBuffer(); + + // Reports that the jitter buffer received a packet. + void ReceivedPacket(); + + // Reports that a received packet was delayed by `delay_ms` milliseconds. + virtual void RelativePacketArrivalDelay(size_t delay_ms); + + // Logs a delayed packet outage event of `num_samples` expanded at a sample + // rate of `fs_hz`. A delayed packet outage event is defined as an expand + // period caused not by an actual packet loss, but by a delayed packet. + virtual void LogDelayedPacketOutageEvent(int num_samples, int fs_hz); + + // Returns the current network statistics in `stats`. The number of samples + // per packet is `samples_per_packet`. The method does not populate + // `preferred_buffer_size_ms`, `jitter_peaks_found` or `clockdrift_ppm`; use + // the PopulateDelayManagerStats method for those. + void GetNetworkStatistics(size_t samples_per_packet, + NetEqNetworkStatistics* stats); + + // Returns a copy of this class's lifetime statistics. These statistics are + // never reset. + NetEqLifetimeStatistics GetLifetimeStatistics() const; + + NetEqOperationsAndState GetOperationsAndState() const; + + private: + static const int kMaxReportPeriod = 60; // Seconds before auto-reset. + static const size_t kLenWaitingTimes = 100; + + class PeriodicUmaLogger { + public: + PeriodicUmaLogger(absl::string_view uma_name, + int report_interval_ms, + int max_value); + virtual ~PeriodicUmaLogger(); + void AdvanceClock(int step_ms); + + protected: + void LogToUma(int value) const; + virtual int Metric() const = 0; + virtual void Reset() = 0; + + const std::string uma_name_; + const int report_interval_ms_; + const int max_value_; + int timer_ = 0; + }; + + class PeriodicUmaCount final : public PeriodicUmaLogger { + public: + PeriodicUmaCount(absl::string_view uma_name, + int report_interval_ms, + int max_value); + ~PeriodicUmaCount() override; + void RegisterSample(); + + protected: + int Metric() const override; + void Reset() override; + + private: + int counter_ = 0; + }; + + class PeriodicUmaAverage final : public PeriodicUmaLogger { + public: + PeriodicUmaAverage(absl::string_view uma_name, + int report_interval_ms, + int max_value); + ~PeriodicUmaAverage() override; + void RegisterSample(int value); + + protected: + int Metric() const override; + void Reset() override; + + private: + double sum_ = 0.0; + int counter_ = 0; + }; + + // Corrects the concealed samples counter in lifetime_stats_. The value of + // num_samples_ is added directly to the stat if the correction is positive. + // If the correction is negative, it is cached and will be subtracted against + // future additions to the counter. This is meant to be called from + // Expanded{Voice,Noise}Samples{Correction}. + void ConcealedSamplesCorrection(int num_samples, bool is_voice); + + // Calculates numerator / denominator, and returns the value in Q14. + static uint16_t CalculateQ14Ratio(size_t numerator, uint32_t denominator); + + NetEqLifetimeStatistics lifetime_stats_; + NetEqOperationsAndState operations_and_state_; + size_t concealed_samples_correction_ = 0; + size_t silent_concealed_samples_correction_ = 0; + size_t preemptive_samples_; + size_t accelerate_samples_; + size_t expanded_speech_samples_; + size_t expanded_noise_samples_; + size_t concealed_samples_at_event_end_ = 0; + uint32_t timestamps_since_last_report_; + std::deque waiting_times_; + uint32_t secondary_decoded_samples_; + size_t discarded_secondary_packets_; + PeriodicUmaCount delayed_packet_outage_counter_; + PeriodicUmaAverage excess_buffer_delay_; + PeriodicUmaCount buffer_full_counter_; + bool decoded_output_played_ = false; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_STATISTICS_CALCULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator_unittest.cc new file mode 100644 index 0000000000..491cd83dc4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator_unittest.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/statistics_calculator.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(LifetimeStatistics, TotalSamplesReceived) { + StatisticsCalculator stats; + for (int i = 0; i < 10; ++i) { + stats.IncreaseCounter(480, 48000); // 10 ms at 48 kHz. + } + EXPECT_EQ(10 * 480u, stats.GetLifetimeStatistics().total_samples_received); +} + +TEST(LifetimeStatistics, SamplesConcealed) { + StatisticsCalculator stats; + stats.ExpandedVoiceSamples(100, false); + stats.ExpandedNoiseSamples(17, false); + EXPECT_EQ(100u + 17u, stats.GetLifetimeStatistics().concealed_samples); +} + +// This test verifies that a negative correction of concealed_samples does not +// result in a decrease in the stats value (because stats-consuming applications +// would not expect the value to decrease). Instead, the correction should be +// made to future increments to the stat. +TEST(LifetimeStatistics, SamplesConcealedCorrection) { + StatisticsCalculator stats; + stats.ExpandedVoiceSamples(100, false); + EXPECT_EQ(100u, stats.GetLifetimeStatistics().concealed_samples); + stats.ExpandedVoiceSamplesCorrection(-10); + // Do not subtract directly, but keep the correction for later. + EXPECT_EQ(100u, stats.GetLifetimeStatistics().concealed_samples); + stats.ExpandedVoiceSamplesCorrection(20); + // The total correction is 20 - 10. + EXPECT_EQ(110u, stats.GetLifetimeStatistics().concealed_samples); + + // Also test correction done to the next ExpandedVoiceSamples call. + stats.ExpandedVoiceSamplesCorrection(-17); + EXPECT_EQ(110u, stats.GetLifetimeStatistics().concealed_samples); + stats.ExpandedVoiceSamples(100, false); + EXPECT_EQ(110u + 100u - 17u, stats.GetLifetimeStatistics().concealed_samples); +} + +// This test verifies that neither "accelerate" nor "pre-emptive expand" reults +// in a modification to concealed_samples stats. Only PLC operations (i.e., +// "expand" and "merge") should affect the stat. +TEST(LifetimeStatistics, NoUpdateOnTimeStretch) { + StatisticsCalculator stats; + stats.ExpandedVoiceSamples(100, false); + stats.AcceleratedSamples(4711); + stats.PreemptiveExpandedSamples(17); + stats.ExpandedVoiceSamples(100, false); + EXPECT_EQ(200u, stats.GetLifetimeStatistics().concealed_samples); +} + +TEST(StatisticsCalculator, ExpandedSamplesCorrection) { + StatisticsCalculator stats; + NetEqNetworkStatistics stats_output; + constexpr int kSampleRateHz = 48000; + constexpr int k10MsSamples = kSampleRateHz / 100; + constexpr int kPacketSizeMs = 20; + constexpr size_t kSamplesPerPacket = kPacketSizeMs * kSampleRateHz / 1000; + + // Advance time by 10 ms. + stats.IncreaseCounter(k10MsSamples, kSampleRateHz); + + stats.GetNetworkStatistics(kSamplesPerPacket, &stats_output); + + EXPECT_EQ(0u, stats_output.expand_rate); + EXPECT_EQ(0u, stats_output.speech_expand_rate); + + // Correct with a negative value. + stats.ExpandedVoiceSamplesCorrection(-100); + stats.ExpandedNoiseSamplesCorrection(-100); + stats.IncreaseCounter(k10MsSamples, kSampleRateHz); + stats.GetNetworkStatistics(kSamplesPerPacket, &stats_output); + // Expect no change, since negative values are disallowed. + EXPECT_EQ(0u, stats_output.expand_rate); + EXPECT_EQ(0u, stats_output.speech_expand_rate); + + // Correct with a positive value. + stats.ExpandedVoiceSamplesCorrection(50); + stats.ExpandedNoiseSamplesCorrection(200); + stats.IncreaseCounter(k10MsSamples, kSampleRateHz); + stats.GetNetworkStatistics(kSamplesPerPacket, &stats_output); + // Calculate expected rates in Q14. Expand rate is noise + voice, while + // speech expand rate is only voice. + EXPECT_EQ(((50u + 200u) << 14) / k10MsSamples, stats_output.expand_rate); + EXPECT_EQ((50u << 14) / k10MsSamples, stats_output.speech_expand_rate); +} + +TEST(StatisticsCalculator, RelativePacketArrivalDelay) { + StatisticsCalculator stats; + + stats.RelativePacketArrivalDelay(50); + NetEqLifetimeStatistics stats_output = stats.GetLifetimeStatistics(); + EXPECT_EQ(50u, stats_output.relative_packet_arrival_delay_ms); + + stats.RelativePacketArrivalDelay(20); + stats_output = stats.GetLifetimeStatistics(); + EXPECT_EQ(70u, stats_output.relative_packet_arrival_delay_ms); +} + +TEST(StatisticsCalculator, ReceivedPacket) { + StatisticsCalculator stats; + + stats.ReceivedPacket(); + NetEqLifetimeStatistics stats_output = stats.GetLifetimeStatistics(); + EXPECT_EQ(1u, stats_output.jitter_buffer_packets_received); + + stats.ReceivedPacket(); + stats_output = stats.GetLifetimeStatistics(); + EXPECT_EQ(2u, stats_output.jitter_buffer_packets_received); +} + +TEST(StatisticsCalculator, InterruptionCounter) { + constexpr int fs_khz = 48; + constexpr int fs_hz = fs_khz * 1000; + StatisticsCalculator stats; + stats.DecodedOutputPlayed(); + stats.EndExpandEvent(fs_hz); + auto lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(0, lts.interruption_count); + EXPECT_EQ(0, lts.total_interruption_duration_ms); + + // Add an event that is shorter than 150 ms. Should not be logged. + stats.ExpandedVoiceSamples(10 * fs_khz, false); // 10 ms. + stats.ExpandedNoiseSamples(139 * fs_khz, false); // 139 ms. + stats.EndExpandEvent(fs_hz); + lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(0, lts.interruption_count); + + // Add an event that is longer than 150 ms. Should be logged. + stats.ExpandedVoiceSamples(140 * fs_khz, false); // 140 ms. + stats.ExpandedNoiseSamples(11 * fs_khz, false); // 11 ms. + stats.EndExpandEvent(fs_hz); + lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(1, lts.interruption_count); + EXPECT_EQ(151, lts.total_interruption_duration_ms); + + // Add one more long event. + stats.ExpandedVoiceSamples(100 * fs_khz, false); // 100 ms. + stats.ExpandedNoiseSamples(5000 * fs_khz, false); // 5000 ms. + stats.EndExpandEvent(fs_hz); + lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(2, lts.interruption_count); + EXPECT_EQ(5100 + 151, lts.total_interruption_duration_ms); +} + +TEST(StatisticsCalculator, InterruptionCounterDoNotLogBeforeDecoding) { + constexpr int fs_khz = 48; + constexpr int fs_hz = fs_khz * 1000; + StatisticsCalculator stats; + + // Add an event that is longer than 150 ms. Should normally be logged, but we + // have not called DecodedOutputPlayed() yet, so it shouldn't this time. + stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms. + stats.EndExpandEvent(fs_hz); + auto lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(0, lts.interruption_count); + + // Call DecodedOutputPlayed(). Logging should happen after this. + stats.DecodedOutputPlayed(); + + // Add one more long event. + stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms. + stats.EndExpandEvent(fs_hz); + lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(1, lts.interruption_count); +} + +TEST(StatisticsCalculator, DiscardedPackets) { + StatisticsCalculator statistics_calculator; + EXPECT_EQ(0u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); + + statistics_calculator.PacketsDiscarded(1); + EXPECT_EQ(1u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); + + statistics_calculator.PacketsDiscarded(10); + EXPECT_EQ(11u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); + + // Calling `SecondaryPacketsDiscarded` does not modify `packets_discarded`. + statistics_calculator.SecondaryPacketsDiscarded(1); + EXPECT_EQ(11u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); + + // Calling `FlushedPacketBuffer` does not modify `packets_discarded`. + statistics_calculator.FlushedPacketBuffer(); + EXPECT_EQ(11u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.cc new file mode 100644 index 0000000000..7d7cac7157 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/sync_buffer.h" + +#include // Access to min. + +#include "rtc_base/checks.h" + +namespace webrtc { + +size_t SyncBuffer::FutureLength() const { + return Size() - next_index_; +} + +void SyncBuffer::PushBack(const AudioMultiVector& append_this) { + size_t samples_added = append_this.Size(); + AudioMultiVector::PushBack(append_this); + AudioMultiVector::PopFront(samples_added); + if (samples_added <= next_index_) { + next_index_ -= samples_added; + } else { + // This means that we are pushing out future data that was never used. + // RTC_DCHECK_NOTREACHED(); + // TODO(hlundin): This assert must be disabled to support 60 ms frames. + // This should not happen even for 60 ms frames, but it does. Investigate + // why. + next_index_ = 0; + } + dtmf_index_ -= std::min(dtmf_index_, samples_added); +} + +void SyncBuffer::PushBackInterleaved(const rtc::BufferT& append_this) { + const size_t size_before_adding = Size(); + AudioMultiVector::PushBackInterleaved(append_this); + const size_t samples_added_per_channel = Size() - size_before_adding; + RTC_DCHECK_EQ(samples_added_per_channel * Channels(), append_this.size()); + AudioMultiVector::PopFront(samples_added_per_channel); + next_index_ -= std::min(next_index_, samples_added_per_channel); + dtmf_index_ -= std::min(dtmf_index_, samples_added_per_channel); +} + +void SyncBuffer::PushFrontZeros(size_t length) { + InsertZerosAtIndex(length, 0); +} + +void SyncBuffer::InsertZerosAtIndex(size_t length, size_t position) { + position = std::min(position, Size()); + length = std::min(length, Size() - position); + AudioMultiVector::PopBack(length); + for (size_t channel = 0; channel < Channels(); ++channel) { + channels_[channel]->InsertZerosAt(length, position); + } + if (next_index_ >= position) { + // We are moving the `next_index_` sample. + set_next_index(next_index_ + length); // Overflow handled by subfunction. + } + if (dtmf_index_ > 0 && dtmf_index_ >= position) { + // We are moving the `dtmf_index_` sample. + set_dtmf_index(dtmf_index_ + length); // Overflow handled by subfunction. + } +} + +void SyncBuffer::ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t length, + size_t position) { + position = std::min(position, Size()); // Cap `position` in the valid range. + length = std::min(length, Size() - position); + AudioMultiVector::OverwriteAt(insert_this, length, position); +} + +void SyncBuffer::ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t position) { + ReplaceAtIndex(insert_this, insert_this.Size(), position); +} + +void SyncBuffer::GetNextAudioInterleaved(size_t requested_len, + AudioFrame* output) { + RTC_DCHECK(output); + const size_t samples_to_read = std::min(FutureLength(), requested_len); + output->ResetWithoutMuting(); + const size_t tot_samples_read = ReadInterleavedFromIndex( + next_index_, samples_to_read, output->mutable_data()); + const size_t samples_read_per_channel = tot_samples_read / Channels(); + next_index_ += samples_read_per_channel; + output->num_channels_ = Channels(); + output->samples_per_channel_ = samples_read_per_channel; +} + +void SyncBuffer::IncreaseEndTimestamp(uint32_t increment) { + end_timestamp_ += increment; +} + +void SyncBuffer::Flush() { + Zeros(Size()); + next_index_ = Size(); + end_timestamp_ = 0; + dtmf_index_ = 0; +} + +void SyncBuffer::set_next_index(size_t value) { + // Cannot set `next_index_` larger than the size of the buffer. + next_index_ = std::min(value, Size()); +} + +void SyncBuffer::set_dtmf_index(size_t value) { + // Cannot set `dtmf_index_` larger than the size of the buffer. + dtmf_index_ = std::min(value, Size()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.h new file mode 100644 index 0000000000..cf56c432e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_SYNC_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_SYNC_BUFFER_H_ + +#include +#include + +#include + +#include "api/audio/audio_frame.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/audio_vector.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class SyncBuffer : public AudioMultiVector { + public: + SyncBuffer(size_t channels, size_t length) + : AudioMultiVector(channels, length), + next_index_(length), + end_timestamp_(0), + dtmf_index_(0) {} + + SyncBuffer(const SyncBuffer&) = delete; + SyncBuffer& operator=(const SyncBuffer&) = delete; + + // Returns the number of samples yet to play out from the buffer. + size_t FutureLength() const; + + // Adds the contents of `append_this` to the back of the SyncBuffer. Removes + // the same number of samples from the beginning of the SyncBuffer, to + // maintain a constant buffer size. The `next_index_` is updated to reflect + // the move of the beginning of "future" data. + void PushBack(const AudioMultiVector& append_this) override; + + // Like PushBack, but reads the samples channel-interleaved from the input. + void PushBackInterleaved(const rtc::BufferT& append_this); + + // Adds `length` zeros to the beginning of each channel. Removes + // the same number of samples from the end of the SyncBuffer, to + // maintain a constant buffer size. The `next_index_` is updated to reflect + // the move of the beginning of "future" data. + // Note that this operation may delete future samples that are waiting to + // be played. + void PushFrontZeros(size_t length); + + // Inserts `length` zeros into each channel at index `position`. The size of + // the SyncBuffer is kept constant, which means that the last `length` + // elements in each channel will be purged. + virtual void InsertZerosAtIndex(size_t length, size_t position); + + // Overwrites each channel in this SyncBuffer with values taken from + // `insert_this`. The values are taken from the beginning of `insert_this` and + // are inserted starting at `position`. `length` values are written into each + // channel. The size of the SyncBuffer is kept constant. That is, if `length` + // and `position` are selected such that the new data would extend beyond the + // end of the current SyncBuffer, the buffer is not extended. + // The `next_index_` is not updated. + virtual void ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t length, + size_t position); + + // Same as the above method, but where all of `insert_this` is written (with + // the same constraints as above, that the SyncBuffer is not extended). + virtual void ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t position); + + // Reads `requested_len` samples from each channel and writes them interleaved + // into `output`. The `next_index_` is updated to point to the sample to read + // next time. The AudioFrame `output` is first reset, and the `data_`, + // `num_channels_`, and `samples_per_channel_` fields are updated. + void GetNextAudioInterleaved(size_t requested_len, AudioFrame* output); + + // Adds `increment` to `end_timestamp_`. + void IncreaseEndTimestamp(uint32_t increment); + + // Flushes the buffer. The buffer will contain only zeros after the flush, and + // `next_index_` will point to the end, like when the buffer was first + // created. + void Flush(); + + const AudioVector& Channel(size_t n) const { return *channels_[n]; } + AudioVector& Channel(size_t n) { return *channels_[n]; } + + // Accessors and mutators. + size_t next_index() const { return next_index_; } + void set_next_index(size_t value); + uint32_t end_timestamp() const { return end_timestamp_; } + void set_end_timestamp(uint32_t value) { end_timestamp_ = value; } + size_t dtmf_index() const { return dtmf_index_; } + void set_dtmf_index(size_t value); + + private: + size_t next_index_; + uint32_t end_timestamp_; // The timestamp of the last sample in the buffer. + size_t dtmf_index_; // Index to the first non-DTMF sample in the buffer. +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_SYNC_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc new file mode 100644 index 0000000000..bdcd92446b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/sync_buffer.h" + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(SyncBuffer, CreateAndDestroy) { + // Create a SyncBuffer with two channels and 10 samples each. + static const size_t kLen = 10; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + EXPECT_EQ(kChannels, sync_buffer.Channels()); + EXPECT_EQ(kLen, sync_buffer.Size()); + // When the buffer is empty, the next index to play out is at the end. + EXPECT_EQ(kLen, sync_buffer.next_index()); + // Verify that all elements are zero. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kLen; ++i) { + EXPECT_EQ(0, sync_buffer[channel][i]); + } + } +} + +TEST(SyncBuffer, SetNextIndex) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + sync_buffer.set_next_index(0); + EXPECT_EQ(0u, sync_buffer.next_index()); + sync_buffer.set_next_index(kLen / 2); + EXPECT_EQ(kLen / 2, sync_buffer.next_index()); + sync_buffer.set_next_index(kLen); + EXPECT_EQ(kLen, sync_buffer.next_index()); + // Try to set larger than the buffer size; should cap at buffer size. + sync_buffer.set_next_index(kLen + 1); + EXPECT_EQ(kLen, sync_buffer.next_index()); +} + +TEST(SyncBuffer, PushBackAndFlush) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + static const size_t kNewLen = 10; + AudioMultiVector new_data(kChannels, kNewLen); + // Populate `new_data`. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + new_data[channel][i] = rtc::checked_cast(i); + } + } + // Push back `new_data` into `sync_buffer`. This operation should pop out + // data from the front of `sync_buffer`, so that the size of the buffer + // remains the same. The `next_index_` should also move with the same length. + sync_buffer.PushBack(new_data); + ASSERT_EQ(kLen, sync_buffer.Size()); + // Verify that `next_index_` moved accordingly. + EXPECT_EQ(kLen - kNewLen, sync_buffer.next_index()); + // Verify the new contents. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + EXPECT_EQ(new_data[channel][i], + sync_buffer[channel][sync_buffer.next_index() + i]); + } + } + + // Now flush the buffer, and verify that it is all zeros, and that next_index + // points to the end. + sync_buffer.Flush(); + ASSERT_EQ(kLen, sync_buffer.Size()); + EXPECT_EQ(kLen, sync_buffer.next_index()); + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kLen; ++i) { + EXPECT_EQ(0, sync_buffer[channel][i]); + } + } +} + +TEST(SyncBuffer, PushFrontZeros) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + static const size_t kNewLen = 10; + AudioMultiVector new_data(kChannels, kNewLen); + // Populate `new_data`. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + new_data[channel][i] = rtc::checked_cast(1000 + i); + } + } + sync_buffer.PushBack(new_data); + EXPECT_EQ(kLen, sync_buffer.Size()); + + // Push `kNewLen` - 1 zeros into each channel in the front of the SyncBuffer. + sync_buffer.PushFrontZeros(kNewLen - 1); + EXPECT_EQ(kLen, sync_buffer.Size()); // Size should remain the same. + // Verify that `next_index_` moved accordingly. Should be at the end - 1. + EXPECT_EQ(kLen - 1, sync_buffer.next_index()); + // Verify the zeros. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen - 1; ++i) { + EXPECT_EQ(0, sync_buffer[channel][i]); + } + } + // Verify that the correct data is at the end of the SyncBuffer. + for (size_t channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000, sync_buffer[channel][sync_buffer.next_index()]); + } +} + +TEST(SyncBuffer, GetNextAudioInterleaved) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + static const size_t kNewLen = 10; + AudioMultiVector new_data(kChannels, kNewLen); + // Populate `new_data`. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + new_data[channel][i] = rtc::checked_cast(i); + } + } + // Push back `new_data` into `sync_buffer`. This operation should pop out + // data from the front of `sync_buffer`, so that the size of the buffer + // remains the same. The `next_index_` should also move with the same length. + sync_buffer.PushBack(new_data); + + // Read to interleaved output. Read in two batches, where each read operation + // should automatically update the `net_index_` in the SyncBuffer. + // Note that `samples_read` is the number of samples read from each channel. + // That is, the number of samples written to `output` is + // `samples_read` * `kChannels`. + AudioFrame output1; + sync_buffer.GetNextAudioInterleaved(kNewLen / 2, &output1); + EXPECT_EQ(kChannels, output1.num_channels_); + EXPECT_EQ(kNewLen / 2, output1.samples_per_channel_); + + AudioFrame output2; + sync_buffer.GetNextAudioInterleaved(kNewLen / 2, &output2); + EXPECT_EQ(kChannels, output2.num_channels_); + EXPECT_EQ(kNewLen / 2, output2.samples_per_channel_); + + // Verify the data. + const int16_t* output_ptr = output1.data(); + for (size_t i = 0; i < kNewLen / 2; ++i) { + for (size_t channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(new_data[channel][i], *output_ptr); + ++output_ptr; + } + } + output_ptr = output2.data(); + for (size_t i = kNewLen / 2; i < kNewLen; ++i) { + for (size_t channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(new_data[channel][i], *output_ptr); + ++output_ptr; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/parse_delay_file.m b/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/parse_delay_file.m new file mode 100644 index 0000000000..031d8a39ee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/parse_delay_file.m @@ -0,0 +1,201 @@ +% +% Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function outStruct = parse_delay_file(file) + +fid = fopen(file, 'rb'); +if fid == -1 + error('Cannot open file %s', file); +end + +textline = fgetl(fid); +if ~strncmp(textline, '#!NetEQ_Delay_Logging', 21) + error('Wrong file format'); +end + +ver = sscanf(textline, '#!NetEQ_Delay_Logging%d.%d'); +if ~all(ver == [2; 0]) + error('Wrong version of delay logging function') +end + + +start_pos = ftell(fid); +fseek(fid, -12, 'eof'); +textline = fgetl(fid); +if ~strncmp(textline, 'End of file', 21) + error('File ending is not correct. Seems like the simulation ended abnormally.'); +end + +fseek(fid,-12-4, 'eof'); +Npackets = fread(fid, 1, 'int32'); +fseek(fid, start_pos, 'bof'); + +rtpts = zeros(Npackets, 1); +seqno = zeros(Npackets, 1); +pt = zeros(Npackets, 1); +plen = zeros(Npackets, 1); +recin_t = nan*ones(Npackets, 1); +decode_t = nan*ones(Npackets, 1); +playout_delay = zeros(Npackets, 1); +optbuf = zeros(Npackets, 1); + +fs_ix = 1; +clock = 0; +ts_ix = 1; +ended = 0; +late_packets = 0; +fs_now = 8000; +last_decode_k = 0; +tot_expand = 0; +tot_accelerate = 0; +tot_preemptive = 0; + +while not(ended) + signal = fread(fid, 1, '*int32'); + + switch signal + case 3 % NETEQ_DELAY_LOGGING_SIGNAL_CLOCK + clock = fread(fid, 1, '*float32'); + + % keep on reading batches of M until the signal is no longer "3" + % read int32 + float32 in one go + % this is to save execution time + temp = [3; 0]; + M = 120; + while all(temp(1,:) == 3) + fp = ftell(fid); + temp = fread(fid, [2 M], '*int32'); + end + + % back up to last clock event + fseek(fid, fp - ftell(fid) + ... + (find(temp(1,:) ~= 3, 1 ) - 2) * 2 * 4 + 4, 'cof'); + % read the last clock value + clock = fread(fid, 1, '*float32'); + + case 1 % NETEQ_DELAY_LOGGING_SIGNAL_RECIN + temp_ts = fread(fid, 1, 'uint32'); + + if late_packets > 0 + temp_ix = ts_ix - 1; + while (temp_ix >= 1) && (rtpts(temp_ix) ~= temp_ts) + % TODO(hlundin): use matlab vector search instead? + temp_ix = temp_ix - 1; + end + + if temp_ix >= 1 + % the ts was found in the vector + late_packets = late_packets - 1; + else + temp_ix = ts_ix; + ts_ix = ts_ix + 1; + end + else + temp_ix = ts_ix; + ts_ix = ts_ix + 1; + end + + rtpts(temp_ix) = temp_ts; + seqno(temp_ix) = fread(fid, 1, 'uint16'); + pt(temp_ix) = fread(fid, 1, 'int32'); + plen(temp_ix) = fread(fid, 1, 'int16'); + recin_t(temp_ix) = clock; + + case 2 % NETEQ_DELAY_LOGGING_SIGNAL_FLUSH + % do nothing + + case 4 % NETEQ_DELAY_LOGGING_SIGNAL_EOF + ended = 1; + + case 5 % NETEQ_DELAY_LOGGING_SIGNAL_DECODE + last_decode_ts = fread(fid, 1, 'uint32'); + temp_delay = fread(fid, 1, 'uint16'); + + k = find(rtpts(1:(ts_ix - 1))==last_decode_ts,1,'last'); + if ~isempty(k) + decode_t(k) = clock; + playout_delay(k) = temp_delay + ... + 5 * fs_now / 8000; % add overlap length + last_decode_k = k; + end + + case 6 % NETEQ_DELAY_LOGGING_SIGNAL_CHANGE_FS + fsvec(fs_ix) = fread(fid, 1, 'uint16'); + fschange_ts(fs_ix) = last_decode_ts; + fs_now = fsvec(fs_ix); + fs_ix = fs_ix + 1; + + case 7 % NETEQ_DELAY_LOGGING_SIGNAL_MERGE_INFO + playout_delay(last_decode_k) = playout_delay(last_decode_k) ... + + fread(fid, 1, 'int32'); + + case 8 % NETEQ_DELAY_LOGGING_SIGNAL_EXPAND_INFO + temp = fread(fid, 1, 'int32'); + if last_decode_k ~= 0 + tot_expand = tot_expand + temp / (fs_now / 1000); + end + + case 9 % NETEQ_DELAY_LOGGING_SIGNAL_ACCELERATE_INFO + temp = fread(fid, 1, 'int32'); + if last_decode_k ~= 0 + tot_accelerate = tot_accelerate + temp / (fs_now / 1000); + end + + case 10 % NETEQ_DELAY_LOGGING_SIGNAL_PREEMPTIVE_INFO + temp = fread(fid, 1, 'int32'); + if last_decode_k ~= 0 + tot_preemptive = tot_preemptive + temp / (fs_now / 1000); + end + + case 11 % NETEQ_DELAY_LOGGING_SIGNAL_OPTBUF + optbuf(last_decode_k) = fread(fid, 1, 'int32'); + + case 12 % NETEQ_DELAY_LOGGING_SIGNAL_DECODE_ONE_DESC + last_decode_ts = fread(fid, 1, 'uint32'); + k = ts_ix - 1; + + while (k >= 1) && (rtpts(k) ~= last_decode_ts) + % TODO(hlundin): use matlab vector search instead? + k = k - 1; + end + + if k < 1 + % packet not received yet + k = ts_ix; + rtpts(ts_ix) = last_decode_ts; + late_packets = late_packets + 1; + end + + decode_t(k) = clock; + playout_delay(k) = fread(fid, 1, 'uint16') + ... + 5 * fs_now / 8000; % add overlap length + last_decode_k = k; + + end + +end + + +fclose(fid); + +outStruct = struct(... + 'ts', rtpts, ... + 'sn', seqno, ... + 'pt', pt,... + 'plen', plen,... + 'arrival', recin_t,... + 'decode', decode_t,... + 'fs', fsvec(:),... + 'fschange_ts', fschange_ts(:),... + 'playout_delay', playout_delay,... + 'tot_expand', tot_expand,... + 'tot_accelerate', tot_accelerate,... + 'tot_preemptive', tot_preemptive,... + 'optbuf', optbuf); diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/plot_neteq_delay.m b/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/plot_neteq_delay.m new file mode 100644 index 0000000000..86d533fbeb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/plot_neteq_delay.m @@ -0,0 +1,197 @@ +% +% Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [delay_struct, delayvalues] = plot_neteq_delay(delayfile, varargin) + +% InfoStruct = plot_neteq_delay(delayfile) +% InfoStruct = plot_neteq_delay(delayfile, 'skipdelay', skip_seconds) +% +% Henrik Lundin, 2006-11-17 +% Henrik Lundin, 2011-05-17 +% + +try + s = parse_delay_file(delayfile); +catch + error(lasterr); +end + +delayskip=0; +noplot=0; +arg_ptr=1; +delaypoints=[]; + +s.sn=unwrap_seqno(s.sn); + +while arg_ptr+1 <= nargin + switch lower(varargin{arg_ptr}) + case {'skipdelay', 'delayskip'} + % skip a number of seconds in the beginning when calculating delays + delayskip = varargin{arg_ptr+1}; + arg_ptr = arg_ptr + 2; + case 'noplot' + noplot=1; + arg_ptr = arg_ptr + 1; + case {'get_delay', 'getdelay'} + % return a vector of delay values for the points in the given vector + delaypoints = varargin{arg_ptr+1}; + arg_ptr = arg_ptr + 2; + otherwise + warning('Unknown switch %s\n', varargin{arg_ptr}); + arg_ptr = arg_ptr + 1; + end +end + +% find lost frames that were covered by one-descriptor decoding +one_desc_ix=find(isnan(s.arrival)); +for k=1:length(one_desc_ix) + ix=find(s.ts==max(s.ts(s.ts(one_desc_ix(k))>s.ts))); + s.sn(one_desc_ix(k))=s.sn(ix)+1; + s.pt(one_desc_ix(k))=s.pt(ix); + s.arrival(one_desc_ix(k))=s.arrival(ix)+s.decode(one_desc_ix(k))-s.decode(ix); +end + +% remove duplicate received frames that were never decoded (RED codec) +if length(unique(s.ts(isfinite(s.ts)))) < length(s.ts(isfinite(s.ts))) + ix=find(isfinite(s.decode)); + s.sn=s.sn(ix); + s.ts=s.ts(ix); + s.arrival=s.arrival(ix); + s.playout_delay=s.playout_delay(ix); + s.pt=s.pt(ix); + s.optbuf=s.optbuf(ix); + plen=plen(ix); + s.decode=s.decode(ix); +end + +% find non-unique sequence numbers +[~,un_ix]=unique(s.sn); +nonun_ix=setdiff(1:length(s.sn),un_ix); +if ~isempty(nonun_ix) + warning('RTP sequence numbers are in error'); +end + +% sort vectors +[s.sn,sort_ix]=sort(s.sn); +s.ts=s.ts(sort_ix); +s.arrival=s.arrival(sort_ix); +s.decode=s.decode(sort_ix); +s.playout_delay=s.playout_delay(sort_ix); +s.pt=s.pt(sort_ix); + +send_t=s.ts-s.ts(1); +if length(s.fs)<1 + warning('No info about sample rate found in file. Using default 8000.'); + s.fs(1)=8000; + s.fschange_ts(1)=min(s.ts); +elseif s.fschange_ts(1)>min(s.ts) + s.fschange_ts(1)=min(s.ts); +end + +end_ix=length(send_t); +for k=length(s.fs):-1:1 + start_ix=find(s.ts==s.fschange_ts(k)); + send_t(start_ix:end_ix)=send_t(start_ix:end_ix)/s.fs(k)*1000; + s.playout_delay(start_ix:end_ix)=s.playout_delay(start_ix:end_ix)/s.fs(k)*1000; + s.optbuf(start_ix:end_ix)=s.optbuf(start_ix:end_ix)/s.fs(k)*1000; + end_ix=start_ix-1; +end + +tot_time=max(send_t)-min(send_t); + +seq_ix=s.sn-min(s.sn)+1; +send_t=send_t+max(min(s.arrival-send_t),0); + +plot_send_t=nan*ones(max(seq_ix),1); +plot_send_t(seq_ix)=send_t; +plot_nw_delay=nan*ones(max(seq_ix),1); +plot_nw_delay(seq_ix)=s.arrival-send_t; + +cng_ix=find(s.pt~=13); % find those packets that are not CNG/SID + +if noplot==0 + h=plot(plot_send_t/1000,plot_nw_delay); + set(h,'color',0.75*[1 1 1]); + hold on + if any(s.optbuf~=0) + peak_ix=find(s.optbuf(cng_ix)<0); % peak mode is labeled with negative values + no_peak_ix=find(s.optbuf(cng_ix)>0); %setdiff(1:length(cng_ix),peak_ix); + h1=plot(send_t(cng_ix(peak_ix))/1000,... + s.arrival(cng_ix(peak_ix))+abs(s.optbuf(cng_ix(peak_ix)))-send_t(cng_ix(peak_ix)),... + 'r.'); + h2=plot(send_t(cng_ix(no_peak_ix))/1000,... + s.arrival(cng_ix(no_peak_ix))+abs(s.optbuf(cng_ix(no_peak_ix)))-send_t(cng_ix(no_peak_ix)),... + 'g.'); + set([h1, h2],'markersize',1) + end + %h=plot(send_t(seq_ix)/1000,s.decode+s.playout_delay-send_t(seq_ix)); + h=plot(send_t(cng_ix)/1000,s.decode(cng_ix)+s.playout_delay(cng_ix)-send_t(cng_ix)); + set(h,'linew',1.5); + hold off + ax1=axis; + axis tight + ax2=axis; + axis([ax2(1:3) ax1(4)]) +end + + +% calculate delays and other parameters + +delayskip_ix = find(send_t-send_t(1)>=delayskip*1000, 1 ); + +use_ix = intersect(cng_ix,... % use those that are not CNG/SID frames... + intersect(find(isfinite(s.decode)),... % ... that did arrive ... + (delayskip_ix:length(s.decode))')); % ... and are sent after delayskip seconds + +mean_delay = mean(s.decode(use_ix)+s.playout_delay(use_ix)-send_t(use_ix)); +neteq_delay = mean(s.decode(use_ix)+s.playout_delay(use_ix)-s.arrival(use_ix)); + +Npack=max(s.sn(delayskip_ix:end))-min(s.sn(delayskip_ix:end))+1; +nw_lossrate=(Npack-length(s.sn(delayskip_ix:end)))/Npack; +neteq_lossrate=(length(s.sn(delayskip_ix:end))-length(use_ix))/Npack; + +delay_struct=struct('mean_delay',mean_delay,'neteq_delay',neteq_delay,... + 'nw_lossrate',nw_lossrate,'neteq_lossrate',neteq_lossrate,... + 'tot_expand',round(s.tot_expand),'tot_accelerate',round(s.tot_accelerate),... + 'tot_preemptive',round(s.tot_preemptive),'tot_time',tot_time,... + 'filename',delayfile,'units','ms','fs',unique(s.fs)); + +if not(isempty(delaypoints)) + delayvalues=interp1(send_t(cng_ix),... + s.decode(cng_ix)+s.playout_delay(cng_ix)-send_t(cng_ix),... + delaypoints,'nearest',NaN); +else + delayvalues=[]; +end + + + +% SUBFUNCTIONS % + +function y=unwrap_seqno(x) + +jumps=find(abs((diff(x)-1))>65000); + +while ~isempty(jumps) + n=jumps(1); + if x(n+1)-x(n) < 0 + % negative jump + x(n+1:end)=x(n+1:end)+65536; + else + % positive jump + x(n+1:end)=x(n+1:end)-65536; + end + + jumps=find(abs((diff(x(n+1:end))-1))>65000); +end + +y=x; + +return; diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.cc new file mode 100644 index 0000000000..e6c1809fb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.cc @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/test/neteq_decoding_test.h" + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/rtp_headers.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/test/result_sink.h" +#include "rtc_base/strings/string_builder.h" +#include "test/testsupport/file_utils.h" + +#ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_coding/neteq/neteq_unittest.pb.h" +#else +#include "modules/audio_coding/neteq/neteq_unittest.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() +#endif + +namespace webrtc { + +namespace { + +void LoadDecoders(webrtc::NetEq* neteq) { + ASSERT_EQ(true, + neteq->RegisterPayloadType(0, SdpAudioFormat("pcmu", 8000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(8, SdpAudioFormat("pcma", 8000, 1))); +#ifdef WEBRTC_CODEC_ILBC + ASSERT_EQ(true, + neteq->RegisterPayloadType(102, SdpAudioFormat("ilbc", 8000, 1))); +#endif +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) + ASSERT_EQ(true, + neteq->RegisterPayloadType(103, SdpAudioFormat("isac", 16000, 1))); +#endif +#ifdef WEBRTC_CODEC_ISAC + ASSERT_EQ(true, + neteq->RegisterPayloadType(104, SdpAudioFormat("isac", 32000, 1))); +#endif +#ifdef WEBRTC_CODEC_OPUS + ASSERT_EQ(true, + neteq->RegisterPayloadType( + 111, SdpAudioFormat("opus", 48000, 2, {{"stereo", "0"}}))); +#endif + ASSERT_EQ(true, + neteq->RegisterPayloadType(93, SdpAudioFormat("L16", 8000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(94, SdpAudioFormat("L16", 16000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(95, SdpAudioFormat("L16", 32000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(13, SdpAudioFormat("cn", 8000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(98, SdpAudioFormat("cn", 16000, 1))); +} + +} // namespace + +const int NetEqDecodingTest::kTimeStepMs; +const size_t NetEqDecodingTest::kBlockSize8kHz; +const size_t NetEqDecodingTest::kBlockSize16kHz; +const size_t NetEqDecodingTest::kBlockSize32kHz; +const int NetEqDecodingTest::kInitSampleRateHz; + +NetEqDecodingTest::NetEqDecodingTest() + : clock_(0), + config_(), + output_sample_rate_(kInitSampleRateHz), + algorithmic_delay_ms_(0) { + config_.sample_rate_hz = kInitSampleRateHz; +} + +void NetEqDecodingTest::SetUp() { + auto decoder_factory = CreateBuiltinAudioDecoderFactory(); + neteq_ = DefaultNetEqFactory().CreateNetEq(config_, decoder_factory, &clock_); + NetEqNetworkStatistics stat; + ASSERT_EQ(0, neteq_->NetworkStatistics(&stat)); + algorithmic_delay_ms_ = stat.current_buffer_size_ms; + ASSERT_TRUE(neteq_); + LoadDecoders(neteq_.get()); +} + +void NetEqDecodingTest::TearDown() {} + +void NetEqDecodingTest::OpenInputFile(absl::string_view rtp_file) { + rtp_source_.reset(test::RtpFileSource::Create(rtp_file)); +} + +void NetEqDecodingTest::Process() { + // Check if time to receive. + while (packet_ && clock_.TimeInMilliseconds() >= packet_->time_ms()) { + if (packet_->payload_length_bytes() > 0) { +#ifndef WEBRTC_CODEC_ISAC + // Ignore payload type 104 (iSAC-swb) if ISAC is not supported. + if (packet_->header().payloadType != 104) +#endif + ASSERT_EQ( + 0, neteq_->InsertPacket( + packet_->header(), + rtc::ArrayView( + packet_->payload(), packet_->payload_length_bytes()))); + } + // Get next packet. + packet_ = rtp_source_->NextPacket(); + } + + // Get audio from NetEq. + bool muted; + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_FALSE(muted); + ASSERT_TRUE((out_frame_.samples_per_channel_ == kBlockSize8kHz) || + (out_frame_.samples_per_channel_ == kBlockSize16kHz) || + (out_frame_.samples_per_channel_ == kBlockSize32kHz) || + (out_frame_.samples_per_channel_ == kBlockSize48kHz)); + output_sample_rate_ = out_frame_.sample_rate_hz_; + EXPECT_EQ(output_sample_rate_, neteq_->last_output_sample_rate_hz()); + + // Increase time. + clock_.AdvanceTimeMilliseconds(kTimeStepMs); +} + +void NetEqDecodingTest::DecodeAndCompare( + absl::string_view rtp_file, + absl::string_view output_checksum, + absl::string_view network_stats_checksum, + bool gen_ref) { + OpenInputFile(rtp_file); + + std::string ref_out_file = + gen_ref ? webrtc::test::OutputPath() + "neteq_universal_ref.pcm" : ""; + ResultSink output(ref_out_file); + + std::string stat_out_file = + gen_ref ? webrtc::test::OutputPath() + "neteq_network_stats.dat" : ""; + ResultSink network_stats(stat_out_file); + + packet_ = rtp_source_->NextPacket(); + int i = 0; + uint64_t last_concealed_samples = 0; + uint64_t last_total_samples_received = 0; + while (packet_) { + rtc::StringBuilder ss; + ss << "Lap number " << i++ << " in DecodeAndCompare while loop"; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + ASSERT_NO_FATAL_FAILURE(Process()); + ASSERT_NO_FATAL_FAILURE( + output.AddResult(out_frame_.data(), out_frame_.samples_per_channel_)); + + // Query the network statistics API once per second + if (clock_.TimeInMilliseconds() % 1000 == 0) { + // Process NetworkStatistics. + NetEqNetworkStatistics current_network_stats; + ASSERT_EQ(0, neteq_->NetworkStatistics(¤t_network_stats)); + ASSERT_NO_FATAL_FAILURE(network_stats.AddResult(current_network_stats)); + + // Verify that liftime stats and network stats report similar loss + // concealment rates. + auto lifetime_stats = neteq_->GetLifetimeStatistics(); + const uint64_t delta_concealed_samples = + lifetime_stats.concealed_samples - last_concealed_samples; + last_concealed_samples = lifetime_stats.concealed_samples; + const uint64_t delta_total_samples_received = + lifetime_stats.total_samples_received - last_total_samples_received; + last_total_samples_received = lifetime_stats.total_samples_received; + // The tolerance is 1% but expressed in Q14. + EXPECT_NEAR( + (delta_concealed_samples << 14) / delta_total_samples_received, + current_network_stats.expand_rate, (2 << 14) / 100.0); + } + } + + SCOPED_TRACE("Check output audio."); + output.VerifyChecksum(output_checksum); + SCOPED_TRACE("Check network stats."); + network_stats.VerifyChecksum(network_stats_checksum); +} + +void NetEqDecodingTest::PopulateRtpInfo(int frame_index, + int timestamp, + RTPHeader* rtp_info) { + rtp_info->sequenceNumber = frame_index; + rtp_info->timestamp = timestamp; + rtp_info->ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info->payloadType = 94; // PCM16b WB codec. + rtp_info->markerBit = false; +} + +void NetEqDecodingTest::PopulateCng(int frame_index, + int timestamp, + RTPHeader* rtp_info, + uint8_t* payload, + size_t* payload_len) { + rtp_info->sequenceNumber = frame_index; + rtp_info->timestamp = timestamp; + rtp_info->ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info->payloadType = 98; // WB CNG. + rtp_info->markerBit = false; + payload[0] = 64; // Noise level -64 dBov, quite arbitrarily chosen. + *payload_len = 1; // Only noise level, no spectral parameters. +} + +void NetEqDecodingTest::WrapTest(uint16_t start_seq_no, + uint32_t start_timestamp, + const std::set& drop_seq_numbers, + bool expect_seq_no_wrap, + bool expect_timestamp_wrap) { + uint16_t seq_no = start_seq_no; + uint32_t timestamp = start_timestamp; + const int kBlocksPerFrame = 3; // Number of 10 ms blocks per frame. + const int kFrameSizeMs = kBlocksPerFrame * kTimeStepMs; + const int kSamples = kBlockSize16kHz * kBlocksPerFrame; + const size_t kPayloadBytes = kSamples * sizeof(int16_t); + double next_input_time_ms = 0.0; + + // Insert speech for 2 seconds. + const int kSpeechDurationMs = 2000; + uint16_t last_seq_no; + uint32_t last_timestamp; + bool timestamp_wrapped = false; + bool seq_no_wrapped = false; + for (double t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + if (drop_seq_numbers.find(seq_no) == drop_seq_numbers.end()) { + // This sequence number was not in the set to drop. Insert it. + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + } + NetEqNetworkStatistics network_stats; + ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats)); + + EXPECT_LE(network_stats.preferred_buffer_size_ms, 80); + EXPECT_LE(network_stats.current_buffer_size_ms, + 80 + algorithmic_delay_ms_); + last_seq_no = seq_no; + last_timestamp = timestamp; + + ++seq_no; + timestamp += kSamples; + next_input_time_ms += static_cast(kFrameSizeMs); + + seq_no_wrapped |= seq_no < last_seq_no; + timestamp_wrapped |= timestamp < last_timestamp; + } + // Pull out data once. + AudioFrame output; + bool muted; + ASSERT_EQ(0, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(kBlockSize16kHz, output.samples_per_channel_); + ASSERT_EQ(1u, output.num_channels_); + + // Expect delay (in samples) to be less than 2 packets. + absl::optional playout_timestamp = neteq_->GetPlayoutTimestamp(); + ASSERT_TRUE(playout_timestamp); + EXPECT_LE(timestamp - *playout_timestamp, + static_cast(kSamples * 2)); + } + // Make sure we have actually tested wrap-around. + ASSERT_EQ(expect_seq_no_wrap, seq_no_wrapped); + ASSERT_EQ(expect_timestamp_wrap, timestamp_wrapped); +} + +void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor, + double network_freeze_ms, + bool pull_audio_during_freeze, + int delay_tolerance_ms, + int max_time_to_speech_ms) { + uint16_t seq_no = 0; + uint32_t timestamp = 0; + const int kFrameSizeMs = 30; + const size_t kSamples = kFrameSizeMs * 16; + const size_t kPayloadBytes = kSamples * 2; + double next_input_time_ms = 0.0; + double t_ms; + bool muted; + + // Insert speech for 5 seconds. + const int kSpeechDurationMs = 5000; + for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + next_input_time_ms += static_cast(kFrameSizeMs) * drift_factor; + } + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } + + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); + absl::optional playout_timestamp = neteq_->GetPlayoutTimestamp(); + ASSERT_TRUE(playout_timestamp); + int32_t delay_before = timestamp - *playout_timestamp; + + // Insert CNG for 1 minute (= 60000 ms). + const int kCngPeriodMs = 100; + const int kCngPeriodSamples = kCngPeriodMs * 16; // Period in 16 kHz samples. + const int kCngDurationMs = 60000; + for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one CNG frame each 100 ms. + uint8_t payload[kPayloadBytes]; + size_t payload_len; + RTPHeader rtp_info; + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView( + payload, payload_len))); + ++seq_no; + timestamp += kCngPeriodSamples; + next_input_time_ms += static_cast(kCngPeriodMs) * drift_factor; + } + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } + + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + + if (network_freeze_ms > 0) { + // First keep pulling audio for `network_freeze_ms` without inserting + // any data, then insert CNG data corresponding to `network_freeze_ms` + // without pulling any output audio. + const double loop_end_time = t_ms + network_freeze_ms; + for (; t_ms < loop_end_time; t_ms += 10) { + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + } + bool pull_once = pull_audio_during_freeze; + // If `pull_once` is true, GetAudio will be called once half-way through + // the network recovery period. + double pull_time_ms = (t_ms + next_input_time_ms) / 2; + while (next_input_time_ms <= t_ms) { + if (pull_once && next_input_time_ms >= pull_time_ms) { + pull_once = false; + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + t_ms += 10; + } + // Insert one CNG frame each 100 ms. + uint8_t payload[kPayloadBytes]; + size_t payload_len; + RTPHeader rtp_info; + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView( + payload, payload_len))); + ++seq_no; + timestamp += kCngPeriodSamples; + next_input_time_ms += kCngPeriodMs * drift_factor; + } + } + + // Insert speech again until output type is speech. + double speech_restart_time_ms = t_ms; + while (out_frame_.speech_type_ != AudioFrame::kNormalSpeech) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + next_input_time_ms += kFrameSizeMs * drift_factor; + } + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + // Increase clock. + t_ms += 10; + } + + // Check that the speech starts again within reasonable time. + double time_until_speech_returns_ms = t_ms - speech_restart_time_ms; + EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms); + playout_timestamp = neteq_->GetPlayoutTimestamp(); + ASSERT_TRUE(playout_timestamp); + int32_t delay_after = timestamp - *playout_timestamp; + // Compare delay before and after, and make sure it differs less than 20 ms. + EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16); + EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16); +} + +void NetEqDecodingTestTwoInstances::SetUp() { + NetEqDecodingTest::SetUp(); + config2_ = config_; +} + +void NetEqDecodingTestTwoInstances::CreateSecondInstance() { + auto decoder_factory = CreateBuiltinAudioDecoderFactory(); + neteq2_ = + DefaultNetEqFactory().CreateNetEq(config2_, decoder_factory, &clock_); + ASSERT_TRUE(neteq2_); + LoadDecoders(neteq2_.get()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.h b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.h new file mode 100644 index 0000000000..456c397fdd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TEST_NETEQ_DECODING_TEST_H_ +#define MODULES_AUDIO_CODING_NETEQ_TEST_NETEQ_DECODING_TEST_H_ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/audio/audio_frame.h" +#include "api/neteq/neteq.h" +#include "api/rtp_headers.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" + +namespace webrtc { + +class NetEqDecodingTest : public ::testing::Test { + protected: + // NetEQ must be polled for data once every 10 ms. + // Thus, none of the constants below can be changed. + static constexpr int kTimeStepMs = 10; + static constexpr size_t kBlockSize8kHz = kTimeStepMs * 8; + static constexpr size_t kBlockSize16kHz = kTimeStepMs * 16; + static constexpr size_t kBlockSize32kHz = kTimeStepMs * 32; + static constexpr size_t kBlockSize48kHz = kTimeStepMs * 48; + static constexpr int kInitSampleRateHz = 8000; + + NetEqDecodingTest(); + virtual void SetUp(); + virtual void TearDown(); + void OpenInputFile(absl::string_view rtp_file); + void Process(); + + void DecodeAndCompare(absl::string_view rtp_file, + absl::string_view output_checksum, + absl::string_view network_stats_checksum, + bool gen_ref); + + static void PopulateRtpInfo(int frame_index, + int timestamp, + RTPHeader* rtp_info); + static void PopulateCng(int frame_index, + int timestamp, + RTPHeader* rtp_info, + uint8_t* payload, + size_t* payload_len); + + void WrapTest(uint16_t start_seq_no, + uint32_t start_timestamp, + const std::set& drop_seq_numbers, + bool expect_seq_no_wrap, + bool expect_timestamp_wrap); + + void LongCngWithClockDrift(double drift_factor, + double network_freeze_ms, + bool pull_audio_during_freeze, + int delay_tolerance_ms, + int max_time_to_speech_ms); + + SimulatedClock clock_; + std::unique_ptr neteq_; + NetEq::Config config_; + std::unique_ptr rtp_source_; + std::unique_ptr packet_; + AudioFrame out_frame_; + int output_sample_rate_; + int algorithmic_delay_ms_; +}; + +class NetEqDecodingTestTwoInstances : public NetEqDecodingTest { + public: + NetEqDecodingTestTwoInstances() : NetEqDecodingTest() {} + + void SetUp() override; + + void CreateSecondInstance(); + + protected: + std::unique_ptr neteq2_; + NetEq::Config config2_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TEST_NETEQ_DECODING_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_ilbc_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_ilbc_quality_test.cc new file mode 100644 index 0000000000..1004141f16 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_ilbc_quality_test.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "absl/flags/flag.h" +#include "modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(int, frame_size_ms, 20, "Codec frame size (milliseconds)."); + +using ::testing::InitGoogleTest; + +namespace webrtc { +namespace test { +namespace { +static const int kInputSampleRateKhz = 8; +static const int kOutputSampleRateKhz = 8; +} // namespace + +class NetEqIlbcQualityTest : public NetEqQualityTest { + protected: + NetEqIlbcQualityTest() + : NetEqQualityTest(absl::GetFlag(FLAGS_frame_size_ms), + kInputSampleRateKhz, + kOutputSampleRateKhz, + SdpAudioFormat("ilbc", 8000, 1)) { + // Flag validation + RTC_CHECK(absl::GetFlag(FLAGS_frame_size_ms) == 20 || + absl::GetFlag(FLAGS_frame_size_ms) == 30 || + absl::GetFlag(FLAGS_frame_size_ms) == 40 || + absl::GetFlag(FLAGS_frame_size_ms) == 60) + << "Invalid frame size, should be 20, 30, 40, or 60 ms."; + } + + void SetUp() override { + ASSERT_EQ(1u, channels_) << "iLBC supports only mono audio."; + AudioEncoderIlbcConfig config; + config.frame_size_ms = absl::GetFlag(FLAGS_frame_size_ms); + encoder_.reset(new AudioEncoderIlbcImpl(config, 102)); + NetEqQualityTest::SetUp(); + } + + int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) override { + const size_t kFrameSizeSamples = 80; // Samples per 10 ms. + size_t encoded_samples = 0; + uint32_t dummy_timestamp = 0; + AudioEncoder::EncodedInfo info; + do { + info = encoder_->Encode(dummy_timestamp, + rtc::ArrayView( + in_data + encoded_samples, kFrameSizeSamples), + payload); + encoded_samples += kFrameSizeSamples; + } while (info.encoded_bytes == 0); + return rtc::checked_cast(info.encoded_bytes); + } + + private: + std::unique_ptr encoder_; +}; + +TEST_F(NetEqIlbcQualityTest, Test) { + Simulate(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_opus_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_opus_quality_test.cc new file mode 100644 index 0000000000..5a2df24ef6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_opus_quality_test.cc @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "absl/flags/flag.h" +#include "modules/audio_coding/codecs/opus/opus_inst.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" + +ABSL_FLAG(int, bit_rate_kbps, 32, "Target bit rate (kbps)."); + +ABSL_FLAG(int, + complexity, + 10, + "Complexity: 0 ~ 10 -- defined as in Opus" + "specification."); + +ABSL_FLAG(int, maxplaybackrate, 48000, "Maximum playback rate (Hz)."); + +ABSL_FLAG(int, application, 0, "Application mode: 0 -- VOIP, 1 -- Audio."); + +ABSL_FLAG(int, reported_loss_rate, 10, "Reported percentile of packet loss."); + +ABSL_FLAG(bool, fec, false, "Enable FEC for encoding (-nofec to disable)."); + +ABSL_FLAG(bool, dtx, false, "Enable DTX for encoding (-nodtx to disable)."); + +ABSL_FLAG(int, sub_packets, 1, "Number of sub packets to repacketize."); + +using ::testing::InitGoogleTest; + +namespace webrtc { +namespace test { +namespace { + +static const int kOpusBlockDurationMs = 20; +static const int kOpusSamplingKhz = 48; +} // namespace + +class NetEqOpusQualityTest : public NetEqQualityTest { + protected: + NetEqOpusQualityTest(); + void SetUp() override; + void TearDown() override; + int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) override; + + private: + WebRtcOpusEncInst* opus_encoder_; + OpusRepacketizer* repacketizer_; + size_t sub_block_size_samples_; + int bit_rate_kbps_; + bool fec_; + bool dtx_; + int complexity_; + int maxplaybackrate_; + int target_loss_rate_; + int sub_packets_; + int application_; +}; + +NetEqOpusQualityTest::NetEqOpusQualityTest() + : NetEqQualityTest(kOpusBlockDurationMs * absl::GetFlag(FLAGS_sub_packets), + kOpusSamplingKhz, + kOpusSamplingKhz, + SdpAudioFormat("opus", 48000, 2)), + opus_encoder_(NULL), + repacketizer_(NULL), + sub_block_size_samples_( + static_cast(kOpusBlockDurationMs * kOpusSamplingKhz)), + bit_rate_kbps_(absl::GetFlag(FLAGS_bit_rate_kbps)), + fec_(absl::GetFlag(FLAGS_fec)), + dtx_(absl::GetFlag(FLAGS_dtx)), + complexity_(absl::GetFlag(FLAGS_complexity)), + maxplaybackrate_(absl::GetFlag(FLAGS_maxplaybackrate)), + target_loss_rate_(absl::GetFlag(FLAGS_reported_loss_rate)), + sub_packets_(absl::GetFlag(FLAGS_sub_packets)) { + // Flag validation + RTC_CHECK(absl::GetFlag(FLAGS_bit_rate_kbps) >= 6 && + absl::GetFlag(FLAGS_bit_rate_kbps) <= 510) + << "Invalid bit rate, should be between 6 and 510 kbps."; + + RTC_CHECK(absl::GetFlag(FLAGS_complexity) >= -1 && + absl::GetFlag(FLAGS_complexity) <= 10) + << "Invalid complexity setting, should be between 0 and 10."; + + RTC_CHECK(absl::GetFlag(FLAGS_application) == 0 || + absl::GetFlag(FLAGS_application) == 1) + << "Invalid application mode, should be 0 or 1."; + + RTC_CHECK(absl::GetFlag(FLAGS_reported_loss_rate) >= 0 && + absl::GetFlag(FLAGS_reported_loss_rate) <= 100) + << "Invalid packet loss percentile, should be between 0 and 100."; + + RTC_CHECK(absl::GetFlag(FLAGS_sub_packets) >= 1 && + absl::GetFlag(FLAGS_sub_packets) <= 3) + << "Invalid number of sub packets, should be between 1 and 3."; + + // Redefine decoder type if input is stereo. + if (channels_ > 1) { + audio_format_ = SdpAudioFormat("opus", 48000, 2, + SdpAudioFormat::Parameters{{"stereo", "1"}}); + } + application_ = absl::GetFlag(FLAGS_application); +} + +void NetEqOpusQualityTest::SetUp() { + // Create encoder memory. + WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, application_, 48000); + ASSERT_TRUE(opus_encoder_); + + // Create repacketizer. + repacketizer_ = opus_repacketizer_create(); + ASSERT_TRUE(repacketizer_); + + // Set bitrate. + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_kbps_ * 1000)); + if (fec_) { + EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_)); + } + if (dtx_) { + EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_encoder_)); + } + EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, complexity_)); + EXPECT_EQ(0, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, maxplaybackrate_)); + EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate(opus_encoder_, target_loss_rate_)); + NetEqQualityTest::SetUp(); +} + +void NetEqOpusQualityTest::TearDown() { + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + opus_repacketizer_destroy(repacketizer_); + NetEqQualityTest::TearDown(); +} + +int NetEqOpusQualityTest::EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) { + EXPECT_EQ(block_size_samples, sub_block_size_samples_ * sub_packets_); + int16_t* pointer = in_data; + int value; + opus_repacketizer_init(repacketizer_); + for (int idx = 0; idx < sub_packets_; idx++) { + payload->AppendData(max_bytes, [&](rtc::ArrayView payload) { + value = WebRtcOpus_Encode(opus_encoder_, pointer, sub_block_size_samples_, + max_bytes, payload.data()); + + Log() << "Encoded a frame with Opus mode " + << (value == 0 ? 0 : payload[0] >> 3) << std::endl; + + return (value >= 0) ? static_cast(value) : 0; + }); + + if (OPUS_OK != + opus_repacketizer_cat(repacketizer_, payload->data(), value)) { + opus_repacketizer_init(repacketizer_); + // If the repacketization fails, we discard this frame. + return 0; + } + pointer += sub_block_size_samples_ * channels_; + } + value = opus_repacketizer_out(repacketizer_, payload->data(), + static_cast(max_bytes)); + EXPECT_GE(value, 0); + return value; +} + +TEST_F(NetEqOpusQualityTest, Test) { + Simulate(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcm16b_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcm16b_quality_test.cc new file mode 100644 index 0000000000..c3e160cb66 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcm16b_quality_test.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "absl/flags/flag.h" +#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(int, frame_size_ms, 20, "Codec frame size (milliseconds)."); + +using ::testing::InitGoogleTest; + +namespace webrtc { +namespace test { +namespace { +static const int kInputSampleRateKhz = 48; +static const int kOutputSampleRateKhz = 48; +} // namespace + +class NetEqPcm16bQualityTest : public NetEqQualityTest { + protected: + NetEqPcm16bQualityTest() + : NetEqQualityTest(absl::GetFlag(FLAGS_frame_size_ms), + kInputSampleRateKhz, + kOutputSampleRateKhz, + SdpAudioFormat("l16", 48000, 1)) { + // Flag validation + RTC_CHECK(absl::GetFlag(FLAGS_frame_size_ms) >= 10 && + absl::GetFlag(FLAGS_frame_size_ms) <= 60 && + (absl::GetFlag(FLAGS_frame_size_ms) % 10) == 0) + << "Invalid frame size, should be 10, 20, ..., 60 ms."; + } + + void SetUp() override { + AudioEncoderPcm16B::Config config; + config.frame_size_ms = absl::GetFlag(FLAGS_frame_size_ms); + config.sample_rate_hz = 48000; + config.num_channels = channels_; + encoder_.reset(new AudioEncoderPcm16B(config)); + NetEqQualityTest::SetUp(); + } + + int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) override { + const size_t kFrameSizeSamples = 480; // Samples per 10 ms. + size_t encoded_samples = 0; + uint32_t dummy_timestamp = 0; + AudioEncoder::EncodedInfo info; + do { + info = encoder_->Encode(dummy_timestamp, + rtc::ArrayView( + in_data + encoded_samples, kFrameSizeSamples), + payload); + encoded_samples += kFrameSizeSamples; + } while (info.encoded_bytes == 0); + return rtc::checked_cast(info.encoded_bytes); + } + + private: + std::unique_ptr encoder_; +}; + +TEST_F(NetEqPcm16bQualityTest, Test) { + Simulate(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcmu_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcmu_quality_test.cc new file mode 100644 index 0000000000..d22170c623 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcmu_quality_test.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "absl/flags/flag.h" +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(int, frame_size_ms, 20, "Codec frame size (milliseconds)."); + +using ::testing::InitGoogleTest; + +namespace webrtc { +namespace test { +namespace { +static const int kInputSampleRateKhz = 8; +static const int kOutputSampleRateKhz = 8; +} // namespace + +class NetEqPcmuQualityTest : public NetEqQualityTest { + protected: + NetEqPcmuQualityTest() + : NetEqQualityTest(absl::GetFlag(FLAGS_frame_size_ms), + kInputSampleRateKhz, + kOutputSampleRateKhz, + SdpAudioFormat("pcmu", 8000, 1)) { + // Flag validation + RTC_CHECK(absl::GetFlag(FLAGS_frame_size_ms) >= 10 && + absl::GetFlag(FLAGS_frame_size_ms) <= 60 && + (absl::GetFlag(FLAGS_frame_size_ms) % 10) == 0) + << "Invalid frame size, should be 10, 20, ..., 60 ms."; + } + + void SetUp() override { + ASSERT_EQ(1u, channels_) << "PCMu supports only mono audio."; + AudioEncoderPcmU::Config config; + config.frame_size_ms = absl::GetFlag(FLAGS_frame_size_ms); + encoder_.reset(new AudioEncoderPcmU(config)); + NetEqQualityTest::SetUp(); + } + + int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) override { + const size_t kFrameSizeSamples = 80; // Samples per 10 ms. + size_t encoded_samples = 0; + uint32_t dummy_timestamp = 0; + AudioEncoder::EncodedInfo info; + do { + info = encoder_->Encode(dummy_timestamp, + rtc::ArrayView( + in_data + encoded_samples, kFrameSizeSamples), + payload); + encoded_samples += kFrameSizeSamples; + } while (info.encoded_bytes == 0); + return rtc::checked_cast(info.encoded_bytes); + } + + private: + std::unique_ptr encoder_; +}; + +TEST_F(NetEqPcmuQualityTest, Test) { + Simulate(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_performance_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_performance_unittest.cc new file mode 100644 index 0000000000..961f74ab66 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_performance_unittest.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/test/metrics/global_metrics_logger_and_exporter.h" +#include "api/test/metrics/metric.h" +#include "modules/audio_coding/neteq/tools/neteq_performance_test.h" +#include "system_wrappers/include/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::webrtc::test::GetGlobalMetricsLogger; +using ::webrtc::test::ImprovementDirection; +using ::webrtc::test::Unit; + +// Runs a test with 10% packet losses and 10% clock drift, to exercise +// both loss concealment and time-stretching code. +TEST(NetEqPerformanceTest, 10_Pl_10_Drift) { + const int kSimulationTimeMs = 10000000; + const int kQuickSimulationTimeMs = 100000; + const int kLossPeriod = 10; // Drop every 10th packet. + const double kDriftFactor = 0.1; + int64_t runtime = test::NetEqPerformanceTest::Run( + field_trial::IsEnabled("WebRTC-QuickPerfTest") ? kQuickSimulationTimeMs + : kSimulationTimeMs, + kLossPeriod, kDriftFactor); + ASSERT_GT(runtime, 0); + GetGlobalMetricsLogger()->LogSingleValueMetric( + "neteq_performance", "10_pl_10_drift", runtime, Unit::kMilliseconds, + ImprovementDirection::kNeitherIsBetter); +} + +// Runs a test with neither packet losses nor clock drift, to put +// emphasis on the "good-weather" code path, which is presumably much +// more lightweight. +TEST(NetEqPerformanceTest, 0_Pl_0_Drift) { + const int kSimulationTimeMs = 10000000; + const int kQuickSimulationTimeMs = 100000; + const int kLossPeriod = 0; // No losses. + const double kDriftFactor = 0.0; // No clock drift. + int64_t runtime = test::NetEqPerformanceTest::Run( + field_trial::IsEnabled("WebRTC-QuickPerfTest") ? kQuickSimulationTimeMs + : kSimulationTimeMs, + kLossPeriod, kDriftFactor); + ASSERT_GT(runtime, 0); + GetGlobalMetricsLogger()->LogSingleValueMetric( + "neteq_performance", "0_pl_0_drift", runtime, Unit::kMilliseconds, + ImprovementDirection::kNeitherIsBetter); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc new file mode 100644 index 0000000000..a72b2009eb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "modules/audio_coding/neteq/tools/neteq_performance_test.h" +#include "rtc_base/checks.h" + +// Define command line flags. +ABSL_FLAG(int, runtime_ms, 10000, "Simulated runtime in ms."); +ABSL_FLAG(int, lossrate, 10, "Packet lossrate; drop every N packets."); +ABSL_FLAG(float, drift, 0.1f, "Clockdrift factor."); + +int main(int argc, char* argv[]) { + std::vector args = absl::ParseCommandLine(argc, argv); + std::string program_name = args[0]; + std::string usage = + "Tool for measuring the speed of NetEq.\n" + "Usage: " + + program_name + + " [options]\n\n" + " --runtime_ms=N runtime in ms; default is 10000 ms\n" + " --lossrate=N drop every N packets; default is 10\n" + " --drift=F clockdrift factor between 0.0 and 1.0; " + "default is 0.1\n"; + if (args.size() != 1) { + printf("%s", usage.c_str()); + return 1; + } + RTC_CHECK_GT(absl::GetFlag(FLAGS_runtime_ms), 0); + RTC_CHECK_GE(absl::GetFlag(FLAGS_lossrate), 0); + RTC_CHECK(absl::GetFlag(FLAGS_drift) >= 0.0 && + absl::GetFlag(FLAGS_drift) < 1.0); + + int64_t result = webrtc::test::NetEqPerformanceTest::Run( + absl::GetFlag(FLAGS_runtime_ms), absl::GetFlag(FLAGS_lossrate), + absl::GetFlag(FLAGS_drift)); + if (result <= 0) { + std::cout << "There was an error" << std::endl; + return -1; + } + + std::cout << "Simulation done" << std::endl; + std::cout << "Runtime = " << result << " ms" << std::endl; + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.cc new file mode 100644 index 0000000000..f5d50dc859 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/test/result_sink.h" + +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/message_digest.h" +#include "rtc_base/string_encode.h" +#include "test/gtest.h" + +#ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_coding/neteq/neteq_unittest.pb.h" +#else +#include "modules/audio_coding/neteq/neteq_unittest.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() +#endif + +namespace webrtc { + +#ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT +void Convert(const webrtc::NetEqNetworkStatistics& stats_raw, + webrtc::neteq_unittest::NetEqNetworkStatistics* stats) { + stats->set_current_buffer_size_ms(stats_raw.current_buffer_size_ms); + stats->set_preferred_buffer_size_ms(stats_raw.preferred_buffer_size_ms); + stats->set_jitter_peaks_found(stats_raw.jitter_peaks_found); + stats->set_expand_rate(stats_raw.expand_rate); + stats->set_speech_expand_rate(stats_raw.speech_expand_rate); + stats->set_preemptive_rate(stats_raw.preemptive_rate); + stats->set_accelerate_rate(stats_raw.accelerate_rate); + stats->set_secondary_decoded_rate(stats_raw.secondary_decoded_rate); + stats->set_secondary_discarded_rate(stats_raw.secondary_discarded_rate); + stats->set_mean_waiting_time_ms(stats_raw.mean_waiting_time_ms); + stats->set_median_waiting_time_ms(stats_raw.median_waiting_time_ms); + stats->set_min_waiting_time_ms(stats_raw.min_waiting_time_ms); + stats->set_max_waiting_time_ms(stats_raw.max_waiting_time_ms); +} + +void AddMessage(FILE* file, + rtc::MessageDigest* digest, + absl::string_view message) { + int32_t size = message.length(); + if (file) + ASSERT_EQ(1u, fwrite(&size, sizeof(size), 1, file)); + digest->Update(&size, sizeof(size)); + + if (file) + ASSERT_EQ(static_cast(size), + fwrite(message.data(), sizeof(char), size, file)); + digest->Update(message.data(), sizeof(char) * size); +} + +#endif // WEBRTC_NETEQ_UNITTEST_BITEXACT + +ResultSink::ResultSink(absl::string_view output_file) + : output_fp_(nullptr), + digest_(rtc::MessageDigestFactory::Create(rtc::DIGEST_SHA_1)) { + if (!output_file.empty()) { + output_fp_ = fopen(std::string(output_file).c_str(), "wb"); + EXPECT_TRUE(output_fp_ != NULL); + } +} + +ResultSink::~ResultSink() { + if (output_fp_) + fclose(output_fp_); +} + +void ResultSink::AddResult(const NetEqNetworkStatistics& stats_raw) { +#ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT + neteq_unittest::NetEqNetworkStatistics stats; + Convert(stats_raw, &stats); + + std::string stats_string; + ASSERT_TRUE(stats.SerializeToString(&stats_string)); + AddMessage(output_fp_, digest_.get(), stats_string); +#else + FAIL() << "Writing to reference file requires Proto Buffer."; +#endif // WEBRTC_NETEQ_UNITTEST_BITEXACT +} + +void ResultSink::VerifyChecksum(absl::string_view checksum) { + std::string buffer; + buffer.resize(digest_->Size()); + digest_->Finish(buffer.data(), buffer.size()); + const std::string result = rtc::hex_encode(buffer); + if (checksum.size() == result.size()) { + EXPECT_EQ(checksum, result); + } else { + // Check result is one the '|'-separated checksums. + EXPECT_NE(checksum.find(result), absl::string_view::npos) + << result << " should be one of these:\n" + << checksum; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.h b/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.h new file mode 100644 index 0000000000..c6923d7a7f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TEST_RESULT_SINK_H_ +#define MODULES_AUDIO_CODING_NETEQ_TEST_RESULT_SINK_H_ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/neteq/neteq.h" +#include "rtc_base/message_digest.h" + +namespace webrtc { + +class ResultSink { + public: + explicit ResultSink(absl::string_view output_file); + ~ResultSink(); + + template + void AddResult(const T* test_results, size_t length); + + void AddResult(const NetEqNetworkStatistics& stats); + + void VerifyChecksum(absl::string_view ref_check_sum); + + private: + FILE* output_fp_; + std::unique_ptr digest_; +}; + +template +void ResultSink::AddResult(const T* test_results, size_t length) { + if (output_fp_) { + ASSERT_EQ(length, fwrite(test_results, sizeof(T), length, output_fp_)); + } + digest_->Update(test_results, sizeof(T) * length); +} + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TEST_RESULT_SINK_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.cc b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.cc new file mode 100644 index 0000000000..b89be0608d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/time_stretch.h" + +#include // min, max +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/cross_correlation.h" +#include "modules/audio_coding/neteq/dsp_helper.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input, + size_t input_len, + bool fast_mode, + AudioMultiVector* output, + size_t* length_change_samples) { + // Pre-calculate common multiplication with `fs_mult_`. + size_t fs_mult_120 = + static_cast(fs_mult_ * 120); // Corresponds to 15 ms. + + const int16_t* signal; + std::unique_ptr signal_array; + size_t signal_len; + if (num_channels_ == 1) { + signal = input; + signal_len = input_len; + } else { + // We want `signal` to be only the first channel of `input`, which is + // interleaved. Thus, we take the first sample, skip forward `num_channels` + // samples, and continue like that. + signal_len = input_len / num_channels_; + signal_array.reset(new int16_t[signal_len]); + signal = signal_array.get(); + size_t j = kRefChannel; + for (size_t i = 0; i < signal_len; ++i) { + signal_array[i] = input[j]; + j += num_channels_; + } + } + + // Find maximum absolute value of input signal. + max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal, signal_len); + + // Downsample to 4 kHz sample rate and calculate auto-correlation. + DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen, + sample_rate_hz_, true /* compensate delay*/, + downsampled_input_); + AutoCorrelation(); + + // Find the strongest correlation peak. + static const size_t kNumPeaks = 1; + size_t peak_index; + int16_t peak_value; + DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks, + fs_mult_, &peak_index, &peak_value); + // Assert that `peak_index` stays within boundaries. + RTC_DCHECK_LE(peak_index, (2 * kCorrelationLen - 1) * fs_mult_); + + // Compensate peak_index for displaced starting position. The displacement + // happens in AutoCorrelation(). Here, `kMinLag` is in the down-sampled 4 kHz + // domain, while the `peak_index` is in the original sample rate; hence, the + // multiplication by fs_mult_ * 2. + peak_index += kMinLag * fs_mult_ * 2; + // Assert that `peak_index` stays within boundaries. + RTC_DCHECK_GE(peak_index, static_cast(20 * fs_mult_)); + RTC_DCHECK_LE(peak_index, + 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_); + + // Calculate scaling to ensure that `peak_index` samples can be square-summed + // without overflowing. + int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) - + WebRtcSpl_NormW32(static_cast(peak_index)); + scaling = std::max(0, scaling); + + // `vec1` starts at 15 ms minus one pitch period. + const int16_t* vec1 = &signal[fs_mult_120 - peak_index]; + // `vec2` start at 15 ms. + const int16_t* vec2 = &signal[fs_mult_120]; + // Calculate energies for `vec1` and `vec2`, assuming they both contain + // `peak_index` samples. + int32_t vec1_energy = + WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling); + int32_t vec2_energy = + WebRtcSpl_DotProductWithScale(vec2, vec2, peak_index, scaling); + + // Calculate cross-correlation between `vec1` and `vec2`. + int32_t cross_corr = + WebRtcSpl_DotProductWithScale(vec1, vec2, peak_index, scaling); + + // Check if the signal seems to be active speech or not (simple VAD). + bool active_speech = + SpeechDetection(vec1_energy, vec2_energy, peak_index, scaling); + + int16_t best_correlation; + if (!active_speech) { + SetParametersForPassiveSpeech(signal_len, &best_correlation, &peak_index); + } else { + // Calculate correlation: + // cross_corr / sqrt(vec1_energy * vec2_energy). + + // Start with calculating scale values. + int energy1_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec1_energy)); + int energy2_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec2_energy)); + + // Make sure total scaling is even (to simplify scale factor after sqrt). + if ((energy1_scale + energy2_scale) & 1) { + // The sum is odd. + energy1_scale += 1; + } + + // Scale energies to int16_t. + int16_t vec1_energy_int16 = + static_cast(vec1_energy >> energy1_scale); + int16_t vec2_energy_int16 = + static_cast(vec2_energy >> energy2_scale); + + // Calculate square-root of energy product. + int16_t sqrt_energy_prod = + WebRtcSpl_SqrtFloor(vec1_energy_int16 * vec2_energy_int16); + + // Calculate cross_corr / sqrt(en1*en2) in Q14. + int temp_scale = 14 - (energy1_scale + energy2_scale) / 2; + cross_corr = WEBRTC_SPL_SHIFT_W32(cross_corr, temp_scale); + cross_corr = std::max(0, cross_corr); // Don't use if negative. + best_correlation = WebRtcSpl_DivW32W16(cross_corr, sqrt_energy_prod); + // Make sure `best_correlation` is no larger than 1 in Q14. + best_correlation = std::min(static_cast(16384), best_correlation); + } + + // Check accelerate criteria and stretch the signal. + ReturnCodes return_value = + CheckCriteriaAndStretch(input, input_len, peak_index, best_correlation, + active_speech, fast_mode, output); + switch (return_value) { + case kSuccess: + *length_change_samples = peak_index; + break; + case kSuccessLowEnergy: + *length_change_samples = peak_index; + break; + case kNoStretch: + case kError: + *length_change_samples = 0; + break; + } + return return_value; +} + +void TimeStretch::AutoCorrelation() { + // Calculate correlation from lag kMinLag to lag kMaxLag in 4 kHz domain. + int32_t auto_corr[kCorrelationLen]; + CrossCorrelationWithAutoShift( + &downsampled_input_[kMaxLag], &downsampled_input_[kMaxLag - kMinLag], + kCorrelationLen, kMaxLag - kMinLag, -1, auto_corr); + + // Normalize correlation to 14 bits and write to `auto_correlation_`. + int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen); + int scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr)); + WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen, + auto_corr, scaling); +} + +bool TimeStretch::SpeechDetection(int32_t vec1_energy, + int32_t vec2_energy, + size_t peak_index, + int scaling) const { + // Check if the signal seems to be active speech or not (simple VAD). + // If (vec1_energy + vec2_energy) / (2 * peak_index) <= + // 8 * background_noise_energy, then we say that the signal contains no + // active speech. + // Rewrite the inequality as: + // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy. + // The two sides of the inequality will be denoted `left_side` and + // `right_side`. + int32_t left_side = rtc::saturated_cast( + (static_cast(vec1_energy) + vec2_energy) / 16); + int32_t right_side; + if (background_noise_.initialized()) { + right_side = background_noise_.Energy(kRefChannel); + } else { + // If noise parameters have not been estimated, use a fixed threshold. + right_side = 75000; + } + int right_scale = 16 - WebRtcSpl_NormW32(right_side); + right_scale = std::max(0, right_scale); + left_side = left_side >> right_scale; + right_side = + rtc::dchecked_cast(peak_index) * (right_side >> right_scale); + + // Scale `left_side` properly before comparing with `right_side`. + // (`scaling` is the scale factor before energy calculation, thus the scale + // factor for the energy is 2 * scaling.) + if (WebRtcSpl_NormW32(left_side) < 2 * scaling) { + // Cannot scale only `left_side`, must scale `right_side` too. + int temp_scale = WebRtcSpl_NormW32(left_side); + left_side = left_side << temp_scale; + right_side = right_side >> (2 * scaling - temp_scale); + } else { + left_side = left_side << 2 * scaling; + } + return left_side > right_side; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.h b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.h new file mode 100644 index 0000000000..f0ddaebeca --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_ +#define MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_ + +#include // memset, size_t + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +namespace webrtc { + +// Forward declarations. +class BackgroundNoise; + +// This is the base class for Accelerate and PreemptiveExpand. This class +// cannot be instantiated, but must be used through either of the derived +// classes. +class TimeStretch { + public: + enum ReturnCodes { + kSuccess = 0, + kSuccessLowEnergy = 1, + kNoStretch = 2, + kError = -1 + }; + + TimeStretch(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise) + : sample_rate_hz_(sample_rate_hz), + fs_mult_(sample_rate_hz / 8000), + num_channels_(num_channels), + background_noise_(background_noise), + max_input_value_(0) { + RTC_DCHECK(sample_rate_hz_ == 8000 || sample_rate_hz_ == 16000 || + sample_rate_hz_ == 32000 || sample_rate_hz_ == 48000); + RTC_DCHECK_GT(num_channels_, 0); + memset(auto_correlation_, 0, sizeof(auto_correlation_)); + } + + virtual ~TimeStretch() {} + + TimeStretch(const TimeStretch&) = delete; + TimeStretch& operator=(const TimeStretch&) = delete; + + // This method performs the processing common to both Accelerate and + // PreemptiveExpand. + ReturnCodes Process(const int16_t* input, + size_t input_len, + bool fast_mode, + AudioMultiVector* output, + size_t* length_change_samples); + + protected: + // Sets the parameters `best_correlation` and `peak_index` to suitable + // values when the signal contains no active speech. This method must be + // implemented by the sub-classes. + virtual void SetParametersForPassiveSpeech(size_t input_length, + int16_t* best_correlation, + size_t* peak_index) const = 0; + + // Checks the criteria for performing the time-stretching operation and, + // if possible, performs the time-stretching. This method must be implemented + // by the sub-classes. + virtual ReturnCodes CheckCriteriaAndStretch( + const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool fast_mode, + AudioMultiVector* output) const = 0; + + static const size_t kCorrelationLen = 50; + static const size_t kLogCorrelationLen = 6; // >= log2(kCorrelationLen). + static const size_t kMinLag = 10; + static const size_t kMaxLag = 60; + static const size_t kDownsampledLen = kCorrelationLen + kMaxLag; + static const int kCorrelationThreshold = 14746; // 0.9 in Q14. + static constexpr size_t kRefChannel = 0; // First channel is reference. + + const int sample_rate_hz_; + const int fs_mult_; // Sample rate multiplier = sample_rate_hz_ / 8000. + const size_t num_channels_; + const BackgroundNoise& background_noise_; + int16_t max_input_value_; + int16_t downsampled_input_[kDownsampledLen]; + // Adding 1 to the size of `auto_correlation_` because of how it is used + // by the peak-detection algorithm. + int16_t auto_correlation_[kCorrelationLen + 1]; + + private: + // Calculates the auto-correlation of `downsampled_input_` and writes the + // result to `auto_correlation_`. + void AutoCorrelation(); + + // Performs a simple voice-activity detection based on the input parameters. + bool SpeechDetection(int32_t vec1_energy, + int32_t vec2_energy, + size_t peak_index, + int scaling) const; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch_unittest.cc new file mode 100644 index 0000000000..da3a98229a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch_unittest.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Accelerate and PreemptiveExpand classes. + +#include +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/accelerate.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/preemptive_expand.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace { +const size_t kNumChannels = 1; +} + +TEST(TimeStretch, CreateAndDestroy) { + const int kSampleRate = 8000; + const int kOverlapSamples = 5 * kSampleRate / 8000; + BackgroundNoise bgn(kNumChannels); + Accelerate accelerate(kSampleRate, kNumChannels, bgn); + PreemptiveExpand preemptive_expand(kSampleRate, kNumChannels, bgn, + kOverlapSamples); +} + +TEST(TimeStretch, CreateUsingFactory) { + const int kSampleRate = 8000; + const int kOverlapSamples = 5 * kSampleRate / 8000; + BackgroundNoise bgn(kNumChannels); + + AccelerateFactory accelerate_factory; + Accelerate* accelerate = + accelerate_factory.Create(kSampleRate, kNumChannels, bgn); + EXPECT_TRUE(accelerate != NULL); + delete accelerate; + + PreemptiveExpandFactory preemptive_expand_factory; + PreemptiveExpand* preemptive_expand = preemptive_expand_factory.Create( + kSampleRate, kNumChannels, bgn, kOverlapSamples); + EXPECT_TRUE(preemptive_expand != NULL); + delete preemptive_expand; +} + +class TimeStretchTest : public ::testing::Test { + protected: + TimeStretchTest() + : input_file_(new test::InputAudioFile( + test::ResourcePath("audio_coding/testfile32kHz", "pcm"))), + sample_rate_hz_(32000), + block_size_(30 * sample_rate_hz_ / 1000), // 30 ms + audio_(new int16_t[block_size_]), + background_noise_(kNumChannels) {} + + const int16_t* Next30Ms() { + RTC_CHECK(input_file_->Read(block_size_, audio_.get())); + return audio_.get(); + } + + // Returns the total length change (in samples) that the accelerate operation + // resulted in during the run. + size_t TestAccelerate(size_t loops, bool fast_mode) { + Accelerate accelerate(sample_rate_hz_, kNumChannels, background_noise_); + size_t total_length_change = 0; + for (size_t i = 0; i < loops; ++i) { + AudioMultiVector output(kNumChannels); + size_t length_change; + UpdateReturnStats(accelerate.Process(Next30Ms(), block_size_, fast_mode, + &output, &length_change)); + total_length_change += length_change; + } + return total_length_change; + } + + void UpdateReturnStats(TimeStretch::ReturnCodes ret) { + switch (ret) { + case TimeStretch::kSuccess: + case TimeStretch::kSuccessLowEnergy: + case TimeStretch::kNoStretch: + ++return_stats_[ret]; + break; + case TimeStretch::kError: + FAIL() << "Process returned an error"; + } + } + + std::unique_ptr input_file_; + const int sample_rate_hz_; + const size_t block_size_; + std::unique_ptr audio_; + std::map return_stats_; + BackgroundNoise background_noise_; +}; + +TEST_F(TimeStretchTest, Accelerate) { + // TestAccelerate returns the total length change in samples. + EXPECT_EQ(15268U, TestAccelerate(100, false)); + EXPECT_EQ(9, return_stats_[TimeStretch::kSuccess]); + EXPECT_EQ(58, return_stats_[TimeStretch::kSuccessLowEnergy]); + EXPECT_EQ(33, return_stats_[TimeStretch::kNoStretch]); +} + +TEST_F(TimeStretchTest, AccelerateFastMode) { + // TestAccelerate returns the total length change in samples. + EXPECT_EQ(21400U, TestAccelerate(100, true)); + EXPECT_EQ(31, return_stats_[TimeStretch::kSuccess]); + EXPECT_EQ(58, return_stats_[TimeStretch::kSuccessLowEnergy]); + EXPECT_EQ(11, return_stats_[TimeStretch::kNoStretch]); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.cc b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.cc new file mode 100644 index 0000000000..59177d027f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/timestamp_scaler.h" + +#include "api/audio_codecs/audio_format.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +void TimestampScaler::Reset() { + first_packet_received_ = false; +} + +void TimestampScaler::ToInternal(Packet* packet) { + if (!packet) { + return; + } + packet->timestamp = ToInternal(packet->timestamp, packet->payload_type); +} + +void TimestampScaler::ToInternal(PacketList* packet_list) { + PacketList::iterator it; + for (it = packet_list->begin(); it != packet_list->end(); ++it) { + ToInternal(&(*it)); + } +} + +uint32_t TimestampScaler::ToInternal(uint32_t external_timestamp, + uint8_t rtp_payload_type) { + const DecoderDatabase::DecoderInfo* info = + decoder_database_.GetDecoderInfo(rtp_payload_type); + if (!info) { + // Payload type is unknown. Do not scale. + return external_timestamp; + } + if (!(info->IsComfortNoise() || info->IsDtmf())) { + // Do not change the timestamp scaling settings for DTMF or CNG. + numerator_ = info->SampleRateHz(); + if (info->GetFormat().clockrate_hz == 0) { + // If the clockrate is invalid (i.e. with an old-style external codec) + // we cannot do any timestamp scaling. + denominator_ = numerator_; + } else { + denominator_ = info->GetFormat().clockrate_hz; + } + } + if (numerator_ != denominator_) { + // We have a scale factor != 1. + if (!first_packet_received_) { + external_ref_ = external_timestamp; + internal_ref_ = external_timestamp; + first_packet_received_ = true; + } + const int64_t external_diff = int64_t{external_timestamp} - external_ref_; + RTC_DCHECK_GT(denominator_, 0); + external_ref_ = external_timestamp; + internal_ref_ += (external_diff * numerator_) / denominator_; + return internal_ref_; + } else { + // No scaling. + return external_timestamp; + } +} + +uint32_t TimestampScaler::ToExternal(uint32_t internal_timestamp) const { + if (!first_packet_received_ || (numerator_ == denominator_)) { + // Not initialized, or scale factor is 1. + return internal_timestamp; + } else { + const int64_t internal_diff = int64_t{internal_timestamp} - internal_ref_; + RTC_DCHECK_GT(numerator_, 0); + // Do not update references in this method. + // Switch `denominator_` and `numerator_` to convert the other way. + return external_ref_ + (internal_diff * denominator_) / numerator_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.h b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.h new file mode 100644 index 0000000000..f42ce7207a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TIMESTAMP_SCALER_H_ +#define MODULES_AUDIO_CODING_NETEQ_TIMESTAMP_SCALER_H_ + +#include "modules/audio_coding/neteq/packet.h" + +namespace webrtc { + +// Forward declaration. +class DecoderDatabase; + +// This class scales timestamps for codecs that need timestamp scaling. +// This is done for codecs where one RTP timestamp does not correspond to +// one sample. +class TimestampScaler { + public: + explicit TimestampScaler(const DecoderDatabase& decoder_database) + : first_packet_received_(false), + numerator_(1), + denominator_(1), + external_ref_(0), + internal_ref_(0), + decoder_database_(decoder_database) {} + + virtual ~TimestampScaler() {} + + TimestampScaler(const TimestampScaler&) = delete; + TimestampScaler& operator=(const TimestampScaler&) = delete; + + // Start over. + virtual void Reset(); + + // Scale the timestamp in `packet` from external to internal. + virtual void ToInternal(Packet* packet); + + // Scale the timestamp for all packets in `packet_list` from external to + // internal. + virtual void ToInternal(PacketList* packet_list); + + // Returns the internal equivalent of `external_timestamp`, given the + // RTP payload type `rtp_payload_type`. + virtual uint32_t ToInternal(uint32_t external_timestamp, + uint8_t rtp_payload_type); + + // Scales back to external timestamp. This is the inverse of ToInternal(). + virtual uint32_t ToExternal(uint32_t internal_timestamp) const; + + private: + bool first_packet_received_; + int numerator_; + int denominator_; + uint32_t external_ref_; + uint32_t internal_ref_; + const DecoderDatabase& decoder_database_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TIMESTAMP_SCALER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler_unittest.cc new file mode 100644 index 0000000000..c2bb4dd95f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler_unittest.cc @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/timestamp_scaler.h" + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/packet.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::Return; +using ::testing::ReturnNull; + +namespace webrtc { + +TEST(TimestampScaler, TestNoScaling) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use PCMu, because it doesn't use scaled timestamps. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 0; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + for (uint32_t timestamp = 0xFFFFFFFF - 5; timestamp != 5; ++timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(timestamp, scaler.ToInternal(timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(timestamp, scaler.ToExternal(timestamp)); + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestNoScalingLargeStep) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use PCMu, because it doesn't use scaled timestamps. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 0; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + static const uint32_t kStep = 160; + uint32_t start_timestamp = 0; + // `external_timestamp` will be a large positive value. + start_timestamp = start_timestamp - 5 * kStep; + for (uint32_t timestamp = start_timestamp; timestamp != 5 * kStep; + timestamp += kStep) { + // Scale to internal timestamp. + EXPECT_EQ(timestamp, scaler.ToInternal(timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(timestamp, scaler.ToExternal(timestamp)); + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5; ++external_timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722LargeStep) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + static const uint32_t kStep = 320; + uint32_t external_timestamp = 0; + // `external_timestamp` will be a large positive value. + external_timestamp = external_timestamp - 5 * kStep; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5 * kStep; external_timestamp += kStep) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + // Internal timestamp should be incremented with twice the step. + internal_timestamp += 2 * kStep; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722WithCng) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info_g722(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + const DecoderDatabase::DecoderInfo info_cng(SdpAudioFormat("cn", 16000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadTypeG722 = 17; + static const uint8_t kRtpPayloadTypeCng = 13; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadTypeG722)) + .WillRepeatedly(Return(&info_g722)); + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadTypeCng)) + .WillRepeatedly(Return(&info_cng)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + bool next_is_cng = false; + for (; external_timestamp != 5; ++external_timestamp) { + // Alternate between G.722 and CNG every other packet. + if (next_is_cng) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadTypeCng)); + next_is_cng = false; + } else { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadTypeG722)); + next_is_cng = true; + } + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +// Make sure that the method ToInternal(Packet* packet) is wired up correctly. +// Since it is simply calling the other ToInternal method, we are not doing +// as many tests here. +TEST(TimestampScaler, TestG722Packet) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + Packet packet; + packet.payload_type = kRtpPayloadType; + for (; external_timestamp != 5; ++external_timestamp) { + packet.timestamp = external_timestamp; + // Scale to internal timestamp. + scaler.ToInternal(&packet); + EXPECT_EQ(internal_timestamp, packet.timestamp); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +// Make sure that the method ToInternal(PacketList* packet_list) is wired up +// correctly. Since it is simply calling the ToInternal(Packet* packet) method, +// we are not doing as many tests here. +TEST(TimestampScaler, TestG722PacketList) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + PacketList packet_list; + { + Packet packet1; + packet1.payload_type = kRtpPayloadType; + packet1.timestamp = external_timestamp; + Packet packet2; + packet2.payload_type = kRtpPayloadType; + packet2.timestamp = external_timestamp + 10; + packet_list.push_back(std::move(packet1)); + packet_list.push_back(std::move(packet2)); + } + + scaler.ToInternal(&packet_list); + EXPECT_EQ(internal_timestamp, packet_list.front().timestamp); + packet_list.pop_front(); + EXPECT_EQ(internal_timestamp + 20, packet_list.front().timestamp); + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722Reset) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5; ++external_timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + // Reset the scaler. After this, we expect the internal and external to start + // over at the same value again. + scaler.Reset(); + internal_timestamp = external_timestamp; + for (; external_timestamp != 15; ++external_timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +// TODO(minyue): This test becomes trivial since Opus does not need a timestamp +// scaler. Therefore, this test may be removed in future. There is no harm to +// keep it, since it can be taken as a test case for the situation of a trivial +// timestamp scaler. +TEST(TimestampScaler, TestOpusLargeStep) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("opus", 48000, 2), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + static const uint32_t kStep = 960; + uint32_t external_timestamp = 0; + // `external_timestamp` will be a large positive value. + external_timestamp = external_timestamp - 5 * kStep; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5 * kStep; external_timestamp += kStep) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += kStep; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, Failures) { + static const uint8_t kRtpPayloadType = 17; + MockDecoderDatabase db; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillOnce(ReturnNull()); // Return NULL to indicate unknown payload type. + + TimestampScaler scaler(db); + uint32_t timestamp = 4711; // Some number. + EXPECT_EQ(timestamp, scaler.ToInternal(timestamp, kRtpPayloadType)); + + Packet* packet = NULL; + scaler.ToInternal(packet); // Should not crash. That's all we can test. + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/DEPS b/third_party/libwebrtc/modules/audio_coding/neteq/tools/DEPS new file mode 100644 index 0000000000..4db1e1d6e5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+logging/rtc_event_log", +] diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/README.md b/third_party/libwebrtc/modules/audio_coding/neteq/tools/README.md new file mode 100644 index 0000000000..e7bd95c285 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/README.md @@ -0,0 +1,17 @@ +# NetEQ RTP Play tool + +## Testing of the command line arguments +The command line tool `neteq_rtpplay` can be tested by running `neteq_rtpplay_test.sh`, which is not use on try bots, but it can be used before submitting any CLs that may break the behavior of the command line arguments of `neteq_rtpplay`. + +Run `neteq_rtpplay_test.sh` as follows from the `src/` folder: +``` +src$ ./modules/audio_coding/neteq/tools/neteq_rtpplay_test.sh \ + out/Default/neteq_rtpplay \ + resources/audio_coding/neteq_opus.rtp \ + resources/short_mixed_mono_48.pcm +``` + +You can replace the RTP and PCM files with any other compatible files. +If you get an error using the files indicated above, try running `gclient sync`. + +Requirements: `awk` and `md5sum`. diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_checksum.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_checksum.h new file mode 100644 index 0000000000..42e3a3a3a0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_checksum.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_CHECKSUM_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_CHECKSUM_H_ + +#include +#include + +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "rtc_base/buffer.h" +#include "rtc_base/message_digest.h" +#include "rtc_base/string_encode.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace test { + +class AudioChecksum : public AudioSink { + public: + AudioChecksum() + : checksum_(rtc::MessageDigestFactory::Create(rtc::DIGEST_MD5)), + checksum_result_(checksum_->Size()), + finished_(false) {} + + AudioChecksum(const AudioChecksum&) = delete; + AudioChecksum& operator=(const AudioChecksum&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override { + if (finished_) + return false; + +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Big-endian gives a different checksum" +#endif + checksum_->Update(audio, num_samples * sizeof(*audio)); + return true; + } + + // Finalizes the computations, and returns the checksum. + std::string Finish() { + if (!finished_) { + finished_ = true; + checksum_->Finish(checksum_result_.data(), checksum_result_.size()); + } + return rtc::hex_encode(checksum_result_); + } + + private: + std::unique_ptr checksum_; + rtc::Buffer checksum_result_; + bool finished_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_CHECKSUM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.cc new file mode 100644 index 0000000000..514e6eb2ee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/audio_loop.h" + +#include +#include + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { + +bool AudioLoop::Init(absl::string_view file_name, + size_t max_loop_length_samples, + size_t block_length_samples) { + FILE* fp = fopen(std::string(file_name).c_str(), "rb"); + if (!fp) + return false; + + audio_array_.reset( + new int16_t[max_loop_length_samples + block_length_samples]); + size_t samples_read = + fread(audio_array_.get(), sizeof(int16_t), max_loop_length_samples, fp); + fclose(fp); + + // Block length must be shorter than the loop length. + if (block_length_samples > samples_read) + return false; + + // Add an extra block length of samples to the end of the array, starting + // over again from the beginning of the array. This is done to simplify + // the reading process when reading over the end of the loop. + memcpy(&audio_array_[samples_read], audio_array_.get(), + block_length_samples * sizeof(int16_t)); + + loop_length_samples_ = samples_read; + block_length_samples_ = block_length_samples; + next_index_ = 0; + return true; +} + +rtc::ArrayView AudioLoop::GetNextBlock() { + // Check that the AudioLoop is initialized. + if (block_length_samples_ == 0) + return rtc::ArrayView(); + + const int16_t* output_ptr = &audio_array_[next_index_]; + next_index_ = (next_index_ + block_length_samples_) % loop_length_samples_; + return rtc::ArrayView(output_ptr, block_length_samples_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.h new file mode 100644 index 0000000000..f5f0b59011 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_LOOP_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_LOOP_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" + +namespace webrtc { +namespace test { + +// Class serving as an infinite source of audio, realized by looping an audio +// clip. +class AudioLoop { + public: + AudioLoop() + : next_index_(0), loop_length_samples_(0), block_length_samples_(0) {} + + virtual ~AudioLoop() {} + + AudioLoop(const AudioLoop&) = delete; + AudioLoop& operator=(const AudioLoop&) = delete; + + // Initializes the AudioLoop by reading from `file_name`. The loop will be no + // longer than `max_loop_length_samples`, if the length of the file is + // greater. Otherwise, the loop length is the same as the file length. + // The audio will be delivered in blocks of `block_length_samples`. + // Returns false if the initialization failed, otherwise true. + bool Init(absl::string_view file_name, + size_t max_loop_length_samples, + size_t block_length_samples); + + // Returns a (pointer,size) pair for the next block of audio. The size is + // equal to the `block_length_samples` Init() argument. + rtc::ArrayView GetNextBlock(); + + private: + size_t next_index_; + size_t loop_length_samples_; + size_t block_length_samples_; + std::unique_ptr audio_array_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_LOOP_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.cc new file mode 100644 index 0000000000..7d7af7ef9f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.cc @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/audio_sink.h" + +namespace webrtc { +namespace test { + +bool AudioSinkFork::WriteArray(const int16_t* audio, size_t num_samples) { + return left_sink_->WriteArray(audio, num_samples) && + right_sink_->WriteArray(audio, num_samples); +} + +bool VoidAudioSink::WriteArray(const int16_t* audio, size_t num_samples) { + return true; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.h new file mode 100644 index 0000000000..53729fa920 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_SINK_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_SINK_H_ + +#include "api/audio/audio_frame.h" + +namespace webrtc { +namespace test { + +// Interface class for an object receiving raw output audio from test +// applications. +class AudioSink { + public: + AudioSink() {} + virtual ~AudioSink() {} + + AudioSink(const AudioSink&) = delete; + AudioSink& operator=(const AudioSink&) = delete; + + // Writes `num_samples` from `audio` to the AudioSink. Returns true if + // successful, otherwise false. + virtual bool WriteArray(const int16_t* audio, size_t num_samples) = 0; + + // Writes `audio_frame` to the AudioSink. Returns true if successful, + // otherwise false. + bool WriteAudioFrame(const AudioFrame& audio_frame) { + return WriteArray(audio_frame.data(), audio_frame.samples_per_channel_ * + audio_frame.num_channels_); + } +}; + +// Forks the output audio to two AudioSink objects. +class AudioSinkFork : public AudioSink { + public: + AudioSinkFork(AudioSink* left, AudioSink* right) + : left_sink_(left), right_sink_(right) {} + + AudioSinkFork(const AudioSinkFork&) = delete; + AudioSinkFork& operator=(const AudioSinkFork&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override; + + private: + AudioSink* left_sink_; + AudioSink* right_sink_; +}; + +// An AudioSink implementation that does nothing. +class VoidAudioSink : public AudioSink { + public: + VoidAudioSink() = default; + + VoidAudioSink(const VoidAudioSink&) = delete; + VoidAudioSink& operator=(const VoidAudioSink&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_SINK_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.cc new file mode 100644 index 0000000000..18a910365f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/constant_pcm_packet_source.h" + +#include + +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +ConstantPcmPacketSource::ConstantPcmPacketSource(size_t payload_len_samples, + int16_t sample_value, + int sample_rate_hz, + int payload_type) + : payload_len_samples_(payload_len_samples), + packet_len_bytes_(2 * payload_len_samples_ + kHeaderLenBytes), + samples_per_ms_(sample_rate_hz / 1000), + next_arrival_time_ms_(0.0), + payload_type_(payload_type), + seq_number_(0), + timestamp_(0), + payload_ssrc_(0xABCD1234) { + size_t encoded_len = WebRtcPcm16b_Encode(&sample_value, 1, encoded_sample_); + RTC_CHECK_EQ(2U, encoded_len); +} + +std::unique_ptr ConstantPcmPacketSource::NextPacket() { + RTC_CHECK_GT(packet_len_bytes_, kHeaderLenBytes); + rtc::CopyOnWriteBuffer packet_buffer(packet_len_bytes_); + uint8_t* packet_memory = packet_buffer.MutableData(); + // Fill the payload part of the packet memory with the pre-encoded value. + for (unsigned i = 0; i < 2 * payload_len_samples_; ++i) + packet_memory[kHeaderLenBytes + i] = encoded_sample_[i % 2]; + WriteHeader(packet_memory); + // `packet` assumes ownership of `packet_memory`. + auto packet = + std::make_unique(std::move(packet_buffer), next_arrival_time_ms_); + next_arrival_time_ms_ += payload_len_samples_ / samples_per_ms_; + return packet; +} + +void ConstantPcmPacketSource::WriteHeader(uint8_t* packet_memory) { + packet_memory[0] = 0x80; + packet_memory[1] = static_cast(payload_type_); + packet_memory[2] = seq_number_ >> 8; + packet_memory[3] = seq_number_ & 0xFF; + packet_memory[4] = timestamp_ >> 24; + packet_memory[5] = (timestamp_ >> 16) & 0xFF; + packet_memory[6] = (timestamp_ >> 8) & 0xFF; + packet_memory[7] = timestamp_ & 0xFF; + packet_memory[8] = payload_ssrc_ >> 24; + packet_memory[9] = (payload_ssrc_ >> 16) & 0xFF; + packet_memory[10] = (payload_ssrc_ >> 8) & 0xFF; + packet_memory[11] = payload_ssrc_ & 0xFF; + ++seq_number_; + timestamp_ += static_cast(payload_len_samples_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.h new file mode 100644 index 0000000000..ab4f5c2281 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_CONSTANT_PCM_PACKET_SOURCE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_CONSTANT_PCM_PACKET_SOURCE_H_ + +#include + +#include + +#include "modules/audio_coding/neteq/tools/packet_source.h" + +namespace webrtc { +namespace test { + +// This class implements a packet source that delivers PCM16b encoded packets +// with a constant sample value. The payload length, constant sample value, +// sample rate, and payload type are all set in the constructor. +class ConstantPcmPacketSource : public PacketSource { + public: + ConstantPcmPacketSource(size_t payload_len_samples, + int16_t sample_value, + int sample_rate_hz, + int payload_type); + + ConstantPcmPacketSource(const ConstantPcmPacketSource&) = delete; + ConstantPcmPacketSource& operator=(const ConstantPcmPacketSource&) = delete; + + std::unique_ptr NextPacket() override; + + private: + void WriteHeader(uint8_t* packet_memory); + + const size_t kHeaderLenBytes = 12; + const size_t payload_len_samples_; + const size_t packet_len_bytes_; + uint8_t encoded_sample_[2]; + const int samples_per_ms_; + double next_arrival_time_ms_; + const int payload_type_; + uint16_t seq_number_; + uint32_t timestamp_; + const uint32_t payload_ssrc_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_CONSTANT_PCM_PACKET_SOURCE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.cc new file mode 100644 index 0000000000..87b987ddb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.cc @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/encode_neteq_input.h" + +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace test { + +EncodeNetEqInput::EncodeNetEqInput(std::unique_ptr generator, + std::unique_ptr encoder, + int64_t input_duration_ms) + : generator_(std::move(generator)), + encoder_(std::move(encoder)), + input_duration_ms_(input_duration_ms) { + CreatePacket(); +} + +EncodeNetEqInput::~EncodeNetEqInput() = default; + +absl::optional EncodeNetEqInput::NextPacketTime() const { + RTC_DCHECK(packet_data_); + return static_cast(packet_data_->time_ms); +} + +absl::optional EncodeNetEqInput::NextOutputEventTime() const { + return next_output_event_ms_; +} + +std::unique_ptr EncodeNetEqInput::PopPacket() { + RTC_DCHECK(packet_data_); + // Grab the packet to return... + std::unique_ptr packet_to_return = std::move(packet_data_); + // ... and line up the next packet for future use. + CreatePacket(); + + return packet_to_return; +} + +void EncodeNetEqInput::AdvanceOutputEvent() { + next_output_event_ms_ += kOutputPeriodMs; +} + +bool EncodeNetEqInput::ended() const { + return next_output_event_ms_ > input_duration_ms_; +} + +absl::optional EncodeNetEqInput::NextHeader() const { + RTC_DCHECK(packet_data_); + return packet_data_->header; +} + +void EncodeNetEqInput::CreatePacket() { + // Create a new PacketData object. + RTC_DCHECK(!packet_data_); + packet_data_.reset(new NetEqInput::PacketData); + RTC_DCHECK_EQ(packet_data_->payload.size(), 0); + + // Loop until we get a packet. + AudioEncoder::EncodedInfo info; + RTC_DCHECK(!info.send_even_if_empty); + int num_blocks = 0; + while (packet_data_->payload.size() == 0 && !info.send_even_if_empty) { + const size_t num_samples = rtc::CheckedDivExact( + static_cast(encoder_->SampleRateHz() * kOutputPeriodMs), 1000); + + info = encoder_->Encode(rtp_timestamp_, generator_->Generate(num_samples), + &packet_data_->payload); + + rtp_timestamp_ += rtc::dchecked_cast( + num_samples * encoder_->RtpTimestampRateHz() / + encoder_->SampleRateHz()); + ++num_blocks; + } + packet_data_->header.timestamp = info.encoded_timestamp; + packet_data_->header.payloadType = info.payload_type; + packet_data_->header.sequenceNumber = sequence_number_++; + packet_data_->time_ms = next_packet_time_ms_; + next_packet_time_ms_ += num_blocks * kOutputPeriodMs; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.h new file mode 100644 index 0000000000..caa9ac76f4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_ENCODE_NETEQ_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_ENCODE_NETEQ_INPUT_H_ + +#include + +#include "api/audio_codecs/audio_encoder.h" +#include "modules/audio_coding/neteq/tools/neteq_input.h" + +namespace webrtc { +namespace test { + +// This class provides a NetEqInput that takes audio from a generator object and +// encodes it using a given audio encoder. +class EncodeNetEqInput : public NetEqInput { + public: + // Generator class, to be provided to the EncodeNetEqInput constructor. + class Generator { + public: + virtual ~Generator() = default; + // Returns the next num_samples values from the signal generator. + virtual rtc::ArrayView Generate(size_t num_samples) = 0; + }; + + // The source will end after the given input duration. + EncodeNetEqInput(std::unique_ptr generator, + std::unique_ptr encoder, + int64_t input_duration_ms); + ~EncodeNetEqInput() override; + + absl::optional NextPacketTime() const override; + + absl::optional NextOutputEventTime() const override; + + std::unique_ptr PopPacket() override; + + void AdvanceOutputEvent() override; + + bool ended() const override; + + absl::optional NextHeader() const override; + + private: + static constexpr int64_t kOutputPeriodMs = 10; + + void CreatePacket(); + + std::unique_ptr generator_; + std::unique_ptr encoder_; + std::unique_ptr packet_data_; + uint32_t rtp_timestamp_ = 0; + int16_t sequence_number_ = 0; + int64_t next_packet_time_ms_ = 0; + int64_t next_output_event_ms_ = 0; + const int64_t input_duration_ms_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_ENCODE_NETEQ_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.cc new file mode 100644 index 0000000000..6c5e5ac2e4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.cc @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h" + +#include "modules/rtp_rtcp/source/byte_io.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace test { + +namespace { + +class FakeEncodedFrame : public AudioDecoder::EncodedAudioFrame { + public: + FakeEncodedFrame(AudioDecoder* decoder, rtc::Buffer&& payload) + : decoder_(decoder), payload_(std::move(payload)) {} + + size_t Duration() const override { + const int ret = decoder_->PacketDuration(payload_.data(), payload_.size()); + return ret < 0 ? 0 : static_cast(ret); + } + + absl::optional Decode( + rtc::ArrayView decoded) const override { + auto speech_type = AudioDecoder::kSpeech; + const int ret = decoder_->Decode( + payload_.data(), payload_.size(), decoder_->SampleRateHz(), + decoded.size() * sizeof(int16_t), decoded.data(), &speech_type); + return ret < 0 ? absl::nullopt + : absl::optional( + {static_cast(ret), speech_type}); + } + + // This is to mimic OpusFrame. + bool IsDtxPacket() const override { + uint32_t original_payload_size_bytes = + ByteReader::ReadLittleEndian(&payload_.data()[8]); + return original_payload_size_bytes <= 2; + } + + private: + AudioDecoder* const decoder_; + const rtc::Buffer payload_; +}; + +} // namespace + +std::vector FakeDecodeFromFile::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + std::vector results; + std::unique_ptr frame( + new FakeEncodedFrame(this, std::move(payload))); + results.emplace_back(timestamp, 0, std::move(frame)); + return results; +} + +int FakeDecodeFromFile::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz, SampleRateHz()); + + const int samples_to_decode = PacketDuration(encoded, encoded_len); + const int total_samples_to_decode = samples_to_decode * (stereo_ ? 2 : 1); + + if (encoded_len == 0) { + // Decoder is asked to produce codec-internal comfort noise. + RTC_DCHECK(!encoded); // NetEq always sends nullptr in this case. + RTC_DCHECK(cng_mode_); + RTC_DCHECK_GT(total_samples_to_decode, 0); + std::fill_n(decoded, total_samples_to_decode, 0); + *speech_type = kComfortNoise; + return rtc::dchecked_cast(total_samples_to_decode); + } + + RTC_CHECK_GE(encoded_len, 12); + uint32_t timestamp_to_decode = + ByteReader::ReadLittleEndian(encoded); + + if (next_timestamp_from_input_ && + timestamp_to_decode != *next_timestamp_from_input_) { + // A gap in the timestamp sequence is detected. Skip the same number of + // samples from the file. + uint32_t jump = timestamp_to_decode - *next_timestamp_from_input_; + RTC_CHECK(input_->Seek(jump)); + } + + next_timestamp_from_input_ = timestamp_to_decode + samples_to_decode; + + uint32_t original_payload_size_bytes = + ByteReader::ReadLittleEndian(&encoded[8]); + if (original_payload_size_bytes <= 2) { + // This is a comfort noise payload. + RTC_DCHECK_GT(total_samples_to_decode, 0); + std::fill_n(decoded, total_samples_to_decode, 0); + *speech_type = kComfortNoise; + cng_mode_ = true; + return rtc::dchecked_cast(total_samples_to_decode); + } + + cng_mode_ = false; + RTC_CHECK(input_->Read(static_cast(samples_to_decode), decoded)); + + if (stereo_) { + InputAudioFile::DuplicateInterleaved(decoded, samples_to_decode, 2, + decoded); + } + + *speech_type = kSpeech; + last_decoded_length_ = samples_to_decode; + return rtc::dchecked_cast(total_samples_to_decode); +} + +int FakeDecodeFromFile::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + const uint32_t original_payload_size_bytes = + encoded_len < 8 + sizeof(uint32_t) + ? 0 + : ByteReader::ReadLittleEndian(&encoded[8]); + const uint32_t samples_to_decode = + encoded_len < 4 + sizeof(uint32_t) + ? 0 + : ByteReader::ReadLittleEndian(&encoded[4]); + if (encoded_len == 0) { + // Decoder is asked to produce codec-internal comfort noise. + return rtc::CheckedDivExact(SampleRateHz(), 100); + } + bool is_dtx_payload = + original_payload_size_bytes <= 2 || samples_to_decode == 0; + bool has_error_duration = + samples_to_decode % rtc::CheckedDivExact(SampleRateHz(), 100) != 0; + if (is_dtx_payload || has_error_duration) { + if (last_decoded_length_ > 0) { + // Use length of last decoded packet. + return rtc::dchecked_cast(last_decoded_length_); + } else { + // This is the first packet to decode, and we do not know the length of + // it. Set it to 10 ms. + return rtc::CheckedDivExact(SampleRateHz(), 100); + } + } + return samples_to_decode; +} + +void FakeDecodeFromFile::PrepareEncoded(uint32_t timestamp, + size_t samples, + size_t original_payload_size_bytes, + rtc::ArrayView encoded) { + RTC_CHECK_GE(encoded.size(), 12); + ByteWriter::WriteLittleEndian(&encoded[0], timestamp); + ByteWriter::WriteLittleEndian(&encoded[4], + rtc::checked_cast(samples)); + ByteWriter::WriteLittleEndian( + &encoded[8], rtc::checked_cast(original_payload_size_bytes)); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.h new file mode 100644 index 0000000000..7b53653998 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_FAKE_DECODE_FROM_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_FAKE_DECODE_FROM_FILE_H_ + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio_codecs/audio_decoder.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" + +namespace webrtc { +namespace test { +// Provides an AudioDecoder implementation that delivers audio data from a file. +// The "encoded" input should contain information about what RTP timestamp the +// encoding represents, and how many samples the decoder should produce for that +// encoding. A helper method PrepareEncoded is provided to prepare such +// encodings. If packets are missing, as determined from the timestamps, the +// file reading will skip forward to match the loss. +class FakeDecodeFromFile : public AudioDecoder { + public: + FakeDecodeFromFile(std::unique_ptr input, + int sample_rate_hz, + bool stereo) + : input_(std::move(input)), + sample_rate_hz_(sample_rate_hz), + stereo_(stereo) {} + + ~FakeDecodeFromFile() = default; + + std::vector ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + + void Reset() override {} + + int SampleRateHz() const override { return sample_rate_hz_; } + + size_t Channels() const override { return stereo_ ? 2 : 1; } + + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + + // Helper method. Writes `timestamp`, `samples` and + // `original_payload_size_bytes` to `encoded` in a format that the + // FakeDecodeFromFile decoder will understand. `encoded` must be at least 12 + // bytes long. + static void PrepareEncoded(uint32_t timestamp, + size_t samples, + size_t original_payload_size_bytes, + rtc::ArrayView encoded); + + private: + std::unique_ptr input_; + absl::optional next_timestamp_from_input_; + const int sample_rate_hz_; + const bool stereo_; + size_t last_decoded_length_ = 0; + bool cng_mode_ = false; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_FAKE_DECODE_FROM_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.cc new file mode 100644 index 0000000000..3c33aabf1c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.cc @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h" + +#include +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +InitialPacketInserterNetEqInput::InitialPacketInserterNetEqInput( + std::unique_ptr source, + int number_of_initial_packets, + int sample_rate_hz) + : source_(std::move(source)), + packets_to_insert_(number_of_initial_packets), + sample_rate_hz_(sample_rate_hz) {} + +absl::optional InitialPacketInserterNetEqInput::NextPacketTime() + const { + return source_->NextPacketTime(); +} + +absl::optional InitialPacketInserterNetEqInput::NextOutputEventTime() + const { + return source_->NextOutputEventTime(); +} + +std::unique_ptr +InitialPacketInserterNetEqInput::PopPacket() { + if (!first_packet_) { + first_packet_ = source_->PopPacket(); + if (!first_packet_) { + // The source has no packets, so we should not insert any dummy packets. + packets_to_insert_ = 0; + } + } + if (packets_to_insert_ > 0) { + RTC_CHECK(first_packet_); + auto dummy_packet = std::unique_ptr(new PacketData()); + dummy_packet->header = first_packet_->header; + dummy_packet->payload = rtc::Buffer(first_packet_->payload.data(), + first_packet_->payload.size()); + dummy_packet->time_ms = first_packet_->time_ms; + dummy_packet->header.sequenceNumber -= packets_to_insert_; + // This assumes 20ms per packet. + dummy_packet->header.timestamp -= + 20 * sample_rate_hz_ * packets_to_insert_ / 1000; + packets_to_insert_--; + return dummy_packet; + } + return source_->PopPacket(); +} + +void InitialPacketInserterNetEqInput::AdvanceOutputEvent() { + source_->AdvanceOutputEvent(); +} + +bool InitialPacketInserterNetEqInput::ended() const { + return source_->ended(); +} + +absl::optional InitialPacketInserterNetEqInput::NextHeader() const { + return source_->NextHeader(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h new file mode 100644 index 0000000000..bd20a7aecf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_INITIAL_PACKET_INSERTER_NETEQ_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_INITIAL_PACKET_INSERTER_NETEQ_INPUT_H_ + +#include +#include +#include + +#include "modules/audio_coding/neteq/tools/neteq_input.h" + +namespace webrtc { +namespace test { + +// Wrapper class that can insert a number of packets at the start of the +// simulation. +class InitialPacketInserterNetEqInput final : public NetEqInput { + public: + InitialPacketInserterNetEqInput(std::unique_ptr source, + int number_of_initial_packets, + int sample_rate_hz); + absl::optional NextPacketTime() const override; + absl::optional NextOutputEventTime() const override; + std::unique_ptr PopPacket() override; + void AdvanceOutputEvent() override; + bool ended() const override; + absl::optional NextHeader() const override; + + private: + const std::unique_ptr source_; + int packets_to_insert_; + const int sample_rate_hz_; + std::unique_ptr first_packet_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_INITIAL_PACKET_INSERTER_NETEQ_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.cc new file mode 100644 index 0000000000..b077dbff21 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/input_audio_file.h" + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +InputAudioFile::InputAudioFile(absl::string_view file_name, bool loop_at_end) + : loop_at_end_(loop_at_end) { + fp_ = fopen(std::string(file_name).c_str(), "rb"); + RTC_DCHECK(fp_) << file_name << " could not be opened."; +} + +InputAudioFile::~InputAudioFile() { + RTC_DCHECK(fp_); + fclose(fp_); +} + +bool InputAudioFile::Read(size_t samples, int16_t* destination) { + if (!fp_) { + return false; + } + size_t samples_read = fread(destination, sizeof(int16_t), samples, fp_); + if (samples_read < samples) { + if (!loop_at_end_) { + return false; + } + // Rewind and read the missing samples. + rewind(fp_); + size_t missing_samples = samples - samples_read; + if (fread(destination + samples_read, sizeof(int16_t), missing_samples, + fp_) < missing_samples) { + // Could not read enough even after rewinding the file. + return false; + } + } + return true; +} + +bool InputAudioFile::Seek(int samples) { + if (!fp_) { + return false; + } + // Find file boundaries. + const long current_pos = ftell(fp_); + RTC_CHECK_NE(EOF, current_pos) + << "Error returned when getting file position."; + RTC_CHECK_EQ(0, fseek(fp_, 0, SEEK_END)); // Move to end of file. + const long file_size = ftell(fp_); + RTC_CHECK_NE(EOF, file_size) << "Error returned when getting file position."; + // Find new position. + long new_pos = current_pos + sizeof(int16_t) * samples; // Samples to bytes. + if (loop_at_end_) { + new_pos = new_pos % file_size; // Wrap around the end of the file. + if (new_pos < 0) { + // For negative values of new_pos, newpos % file_size will also be + // negative. To get the correct result it's needed to add file_size. + new_pos += file_size; + } + } else { + new_pos = new_pos > file_size ? file_size : new_pos; // Don't loop. + } + RTC_CHECK_GE(new_pos, 0) + << "Trying to move to before the beginning of the file"; + // Move to new position relative to the beginning of the file. + RTC_CHECK_EQ(0, fseek(fp_, new_pos, SEEK_SET)); + return true; +} + +void InputAudioFile::DuplicateInterleaved(const int16_t* source, + size_t samples, + size_t channels, + int16_t* destination) { + // Start from the end of `source` and `destination`, and work towards the + // beginning. This is to allow in-place interleaving of the same array (i.e., + // `source` and `destination` are the same array). + for (int i = static_cast(samples - 1); i >= 0; --i) { + for (int j = static_cast(channels - 1); j >= 0; --j) { + destination[i * channels + j] = source[i]; + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.h new file mode 100644 index 0000000000..f538b295a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_INPUT_AUDIO_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_INPUT_AUDIO_FILE_H_ + +#include + +#include + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { + +// Class for handling a looping input audio file. +class InputAudioFile { + public: + explicit InputAudioFile(absl::string_view file_name, bool loop_at_end = true); + + virtual ~InputAudioFile(); + + InputAudioFile(const InputAudioFile&) = delete; + InputAudioFile& operator=(const InputAudioFile&) = delete; + + // Reads `samples` elements from source file to `destination`. Returns true + // if the read was successful, otherwise false. If the file end is reached, + // the file is rewound and reading continues from the beginning. + // The output `destination` must have the capacity to hold `samples` elements. + virtual bool Read(size_t samples, int16_t* destination); + + // Fast-forwards (`samples` > 0) or -backwards (`samples` < 0) the file by the + // indicated number of samples. Just like Read(), Seek() starts over at the + // beginning of the file if the end is reached. However, seeking backwards + // past the beginning of the file is not possible. + virtual bool Seek(int samples); + + // Creates a multi-channel signal from a mono signal. Each sample is repeated + // `channels` times to create an interleaved multi-channel signal where all + // channels are identical. The output `destination` must have the capacity to + // hold samples * channels elements. Note that `source` and `destination` can + // be the same array (i.e., point to the same address). + static void DuplicateInterleaved(const int16_t* source, + size_t samples, + size_t channels, + int16_t* destination); + + private: + FILE* fp_; + const bool loop_at_end_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_INPUT_AUDIO_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file_unittest.cc new file mode 100644 index 0000000000..52f7ea82a0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file_unittest.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for test InputAudioFile class. + +#include "modules/audio_coding/neteq/tools/input_audio_file.h" + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +TEST(TestInputAudioFile, DuplicateInterleaveSeparateSrcDst) { + static const size_t kSamples = 10; + static const size_t kChannels = 2; + int16_t input[kSamples]; + for (size_t i = 0; i < kSamples; ++i) { + input[i] = rtc::checked_cast(i); + } + int16_t output[kSamples * kChannels]; + InputAudioFile::DuplicateInterleaved(input, kSamples, kChannels, output); + + // Verify output + int16_t* output_ptr = output; + for (size_t i = 0; i < kSamples; ++i) { + for (size_t j = 0; j < kChannels; ++j) { + EXPECT_EQ(static_cast(i), *output_ptr++); + } + } +} + +TEST(TestInputAudioFile, DuplicateInterleaveSameSrcDst) { + static const size_t kSamples = 10; + static const size_t kChannels = 5; + int16_t input[kSamples * kChannels]; + for (size_t i = 0; i < kSamples; ++i) { + input[i] = rtc::checked_cast(i); + } + InputAudioFile::DuplicateInterleaved(input, kSamples, kChannels, input); + + // Verify output + int16_t* output_ptr = input; + for (size_t i = 0; i < kSamples; ++i) { + for (size_t j = 0; j < kChannels; ++j) { + EXPECT_EQ(static_cast(i), *output_ptr++); + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.cc new file mode 100644 index 0000000000..9e77457775 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.cc @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_delay_analyzer.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/sequence_number_unwrapper.h" + +namespace webrtc { +namespace test { +namespace { +constexpr char kArrivalDelayX[] = "arrival_delay_x"; +constexpr char kArrivalDelayY[] = "arrival_delay_y"; +constexpr char kTargetDelayX[] = "target_delay_x"; +constexpr char kTargetDelayY[] = "target_delay_y"; +constexpr char kPlayoutDelayX[] = "playout_delay_x"; +constexpr char kPlayoutDelayY[] = "playout_delay_y"; + +// Helper function for NetEqDelayAnalyzer::CreateGraphs. Returns the +// interpolated value of a function at the point x. Vector x_vec contains the +// sample points, and y_vec contains the function values at these points. The +// return value is a linear interpolation between y_vec values. +double LinearInterpolate(double x, + const std::vector& x_vec, + const std::vector& y_vec) { + // Find first element which is larger than x. + auto it = std::upper_bound(x_vec.begin(), x_vec.end(), x); + if (it == x_vec.end()) { + --it; + } + const size_t upper_ix = it - x_vec.begin(); + + size_t lower_ix; + if (upper_ix == 0 || x_vec[upper_ix] <= x) { + lower_ix = upper_ix; + } else { + lower_ix = upper_ix - 1; + } + double y; + if (lower_ix == upper_ix) { + y = y_vec[lower_ix]; + } else { + RTC_DCHECK_NE(x_vec[lower_ix], x_vec[upper_ix]); + y = (x - x_vec[lower_ix]) * (y_vec[upper_ix] - y_vec[lower_ix]) / + (x_vec[upper_ix] - x_vec[lower_ix]) + + y_vec[lower_ix]; + } + return y; +} + +void PrintDelays(const NetEqDelayAnalyzer::Delays& delays, + int64_t ref_time_ms, + absl::string_view var_name_x, + absl::string_view var_name_y, + std::ofstream& output, + absl::string_view terminator = "") { + output << var_name_x << " = [ "; + for (const std::pair& delay : delays) { + output << (delay.first - ref_time_ms) / 1000.f << ", "; + } + output << "]" << terminator << std::endl; + + output << var_name_y << " = [ "; + for (const std::pair& delay : delays) { + output << delay.second << ", "; + } + output << "]" << terminator << std::endl; +} + +} // namespace + +void NetEqDelayAnalyzer::AfterInsertPacket( + const test::NetEqInput::PacketData& packet, + NetEq* neteq) { + data_.insert( + std::make_pair(packet.header.timestamp, TimingData(packet.time_ms))); + ssrcs_.insert(packet.header.ssrc); + payload_types_.insert(packet.header.payloadType); +} + +void NetEqDelayAnalyzer::BeforeGetAudio(NetEq* neteq) { + last_sync_buffer_ms_ = neteq->SyncBufferSizeMs(); +} + +void NetEqDelayAnalyzer::AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool /*muted*/, + NetEq* neteq) { + get_audio_time_ms_.push_back(time_now_ms); + for (const RtpPacketInfo& info : audio_frame.packet_infos_) { + auto it = data_.find(info.rtp_timestamp()); + if (it == data_.end()) { + // This is a packet that was split out from another packet. Skip it. + continue; + } + auto& it_timing = it->second; + RTC_CHECK(!it_timing.decode_get_audio_count) + << "Decode time already written"; + it_timing.decode_get_audio_count = get_audio_count_; + RTC_CHECK(!it_timing.sync_delay_ms) << "Decode time already written"; + it_timing.sync_delay_ms = last_sync_buffer_ms_; + it_timing.target_delay_ms = neteq->TargetDelayMs(); + it_timing.current_delay_ms = neteq->FilteredCurrentDelayMs(); + } + last_sample_rate_hz_ = audio_frame.sample_rate_hz_; + ++get_audio_count_; +} + +void NetEqDelayAnalyzer::CreateGraphs(Delays* arrival_delay_ms, + Delays* corrected_arrival_delay_ms, + Delays* playout_delay_ms, + Delays* target_delay_ms) const { + if (get_audio_time_ms_.empty()) { + return; + } + // Create nominal_get_audio_time_ms, a vector starting at + // get_audio_time_ms_[0] and increasing by 10 for each element. + std::vector nominal_get_audio_time_ms(get_audio_time_ms_.size()); + nominal_get_audio_time_ms[0] = get_audio_time_ms_[0]; + std::transform( + nominal_get_audio_time_ms.begin(), nominal_get_audio_time_ms.end() - 1, + nominal_get_audio_time_ms.begin() + 1, [](int64_t& x) { return x + 10; }); + RTC_DCHECK( + std::is_sorted(get_audio_time_ms_.begin(), get_audio_time_ms_.end())); + + std::vector rtp_timestamps_ms; + double offset = std::numeric_limits::max(); + RtpTimestampUnwrapper unwrapper; + // This loop traverses data_ and populates rtp_timestamps_ms as well as + // calculates the base offset. + for (auto& d : data_) { + rtp_timestamps_ms.push_back( + static_cast(unwrapper.Unwrap(d.first)) / + rtc::CheckedDivExact(last_sample_rate_hz_, 1000)); + offset = + std::min(offset, d.second.arrival_time_ms - rtp_timestamps_ms.back()); + } + + // This loop traverses the data again and populates the graph vectors. The + // reason to have two loops and traverse twice is that the offset cannot be + // known until the first traversal is done. Meanwhile, the final offset must + // be known already at the start of this second loop. + size_t i = 0; + for (const auto& data : data_) { + const double offset_send_time_ms = rtp_timestamps_ms[i++] + offset; + const auto& timing = data.second; + corrected_arrival_delay_ms->push_back(std::make_pair( + timing.arrival_time_ms, + LinearInterpolate(timing.arrival_time_ms, get_audio_time_ms_, + nominal_get_audio_time_ms) - + offset_send_time_ms)); + arrival_delay_ms->push_back(std::make_pair( + timing.arrival_time_ms, timing.arrival_time_ms - offset_send_time_ms)); + + if (timing.decode_get_audio_count) { + // This packet was decoded. + RTC_DCHECK(timing.sync_delay_ms); + const int64_t get_audio_time = + *timing.decode_get_audio_count * 10 + get_audio_time_ms_[0]; + const float playout_ms = + get_audio_time + *timing.sync_delay_ms - offset_send_time_ms; + playout_delay_ms->push_back(std::make_pair(get_audio_time, playout_ms)); + RTC_DCHECK(timing.target_delay_ms); + RTC_DCHECK(timing.current_delay_ms); + const float target = + playout_ms - *timing.current_delay_ms + *timing.target_delay_ms; + target_delay_ms->push_back(std::make_pair(get_audio_time, target)); + } + } +} + +void NetEqDelayAnalyzer::CreateMatlabScript( + absl::string_view script_name) const { + Delays arrival_delay_ms; + Delays corrected_arrival_delay_ms; + Delays playout_delay_ms; + Delays target_delay_ms; + CreateGraphs(&arrival_delay_ms, &corrected_arrival_delay_ms, + &playout_delay_ms, &target_delay_ms); + + // Maybe better to find the actually smallest timestamp, to surely avoid + // x-axis starting from negative. + const int64_t ref_time_ms = arrival_delay_ms.front().first; + + // Create an output file stream to Matlab script file. + std::ofstream output(std::string{script_name}); + + PrintDelays(corrected_arrival_delay_ms, ref_time_ms, kArrivalDelayX, + kArrivalDelayY, output, ";"); + + // PrintDelays(corrected_arrival_delay_x, kCorrectedArrivalDelayX, + // kCorrectedArrivalDelayY, output); + + PrintDelays(playout_delay_ms, ref_time_ms, kPlayoutDelayX, kPlayoutDelayY, + output, ";"); + + PrintDelays(target_delay_ms, ref_time_ms, kTargetDelayX, kTargetDelayY, + output, ";"); + + output << "h=plot(" << kArrivalDelayX << ", " << kArrivalDelayY << ", " + << kTargetDelayX << ", " << kTargetDelayY << ", 'g.', " + << kPlayoutDelayX << ", " << kPlayoutDelayY << ");" << std::endl; + output << "set(h(1),'color',0.75*[1 1 1]);" << std::endl; + output << "set(h(2),'markersize',6);" << std::endl; + output << "set(h(3),'linew',1.5);" << std::endl; + output << "ax1=axis;" << std::endl; + output << "axis tight" << std::endl; + output << "ax2=axis;" << std::endl; + output << "axis([ax2(1:3) ax1(4)])" << std::endl; + output << "xlabel('time [s]');" << std::endl; + output << "ylabel('relative delay [ms]');" << std::endl; + if (!ssrcs_.empty()) { + auto ssrc_it = ssrcs_.cbegin(); + output << "title('SSRC: 0x" << std::hex << static_cast(*ssrc_it++); + while (ssrc_it != ssrcs_.end()) { + output << ", 0x" << std::hex << static_cast(*ssrc_it++); + } + output << std::dec; + auto pt_it = payload_types_.cbegin(); + output << "; Payload Types: " << *pt_it++; + while (pt_it != payload_types_.end()) { + output << ", " << *pt_it++; + } + output << "');" << std::endl; + } +} + +void NetEqDelayAnalyzer::CreatePythonScript( + absl::string_view script_name) const { + Delays arrival_delay_ms; + Delays corrected_arrival_delay_ms; + Delays playout_delay_ms; + Delays target_delay_ms; + CreateGraphs(&arrival_delay_ms, &corrected_arrival_delay_ms, + &playout_delay_ms, &target_delay_ms); + + // Maybe better to find the actually smallest timestamp, to surely avoid + // x-axis starting from negative. + const int64_t ref_time_ms = arrival_delay_ms.front().first; + + // Create an output file stream to the python script file. + std::ofstream output(std::string{script_name}); + + // Necessary includes + output << "import numpy as np" << std::endl; + output << "import matplotlib.pyplot as plt" << std::endl; + + PrintDelays(corrected_arrival_delay_ms, ref_time_ms, kArrivalDelayX, + kArrivalDelayY, output); + + // PrintDelays(corrected_arrival_delay_x, kCorrectedArrivalDelayX, + // kCorrectedArrivalDelayY, output); + + PrintDelays(playout_delay_ms, ref_time_ms, kPlayoutDelayX, kPlayoutDelayY, + output); + + PrintDelays(target_delay_ms, ref_time_ms, kTargetDelayX, kTargetDelayY, + output); + + output << "if __name__ == '__main__':" << std::endl; + output << " h=plt.plot(" << kArrivalDelayX << ", " << kArrivalDelayY << ", " + << kTargetDelayX << ", " << kTargetDelayY << ", 'g.', " + << kPlayoutDelayX << ", " << kPlayoutDelayY << ")" << std::endl; + output << " plt.setp(h[0],'color',[.75, .75, .75])" << std::endl; + output << " plt.setp(h[1],'markersize',6)" << std::endl; + output << " plt.setp(h[2],'linewidth',1.5)" << std::endl; + output << " plt.axis('tight')" << std::endl; + output << " plt.xlabel('time [s]')" << std::endl; + output << " plt.ylabel('relative delay [ms]')" << std::endl; + if (!ssrcs_.empty()) { + auto ssrc_it = ssrcs_.cbegin(); + output << " plt.legend((\"arrival delay\", \"target delay\", \"playout " + "delay\"))" + << std::endl; + output << " plt.title('SSRC: 0x" << std::hex + << static_cast(*ssrc_it++); + while (ssrc_it != ssrcs_.end()) { + output << ", 0x" << std::hex << static_cast(*ssrc_it++); + } + output << std::dec; + auto pt_it = payload_types_.cbegin(); + output << "; Payload Types: " << *pt_it++; + while (pt_it != payload_types_.end()) { + output << ", " << *pt_it++; + } + output << "')" << std::endl; + } + output << " plt.show()" << std::endl; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.h new file mode 100644 index 0000000000..ffcba5843f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_DELAY_ANALYZER_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_DELAY_ANALYZER_H_ + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/neteq_input.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" + +namespace webrtc { +namespace test { + +class NetEqDelayAnalyzer : public test::NetEqPostInsertPacket, + public test::NetEqGetAudioCallback { + public: + void AfterInsertPacket(const test::NetEqInput::PacketData& packet, + NetEq* neteq) override; + + void BeforeGetAudio(NetEq* neteq) override; + + void AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool muted, + NetEq* neteq) override; + + using Delays = std::vector>; + void CreateGraphs(Delays* arrival_delay_ms, + Delays* corrected_arrival_delay_ms, + Delays* playout_delay_ms, + Delays* target_delay_ms) const; + + // Creates a matlab script with file name script_name. When executed in + // Matlab, the script will generate graphs with the same timing information + // as provided by CreateGraphs. + void CreateMatlabScript(absl::string_view script_name) const; + + // Creates a python script with file name `script_name`. When executed in + // Python, the script will generate graphs with the same timing information + // as provided by CreateGraphs. + void CreatePythonScript(absl::string_view script_name) const; + + private: + struct TimingData { + explicit TimingData(int64_t at) : arrival_time_ms(at) {} + int64_t arrival_time_ms; + absl::optional decode_get_audio_count; + absl::optional sync_delay_ms; + absl::optional target_delay_ms; + absl::optional current_delay_ms; + }; + std::map data_; + std::vector get_audio_time_ms_; + size_t get_audio_count_ = 0; + size_t last_sync_buffer_ms_ = 0; + int last_sample_rate_hz_ = 0; + std::set ssrcs_; + std::set payload_types_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_DELAY_ANALYZER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.cc new file mode 100644 index 0000000000..0c1f27799a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_event_log_input.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/rtc_event_log_source.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +NetEqEventLogInput* NetEqEventLogInput::CreateFromFile( + absl::string_view file_name, + absl::optional ssrc_filter) { + auto event_log_src = + RtcEventLogSource::CreateFromFile(file_name, ssrc_filter); + if (!event_log_src) { + return nullptr; + } + return new NetEqEventLogInput(std::move(event_log_src)); +} + +NetEqEventLogInput* NetEqEventLogInput::CreateFromString( + absl::string_view file_contents, + absl::optional ssrc_filter) { + auto event_log_src = + RtcEventLogSource::CreateFromString(file_contents, ssrc_filter); + if (!event_log_src) { + return nullptr; + } + return new NetEqEventLogInput(std::move(event_log_src)); +} + +absl::optional NetEqEventLogInput::NextOutputEventTime() const { + return next_output_event_ms_; +} + +void NetEqEventLogInput::AdvanceOutputEvent() { + next_output_event_ms_ = source_->NextAudioOutputEventMs(); + if (*next_output_event_ms_ == std::numeric_limits::max()) { + next_output_event_ms_ = absl::nullopt; + } +} + +PacketSource* NetEqEventLogInput::source() { + return source_.get(); +} + +NetEqEventLogInput::NetEqEventLogInput( + std::unique_ptr source) + : source_(std::move(source)) { + LoadNextPacket(); + AdvanceOutputEvent(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.h new file mode 100644 index 0000000000..c947ee1fc0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_EVENT_LOG_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_EVENT_LOG_INPUT_H_ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/neteq_packet_source_input.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" + +namespace webrtc { +namespace test { + +class RtcEventLogSource; + +// Implementation of NetEqPacketSourceInput to be used with an +// RtcEventLogSource. +class NetEqEventLogInput final : public NetEqPacketSourceInput { + public: + static NetEqEventLogInput* CreateFromFile( + absl::string_view file_name, + absl::optional ssrc_filter); + static NetEqEventLogInput* CreateFromString( + absl::string_view file_contents, + absl::optional ssrc_filter); + + absl::optional NextOutputEventTime() const override; + void AdvanceOutputEvent() override; + + protected: + PacketSource* source() override; + + private: + NetEqEventLogInput(std::unique_ptr source); + std::unique_ptr source_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_EVENT_LOG_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.cc new file mode 100644 index 0000000000..de416348f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_input.h" + +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { +namespace test { + +NetEqInput::PacketData::PacketData() = default; +NetEqInput::PacketData::~PacketData() = default; + +std::string NetEqInput::PacketData::ToString() const { + rtc::StringBuilder ss; + ss << "{" + "time_ms: " + << static_cast(time_ms) + << ", " + "header: {" + "pt: " + << static_cast(header.payloadType) + << ", " + "sn: " + << header.sequenceNumber + << ", " + "ts: " + << header.timestamp + << ", " + "ssrc: " + << header.ssrc + << "}, " + "payload bytes: " + << payload.size() << "}"; + return ss.Release(); +} + +TimeLimitedNetEqInput::TimeLimitedNetEqInput(std::unique_ptr input, + int64_t duration_ms) + : input_(std::move(input)), + start_time_ms_(input_->NextEventTime()), + duration_ms_(duration_ms) {} + +TimeLimitedNetEqInput::~TimeLimitedNetEqInput() = default; + +absl::optional TimeLimitedNetEqInput::NextPacketTime() const { + return ended_ ? absl::nullopt : input_->NextPacketTime(); +} + +absl::optional TimeLimitedNetEqInput::NextOutputEventTime() const { + return ended_ ? absl::nullopt : input_->NextOutputEventTime(); +} + +std::unique_ptr TimeLimitedNetEqInput::PopPacket() { + if (ended_) { + return std::unique_ptr(); + } + auto packet = input_->PopPacket(); + MaybeSetEnded(); + return packet; +} + +void TimeLimitedNetEqInput::AdvanceOutputEvent() { + if (!ended_) { + input_->AdvanceOutputEvent(); + MaybeSetEnded(); + } +} + +bool TimeLimitedNetEqInput::ended() const { + return ended_ || input_->ended(); +} + +absl::optional TimeLimitedNetEqInput::NextHeader() const { + return ended_ ? absl::nullopt : input_->NextHeader(); +} + +void TimeLimitedNetEqInput::MaybeSetEnded() { + if (NextEventTime() && start_time_ms_ && + *NextEventTime() - *start_time_ms_ > duration_ms_) { + ended_ = true; + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.h new file mode 100644 index 0000000000..3a66264043 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_INPUT_H_ + +#include +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "rtc_base/buffer.h" + +namespace webrtc { +namespace test { + +// Interface class for input to the NetEqTest class. +class NetEqInput { + public: + struct PacketData { + PacketData(); + ~PacketData(); + std::string ToString() const; + + RTPHeader header; + rtc::Buffer payload; + int64_t time_ms; + }; + + virtual ~NetEqInput() = default; + + // Returns at what time (in ms) NetEq::InsertPacket should be called next, or + // empty if the source is out of packets. + virtual absl::optional NextPacketTime() const = 0; + + // Returns at what time (in ms) NetEq::GetAudio should be called next, or + // empty if no more output events are available. + virtual absl::optional NextOutputEventTime() const = 0; + + // Returns the time (in ms) for the next event from either NextPacketTime() + // or NextOutputEventTime(), or empty if both are out of events. + absl::optional NextEventTime() const { + const auto a = NextPacketTime(); + const auto b = NextOutputEventTime(); + // Return the minimum of non-empty `a` and `b`, or empty if both are empty. + if (a) { + return b ? std::min(*a, *b) : a; + } + return b ? b : absl::nullopt; + } + + // Returns the next packet to be inserted into NetEq. The packet following the + // returned one is pre-fetched in the NetEqInput object, such that future + // calls to NextPacketTime() or NextHeader() will return information from that + // packet. + virtual std::unique_ptr PopPacket() = 0; + + // Move to the next output event. This will make NextOutputEventTime() return + // a new value (potentially the same if several output events share the same + // time). + virtual void AdvanceOutputEvent() = 0; + + // Returns true if the source has come to an end. An implementation must + // eventually return true from this method, or the test will end up in an + // infinite loop. + virtual bool ended() const = 0; + + // Returns the RTP header for the next packet, i.e., the packet that will be + // delivered next by PopPacket(). + virtual absl::optional NextHeader() const = 0; +}; + +// Wrapper class to impose a time limit on a NetEqInput object, typically +// another time limit than what the object itself provides. For example, an +// input taken from a file can be cut shorter by wrapping it in this class. +class TimeLimitedNetEqInput : public NetEqInput { + public: + TimeLimitedNetEqInput(std::unique_ptr input, int64_t duration_ms); + ~TimeLimitedNetEqInput() override; + absl::optional NextPacketTime() const override; + absl::optional NextOutputEventTime() const override; + std::unique_ptr PopPacket() override; + void AdvanceOutputEvent() override; + bool ended() const override; + absl::optional NextHeader() const override; + + private: + void MaybeSetEnded(); + + std::unique_ptr input_; + const absl::optional start_time_ms_; + const int64_t duration_ms_; + bool ended_ = false; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.cc new file mode 100644 index 0000000000..55a5653238 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_packet_source_input.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +NetEqPacketSourceInput::NetEqPacketSourceInput() : next_output_event_ms_(0) {} + +absl::optional NetEqPacketSourceInput::NextPacketTime() const { + return packet_ + ? absl::optional(static_cast(packet_->time_ms())) + : absl::nullopt; +} + +absl::optional NetEqPacketSourceInput::NextHeader() const { + return packet_ ? absl::optional(packet_->header()) : absl::nullopt; +} + +void NetEqPacketSourceInput::LoadNextPacket() { + packet_ = source()->NextPacket(); +} + +std::unique_ptr NetEqPacketSourceInput::PopPacket() { + if (!packet_) { + return std::unique_ptr(); + } + std::unique_ptr packet_data(new PacketData); + packet_data->header = packet_->header(); + if (packet_->payload_length_bytes() == 0 && + packet_->virtual_payload_length_bytes() > 0) { + // This is a header-only "dummy" packet. Set the payload to all zeros, with + // length according to the virtual length. + packet_data->payload.SetSize(packet_->virtual_payload_length_bytes()); + std::fill_n(packet_data->payload.data(), packet_data->payload.size(), 0); + } else { + packet_data->payload.SetData(packet_->payload(), + packet_->payload_length_bytes()); + } + packet_data->time_ms = packet_->time_ms(); + + LoadNextPacket(); + + return packet_data; +} + +NetEqRtpDumpInput::NetEqRtpDumpInput(absl::string_view file_name, + const RtpHeaderExtensionMap& hdr_ext_map, + absl::optional ssrc_filter) + : source_(RtpFileSource::Create(file_name, ssrc_filter)) { + for (const auto& ext_pair : hdr_ext_map) { + source_->RegisterRtpHeaderExtension(ext_pair.second, ext_pair.first); + } + LoadNextPacket(); +} + +absl::optional NetEqRtpDumpInput::NextOutputEventTime() const { + return next_output_event_ms_; +} + +void NetEqRtpDumpInput::AdvanceOutputEvent() { + if (next_output_event_ms_) { + *next_output_event_ms_ += kOutputPeriodMs; + } + if (!NextPacketTime()) { + next_output_event_ms_ = absl::nullopt; + } +} + +PacketSource* NetEqRtpDumpInput::source() { + return source_.get(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.h new file mode 100644 index 0000000000..407fa491b1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PACKET_SOURCE_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PACKET_SOURCE_INPUT_H_ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/neteq_input.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" + +namespace webrtc { +namespace test { + +class RtpFileSource; + +// An adapter class to dress up a PacketSource object as a NetEqInput. +class NetEqPacketSourceInput : public NetEqInput { + public: + using RtpHeaderExtensionMap = std::map; + + NetEqPacketSourceInput(); + absl::optional NextPacketTime() const override; + std::unique_ptr PopPacket() override; + absl::optional NextHeader() const override; + bool ended() const override { return !next_output_event_ms_; } + + protected: + virtual PacketSource* source() = 0; + void LoadNextPacket(); + + absl::optional next_output_event_ms_; + + private: + std::unique_ptr packet_; +}; + +// Implementation of NetEqPacketSourceInput to be used with an RtpFileSource. +class NetEqRtpDumpInput final : public NetEqPacketSourceInput { + public: + NetEqRtpDumpInput(absl::string_view file_name, + const RtpHeaderExtensionMap& hdr_ext_map, + absl::optional ssrc_filter); + + absl::optional NextOutputEventTime() const override; + void AdvanceOutputEvent() override; + + protected: + PacketSource* source() override; + + private: + static constexpr int64_t kOutputPeriodMs = 10; + + std::unique_ptr source_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PACKET_SOURCE_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.cc new file mode 100644 index 0000000000..ccaa87b5e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_performance_test.h" + +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/clock.h" +#include "test/testsupport/file_utils.h" + +using webrtc::NetEq; +using webrtc::test::AudioLoop; +using webrtc::test::RtpGenerator; + +namespace webrtc { +namespace test { + +int64_t NetEqPerformanceTest::Run(int runtime_ms, + int lossrate, + double drift_factor) { + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + const int kSampRateHz = 32000; + const std::string kDecoderName = "pcm16-swb32"; + const int kPayloadType = 95; + + // Initialize NetEq instance. + NetEq::Config config; + config.sample_rate_hz = kSampRateHz; + webrtc::Clock* clock = webrtc::Clock::GetRealTimeClock(); + auto audio_decoder_factory = CreateBuiltinAudioDecoderFactory(); + auto neteq = + DefaultNetEqFactory().CreateNetEq(config, audio_decoder_factory, clock); + // Register decoder in `neteq`. + if (!neteq->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", kSampRateHz, 1))) + return -1; + + // Set up AudioLoop object. + AudioLoop audio_loop; + const size_t kMaxLoopLengthSamples = kSampRateHz * 10; // 10 second loop. + const size_t kInputBlockSizeSamples = 60 * kSampRateHz / 1000; // 60 ms. + if (!audio_loop.Init(kInputFileName, kMaxLoopLengthSamples, + kInputBlockSizeSamples)) + return -1; + + int32_t time_now_ms = 0; + + // Get first input packet. + RTPHeader rtp_header; + RtpGenerator rtp_gen(kSampRateHz / 1000); + // Start with positive drift first half of simulation. + rtp_gen.set_drift_factor(drift_factor); + bool drift_flipped = false; + int32_t packet_input_time_ms = + rtp_gen.GetRtpHeader(kPayloadType, kInputBlockSizeSamples, &rtp_header); + auto input_samples = audio_loop.GetNextBlock(); + if (input_samples.empty()) + exit(1); + uint8_t input_payload[kInputBlockSizeSamples * sizeof(int16_t)]; + size_t payload_len = WebRtcPcm16b_Encode(input_samples.data(), + input_samples.size(), input_payload); + RTC_CHECK_EQ(sizeof(input_payload), payload_len); + + // Main loop. + int64_t start_time_ms = clock->TimeInMilliseconds(); + AudioFrame out_frame; + while (time_now_ms < runtime_ms) { + while (packet_input_time_ms <= time_now_ms) { + // Drop every N packets, where N = FLAG_lossrate. + bool lost = false; + if (lossrate > 0) { + lost = ((rtp_header.sequenceNumber - 1) % lossrate) == 0; + } + if (!lost) { + // Insert packet. + int error = neteq->InsertPacket(rtp_header, input_payload); + if (error != NetEq::kOK) + return -1; + } + + // Get next packet. + packet_input_time_ms = rtp_gen.GetRtpHeader( + kPayloadType, kInputBlockSizeSamples, &rtp_header); + input_samples = audio_loop.GetNextBlock(); + if (input_samples.empty()) + return -1; + payload_len = WebRtcPcm16b_Encode(input_samples.data(), + input_samples.size(), input_payload); + RTC_DCHECK_EQ(payload_len, kInputBlockSizeSamples * sizeof(int16_t)); + } + + // Get output audio, but don't do anything with it. + bool muted; + int error = neteq->GetAudio(&out_frame, &muted); + RTC_CHECK(!muted); + if (error != NetEq::kOK) + return -1; + + RTC_DCHECK_EQ(out_frame.samples_per_channel_, (kSampRateHz * 10) / 1000); + + static const int kOutputBlockSizeMs = 10; + time_now_ms += kOutputBlockSizeMs; + if (time_now_ms >= runtime_ms / 2 && !drift_flipped) { + // Apply negative drift second half of simulation. + rtp_gen.set_drift_factor(-drift_factor); + drift_flipped = true; + } + } + int64_t end_time_ms = clock->TimeInMilliseconds(); + return end_time_ms - start_time_ms; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.h new file mode 100644 index 0000000000..b5b4d91577 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PERFORMANCE_TEST_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PERFORMANCE_TEST_H_ + +#include + +namespace webrtc { +namespace test { + +class NetEqPerformanceTest { + public: + // Runs a performance test with parameters as follows: + // `runtime_ms`: the simulation time, i.e., the duration of the audio data. + // `lossrate`: drop one out of `lossrate` packets, e.g., one out of 10. + // `drift_factor`: clock drift in [0, 1]. + // Returns the runtime in ms. + static int64_t Run(int runtime_ms, int lossrate, double drift_factor); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PERFORMANCE_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc new file mode 100644 index 0000000000..18e6091f93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" + +#include + +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" +#include "modules/audio_coding/neteq/tools/output_audio_file.h" +#include "modules/audio_coding/neteq/tools/output_wav_file.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "rtc_base/checks.h" +#include "rtc_base/string_encode.h" +#include "system_wrappers/include/clock.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(std::string, + in_filename, + "audio_coding/speech_mono_16kHz.pcm", + "Path of the input file (relative to the resources/ directory) for " + "input audio (specify sample rate with --input_sample_rate, " + "and channels with --channels)."); + +ABSL_FLAG(int, input_sample_rate, 16000, "Sample rate of input file in Hz."); + +ABSL_FLAG(int, channels, 1, "Number of channels in input audio."); + +ABSL_FLAG(std::string, + out_filename, + "neteq_quality_test_out.pcm", + "Name of output audio file, which will be saved in " + + ::webrtc::test::OutputPath()); + +ABSL_FLAG( + int, + runtime_ms, + 10000, + "Simulated runtime (milliseconds). -1 will consume the complete file."); + +ABSL_FLAG(int, packet_loss_rate, 10, "Percentile of packet loss."); + +ABSL_FLAG(int, + random_loss_mode, + ::webrtc::test::kUniformLoss, + "Random loss mode: 0--no loss, 1--uniform loss, 2--Gilbert Elliot " + "loss, 3--fixed loss."); + +ABSL_FLAG(int, + burst_length, + 30, + "Burst length in milliseconds, only valid for Gilbert Elliot loss."); + +ABSL_FLAG(float, drift_factor, 0.0, "Time drift factor."); + +ABSL_FLAG(int, + preload_packets, + 1, + "Preload the buffer with this many packets."); + +ABSL_FLAG(std::string, + loss_events, + "", + "List of loss events time and duration separated by comma: " + " , " + ", ..."); + +namespace webrtc { +namespace test { + +namespace { + +std::unique_ptr CreateNetEq( + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr& decoder_factory) { + return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock); +} + +const std::string& GetInFilenamePath(absl::string_view file_name) { + std::vector name_parts = rtc::split(file_name, '.'); + RTC_CHECK_EQ(name_parts.size(), 2); + static const std::string path = + ::webrtc::test::ResourcePath(name_parts[0], name_parts[1]); + return path; +} + +const std::string& GetOutFilenamePath(absl::string_view file_name) { + static const std::string path = + ::webrtc::test::OutputPath() + std::string(file_name); + return path; +} + +} // namespace + +const uint8_t kPayloadType = 95; +const int kOutputSizeMs = 10; +const int kInitSeed = 0x12345678; +const int kPacketLossTimeUnitMs = 10; + +// Common validator for file names. +static bool ValidateFilename(absl::string_view value, bool is_output) { + if (!is_output) { + RTC_CHECK_NE(value.substr(value.find_last_of('.') + 1), "wav") + << "WAV file input is not supported"; + } + FILE* fid = is_output ? fopen(std::string(value).c_str(), "wb") + : fopen(std::string(value).c_str(), "rb"); + if (fid == nullptr) + return false; + fclose(fid); + return true; +} + +// ProbTrans00Solver() is to calculate the transition probability from no-loss +// state to itself in a modified Gilbert Elliot packet loss model. The result is +// to achieve the target packet loss rate `loss_rate`, when a packet is not +// lost only if all `units` drawings within the duration of the packet result in +// no-loss. +static double ProbTrans00Solver(int units, + double loss_rate, + double prob_trans_10) { + if (units == 1) + return prob_trans_10 / (1.0f - loss_rate) - prob_trans_10; + // 0 == prob_trans_00 ^ (units - 1) + (1 - loss_rate) / prob_trans_10 * + // prob_trans_00 - (1 - loss_rate) * (1 + 1 / prob_trans_10). + // There is a unique solution between 0.0 and 1.0, due to the monotonicity and + // an opposite sign at 0.0 and 1.0. + // For simplicity, we reformulate the equation as + // f(x) = x ^ (units - 1) + a x + b. + // Its derivative is + // f'(x) = (units - 1) x ^ (units - 2) + a. + // The derivative is strictly greater than 0 when x is between 0 and 1. + // We use Newton's method to solve the equation, iteration is + // x(k+1) = x(k) - f(x) / f'(x); + const double kPrecision = 0.001f; + const int kIterations = 100; + const double a = (1.0f - loss_rate) / prob_trans_10; + const double b = (loss_rate - 1.0f) * (1.0f + 1.0f / prob_trans_10); + double x = 0.0; // Starting point; + double f = b; + double f_p; + int iter = 0; + while ((f >= kPrecision || f <= -kPrecision) && iter < kIterations) { + f_p = (units - 1.0f) * std::pow(x, units - 2) + a; + x -= f / f_p; + if (x > 1.0f) { + x = 1.0f; + } else if (x < 0.0f) { + x = 0.0f; + } + f = std::pow(x, units - 1) + a * x + b; + iter++; + } + return x; +} + +NetEqQualityTest::NetEqQualityTest( + int block_duration_ms, + int in_sampling_khz, + int out_sampling_khz, + const SdpAudioFormat& format, + const rtc::scoped_refptr& decoder_factory) + : audio_format_(format), + channels_(absl::GetFlag(FLAGS_channels)), + decoded_time_ms_(0), + decodable_time_ms_(0), + drift_factor_(absl::GetFlag(FLAGS_drift_factor)), + packet_loss_rate_(absl::GetFlag(FLAGS_packet_loss_rate)), + block_duration_ms_(block_duration_ms), + in_sampling_khz_(in_sampling_khz), + out_sampling_khz_(out_sampling_khz), + in_size_samples_( + static_cast(in_sampling_khz_ * block_duration_ms_)), + payload_size_bytes_(0), + max_payload_bytes_(0), + in_file_(new ResampleInputAudioFile( + GetInFilenamePath(absl::GetFlag(FLAGS_in_filename)), + absl::GetFlag(FLAGS_input_sample_rate), + in_sampling_khz * 1000, + absl::GetFlag(FLAGS_runtime_ms) > 0)), + rtp_generator_( + new RtpGenerator(in_sampling_khz_, 0, 0, decodable_time_ms_)), + total_payload_size_bytes_(0) { + // Flag validation + RTC_CHECK(ValidateFilename( + GetInFilenamePath(absl::GetFlag(FLAGS_in_filename)), false)) + << "Invalid input filename."; + + RTC_CHECK(absl::GetFlag(FLAGS_input_sample_rate) == 8000 || + absl::GetFlag(FLAGS_input_sample_rate) == 16000 || + absl::GetFlag(FLAGS_input_sample_rate) == 32000 || + absl::GetFlag(FLAGS_input_sample_rate) == 48000) + << "Invalid sample rate should be 8000, 16000, 32000 or 48000 Hz."; + + RTC_CHECK_EQ(absl::GetFlag(FLAGS_channels), 1) + << "Invalid number of channels, current support only 1."; + + RTC_CHECK(ValidateFilename( + GetOutFilenamePath(absl::GetFlag(FLAGS_out_filename)), true)) + << "Invalid output filename."; + + RTC_CHECK(absl::GetFlag(FLAGS_packet_loss_rate) >= 0 && + absl::GetFlag(FLAGS_packet_loss_rate) <= 100) + << "Invalid packet loss percentile, should be between 0 and 100."; + + RTC_CHECK(absl::GetFlag(FLAGS_random_loss_mode) >= 0 && + absl::GetFlag(FLAGS_random_loss_mode) < kLastLossMode) + << "Invalid random packet loss mode, should be between 0 and " + << kLastLossMode - 1 << "."; + + RTC_CHECK_GE(absl::GetFlag(FLAGS_burst_length), kPacketLossTimeUnitMs) + << "Invalid burst length, should be greater than or equal to " + << kPacketLossTimeUnitMs << " ms."; + + RTC_CHECK_GT(absl::GetFlag(FLAGS_drift_factor), -0.1) + << "Invalid drift factor, should be greater than -0.1."; + + RTC_CHECK_GE(absl::GetFlag(FLAGS_preload_packets), 0) + << "Invalid number of packets to preload; must be non-negative."; + + const std::string out_filename = + GetOutFilenamePath(absl::GetFlag(FLAGS_out_filename)); + const std::string log_filename = out_filename + ".log"; + log_file_.open(log_filename.c_str(), std::ofstream::out); + RTC_CHECK(log_file_.is_open()); + + if (out_filename.size() >= 4 && + out_filename.substr(out_filename.size() - 4) == ".wav") { + // Open a wav file. + output_.reset( + new webrtc::test::OutputWavFile(out_filename, 1000 * out_sampling_khz)); + } else { + // Open a pcm file. + output_.reset(new webrtc::test::OutputAudioFile(out_filename)); + } + + NetEq::Config config; + config.sample_rate_hz = out_sampling_khz_ * 1000; + neteq_ = CreateNetEq(config, Clock::GetRealTimeClock(), decoder_factory); + max_payload_bytes_ = in_size_samples_ * channels_ * sizeof(int16_t); + in_data_.reset(new int16_t[in_size_samples_ * channels_]); +} + +NetEqQualityTest::~NetEqQualityTest() { + log_file_.close(); +} + +bool NoLoss::Lost(int now_ms) { + return false; +} + +UniformLoss::UniformLoss(double loss_rate) : loss_rate_(loss_rate) {} + +bool UniformLoss::Lost(int now_ms) { + int drop_this = rand(); + return (drop_this < loss_rate_ * RAND_MAX); +} + +GilbertElliotLoss::GilbertElliotLoss(double prob_trans_11, double prob_trans_01) + : prob_trans_11_(prob_trans_11), + prob_trans_01_(prob_trans_01), + lost_last_(false), + uniform_loss_model_(new UniformLoss(0)) {} + +GilbertElliotLoss::~GilbertElliotLoss() {} + +bool GilbertElliotLoss::Lost(int now_ms) { + // Simulate bursty channel (Gilbert model). + // (1st order) Markov chain model with memory of the previous/last + // packet state (lost or received). + if (lost_last_) { + // Previous packet was not received. + uniform_loss_model_->set_loss_rate(prob_trans_11_); + return lost_last_ = uniform_loss_model_->Lost(now_ms); + } else { + uniform_loss_model_->set_loss_rate(prob_trans_01_); + return lost_last_ = uniform_loss_model_->Lost(now_ms); + } +} + +FixedLossModel::FixedLossModel( + std::set loss_events) + : loss_events_(loss_events) { + loss_events_it_ = loss_events_.begin(); +} + +FixedLossModel::~FixedLossModel() {} + +bool FixedLossModel::Lost(int now_ms) { + if (loss_events_it_ != loss_events_.end() && + now_ms > loss_events_it_->start_ms) { + if (now_ms <= loss_events_it_->start_ms + loss_events_it_->duration_ms) { + return true; + } else { + ++loss_events_it_; + return false; + } + } + return false; +} + +void NetEqQualityTest::SetUp() { + ASSERT_TRUE(neteq_->RegisterPayloadType(kPayloadType, audio_format_)); + rtp_generator_->set_drift_factor(drift_factor_); + + int units = block_duration_ms_ / kPacketLossTimeUnitMs; + switch (absl::GetFlag(FLAGS_random_loss_mode)) { + case kUniformLoss: { + // `unit_loss_rate` is the packet loss rate for each unit time interval + // (kPacketLossTimeUnitMs). Since a packet loss event is generated if any + // of |block_duration_ms_ / kPacketLossTimeUnitMs| unit time intervals of + // a full packet duration is drawn with a loss, `unit_loss_rate` fulfills + // (1 - unit_loss_rate) ^ (block_duration_ms_ / kPacketLossTimeUnitMs) == + // 1 - packet_loss_rate. + double unit_loss_rate = + (1.0 - std::pow(1.0 - 0.01 * packet_loss_rate_, 1.0 / units)); + loss_model_.reset(new UniformLoss(unit_loss_rate)); + break; + } + case kGilbertElliotLoss: { + // `FLAGS_burst_length` should be integer times of kPacketLossTimeUnitMs. + ASSERT_EQ(0, absl::GetFlag(FLAGS_burst_length) % kPacketLossTimeUnitMs); + + // We do not allow 100 percent packet loss in Gilbert Elliot model, which + // makes no sense. + ASSERT_GT(100, packet_loss_rate_); + + // To guarantee the overall packet loss rate, transition probabilities + // need to satisfy: + // pi_0 * (1 - prob_trans_01_) ^ units + + // pi_1 * prob_trans_10_ ^ (units - 1) == 1 - loss_rate + // pi_0 = prob_trans_10 / (prob_trans_10 + prob_trans_01_) + // is the stationary state probability of no-loss + // pi_1 = prob_trans_01_ / (prob_trans_10 + prob_trans_01_) + // is the stationary state probability of loss + // After a derivation prob_trans_00 should satisfy: + // prob_trans_00 ^ (units - 1) = (loss_rate - 1) / prob_trans_10 * + // prob_trans_00 + (1 - loss_rate) * (1 + 1 / prob_trans_10). + double loss_rate = 0.01f * packet_loss_rate_; + double prob_trans_10 = + 1.0f * kPacketLossTimeUnitMs / absl::GetFlag(FLAGS_burst_length); + double prob_trans_00 = ProbTrans00Solver(units, loss_rate, prob_trans_10); + loss_model_.reset( + new GilbertElliotLoss(1.0f - prob_trans_10, 1.0f - prob_trans_00)); + break; + } + case kFixedLoss: { + std::istringstream loss_events_stream(absl::GetFlag(FLAGS_loss_events)); + std::string loss_event_string; + std::set loss_events; + while (std::getline(loss_events_stream, loss_event_string, ',')) { + std::vector loss_event_params; + std::istringstream loss_event_params_stream(loss_event_string); + std::copy(std::istream_iterator(loss_event_params_stream), + std::istream_iterator(), + std::back_inserter(loss_event_params)); + RTC_CHECK_EQ(loss_event_params.size(), 2); + auto result = loss_events.insert( + FixedLossEvent(loss_event_params[0], loss_event_params[1])); + RTC_CHECK(result.second); + } + RTC_CHECK_GT(loss_events.size(), 0); + loss_model_.reset(new FixedLossModel(loss_events)); + break; + } + default: { + loss_model_.reset(new NoLoss); + break; + } + } + + // Make sure that the packet loss profile is same for all derived tests. + srand(kInitSeed); +} + +std::ofstream& NetEqQualityTest::Log() { + return log_file_; +} + +bool NetEqQualityTest::PacketLost() { + int cycles = block_duration_ms_ / kPacketLossTimeUnitMs; + + // The loop is to make sure that codecs with different block lengths share the + // same packet loss profile. + bool lost = false; + for (int idx = 0; idx < cycles; idx++) { + if (loss_model_->Lost(decoded_time_ms_)) { + // The packet will be lost if any of the drawings indicates a loss, but + // the loop has to go on to make sure that codecs with different block + // lengths keep the same pace. + lost = true; + } + } + return lost; +} + +int NetEqQualityTest::Transmit() { + int packet_input_time_ms = rtp_generator_->GetRtpHeader( + kPayloadType, in_size_samples_, &rtp_header_); + Log() << "Packet of size " << payload_size_bytes_ << " bytes, for frame at " + << packet_input_time_ms << " ms "; + if (payload_size_bytes_ > 0) { + if (!PacketLost()) { + int ret = neteq_->InsertPacket( + rtp_header_, + rtc::ArrayView(payload_.data(), payload_size_bytes_)); + if (ret != NetEq::kOK) + return -1; + Log() << "was sent."; + } else { + Log() << "was lost."; + } + } + Log() << std::endl; + return packet_input_time_ms; +} + +int NetEqQualityTest::DecodeBlock() { + bool muted; + int ret = neteq_->GetAudio(&out_frame_, &muted); + RTC_CHECK(!muted); + + if (ret != NetEq::kOK) { + return -1; + } else { + RTC_DCHECK_EQ(out_frame_.num_channels_, channels_); + RTC_DCHECK_EQ(out_frame_.samples_per_channel_, + static_cast(kOutputSizeMs * out_sampling_khz_)); + RTC_CHECK(output_->WriteArray( + out_frame_.data(), + out_frame_.samples_per_channel_ * out_frame_.num_channels_)); + return static_cast(out_frame_.samples_per_channel_); + } +} + +void NetEqQualityTest::Simulate() { + int audio_size_samples; + bool end_of_input = false; + int runtime_ms = absl::GetFlag(FLAGS_runtime_ms) >= 0 + ? absl::GetFlag(FLAGS_runtime_ms) + : INT_MAX; + + while (!end_of_input && decoded_time_ms_ < runtime_ms) { + // Preload the buffer if needed. + while (decodable_time_ms_ - + absl::GetFlag(FLAGS_preload_packets) * block_duration_ms_ < + decoded_time_ms_) { + if (!in_file_->Read(in_size_samples_ * channels_, &in_data_[0])) { + end_of_input = true; + ASSERT_TRUE(end_of_input && absl::GetFlag(FLAGS_runtime_ms) < 0); + break; + } + payload_.Clear(); + payload_size_bytes_ = EncodeBlock(&in_data_[0], in_size_samples_, + &payload_, max_payload_bytes_); + total_payload_size_bytes_ += payload_size_bytes_; + decodable_time_ms_ = Transmit() + block_duration_ms_; + } + audio_size_samples = DecodeBlock(); + if (audio_size_samples > 0) { + decoded_time_ms_ += audio_size_samples / out_sampling_khz_; + } + } + Log() << "Average bit rate was " + << 8.0f * total_payload_size_bytes_ / absl::GetFlag(FLAGS_runtime_ms) + << " kbps" << std::endl; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.h new file mode 100644 index 0000000000..edcb117748 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_QUALITY_TEST_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_QUALITY_TEST_H_ + +#include +#include + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +enum LossModes { + kNoLoss, + kUniformLoss, + kGilbertElliotLoss, + kFixedLoss, + kLastLossMode +}; + +class LossModel { + public: + virtual ~LossModel() {} + virtual bool Lost(int now_ms) = 0; +}; + +class NoLoss : public LossModel { + public: + bool Lost(int now_ms) override; +}; + +class UniformLoss : public LossModel { + public: + UniformLoss(double loss_rate); + bool Lost(int now_ms) override; + void set_loss_rate(double loss_rate) { loss_rate_ = loss_rate; } + + private: + double loss_rate_; +}; + +class GilbertElliotLoss : public LossModel { + public: + GilbertElliotLoss(double prob_trans_11, double prob_trans_01); + ~GilbertElliotLoss() override; + bool Lost(int now_ms) override; + + private: + // Prob. of losing current packet, when previous packet is lost. + double prob_trans_11_; + // Prob. of losing current packet, when previous packet is not lost. + double prob_trans_01_; + bool lost_last_; + std::unique_ptr uniform_loss_model_; +}; + +struct FixedLossEvent { + int start_ms; + int duration_ms; + FixedLossEvent(int start_ms, int duration_ms) + : start_ms(start_ms), duration_ms(duration_ms) {} +}; + +struct FixedLossEventCmp { + bool operator()(const FixedLossEvent& l_event, + const FixedLossEvent& r_event) const { + return l_event.start_ms < r_event.start_ms; + } +}; + +class FixedLossModel : public LossModel { + public: + FixedLossModel(std::set loss_events); + ~FixedLossModel() override; + bool Lost(int now_ms) override; + + private: + std::set loss_events_; + std::set::iterator loss_events_it_; +}; + +class NetEqQualityTest : public ::testing::Test { + protected: + NetEqQualityTest( + int block_duration_ms, + int in_sampling_khz, + int out_sampling_khz, + const SdpAudioFormat& format, + const rtc::scoped_refptr& decoder_factory = + webrtc::CreateBuiltinAudioDecoderFactory()); + ~NetEqQualityTest() override; + + void SetUp() override; + + // EncodeBlock(...) does the following: + // 1. encodes a block of audio, saved in `in_data` and has a length of + // `block_size_samples` (samples per channel), + // 2. save the bit stream to `payload` of `max_bytes` bytes in size, + // 3. returns the length of the payload (in bytes), + virtual int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) = 0; + + // PacketLost(...) determines weather a packet sent at an indicated time gets + // lost or not. + bool PacketLost(); + + // DecodeBlock() decodes a block of audio using the payload stored in + // `payload_` with the length of `payload_size_bytes_` (bytes). The decoded + // audio is to be stored in `out_data_`. + int DecodeBlock(); + + // Transmit() uses `rtp_generator_` to generate a packet and passes it to + // `neteq_`. + int Transmit(); + + // Runs encoding / transmitting / decoding. + void Simulate(); + + // Write to log file. Usage Log() << ... + std::ofstream& Log(); + + SdpAudioFormat audio_format_; + const size_t channels_; + + private: + int decoded_time_ms_; + int decodable_time_ms_; + double drift_factor_; + int packet_loss_rate_; + const int block_duration_ms_; + const int in_sampling_khz_; + const int out_sampling_khz_; + + // Number of samples per channel in a frame. + const size_t in_size_samples_; + + size_t payload_size_bytes_; + size_t max_payload_bytes_; + + std::unique_ptr in_file_; + std::unique_ptr output_; + std::ofstream log_file_; + + std::unique_ptr rtp_generator_; + std::unique_ptr neteq_; + std::unique_ptr loss_model_; + + std::unique_ptr in_data_; + rtc::Buffer payload_; + AudioFrame out_frame_; + RTPHeader rtp_header_; + + size_t total_payload_size_bytes_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_QUALITY_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc new file mode 100644 index 0000000000..ffd114ae5b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_replacement_input.h" + +#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +NetEqReplacementInput::NetEqReplacementInput( + std::unique_ptr source, + uint8_t replacement_payload_type, + const std::set& comfort_noise_types, + const std::set& forbidden_types) + : source_(std::move(source)), + replacement_payload_type_(replacement_payload_type), + comfort_noise_types_(comfort_noise_types), + forbidden_types_(forbidden_types) { + RTC_CHECK(source_); + packet_ = source_->PopPacket(); + ReplacePacket(); +} + +absl::optional NetEqReplacementInput::NextPacketTime() const { + return packet_ + ? absl::optional(static_cast(packet_->time_ms)) + : absl::nullopt; +} + +absl::optional NetEqReplacementInput::NextOutputEventTime() const { + return source_->NextOutputEventTime(); +} + +std::unique_ptr NetEqReplacementInput::PopPacket() { + std::unique_ptr to_return = std::move(packet_); + while (true) { + packet_ = source_->PopPacket(); + if (!packet_) + break; + if (packet_->payload.size() > packet_->header.paddingLength) { + // Not padding only. Good to go. Skip this packet otherwise. + break; + } + } + ReplacePacket(); + return to_return; +} + +void NetEqReplacementInput::AdvanceOutputEvent() { + source_->AdvanceOutputEvent(); +} + +bool NetEqReplacementInput::ended() const { + return source_->ended(); +} + +absl::optional NetEqReplacementInput::NextHeader() const { + return source_->NextHeader(); +} + +void NetEqReplacementInput::ReplacePacket() { + if (!source_->NextPacketTime()) { + // End of input. Cannot do proper replacement on the very last packet, so we + // delete it instead. + packet_.reset(); + return; + } + + RTC_DCHECK(packet_); + + RTC_CHECK_EQ(forbidden_types_.count(packet_->header.payloadType), 0) + << "Payload type " << static_cast(packet_->header.payloadType) + << " is forbidden."; + + // Check if this packet is comfort noise. + if (comfort_noise_types_.count(packet_->header.payloadType) != 0) { + // If CNG, simply insert a zero-energy one-byte payload. + uint8_t cng_payload[1] = {127}; // Max attenuation of CNG. + packet_->payload.SetData(cng_payload); + return; + } + + absl::optional next_hdr = source_->NextHeader(); + RTC_DCHECK(next_hdr); + uint8_t payload[12]; + RTC_DCHECK_LE(last_frame_size_timestamps_, 120 * 48); + uint32_t input_frame_size_timestamps = last_frame_size_timestamps_; + const uint32_t timestamp_diff = + next_hdr->timestamp - packet_->header.timestamp; + if (next_hdr->sequenceNumber == packet_->header.sequenceNumber + 1 && + timestamp_diff <= 120 * 48) { + // Packets are in order and the timestamp diff is less than 5760 samples. + // Accept the timestamp diff as a valid frame size. + input_frame_size_timestamps = timestamp_diff; + last_frame_size_timestamps_ = input_frame_size_timestamps; + } + RTC_DCHECK_LE(input_frame_size_timestamps, 120 * 48); + FakeDecodeFromFile::PrepareEncoded(packet_->header.timestamp, + input_frame_size_timestamps, + packet_->payload.size(), payload); + packet_->payload.SetData(payload); + packet_->header.payloadType = replacement_payload_type_; + return; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.h new file mode 100644 index 0000000000..9ce9b9dc63 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_REPLACEMENT_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_REPLACEMENT_INPUT_H_ + +#include +#include + +#include "modules/audio_coding/neteq/tools/neteq_input.h" + +namespace webrtc { +namespace test { + +// This class converts the packets from a NetEqInput to fake encodings to be +// decoded by a FakeDecodeFromFile decoder. +class NetEqReplacementInput : public NetEqInput { + public: + NetEqReplacementInput(std::unique_ptr source, + uint8_t replacement_payload_type, + const std::set& comfort_noise_types, + const std::set& forbidden_types); + + absl::optional NextPacketTime() const override; + absl::optional NextOutputEventTime() const override; + std::unique_ptr PopPacket() override; + void AdvanceOutputEvent() override; + bool ended() const override; + absl::optional NextHeader() const override; + + private: + void ReplacePacket(); + + std::unique_ptr source_; + const uint8_t replacement_payload_type_; + const std::set comfort_noise_types_; + const std::set forbidden_types_; + std::unique_ptr packet_; // The next packet to deliver. + uint32_t last_frame_size_timestamps_ = 960; // Initial guess: 20 ms @ 48 kHz. +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_REPLACEMENT_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc new file mode 100644 index 0000000000..b274069bd4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "modules/audio_coding/neteq/tools/neteq_test_factory.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/field_trial.h" +#include "test/field_trial.h" + +using TestConfig = webrtc::test::NetEqTestFactory::Config; + +ABSL_FLAG(bool, + codec_map, + false, + "Prints the mapping between RTP payload type and " + "codec"); +ABSL_FLAG(std::string, + force_fieldtrials, + "", + "Field trials control experimental feature code which can be forced. " + "E.g. running with --force_fieldtrials=WebRTC-FooFeature/Enable/" + " will assign the group Enable to field trial WebRTC-FooFeature."); +ABSL_FLAG(int, pcmu, TestConfig::default_pcmu(), "RTP payload type for PCM-u"); +ABSL_FLAG(int, pcma, TestConfig::default_pcma(), "RTP payload type for PCM-a"); +ABSL_FLAG(int, ilbc, TestConfig::default_ilbc(), "RTP payload type for iLBC"); +ABSL_FLAG(int, isac, TestConfig::default_isac(), "RTP payload type for iSAC"); +ABSL_FLAG(int, + isac_swb, + TestConfig::default_isac_swb(), + "RTP payload type for iSAC-swb (32 kHz)"); +ABSL_FLAG(int, opus, TestConfig::default_opus(), "RTP payload type for Opus"); +ABSL_FLAG(int, + pcm16b, + TestConfig::default_pcm16b(), + "RTP payload type for PCM16b-nb (8 kHz)"); +ABSL_FLAG(int, + pcm16b_wb, + TestConfig::default_pcm16b_wb(), + "RTP payload type for PCM16b-wb (16 kHz)"); +ABSL_FLAG(int, + pcm16b_swb32, + TestConfig::default_pcm16b_swb32(), + "RTP payload type for PCM16b-swb32 (32 kHz)"); +ABSL_FLAG(int, + pcm16b_swb48, + TestConfig::default_pcm16b_swb48(), + "RTP payload type for PCM16b-swb48 (48 kHz)"); +ABSL_FLAG(int, g722, TestConfig::default_g722(), "RTP payload type for G.722"); +ABSL_FLAG(int, + avt, + TestConfig::default_avt(), + "RTP payload type for AVT/DTMF (8 kHz)"); +ABSL_FLAG(int, + avt_16, + TestConfig::default_avt_16(), + "RTP payload type for AVT/DTMF (16 kHz)"); +ABSL_FLAG(int, + avt_32, + TestConfig::default_avt_32(), + "RTP payload type for AVT/DTMF (32 kHz)"); +ABSL_FLAG(int, + avt_48, + TestConfig::default_avt_48(), + "RTP payload type for AVT/DTMF (48 kHz)"); +ABSL_FLAG(int, + red, + TestConfig::default_red(), + "RTP payload type for redundant audio (RED)"); +ABSL_FLAG(int, + cn_nb, + TestConfig::default_cn_nb(), + "RTP payload type for comfort noise (8 kHz)"); +ABSL_FLAG(int, + cn_wb, + TestConfig::default_cn_wb(), + "RTP payload type for comfort noise (16 kHz)"); +ABSL_FLAG(int, + cn_swb32, + TestConfig::default_cn_swb32(), + "RTP payload type for comfort noise (32 kHz)"); +ABSL_FLAG(int, + cn_swb48, + TestConfig::default_cn_swb48(), + "RTP payload type for comfort noise (48 kHz)"); +ABSL_FLAG(std::string, + replacement_audio_file, + "", + "A PCM file that will be used to populate dummy" + " RTP packets"); +ABSL_FLAG(std::string, + ssrc, + "", + "Only use packets with this SSRC (decimal or hex, the latter " + "starting with 0x)"); +ABSL_FLAG(int, + audio_level, + TestConfig::default_audio_level(), + "Extension ID for audio level (RFC 6464)"); +ABSL_FLAG(int, + abs_send_time, + TestConfig::default_abs_send_time(), + "Extension ID for absolute sender time"); +ABSL_FLAG(int, + transport_seq_no, + TestConfig::default_transport_seq_no(), + "Extension ID for transport sequence number"); +ABSL_FLAG(int, + video_content_type, + TestConfig::default_video_content_type(), + "Extension ID for video content type"); +ABSL_FLAG(int, + video_timing, + TestConfig::default_video_timing(), + "Extension ID for video timing"); +ABSL_FLAG(std::string, + output_files_base_name, + "", + "Custom path used as prefix for the output files - i.e., " + "matlab plot, python plot, text log."); +ABSL_FLAG(bool, + matlabplot, + false, + "Generates a matlab script for plotting the delay profile"); +ABSL_FLAG(bool, + pythonplot, + false, + "Generates a python script for plotting the delay profile"); +ABSL_FLAG(bool, + textlog, + false, + "Generates a text log describing the simulation on a " + "step-by-step basis."); +ABSL_FLAG(bool, concealment_events, false, "Prints concealment events"); +ABSL_FLAG(int, + max_nr_packets_in_buffer, + TestConfig::default_max_nr_packets_in_buffer(), + "Maximum allowed number of packets in the buffer"); +ABSL_FLAG(bool, + enable_fast_accelerate, + false, + "Enables jitter buffer fast accelerate"); + +namespace { + +// Parses the input string for a valid SSRC (at the start of the string). If a +// valid SSRC is found, it is written to the output variable `ssrc`, and true is +// returned. Otherwise, false is returned. +bool ParseSsrc(absl::string_view str, uint32_t* ssrc) { + if (str.empty()) + return true; + int base = 10; + // Look for "0x" or "0X" at the start and change base to 16 if found. + if ((str.compare(0, 2, "0x") == 0) || (str.compare(0, 2, "0X") == 0)) + base = 16; + errno = 0; + char* end_ptr; + std::string str_str = std::string(str); + unsigned long value = strtoul(str_str.c_str(), &end_ptr, base); // NOLINT + if (value == ULONG_MAX && errno == ERANGE) + return false; // Value out of range for unsigned long. + if (sizeof(unsigned long) > sizeof(uint32_t) && value > 0xFFFFFFFF) // NOLINT + return false; // Value out of range for uint32_t. + if (end_ptr - str_str.c_str() < static_cast(str.length())) + return false; // Part of the string was not parsed. + *ssrc = static_cast(value); + return true; +} + +static bool ValidateExtensionId(int value) { + if (value > 0 && value <= 255) // Value is ok. + return true; + printf("Extension ID must be between 1 and 255, not %d\n", + static_cast(value)); + return false; +} + +// Flag validators. +bool ValidatePayloadType(int value) { + if (value >= 0 && value <= 127) // Value is ok. + return true; + printf("Payload type must be between 0 and 127, not %d\n", + static_cast(value)); + return false; +} + +bool ValidateSsrcValue(absl::string_view str) { + uint32_t dummy_ssrc; + if (ParseSsrc(str, &dummy_ssrc)) // Value is ok. + return true; + printf("Invalid SSRC: %.*s\n", static_cast(str.size()), str.data()); + return false; +} + +void PrintCodecMappingEntry(absl::string_view codec, int flag) { + std::cout << codec << ": " << flag << std::endl; +} + +void PrintCodecMapping() { + PrintCodecMappingEntry("PCM-u", absl::GetFlag(FLAGS_pcmu)); + PrintCodecMappingEntry("PCM-a", absl::GetFlag(FLAGS_pcma)); + PrintCodecMappingEntry("iLBC", absl::GetFlag(FLAGS_ilbc)); + PrintCodecMappingEntry("iSAC", absl::GetFlag(FLAGS_isac)); + PrintCodecMappingEntry("iSAC-swb (32 kHz)", absl::GetFlag(FLAGS_isac_swb)); + PrintCodecMappingEntry("Opus", absl::GetFlag(FLAGS_opus)); + PrintCodecMappingEntry("PCM16b-nb (8 kHz)", absl::GetFlag(FLAGS_pcm16b)); + PrintCodecMappingEntry("PCM16b-wb (16 kHz)", absl::GetFlag(FLAGS_pcm16b_wb)); + PrintCodecMappingEntry("PCM16b-swb32 (32 kHz)", + absl::GetFlag(FLAGS_pcm16b_swb32)); + PrintCodecMappingEntry("PCM16b-swb48 (48 kHz)", + absl::GetFlag(FLAGS_pcm16b_swb48)); + PrintCodecMappingEntry("G.722", absl::GetFlag(FLAGS_g722)); + PrintCodecMappingEntry("AVT/DTMF (8 kHz)", absl::GetFlag(FLAGS_avt)); + PrintCodecMappingEntry("AVT/DTMF (16 kHz)", absl::GetFlag(FLAGS_avt_16)); + PrintCodecMappingEntry("AVT/DTMF (32 kHz)", absl::GetFlag(FLAGS_avt_32)); + PrintCodecMappingEntry("AVT/DTMF (48 kHz)", absl::GetFlag(FLAGS_avt_48)); + PrintCodecMappingEntry("redundant audio (RED)", absl::GetFlag(FLAGS_red)); + PrintCodecMappingEntry("comfort noise (8 kHz)", absl::GetFlag(FLAGS_cn_nb)); + PrintCodecMappingEntry("comfort noise (16 kHz)", absl::GetFlag(FLAGS_cn_wb)); + PrintCodecMappingEntry("comfort noise (32 kHz)", + absl::GetFlag(FLAGS_cn_swb32)); + PrintCodecMappingEntry("comfort noise (48 kHz)", + absl::GetFlag(FLAGS_cn_swb48)); +} + +bool ValidateOutputFilesOptions(bool textlog, + bool plotting, + absl::string_view output_files_base_name, + absl::string_view output_audio_filename) { + bool output_files_base_name_specified = !output_files_base_name.empty(); + if (!textlog && !plotting && output_files_base_name_specified) { + std::cout << "Error: --output_files_base_name cannot be used without at " + "least one of the following flags: --textlog, --matlabplot, " + "--pythonplot." + << std::endl; + return false; + } + // Without `output_audio_filename`, `output_files_base_name` is required when + // plotting output files must be generated (in order to form a valid output + // file name). + if (output_audio_filename.empty() && plotting && + !output_files_base_name_specified) { + std::cout << "Error: when no output audio file is specified and " + "--matlabplot and/or --pythonplot are used, " + "--output_files_base_name must be also used." + << std::endl; + return false; + } + return true; +} + +absl::optional CreateOptionalOutputFileName( + bool output_requested, + absl::string_view basename, + absl::string_view output_audio_filename, + absl::string_view suffix) { + if (!output_requested) { + return absl::nullopt; + } + if (!basename.empty()) { + // Override the automatic assignment. + rtc::StringBuilder sb(basename); + sb << suffix; + return sb.str(); + } + if (!output_audio_filename.empty()) { + // Automatically assign name. + rtc::StringBuilder sb(output_audio_filename); + sb << suffix; + return sb.str(); + } + std::cout << "Error: invalid text log file parameters."; + return absl::nullopt; +} + +} // namespace + +int main(int argc, char* argv[]) { + std::vector args = absl::ParseCommandLine(argc, argv); + webrtc::test::NetEqTestFactory factory; + std::string usage = + "Tool for decoding an RTP dump file using NetEq.\n" + "Example usage:\n" + "./neteq_rtpplay input.rtp [output.{pcm, wav}]\n"; + if (absl::GetFlag(FLAGS_codec_map)) { + PrintCodecMapping(); + exit(0); + } + if (args.size() != 2 && + args.size() != 3) { // The output audio file is optional. + // Print usage information. + std::cout << usage; + exit(0); + } + const std::string output_audio_filename((args.size() == 3) ? args[2] : ""); + const std::string output_files_base_name( + absl::GetFlag(FLAGS_output_files_base_name)); + RTC_CHECK(ValidateOutputFilesOptions( + absl::GetFlag(FLAGS_textlog), + absl::GetFlag(FLAGS_matlabplot) || absl::GetFlag(FLAGS_pythonplot), + output_files_base_name, output_audio_filename)); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcmu))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcma))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_ilbc))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_isac))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_isac_swb))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_opus))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcm16b))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcm16b_wb))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcm16b_swb32))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcm16b_swb48))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_g722))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_avt))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_avt_16))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_avt_32))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_avt_48))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_red))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_cn_nb))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_cn_wb))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_cn_swb32))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_cn_swb48))); + RTC_CHECK(ValidateSsrcValue(absl::GetFlag(FLAGS_ssrc))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_audio_level))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_abs_send_time))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_transport_seq_no))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_video_content_type))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_video_timing))); + + // Make force_fieldtrials persistent string during entire program live as + // absl::GetFlag creates temporary string and c_str() will point to + // deallocated string. + const std::string force_fieldtrials = absl::GetFlag(FLAGS_force_fieldtrials); + webrtc::field_trial::InitFieldTrialsFromString(force_fieldtrials.c_str()); + + webrtc::test::NetEqTestFactory::Config config; + config.pcmu = absl::GetFlag(FLAGS_pcmu); + config.pcma = absl::GetFlag(FLAGS_pcma); + config.ilbc = absl::GetFlag(FLAGS_ilbc); + config.isac = absl::GetFlag(FLAGS_isac); + config.isac_swb = absl::GetFlag(FLAGS_isac_swb); + config.opus = absl::GetFlag(FLAGS_opus); + config.pcm16b = absl::GetFlag(FLAGS_pcm16b); + config.pcm16b_wb = absl::GetFlag(FLAGS_pcm16b_wb); + config.pcm16b_swb32 = absl::GetFlag(FLAGS_pcm16b_swb32); + config.pcm16b_swb48 = absl::GetFlag(FLAGS_pcm16b_swb48); + config.g722 = absl::GetFlag(FLAGS_g722); + config.avt = absl::GetFlag(FLAGS_avt); + config.avt_16 = absl::GetFlag(FLAGS_avt_16); + config.avt_32 = absl::GetFlag(FLAGS_avt_32); + config.avt_48 = absl::GetFlag(FLAGS_avt_48); + config.red = absl::GetFlag(FLAGS_red); + config.cn_nb = absl::GetFlag(FLAGS_cn_nb); + config.cn_wb = absl::GetFlag(FLAGS_cn_wb); + config.cn_swb32 = absl::GetFlag(FLAGS_cn_swb32); + config.cn_swb48 = absl::GetFlag(FLAGS_cn_swb48); + config.replacement_audio_file = absl::GetFlag(FLAGS_replacement_audio_file); + config.audio_level = absl::GetFlag(FLAGS_audio_level); + config.abs_send_time = absl::GetFlag(FLAGS_abs_send_time); + config.transport_seq_no = absl::GetFlag(FLAGS_transport_seq_no); + config.video_content_type = absl::GetFlag(FLAGS_video_content_type); + config.video_timing = absl::GetFlag(FLAGS_video_timing); + config.matlabplot = absl::GetFlag(FLAGS_matlabplot); + config.pythonplot = absl::GetFlag(FLAGS_pythonplot); + config.concealment_events = absl::GetFlag(FLAGS_concealment_events); + config.max_nr_packets_in_buffer = + absl::GetFlag(FLAGS_max_nr_packets_in_buffer); + config.enable_fast_accelerate = absl::GetFlag(FLAGS_enable_fast_accelerate); + if (!output_audio_filename.empty()) { + config.output_audio_filename = output_audio_filename; + } + config.textlog = absl::GetFlag(FLAGS_textlog); + config.textlog_filename = CreateOptionalOutputFileName( + absl::GetFlag(FLAGS_textlog), output_files_base_name, + output_audio_filename, ".text_log.txt"); + config.plot_scripts_basename = CreateOptionalOutputFileName( + absl::GetFlag(FLAGS_matlabplot) || absl::GetFlag(FLAGS_pythonplot), + output_files_base_name, output_audio_filename, ""); + + // Check if an SSRC value was provided. + if (absl::GetFlag(FLAGS_ssrc).size() > 0) { + uint32_t ssrc; + RTC_CHECK(ParseSsrc(absl::GetFlag(FLAGS_ssrc), &ssrc)) + << "Flag verification has failed."; + config.ssrc_filter = absl::make_optional(ssrc); + } + + std::unique_ptr test = + factory.InitializeTestFromFile(/*input_filename=*/args[1], + /*factory=*/nullptr, config); + RTC_CHECK(test) << "ERROR: Unable to run test"; + test->Run(); + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay_test.sh b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay_test.sh new file mode 100755 index 0000000000..0a6bf16016 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay_test.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# +# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +# + +# Aliases. +BIN=$1 +TEST_RTC_EVENT_LOG=$2 +INPUT_PCM_FILE=$3 + +# Check setup. +if [ ! -f $BIN ]; then + echo "Cannot find neteq_rtpplay binary." + exit 99 +fi +if [ ! -f $TEST_RTC_EVENT_LOG ]; then + echo "Cannot find RTC event log file." + exit 99 +fi +if [ ! -f $INPUT_PCM_FILE ]; then + echo "Cannot find PCM file." + exit 99 +fi + +# Defines. + +TMP_DIR=$(mktemp -d /tmp/tmp_XXXXXXXXXX) +PASS=0 +FAIL=1 +TEST_SUITE_RESULT=$PASS + +file_hash () { + md5sum $1 | awk '{ print $1 }' +} + +test_passed () { + echo PASS +} + +test_failed () { + echo "FAIL: $1" + TEST_SUITE_RESULT=$FAIL +} + +test_file_checksums_match () { + if [ ! -f $1 ] || [ ! -f $2 ]; then + test_failed "Cannot compare hash values: file(s) not found." + return + fi + HASH1=$(file_hash $1) + HASH2=$(file_hash $2) + if [ "$HASH1" = "$HASH2" ]; then + test_passed + else + test_failed "$1 differs from $2" + fi +} + +test_file_exists () { + if [ -f $1 ]; then + test_passed + else + test_failed "$1 does not exist" + fi +} + +test_exit_code_0 () { + if [ $1 -eq 0 ]; then + test_passed + else + test_failed "$1 did not return 0" + fi +} + +test_exit_code_not_0 () { + if [ $1 -eq 0 ]; then + test_failed "$1 returned 0" + else + test_passed + fi +} + +# Generate test data. + +# Case 1. Pre-existing way. +CASE1_WAV=$TMP_DIR/case1.wav +$BIN $TEST_RTC_EVENT_LOG $CASE1_WAV \ + --replacement_audio_file $INPUT_PCM_FILE \ + --textlog --pythonplot --matlabplot \ + > $TMP_DIR/case1.stdout 2> /dev/null +CASE1_RETURN_CODE=$? +CASE1_TEXTLOG=$TMP_DIR/case1.wav.text_log.txt +CASE1_PYPLOT=$TMP_DIR/case1_wav.py +CASE1_MATPLOT=$TMP_DIR/case1_wav.m + +# Case 2. No output files. +$BIN $TEST_RTC_EVENT_LOG --replacement_audio_file $INPUT_PCM_FILE \ + > $TMP_DIR/case2.stdout 2> /dev/null +CASE2_RETURN_CODE=$? + +# Case 3. No output audio file. + +# Case 3.1 Without --output_files_base_name (won't run). +$BIN $TEST_RTC_EVENT_LOG \ + --replacement_audio_file $INPUT_PCM_FILE \ + --textlog --pythonplot --matlabplot \ + &> /dev/null +CASE3_1_RETURN_CODE=$? + +# Case 3.2 With --output_files_base_name (runs). +$BIN $TEST_RTC_EVENT_LOG \ + --replacement_audio_file $INPUT_PCM_FILE \ + --output_files_base_name $TMP_DIR/case3_2 \ + --textlog --pythonplot --matlabplot \ + > $TMP_DIR/case3_2.stdout 2> /dev/null +CASE3_2_RETURN_CODE=$? +CASE3_2_TEXTLOG=$TMP_DIR/case3_2.text_log.txt +CASE3_2_PYPLOT=$TMP_DIR/case3_2.py +CASE3_2_MATPLOT=$TMP_DIR/case3_2.m + +# Case 4. With output audio file and --output_files_base_name. +CASE4_WAV=$TMP_DIR/case4.wav +$BIN $TEST_RTC_EVENT_LOG $TMP_DIR/case4.wav \ + --replacement_audio_file $INPUT_PCM_FILE \ + --output_files_base_name $TMP_DIR/case4 \ + --textlog --pythonplot --matlabplot \ + > $TMP_DIR/case4.stdout 2> /dev/null +CASE4_RETURN_CODE=$? +CASE4_TEXTLOG=$TMP_DIR/case4.text_log.txt +CASE4_PYPLOT=$TMP_DIR/case4.py +CASE4_MATPLOT=$TMP_DIR/case4.m + +# Tests. + +echo Check exit codes +test_exit_code_0 $CASE1_RETURN_CODE +test_exit_code_0 $CASE2_RETURN_CODE +test_exit_code_not_0 $CASE3_1_RETURN_CODE +test_exit_code_0 $CASE3_2_RETURN_CODE +test_exit_code_0 $CASE4_RETURN_CODE + +echo Check that the expected output files exist +test_file_exists $CASE1_TEXTLOG +test_file_exists $CASE3_2_TEXTLOG +test_file_exists $CASE4_TEXTLOG +test_file_exists $CASE1_PYPLOT +test_file_exists $CASE3_2_PYPLOT +test_file_exists $CASE4_PYPLOT +test_file_exists $CASE1_MATPLOT +test_file_exists $CASE3_2_MATPLOT +test_file_exists $CASE4_MATPLOT + +echo Check that the same WAV file is produced +test_file_checksums_match $CASE1_WAV $CASE4_WAV + +echo Check that the same text log is produced +test_file_checksums_match $CASE1_TEXTLOG $CASE3_2_TEXTLOG +test_file_checksums_match $CASE1_TEXTLOG $CASE4_TEXTLOG + +echo Check that the same python plot scripts is produced +test_file_checksums_match $CASE1_PYPLOT $CASE3_2_PYPLOT +test_file_checksums_match $CASE1_PYPLOT $CASE4_PYPLOT + +echo Check that the same matlab plot scripts is produced +test_file_checksums_match $CASE1_MATPLOT $CASE3_2_MATPLOT +test_file_checksums_match $CASE1_MATPLOT $CASE4_MATPLOT + +# Clean up +rm -fr $TMP_DIR + +if [ $TEST_SUITE_RESULT -eq $PASS ]; then + echo All tests passed. + exit 0 +else + echo One or more tests failed. + exit 1 +fi diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.cc new file mode 100644 index 0000000000..6738e494f6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.cc @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_stats_getter.h" + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { +namespace test { + +std::string NetEqStatsGetter::ConcealmentEvent::ToString() const { + char ss_buf[256]; + rtc::SimpleStringBuilder ss(ss_buf); + ss << "ConcealmentEvent duration_ms:" << duration_ms + << " event_number:" << concealment_event_number + << " time_from_previous_event_end_ms:" << time_from_previous_event_end_ms; + return ss.str(); +} + +NetEqStatsGetter::NetEqStatsGetter( + std::unique_ptr delay_analyzer) + : delay_analyzer_(std::move(delay_analyzer)) {} + +void NetEqStatsGetter::BeforeGetAudio(NetEq* neteq) { + if (delay_analyzer_) { + delay_analyzer_->BeforeGetAudio(neteq); + } +} + +void NetEqStatsGetter::AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool muted, + NetEq* neteq) { + // TODO(minyue): Get stats should better not be called as a call back after + // get audio. It is called independently from get audio in practice. + const auto lifetime_stat = neteq->GetLifetimeStatistics(); + if (last_stats_query_time_ms_ == 0 || + rtc::TimeDiff(time_now_ms, last_stats_query_time_ms_) >= + stats_query_interval_ms_) { + NetEqNetworkStatistics stats; + RTC_CHECK_EQ(neteq->NetworkStatistics(&stats), 0); + stats_.push_back(std::make_pair(time_now_ms, stats)); + lifetime_stats_.push_back(std::make_pair(time_now_ms, lifetime_stat)); + last_stats_query_time_ms_ = time_now_ms; + } + + const auto voice_concealed_samples = + lifetime_stat.concealed_samples - lifetime_stat.silent_concealed_samples; + if (current_concealment_event_ != lifetime_stat.concealment_events && + voice_concealed_samples_until_last_event_ < voice_concealed_samples) { + if (last_event_end_time_ms_ > 0) { + // Do not account for the first event to avoid start of the call + // skewing. + ConcealmentEvent concealment_event; + uint64_t last_event_voice_concealed_samples = + voice_concealed_samples - voice_concealed_samples_until_last_event_; + RTC_CHECK_GT(last_event_voice_concealed_samples, 0); + concealment_event.duration_ms = last_event_voice_concealed_samples / + (audio_frame.sample_rate_hz_ / 1000); + concealment_event.concealment_event_number = current_concealment_event_; + concealment_event.time_from_previous_event_end_ms = + time_now_ms - last_event_end_time_ms_; + concealment_events_.emplace_back(concealment_event); + voice_concealed_samples_until_last_event_ = voice_concealed_samples; + } + last_event_end_time_ms_ = time_now_ms; + voice_concealed_samples_until_last_event_ = voice_concealed_samples; + current_concealment_event_ = lifetime_stat.concealment_events; + } + + if (delay_analyzer_) { + delay_analyzer_->AfterGetAudio(time_now_ms, audio_frame, muted, neteq); + } +} + +double NetEqStatsGetter::AverageSpeechExpandRate() const { + double sum_speech_expand = std::accumulate( + stats_.begin(), stats_.end(), double{0.0}, + [](double a, std::pair b) { + return a + static_cast(b.second.speech_expand_rate); + }); + return sum_speech_expand / 16384.0 / stats_.size(); +} + +NetEqStatsGetter::Stats NetEqStatsGetter::AverageStats() const { + Stats sum_stats = std::accumulate( + stats_.begin(), stats_.end(), Stats(), + [](Stats a, std::pair bb) { + const auto& b = bb.second; + a.current_buffer_size_ms += b.current_buffer_size_ms; + a.preferred_buffer_size_ms += b.preferred_buffer_size_ms; + a.jitter_peaks_found += b.jitter_peaks_found; + a.expand_rate += b.expand_rate / 16384.0; + a.speech_expand_rate += b.speech_expand_rate / 16384.0; + a.preemptive_rate += b.preemptive_rate / 16384.0; + a.accelerate_rate += b.accelerate_rate / 16384.0; + a.secondary_decoded_rate += b.secondary_decoded_rate / 16384.0; + a.secondary_discarded_rate += b.secondary_discarded_rate / 16384.0; + a.mean_waiting_time_ms += b.mean_waiting_time_ms; + a.median_waiting_time_ms += b.median_waiting_time_ms; + a.min_waiting_time_ms = std::min( + a.min_waiting_time_ms, static_cast(b.min_waiting_time_ms)); + a.max_waiting_time_ms = std::max( + a.max_waiting_time_ms, static_cast(b.max_waiting_time_ms)); + return a; + }); + + sum_stats.current_buffer_size_ms /= stats_.size(); + sum_stats.preferred_buffer_size_ms /= stats_.size(); + sum_stats.jitter_peaks_found /= stats_.size(); + sum_stats.packet_loss_rate /= stats_.size(); + sum_stats.expand_rate /= stats_.size(); + sum_stats.speech_expand_rate /= stats_.size(); + sum_stats.preemptive_rate /= stats_.size(); + sum_stats.accelerate_rate /= stats_.size(); + sum_stats.secondary_decoded_rate /= stats_.size(); + sum_stats.secondary_discarded_rate /= stats_.size(); + sum_stats.added_zero_samples /= stats_.size(); + sum_stats.mean_waiting_time_ms /= stats_.size(); + sum_stats.median_waiting_time_ms /= stats_.size(); + + return sum_stats; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.h new file mode 100644 index 0000000000..b1b12bb1f8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_GETTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_GETTER_H_ + +#include +#include +#include + +#include "modules/audio_coding/neteq/tools/neteq_delay_analyzer.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" + +namespace webrtc { +namespace test { + +class NetEqStatsGetter : public NetEqGetAudioCallback { + public: + // This struct is a replica of webrtc::NetEqNetworkStatistics, but with all + // values stored in double precision. + struct Stats { + double current_buffer_size_ms = 0.0; + double preferred_buffer_size_ms = 0.0; + double jitter_peaks_found = 0.0; + double packet_loss_rate = 0.0; + double expand_rate = 0.0; + double speech_expand_rate = 0.0; + double preemptive_rate = 0.0; + double accelerate_rate = 0.0; + double secondary_decoded_rate = 0.0; + double secondary_discarded_rate = 0.0; + double clockdrift_ppm = 0.0; + double added_zero_samples = 0.0; + double mean_waiting_time_ms = 0.0; + double median_waiting_time_ms = 0.0; + double min_waiting_time_ms = 0.0; + double max_waiting_time_ms = 0.0; + }; + + struct ConcealmentEvent { + uint64_t duration_ms; + size_t concealment_event_number; + int64_t time_from_previous_event_end_ms; + std::string ToString() const; + }; + + // Takes a pointer to another callback object, which will be invoked after + // this object finishes. This does not transfer ownership, and null is a + // valid value. + explicit NetEqStatsGetter(std::unique_ptr delay_analyzer); + + void set_stats_query_interval_ms(int64_t stats_query_interval_ms) { + stats_query_interval_ms_ = stats_query_interval_ms; + } + + void BeforeGetAudio(NetEq* neteq) override; + + void AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool muted, + NetEq* neteq) override; + + double AverageSpeechExpandRate() const; + + NetEqDelayAnalyzer* delay_analyzer() const { return delay_analyzer_.get(); } + + const std::vector& concealment_events() const { + // Do not account for the last concealment event to avoid potential end + // call skewing. + return concealment_events_; + } + + const std::vector>* stats() const { + return &stats_; + } + + const std::vector>* + lifetime_stats() const { + return &lifetime_stats_; + } + + Stats AverageStats() const; + + private: + std::unique_ptr delay_analyzer_; + int64_t stats_query_interval_ms_ = 1000; + int64_t last_stats_query_time_ms_ = 0; + std::vector> stats_; + std::vector> lifetime_stats_; + size_t current_concealment_event_ = 1; + uint64_t voice_concealed_samples_until_last_event_ = 0; + std::vector concealment_events_; + int64_t last_event_end_time_ms_ = 0; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_GETTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc new file mode 100644 index 0000000000..162a4c9300 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_stats_plotter.h" + +#include +#include + +#include + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { + +NetEqStatsPlotter::NetEqStatsPlotter(bool make_matlab_plot, + bool make_python_plot, + bool show_concealment_events, + absl::string_view base_file_name) + : make_matlab_plot_(make_matlab_plot), + make_python_plot_(make_python_plot), + show_concealment_events_(show_concealment_events), + base_file_name_(base_file_name) { + std::unique_ptr delay_analyzer; + if (make_matlab_plot || make_python_plot) { + delay_analyzer.reset(new NetEqDelayAnalyzer); + } + stats_getter_.reset(new NetEqStatsGetter(std::move(delay_analyzer))); +} + +void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms) { + if (make_matlab_plot_) { + auto matlab_script_name = base_file_name_; + std::replace(matlab_script_name.begin(), matlab_script_name.end(), '.', + '_'); + printf("Creating Matlab plot script %s.m\n", matlab_script_name.c_str()); + stats_getter_->delay_analyzer()->CreateMatlabScript(matlab_script_name + + ".m"); + } + if (make_python_plot_) { + auto python_script_name = base_file_name_; + std::replace(python_script_name.begin(), python_script_name.end(), '.', + '_'); + printf("Creating Python plot script %s.py\n", python_script_name.c_str()); + stats_getter_->delay_analyzer()->CreatePythonScript(python_script_name + + ".py"); + } + + printf("Simulation statistics:\n"); + printf(" output duration: %" PRId64 " ms\n", simulation_time_ms); + auto stats = stats_getter_->AverageStats(); + printf(" packet_loss_rate: %f %%\n", 100.0 * stats.packet_loss_rate); + printf(" expand_rate: %f %%\n", 100.0 * stats.expand_rate); + printf(" speech_expand_rate: %f %%\n", 100.0 * stats.speech_expand_rate); + printf(" preemptive_rate: %f %%\n", 100.0 * stats.preemptive_rate); + printf(" accelerate_rate: %f %%\n", 100.0 * stats.accelerate_rate); + printf(" secondary_decoded_rate: %f %%\n", + 100.0 * stats.secondary_decoded_rate); + printf(" secondary_discarded_rate: %f %%\n", + 100.0 * stats.secondary_discarded_rate); + printf(" clockdrift_ppm: %f ppm\n", stats.clockdrift_ppm); + printf(" mean_waiting_time_ms: %f ms\n", stats.mean_waiting_time_ms); + printf(" median_waiting_time_ms: %f ms\n", stats.median_waiting_time_ms); + printf(" min_waiting_time_ms: %f ms\n", stats.min_waiting_time_ms); + printf(" max_waiting_time_ms: %f ms\n", stats.max_waiting_time_ms); + printf(" current_buffer_size_ms: %f ms\n", stats.current_buffer_size_ms); + printf(" preferred_buffer_size_ms: %f ms\n", stats.preferred_buffer_size_ms); + if (show_concealment_events_) { + printf(" concealment_events_ms:\n"); + for (auto concealment_event : stats_getter_->concealment_events()) + printf("%s\n", concealment_event.ToString().c_str()); + printf(" end of concealment_events_ms\n"); + } + + const auto lifetime_stats_vector = stats_getter_->lifetime_stats(); + if (!lifetime_stats_vector->empty()) { + auto lifetime_stats = lifetime_stats_vector->back().second; + printf(" total_samples_received: %" PRIu64 "\n", + lifetime_stats.total_samples_received); + printf(" concealed_samples: %" PRIu64 "\n", + lifetime_stats.concealed_samples); + printf(" concealment_events: %" PRIu64 "\n", + lifetime_stats.concealment_events); + printf(" delayed_packet_outage_samples: %" PRIu64 "\n", + lifetime_stats.delayed_packet_outage_samples); + printf(" num_interruptions: %d\n", lifetime_stats.interruption_count); + printf(" sum_interruption_length_ms: %d ms\n", + lifetime_stats.total_interruption_duration_ms); + printf(" interruption_ratio: %f\n", + static_cast(lifetime_stats.total_interruption_duration_ms) / + simulation_time_ms); + printf(" removed_samples_for_acceleration: %" PRIu64 "\n", + lifetime_stats.removed_samples_for_acceleration); + printf(" inserted_samples_for_deceleration: %" PRIu64 "\n", + lifetime_stats.inserted_samples_for_deceleration); + printf(" generated_noise_samples: %" PRIu64 "\n", + lifetime_stats.generated_noise_samples); + printf(" packets_discarded: %" PRIu64 "\n", + lifetime_stats.packets_discarded); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.h new file mode 100644 index 0000000000..11c16da9d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_PLOTTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_PLOTTER_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/neteq_delay_analyzer.h" +#include "modules/audio_coding/neteq/tools/neteq_stats_getter.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" + +namespace webrtc { +namespace test { + +class NetEqStatsPlotter : public NetEqSimulationEndedCallback { + public: + NetEqStatsPlotter(bool make_matlab_plot, + bool make_python_plot, + bool show_concealment_events, + absl::string_view base_file_name); + + void SimulationEnded(int64_t simulation_time_ms) override; + + NetEqStatsGetter* stats_getter() { return stats_getter_.get(); } + + private: + std::unique_ptr stats_getter_; + const bool make_matlab_plot_; + const bool make_python_plot_; + const bool show_concealment_events_; + const std::string base_file_name_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_PLOTTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.cc new file mode 100644 index 0000000000..a567efe2de --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.cc @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_test.h" + +#include +#include + +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +namespace test { +namespace { + +absl::optional ActionToOperations( + absl::optional a) { + if (!a) { + return absl::nullopt; + } + switch (*a) { + case NetEqSimulator::Action::kAccelerate: + return absl::make_optional(NetEq::Operation::kAccelerate); + case NetEqSimulator::Action::kExpand: + return absl::make_optional(NetEq::Operation::kExpand); + case NetEqSimulator::Action::kNormal: + return absl::make_optional(NetEq::Operation::kNormal); + case NetEqSimulator::Action::kPreemptiveExpand: + return absl::make_optional(NetEq::Operation::kPreemptiveExpand); + } +} + +std::unique_ptr CreateNetEq( + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr& decoder_factory) { + return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock); +} + +} // namespace + +void DefaultNetEqTestErrorCallback::OnInsertPacketError( + const NetEqInput::PacketData& packet) { + std::cerr << "InsertPacket returned an error." << std::endl; + std::cerr << "Packet data: " << packet.ToString() << std::endl; + RTC_FATAL(); +} + +void DefaultNetEqTestErrorCallback::OnGetAudioError() { + std::cerr << "GetAudio returned an error." << std::endl; + RTC_FATAL(); +} + +NetEqTest::NetEqTest(const NetEq::Config& config, + rtc::scoped_refptr decoder_factory, + const DecoderMap& codecs, + std::unique_ptr text_log, + NetEqFactory* neteq_factory, + std::unique_ptr input, + std::unique_ptr output, + Callbacks callbacks) + : input_(std::move(input)), + clock_(Timestamp::Millis(input_->NextEventTime().value_or(0))), + neteq_(neteq_factory + ? neteq_factory->CreateNetEq(config, decoder_factory, &clock_) + : CreateNetEq(config, &clock_, decoder_factory)), + output_(std::move(output)), + callbacks_(callbacks), + sample_rate_hz_(config.sample_rate_hz), + text_log_(std::move(text_log)) { + RTC_CHECK(!config.enable_muted_state) + << "The code does not handle enable_muted_state"; + RegisterDecoders(codecs); +} + +NetEqTest::~NetEqTest() = default; + +int64_t NetEqTest::Run() { + int64_t simulation_time = 0; + SimulationStepResult step_result; + do { + step_result = RunToNextGetAudio(); + simulation_time += step_result.simulation_step_ms; + } while (!step_result.is_simulation_finished); + if (callbacks_.simulation_ended_callback) { + callbacks_.simulation_ended_callback->SimulationEnded(simulation_time); + } + return simulation_time; +} + +NetEqTest::SimulationStepResult NetEqTest::RunToNextGetAudio() { + SimulationStepResult result; + const int64_t start_time_ms = *input_->NextEventTime(); + int64_t time_now_ms = clock_.CurrentTime().ms(); + current_state_.packet_iat_ms.clear(); + + while (!input_->ended()) { + // Advance time to next event. + RTC_DCHECK(input_->NextEventTime()); + clock_.AdvanceTimeMilliseconds(*input_->NextEventTime() - time_now_ms); + time_now_ms = *input_->NextEventTime(); + // Check if it is time to insert packet. + if (input_->NextPacketTime() && time_now_ms >= *input_->NextPacketTime()) { + std::unique_ptr packet_data = input_->PopPacket(); + RTC_CHECK(packet_data); + const size_t payload_data_length = + packet_data->payload.size() - packet_data->header.paddingLength; + if (payload_data_length != 0) { + int error = neteq_->InsertPacket( + packet_data->header, + rtc::ArrayView(packet_data->payload)); + if (error != NetEq::kOK && callbacks_.error_callback) { + callbacks_.error_callback->OnInsertPacketError(*packet_data); + } + if (callbacks_.post_insert_packet) { + callbacks_.post_insert_packet->AfterInsertPacket(*packet_data, + neteq_.get()); + } + } else { + neteq_->InsertEmptyPacket(packet_data->header); + } + if (last_packet_time_ms_) { + current_state_.packet_iat_ms.push_back(time_now_ms - + *last_packet_time_ms_); + } + if (text_log_) { + const auto ops_state = neteq_->GetOperationsAndState(); + const auto delta_wallclock = + last_packet_time_ms_ ? (time_now_ms - *last_packet_time_ms_) : -1; + const auto delta_timestamp = + last_packet_timestamp_ + ? (static_cast(packet_data->header.timestamp) - + *last_packet_timestamp_) * + 1000 / sample_rate_hz_ + : -1; + const auto packet_size_bytes = + packet_data->payload.size() == 12 + ? ByteReader::ReadLittleEndian( + &packet_data->payload[8]) + : -1; + *text_log_ << "Packet - wallclock: " << std::setw(5) << time_now_ms + << ", delta wc: " << std::setw(4) << delta_wallclock + << ", seq_no: " << packet_data->header.sequenceNumber + << ", timestamp: " << std::setw(10) + << packet_data->header.timestamp + << ", delta ts: " << std::setw(4) << delta_timestamp + << ", size: " << std::setw(5) << packet_size_bytes + << ", frame size: " << std::setw(3) + << ops_state.current_frame_size_ms + << ", buffer size: " << std::setw(4) + << ops_state.current_buffer_size_ms << std::endl; + } + last_packet_time_ms_ = absl::make_optional(time_now_ms); + last_packet_timestamp_ = + absl::make_optional(packet_data->header.timestamp); + } + + // Check if it is time to get output audio. + if (input_->NextOutputEventTime() && + time_now_ms >= *input_->NextOutputEventTime()) { + if (callbacks_.get_audio_callback) { + callbacks_.get_audio_callback->BeforeGetAudio(neteq_.get()); + } + AudioFrame out_frame; + bool muted; + int error = neteq_->GetAudio(&out_frame, &muted, nullptr, + ActionToOperations(next_action_)); + next_action_ = absl::nullopt; + RTC_CHECK(!muted) << "The code does not handle enable_muted_state"; + if (error != NetEq::kOK) { + if (callbacks_.error_callback) { + callbacks_.error_callback->OnGetAudioError(); + } + } else { + sample_rate_hz_ = out_frame.sample_rate_hz_; + } + if (callbacks_.get_audio_callback) { + callbacks_.get_audio_callback->AfterGetAudio(time_now_ms, out_frame, + muted, neteq_.get()); + } + + if (output_) { + RTC_CHECK(output_->WriteArray( + out_frame.data(), + out_frame.samples_per_channel_ * out_frame.num_channels_)); + } + + input_->AdvanceOutputEvent(); + result.simulation_step_ms = + input_->NextEventTime().value_or(time_now_ms) - start_time_ms; + const auto operations_state = neteq_->GetOperationsAndState(); + current_state_.current_delay_ms = operations_state.current_buffer_size_ms; + current_state_.packet_size_ms = operations_state.current_frame_size_ms; + current_state_.next_packet_available = + operations_state.next_packet_available; + current_state_.packet_buffer_flushed = + operations_state.packet_buffer_flushes > + prev_ops_state_.packet_buffer_flushes; + // TODO(ivoc): Add more accurate reporting by tracking the origin of + // samples in the sync buffer. + result.action_times_ms[Action::kExpand] = 0; + result.action_times_ms[Action::kAccelerate] = 0; + result.action_times_ms[Action::kPreemptiveExpand] = 0; + result.action_times_ms[Action::kNormal] = 0; + + if (out_frame.speech_type_ == AudioFrame::SpeechType::kPLC || + out_frame.speech_type_ == AudioFrame::SpeechType::kPLCCNG) { + // Consider the whole frame to be the result of expansion. + result.action_times_ms[Action::kExpand] = 10; + } else if (operations_state.accelerate_samples - + prev_ops_state_.accelerate_samples > + 0) { + // Consider the whole frame to be the result of acceleration. + result.action_times_ms[Action::kAccelerate] = 10; + } else if (operations_state.preemptive_samples - + prev_ops_state_.preemptive_samples > + 0) { + // Consider the whole frame to be the result of preemptive expansion. + result.action_times_ms[Action::kPreemptiveExpand] = 10; + } else { + // Consider the whole frame to be the result of normal playout. + result.action_times_ms[Action::kNormal] = 10; + } + auto lifetime_stats = LifetimeStats(); + if (text_log_) { + const bool plc = + (out_frame.speech_type_ == AudioFrame::SpeechType::kPLC) || + (out_frame.speech_type_ == AudioFrame::SpeechType::kPLCCNG); + const bool cng = out_frame.speech_type_ == AudioFrame::SpeechType::kCNG; + const bool voice_concealed = + (lifetime_stats.concealed_samples - + lifetime_stats.silent_concealed_samples) > + (prev_lifetime_stats_.concealed_samples - + prev_lifetime_stats_.silent_concealed_samples); + *text_log_ << "GetAudio - wallclock: " << std::setw(5) << time_now_ms + << ", delta wc: " << std::setw(4) + << (input_->NextEventTime().value_or(time_now_ms) - + start_time_ms) + << ", CNG: " << cng << ", PLC: " << plc + << ", voice concealed: " << voice_concealed + << ", buffer size: " << std::setw(4) + << current_state_.current_delay_ms << std::endl; + if (lifetime_stats.packets_discarded > + prev_lifetime_stats_.packets_discarded) { + *text_log_ << "Discarded " + << (lifetime_stats.packets_discarded - + prev_lifetime_stats_.packets_discarded) + << " primary packets." << std::endl; + } + if (operations_state.packet_buffer_flushes > + prev_ops_state_.packet_buffer_flushes) { + *text_log_ << "Flushed packet buffer " + << (operations_state.packet_buffer_flushes - + prev_ops_state_.packet_buffer_flushes) + << " times." << std::endl; + } + } + prev_lifetime_stats_ = lifetime_stats; + const bool no_more_packets_to_decode = + !input_->NextPacketTime() && !operations_state.next_packet_available; + // End the simulation if the gap is too large. This indicates an issue + // with the event log file. + const bool simulation_step_too_large = result.simulation_step_ms > 1000; + if (simulation_step_too_large) { + // If we don't reset the step time, the large gap will be included in + // the simulation time, which can be a large distortion. + result.simulation_step_ms = 10; + } + result.is_simulation_finished = simulation_step_too_large || + no_more_packets_to_decode || + input_->ended(); + prev_ops_state_ = operations_state; + return result; + } + } + result.simulation_step_ms = + input_->NextEventTime().value_or(time_now_ms) - start_time_ms; + result.is_simulation_finished = true; + return result; +} + +void NetEqTest::SetNextAction(NetEqTest::Action next_operation) { + next_action_ = absl::optional(next_operation); +} + +NetEqTest::NetEqState NetEqTest::GetNetEqState() { + return current_state_; +} + +NetEqNetworkStatistics NetEqTest::SimulationStats() { + NetEqNetworkStatistics stats; + RTC_CHECK_EQ(neteq_->NetworkStatistics(&stats), 0); + return stats; +} + +NetEqLifetimeStatistics NetEqTest::LifetimeStats() const { + return neteq_->GetLifetimeStatistics(); +} + +NetEqTest::DecoderMap NetEqTest::StandardDecoderMap() { + DecoderMap codecs = { + {0, SdpAudioFormat("pcmu", 8000, 1)}, + {8, SdpAudioFormat("pcma", 8000, 1)}, +#ifdef WEBRTC_CODEC_ILBC + {102, SdpAudioFormat("ilbc", 8000, 1)}, +#endif +#ifdef WEBRTC_CODEC_OPUS + {111, SdpAudioFormat("opus", 48000, 2)}, +#endif + {93, SdpAudioFormat("l16", 8000, 1)}, + {94, SdpAudioFormat("l16", 16000, 1)}, + {95, SdpAudioFormat("l16", 32000, 1)}, + {96, SdpAudioFormat("l16", 48000, 1)}, + {9, SdpAudioFormat("g722", 8000, 1)}, + {106, SdpAudioFormat("telephone-event", 8000, 1)}, + {114, SdpAudioFormat("telephone-event", 16000, 1)}, + {115, SdpAudioFormat("telephone-event", 32000, 1)}, + {116, SdpAudioFormat("telephone-event", 48000, 1)}, + {117, SdpAudioFormat("red", 8000, 1)}, + {13, SdpAudioFormat("cn", 8000, 1)}, + {98, SdpAudioFormat("cn", 16000, 1)}, + {99, SdpAudioFormat("cn", 32000, 1)}, + {100, SdpAudioFormat("cn", 48000, 1)} + }; + return codecs; +} + +void NetEqTest::RegisterDecoders(const DecoderMap& codecs) { + for (const auto& c : codecs) { + RTC_CHECK(neteq_->RegisterPayloadType(c.first, c.second)) + << "Cannot register " << c.second.name << " to payload type " + << c.first; + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.h new file mode 100644 index 0000000000..1d3eeda453 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_H_ + +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_factory.h" +#include "api/test/neteq_simulator.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/neteq_input.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +namespace test { + +class NetEqTestErrorCallback { + public: + virtual ~NetEqTestErrorCallback() = default; + virtual void OnInsertPacketError(const NetEqInput::PacketData& packet) {} + virtual void OnGetAudioError() {} +}; + +class DefaultNetEqTestErrorCallback : public NetEqTestErrorCallback { + void OnInsertPacketError(const NetEqInput::PacketData& packet) override; + void OnGetAudioError() override; +}; + +class NetEqPostInsertPacket { + public: + virtual ~NetEqPostInsertPacket() = default; + virtual void AfterInsertPacket(const NetEqInput::PacketData& packet, + NetEq* neteq) = 0; +}; + +class NetEqGetAudioCallback { + public: + virtual ~NetEqGetAudioCallback() = default; + virtual void BeforeGetAudio(NetEq* neteq) = 0; + virtual void AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool muted, + NetEq* neteq) = 0; +}; + +class NetEqSimulationEndedCallback { + public: + virtual ~NetEqSimulationEndedCallback() = default; + virtual void SimulationEnded(int64_t simulation_time_ms) = 0; +}; + +// Class that provides an input--output test for NetEq. The input (both packets +// and output events) is provided by a NetEqInput object, while the output is +// directed to an AudioSink object. +class NetEqTest : public NetEqSimulator { + public: + using DecoderMap = std::map; + + struct Callbacks { + NetEqTestErrorCallback* error_callback = nullptr; + NetEqPostInsertPacket* post_insert_packet = nullptr; + NetEqGetAudioCallback* get_audio_callback = nullptr; + NetEqSimulationEndedCallback* simulation_ended_callback = nullptr; + }; + + // Sets up the test with given configuration, codec mappings, input, ouput, + // and callback objects for error reporting. + NetEqTest(const NetEq::Config& config, + rtc::scoped_refptr decoder_factory, + const DecoderMap& codecs, + std::unique_ptr text_log, + NetEqFactory* neteq_factory, + std::unique_ptr input, + std::unique_ptr output, + Callbacks callbacks); + + ~NetEqTest() override; + + // Runs the test. Returns the duration of the produced audio in ms. + int64_t Run() override; + // Runs the simulation until we hit the next GetAudio event. If the simulation + // is finished, is_simulation_finished will be set to true in the returned + // SimulationStepResult. + SimulationStepResult RunToNextGetAudio() override; + + void SetNextAction(Action next_operation) override; + NetEqState GetNetEqState() override; + + // Returns the statistics from NetEq. + NetEqNetworkStatistics SimulationStats(); + NetEqLifetimeStatistics LifetimeStats() const; + + static DecoderMap StandardDecoderMap(); + + private: + void RegisterDecoders(const DecoderMap& codecs); + std::unique_ptr input_; + SimulatedClock clock_; + absl::optional next_action_; + absl::optional last_packet_time_ms_; + std::unique_ptr neteq_; + std::unique_ptr output_; + Callbacks callbacks_; + int sample_rate_hz_; + NetEqState current_state_; + NetEqOperationsAndState prev_ops_state_; + NetEqLifetimeStatistics prev_lifetime_stats_; + absl::optional last_packet_timestamp_; + std::unique_ptr text_log_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.cc new file mode 100644 index 0000000000..6cd371406c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.cc @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_test_factory.h" + +#include +#include // For ULONG_MAX returned by strtoul. +#include +#include // For strtoul. + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h" +#include "modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/neteq_delay_analyzer.h" +#include "modules/audio_coding/neteq/tools/neteq_event_log_input.h" +#include "modules/audio_coding/neteq/tools/neteq_packet_source_input.h" +#include "modules/audio_coding/neteq/tools/neteq_replacement_input.h" +#include "modules/audio_coding/neteq/tools/neteq_stats_getter.h" +#include "modules/audio_coding/neteq/tools/neteq_stats_plotter.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "modules/audio_coding/neteq/tools/output_audio_file.h" +#include "modules/audio_coding/neteq/tools/output_wav_file.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" +#include "rtc_base/checks.h" +#include "test/function_audio_decoder_factory.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace { + +absl::optional CodecSampleRate( + uint8_t payload_type, + webrtc::test::NetEqTestFactory::Config config) { + if (payload_type == config.pcmu || payload_type == config.pcma || + payload_type == config.ilbc || payload_type == config.pcm16b || + payload_type == config.cn_nb || payload_type == config.avt) + return 8000; + if (payload_type == config.isac || payload_type == config.pcm16b_wb || + payload_type == config.g722 || payload_type == config.cn_wb || + payload_type == config.avt_16) + return 16000; + if (payload_type == config.isac_swb || payload_type == config.pcm16b_swb32 || + payload_type == config.cn_swb32 || payload_type == config.avt_32) + return 32000; + if (payload_type == config.opus || payload_type == config.pcm16b_swb48 || + payload_type == config.cn_swb48 || payload_type == config.avt_48) + return 48000; + if (payload_type == config.red) + return 0; + return absl::nullopt; +} + +} // namespace + +// A callback class which prints whenver the inserted packet stream changes +// the SSRC. +class SsrcSwitchDetector : public NetEqPostInsertPacket { + public: + // Takes a pointer to another callback object, which will be invoked after + // this object finishes. This does not transfer ownership, and null is a + // valid value. + explicit SsrcSwitchDetector(NetEqPostInsertPacket* other_callback) + : other_callback_(other_callback) {} + + void AfterInsertPacket(const NetEqInput::PacketData& packet, + NetEq* neteq) override { + if (last_ssrc_ && packet.header.ssrc != *last_ssrc_) { + std::cout << "Changing streams from 0x" << std::hex << *last_ssrc_ + << " to 0x" << std::hex << packet.header.ssrc << std::dec + << " (payload type " + << static_cast(packet.header.payloadType) << ")" + << std::endl; + } + last_ssrc_ = packet.header.ssrc; + if (other_callback_) { + other_callback_->AfterInsertPacket(packet, neteq); + } + } + + private: + NetEqPostInsertPacket* other_callback_; + absl::optional last_ssrc_; +}; + +NetEqTestFactory::NetEqTestFactory() = default; +NetEqTestFactory::~NetEqTestFactory() = default; + +NetEqTestFactory::Config::Config() = default; +NetEqTestFactory::Config::Config(const Config& other) = default; +NetEqTestFactory::Config::~Config() = default; + +std::unique_ptr NetEqTestFactory::InitializeTestFromString( + absl::string_view input_string, + NetEqFactory* factory, + const Config& config) { + std::unique_ptr input( + NetEqEventLogInput::CreateFromString(input_string, config.ssrc_filter)); + if (!input) { + std::cerr << "Error: Cannot parse input string" << std::endl; + return nullptr; + } + return InitializeTest(std::move(input), factory, config); +} + +std::unique_ptr NetEqTestFactory::InitializeTestFromFile( + absl::string_view input_file_name, + NetEqFactory* factory, + const Config& config) { + // Gather RTP header extensions in a map. + NetEqPacketSourceInput::RtpHeaderExtensionMap rtp_ext_map = { + {config.audio_level, kRtpExtensionAudioLevel}, + {config.abs_send_time, kRtpExtensionAbsoluteSendTime}, + {config.transport_seq_no, kRtpExtensionTransportSequenceNumber}, + {config.video_content_type, kRtpExtensionVideoContentType}, + {config.video_timing, kRtpExtensionVideoTiming}}; + + std::unique_ptr input; + if (RtpFileSource::ValidRtpDump(input_file_name) || + RtpFileSource::ValidPcap(input_file_name)) { + input.reset(new NetEqRtpDumpInput(input_file_name, rtp_ext_map, + config.ssrc_filter)); + } else { + input.reset(NetEqEventLogInput::CreateFromFile(input_file_name, + config.ssrc_filter)); + } + + std::cout << "Input file: " << input_file_name << std::endl; + if (!input) { + std::cerr << "Error: Cannot open input file" << std::endl; + return nullptr; + } + return InitializeTest(std::move(input), factory, config); +} + +std::unique_ptr NetEqTestFactory::InitializeTest( + std::unique_ptr input, + NetEqFactory* factory, + const Config& config) { + if (input->ended()) { + std::cerr << "Error: Input is empty" << std::endl; + return nullptr; + } + + if (!config.field_trial_string.empty()) { + field_trials_ = + std::make_unique(config.field_trial_string); + } + + // Skip some initial events/packets if requested. + if (config.skip_get_audio_events > 0) { + std::cout << "Skipping " << config.skip_get_audio_events + << " get_audio events" << std::endl; + if (!input->NextPacketTime() || !input->NextOutputEventTime()) { + std::cerr << "No events found" << std::endl; + return nullptr; + } + for (int i = 0; i < config.skip_get_audio_events; i++) { + input->AdvanceOutputEvent(); + if (!input->NextOutputEventTime()) { + std::cerr << "Not enough get_audio events found" << std::endl; + return nullptr; + } + } + while (*input->NextPacketTime() < *input->NextOutputEventTime()) { + input->PopPacket(); + if (!input->NextPacketTime()) { + std::cerr << "Not enough incoming packets found" << std::endl; + return nullptr; + } + } + } + + // Check the sample rate. + absl::optional sample_rate_hz; + std::set> discarded_pt_and_ssrc; + while (absl::optional first_rtp_header = input->NextHeader()) { + RTC_DCHECK(first_rtp_header); + sample_rate_hz = CodecSampleRate(first_rtp_header->payloadType, config); + if (sample_rate_hz) { + std::cout << "Found valid packet with payload type " + << static_cast(first_rtp_header->payloadType) + << " and SSRC 0x" << std::hex << first_rtp_header->ssrc + << std::dec << std::endl; + if (config.initial_dummy_packets > 0) { + std::cout << "Nr of initial dummy packets: " + << config.initial_dummy_packets << std::endl; + input = std::make_unique( + std::move(input), config.initial_dummy_packets, *sample_rate_hz); + } + break; + } + // Discard this packet and move to the next. Keep track of discarded payload + // types and SSRCs. + discarded_pt_and_ssrc.emplace(first_rtp_header->payloadType, + first_rtp_header->ssrc); + input->PopPacket(); + } + if (!discarded_pt_and_ssrc.empty()) { + std::cout << "Discarded initial packets with the following payload types " + "and SSRCs:" + << std::endl; + for (const auto& d : discarded_pt_and_ssrc) { + std::cout << "PT " << d.first << "; SSRC 0x" << std::hex + << static_cast(d.second) << std::dec << std::endl; + } + } + if (!sample_rate_hz) { + std::cerr << "Cannot find any packets with known payload types" + << std::endl; + return nullptr; + } + + // If an output file is requested, open it. + std::unique_ptr output; + if (!config.output_audio_filename.has_value()) { + output = std::make_unique(); + std::cout << "No output audio file" << std::endl; + } else if (config.output_audio_filename->size() >= 4 && + config.output_audio_filename->substr( + config.output_audio_filename->size() - 4) == ".wav") { + // Open a wav file with the known sample rate. + output = std::make_unique(*config.output_audio_filename, + *sample_rate_hz); + std::cout << "Output WAV file: " << *config.output_audio_filename + << std::endl; + } else { + // Open a pcm file. + output = std::make_unique(*config.output_audio_filename); + std::cout << "Output PCM file: " << *config.output_audio_filename + << std::endl; + } + + NetEqTest::DecoderMap codecs = NetEqTest::StandardDecoderMap(); + + rtc::scoped_refptr decoder_factory = + CreateBuiltinAudioDecoderFactory(); + + // Check if a replacement audio file was provided. + if (config.replacement_audio_file.size() > 0) { + // Find largest unused payload type. + int replacement_pt = 127; + while (codecs.find(replacement_pt) != codecs.end()) { + --replacement_pt; + if (replacement_pt <= 0) { + std::cerr << "Error: Unable to find available replacement payload type" + << std::endl; + return nullptr; + } + } + + auto std_set_int32_to_uint8 = [](const std::set& a) { + std::set b; + for (auto& x : a) { + b.insert(static_cast(x)); + } + return b; + }; + + std::set cn_types = std_set_int32_to_uint8( + {config.cn_nb, config.cn_wb, config.cn_swb32, config.cn_swb48}); + std::set forbidden_types = + std_set_int32_to_uint8({config.g722, config.red, config.avt, + config.avt_16, config.avt_32, config.avt_48}); + input.reset(new NetEqReplacementInput(std::move(input), replacement_pt, + cn_types, forbidden_types)); + + // Note that capture-by-copy implies that the lambda captures the value of + // decoder_factory before it's reassigned on the left-hand side. + decoder_factory = rtc::make_ref_counted( + [decoder_factory, config]( + const SdpAudioFormat& format, + absl::optional codec_pair_id) { + std::unique_ptr decoder = + decoder_factory->MakeAudioDecoder(format, codec_pair_id); + if (!decoder && format.name == "replacement") { + decoder = std::make_unique( + std::make_unique(config.replacement_audio_file), + format.clockrate_hz, format.num_channels > 1); + } + return decoder; + }); + + if (!codecs + .insert({replacement_pt, SdpAudioFormat("replacement", 48000, 1)}) + .second) { + std::cerr << "Error: Unable to insert replacement audio codec" + << std::endl; + return nullptr; + } + } + + // Create a text log output stream if needed. + std::unique_ptr text_log; + if (config.textlog && config.textlog_filename.has_value()) { + // Write to file. + text_log = std::make_unique(*config.textlog_filename); + } else if (config.textlog) { + // Print to stdout. + text_log = std::make_unique(); + text_log->basic_ios::rdbuf(std::cout.rdbuf()); + } + + NetEqTest::Callbacks callbacks; + stats_plotter_ = std::make_unique( + config.matlabplot, config.pythonplot, config.concealment_events, + config.plot_scripts_basename.value_or("")); + + ssrc_switch_detector_.reset( + new SsrcSwitchDetector(stats_plotter_->stats_getter()->delay_analyzer())); + callbacks.post_insert_packet = ssrc_switch_detector_.get(); + callbacks.get_audio_callback = stats_plotter_->stats_getter(); + callbacks.simulation_ended_callback = stats_plotter_.get(); + NetEq::Config neteq_config; + neteq_config.sample_rate_hz = *sample_rate_hz; + neteq_config.max_packets_in_buffer = config.max_nr_packets_in_buffer; + neteq_config.enable_fast_accelerate = config.enable_fast_accelerate; + return std::make_unique( + neteq_config, decoder_factory, codecs, std::move(text_log), factory, + std::move(input), std::move(output), callbacks); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.h new file mode 100644 index 0000000000..96ce0b4334 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_FACTORY_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_FACTORY_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "test/field_trial.h" + +namespace webrtc { +namespace test { + +class SsrcSwitchDetector; +class NetEqStatsGetter; +class NetEqStatsPlotter; + +// Note that the NetEqTestFactory needs to be alive when the NetEqTest object is +// used for a simulation. +class NetEqTestFactory { + public: + NetEqTestFactory(); + ~NetEqTestFactory(); + struct Config { + Config(); + Config(const Config& other); + ~Config(); + // RTP payload type for PCM-u. + static constexpr int default_pcmu() { return 0; } + int pcmu = default_pcmu(); + // RTP payload type for PCM-a. + static constexpr int default_pcma() { return 8; } + int pcma = default_pcma(); + // RTP payload type for iLBC. + static constexpr int default_ilbc() { return 102; } + int ilbc = default_ilbc(); + // RTP payload type for iSAC. + static constexpr int default_isac() { return 103; } + int isac = default_isac(); + // RTP payload type for iSAC-swb (32 kHz). + static constexpr int default_isac_swb() { return 104; } + int isac_swb = default_isac_swb(); + // RTP payload type for Opus. + static constexpr int default_opus() { return 111; } + int opus = default_opus(); + // RTP payload type for PCM16b-nb (8 kHz). + static constexpr int default_pcm16b() { return 93; } + int pcm16b = default_pcm16b(); + // RTP payload type for PCM16b-wb (16 kHz). + static constexpr int default_pcm16b_wb() { return 94; } + int pcm16b_wb = default_pcm16b_wb(); + // RTP payload type for PCM16b-swb32 (32 kHz). + static constexpr int default_pcm16b_swb32() { return 95; } + int pcm16b_swb32 = default_pcm16b_swb32(); + // RTP payload type for PCM16b-swb48 (48 kHz). + static constexpr int default_pcm16b_swb48() { return 96; } + int pcm16b_swb48 = default_pcm16b_swb48(); + // RTP payload type for G.722. + static constexpr int default_g722() { return 9; } + int g722 = default_g722(); + // RTP payload type for AVT/DTMF (8 kHz). + static constexpr int default_avt() { return 106; } + int avt = default_avt(); + // RTP payload type for AVT/DTMF (16 kHz). + static constexpr int default_avt_16() { return 114; } + int avt_16 = default_avt_16(); + // RTP payload type for AVT/DTMF (32 kHz). + static constexpr int default_avt_32() { return 115; } + int avt_32 = default_avt_32(); + // RTP payload type for AVT/DTMF (48 kHz). + static constexpr int default_avt_48() { return 116; } + int avt_48 = default_avt_48(); + // RTP payload type for redundant audio (RED). + static constexpr int default_red() { return 117; } + int red = default_red(); + // RTP payload type for comfort noise (8 kHz). + static constexpr int default_cn_nb() { return 13; } + int cn_nb = default_cn_nb(); + // RTP payload type for comfort noise (16 kHz). + static constexpr int default_cn_wb() { return 98; } + int cn_wb = default_cn_wb(); + // RTP payload type for comfort noise (32 kHz). + static constexpr int default_cn_swb32() { return 99; } + int cn_swb32 = default_cn_swb32(); + // RTP payload type for comfort noise (48 kHz). + static constexpr int default_cn_swb48() { return 100; } + int cn_swb48 = default_cn_swb48(); + // A PCM file that will be used to populate dummy RTP packets. + std::string replacement_audio_file; + // Only use packets with this SSRC. + absl::optional ssrc_filter; + // Extension ID for audio level (RFC 6464). + static constexpr int default_audio_level() { return 1; } + int audio_level = default_audio_level(); + // Extension ID for absolute sender time. + static constexpr int default_abs_send_time() { return 3; } + int abs_send_time = default_abs_send_time(); + // Extension ID for transport sequence number. + static constexpr int default_transport_seq_no() { return 5; } + int transport_seq_no = default_transport_seq_no(); + // Extension ID for video content type. + static constexpr int default_video_content_type() { return 7; } + int video_content_type = default_video_content_type(); + // Extension ID for video timing. + static constexpr int default_video_timing() { return 8; } + int video_timing = default_video_timing(); + // Generate a matlab script for plotting the delay profile. + bool matlabplot = false; + // Generates a python script for plotting the delay profile. + bool pythonplot = false; + // Prints concealment events. + bool concealment_events = false; + // Maximum allowed number of packets in the buffer. + static constexpr int default_max_nr_packets_in_buffer() { return 200; } + int max_nr_packets_in_buffer = default_max_nr_packets_in_buffer(); + // Number of dummy packets to put in the packet buffer at the start of the + // simulation. + static constexpr int default_initial_dummy_packets() { return 0; } + int initial_dummy_packets = default_initial_dummy_packets(); + // Number of getAudio events to skip at the start of the simulation. + static constexpr int default_skip_get_audio_events() { return 0; } + int skip_get_audio_events = default_skip_get_audio_events(); + // Enables jitter buffer fast accelerate. + bool enable_fast_accelerate = false; + // Dumps events that describes the simulation on a step-by-step basis. + bool textlog = false; + // If specified and `textlog` is true, the output of `textlog` is written to + // the specified file name. + absl::optional textlog_filename; + // Base name for the output script files for plotting the delay profile. + absl::optional plot_scripts_basename; + // Path to the output audio file. + absl::optional output_audio_filename; + // Field trials to use during the simulation. + std::string field_trial_string; + }; + + std::unique_ptr InitializeTestFromFile( + absl::string_view input_filename, + NetEqFactory* neteq_factory, + const Config& config); + std::unique_ptr InitializeTestFromString( + absl::string_view input_string, + NetEqFactory* neteq_factory, + const Config& config); + + private: + std::unique_ptr InitializeTest(std::unique_ptr input, + NetEqFactory* neteq_factory, + const Config& config); + std::unique_ptr ssrc_switch_detector_; + std::unique_ptr stats_plotter_; + // The field trials are stored in the test factory, because neteq_test is not + // in a testonly target, and therefore cannot use ScopedFieldTrials. + std::unique_ptr field_trials_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_audio_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_audio_file.h new file mode 100644 index 0000000000..25577fc882 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_audio_file.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_AUDIO_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_AUDIO_FILE_H_ + +#include + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" + +namespace webrtc { +namespace test { + +class OutputAudioFile : public AudioSink { + public: + // Creates an OutputAudioFile, opening a file named `file_name` for writing. + // The file format is 16-bit signed host-endian PCM. + explicit OutputAudioFile(absl::string_view file_name) { + out_file_ = fopen(std::string(file_name).c_str(), "wb"); + } + + virtual ~OutputAudioFile() { + if (out_file_) + fclose(out_file_); + } + + OutputAudioFile(const OutputAudioFile&) = delete; + OutputAudioFile& operator=(const OutputAudioFile&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override { + RTC_DCHECK(out_file_); + return fwrite(audio, sizeof(*audio), num_samples, out_file_) == num_samples; + } + + private: + FILE* out_file_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_AUDIO_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_wav_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_wav_file.h new file mode 100644 index 0000000000..20eedfb554 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_wav_file.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_WAV_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_WAV_FILE_H_ + +#include + +#include "absl/strings/string_view.h" +#include "common_audio/wav_file.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" + +namespace webrtc { +namespace test { + +class OutputWavFile : public AudioSink { + public: + // Creates an OutputWavFile, opening a file named `file_name` for writing. + // The output file is a PCM encoded wav file. + OutputWavFile(absl::string_view file_name, + int sample_rate_hz, + int num_channels = 1) + : wav_writer_(file_name, sample_rate_hz, num_channels) {} + + OutputWavFile(const OutputWavFile&) = delete; + OutputWavFile& operator=(const OutputWavFile&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override { + wav_writer_.WriteSamples(audio, num_samples); + return true; + } + + private: + WavWriter wav_writer_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_WAV_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.cc new file mode 100644 index 0000000000..e540173f43 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.cc @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/packet.h" + +#include "api/array_view.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "rtc_base/checks.h" +#include "rtc_base/copy_on_write_buffer.h" + +namespace webrtc { +namespace test { + +Packet::Packet(rtc::CopyOnWriteBuffer packet, + size_t virtual_packet_length_bytes, + double time_ms, + const RtpHeaderExtensionMap* extension_map) + : packet_(std::move(packet)), + virtual_packet_length_bytes_(virtual_packet_length_bytes), + time_ms_(time_ms), + valid_header_(ParseHeader(extension_map)) {} + +Packet::Packet(const RTPHeader& header, + size_t virtual_packet_length_bytes, + size_t virtual_payload_length_bytes, + double time_ms) + : header_(header), + virtual_packet_length_bytes_(virtual_packet_length_bytes), + virtual_payload_length_bytes_(virtual_payload_length_bytes), + time_ms_(time_ms), + valid_header_(true) {} + +Packet::~Packet() = default; + +bool Packet::ExtractRedHeaders(std::list* headers) const { + // + // 0 1 2 3 + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |1| block PT | timestamp offset | block length | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |1| ... | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |0| block PT | + // +-+-+-+-+-+-+-+-+ + // + + const uint8_t* payload_ptr = payload(); + const uint8_t* payload_end_ptr = payload_ptr + payload_length_bytes(); + + // Find all RED headers with the extension bit set to 1. That is, all headers + // but the last one. + while ((payload_ptr < payload_end_ptr) && (*payload_ptr & 0x80)) { + RTPHeader* header = new RTPHeader; + CopyToHeader(header); + header->payloadType = payload_ptr[0] & 0x7F; + uint32_t offset = (payload_ptr[1] << 6) + ((payload_ptr[2] & 0xFC) >> 2); + header->timestamp -= offset; + headers->push_front(header); + payload_ptr += 4; + } + // Last header. + RTC_DCHECK_LT(payload_ptr, payload_end_ptr); + if (payload_ptr >= payload_end_ptr) { + return false; // Payload too short. + } + RTPHeader* header = new RTPHeader; + CopyToHeader(header); + header->payloadType = payload_ptr[0] & 0x7F; + headers->push_front(header); + return true; +} + +void Packet::DeleteRedHeaders(std::list* headers) { + while (!headers->empty()) { + delete headers->front(); + headers->pop_front(); + } +} + +bool Packet::ParseHeader(const RtpHeaderExtensionMap* extension_map) { + // Use RtpPacketReceived instead of RtpPacket because former already has a + // converter into legacy RTPHeader. + webrtc::RtpPacketReceived rtp_packet(extension_map); + + // Because of the special case of dummy packets that have padding marked in + // the RTP header, but do not have rtp payload with the padding size, handle + // padding manually. Regular RTP packet parser reports failure, but it is fine + // in this context. + bool padding = (packet_[0] & 0b0010'0000); + size_t padding_size = 0; + if (padding) { + // Clear the padding bit to prevent failure when rtp payload is omited. + rtc::CopyOnWriteBuffer packet(packet_); + packet.MutableData()[0] &= ~0b0010'0000; + if (!rtp_packet.Parse(std::move(packet))) { + return false; + } + if (rtp_packet.payload_size() > 0) { + padding_size = rtp_packet.data()[rtp_packet.size() - 1]; + } + if (padding_size > rtp_packet.payload_size()) { + return false; + } + } else { + if (!rtp_packet.Parse(packet_)) { + return false; + } + } + rtp_payload_ = rtc::MakeArrayView(packet_.data() + rtp_packet.headers_size(), + rtp_packet.payload_size() - padding_size); + rtp_packet.GetHeader(&header_); + + RTC_CHECK_GE(virtual_packet_length_bytes_, rtp_packet.size()); + RTC_DCHECK_GE(virtual_packet_length_bytes_, rtp_packet.headers_size()); + virtual_payload_length_bytes_ = + virtual_packet_length_bytes_ - rtp_packet.headers_size(); + return true; +} + +void Packet::CopyToHeader(RTPHeader* destination) const { + *destination = header_; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.h new file mode 100644 index 0000000000..96710907df --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_H_ + +#include + +#include "api/array_view.h" +#include "api/rtp_headers.h" +#include "modules/rtp_rtcp/include/rtp_header_extension_map.h" +#include "rtc_base/copy_on_write_buffer.h" + +namespace webrtc { +namespace test { + +// Class for handling RTP packets in test applications. +class Packet { + public: + // Creates a packet, with the packet payload (including header bytes) in + // `packet`. The `time_ms` is an extra time associated with this packet, + // typically used to denote arrival time. + // `virtual_packet_length_bytes` is typically used when reading RTP dump files + // that only contain the RTP headers, and no payload (a.k.a RTP dummy files or + // RTP light). The `virtual_packet_length_bytes` tells what size the packet + // had on wire, including the now discarded payload. + Packet(rtc::CopyOnWriteBuffer packet, + size_t virtual_packet_length_bytes, + double time_ms, + const RtpHeaderExtensionMap* extension_map = nullptr); + + Packet(rtc::CopyOnWriteBuffer packet, + double time_ms, + const RtpHeaderExtensionMap* extension_map = nullptr) + : Packet(packet, packet.size(), time_ms, extension_map) {} + + // Same as above, but creates the packet from an already parsed RTPHeader. + // This is typically used when reading RTP dump files that only contain the + // RTP headers, and no payload. The `virtual_packet_length_bytes` tells what + // size the packet had on wire, including the now discarded payload, + // The `virtual_payload_length_bytes` tells the size of the payload. + Packet(const RTPHeader& header, + size_t virtual_packet_length_bytes, + size_t virtual_payload_length_bytes, + double time_ms); + + virtual ~Packet(); + + Packet(const Packet&) = delete; + Packet& operator=(const Packet&) = delete; + + // Parses the first bytes of the RTP payload, interpreting them as RED headers + // according to RFC 2198. The headers will be inserted into `headers`. The + // caller of the method assumes ownership of the objects in the list, and + // must delete them properly. + bool ExtractRedHeaders(std::list* headers) const; + + // Deletes all RTPHeader objects in `headers`, but does not delete `headers` + // itself. + static void DeleteRedHeaders(std::list* headers); + + const uint8_t* payload() const { return rtp_payload_.data(); } + + size_t packet_length_bytes() const { return packet_.size(); } + + size_t payload_length_bytes() const { return rtp_payload_.size(); } + + size_t virtual_packet_length_bytes() const { + return virtual_packet_length_bytes_; + } + + size_t virtual_payload_length_bytes() const { + return virtual_payload_length_bytes_; + } + + const RTPHeader& header() const { return header_; } + + double time_ms() const { return time_ms_; } + bool valid_header() const { return valid_header_; } + + private: + bool ParseHeader(const RtpHeaderExtensionMap* extension_map); + void CopyToHeader(RTPHeader* destination) const; + + RTPHeader header_; + const rtc::CopyOnWriteBuffer packet_; + rtc::ArrayView rtp_payload_; // Empty for dummy RTP packets. + // Virtual lengths are used when parsing RTP header files (dummy RTP files). + const size_t virtual_packet_length_bytes_; + size_t virtual_payload_length_bytes_ = 0; + const double time_ms_; // Used to denote a packet's arrival time. + const bool valid_header_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.cc new file mode 100644 index 0000000000..598ae6edd4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/packet_source.h" + +namespace webrtc { +namespace test { + +PacketSource::PacketSource() = default; + +PacketSource::~PacketSource() = default; + +void PacketSource::FilterOutPayloadType(uint8_t payload_type) { + filter_.set(payload_type, true); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.h new file mode 100644 index 0000000000..be1705cae1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_SOURCE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_SOURCE_H_ + +#include +#include + +#include "modules/audio_coding/neteq/tools/packet.h" + +namespace webrtc { +namespace test { + +// Interface class for an object delivering RTP packets to test applications. +class PacketSource { + public: + PacketSource(); + virtual ~PacketSource(); + + PacketSource(const PacketSource&) = delete; + PacketSource& operator=(const PacketSource&) = delete; + + // Returns next packet. Returns nullptr if the source is depleted, or if an + // error occurred. + virtual std::unique_ptr NextPacket() = 0; + + virtual void FilterOutPayloadType(uint8_t payload_type); + + protected: + std::bitset<128> filter_; // Payload type is 7 bits in the RFC. +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_SOURCE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_unittest.cc new file mode 100644 index 0000000000..69cf56b529 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_unittest.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for test Packet class. + +#include "modules/audio_coding/neteq/tools/packet.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +namespace { +const int kHeaderLengthBytes = 12; + +void MakeRtpHeader(int payload_type, + int seq_number, + uint32_t timestamp, + uint32_t ssrc, + uint8_t* rtp_data) { + rtp_data[0] = 0x80; + rtp_data[1] = static_cast(payload_type); + rtp_data[2] = (seq_number >> 8) & 0xFF; + rtp_data[3] = (seq_number)&0xFF; + rtp_data[4] = timestamp >> 24; + rtp_data[5] = (timestamp >> 16) & 0xFF; + rtp_data[6] = (timestamp >> 8) & 0xFF; + rtp_data[7] = timestamp & 0xFF; + rtp_data[8] = ssrc >> 24; + rtp_data[9] = (ssrc >> 16) & 0xFF; + rtp_data[10] = (ssrc >> 8) & 0xFF; + rtp_data[11] = ssrc & 0xFF; +} +} // namespace + +TEST(TestPacket, RegularPacket) { + const size_t kPacketLengthBytes = 100; + rtc::CopyOnWriteBuffer packet_memory(kPacketLengthBytes); + const uint8_t kPayloadType = 17; + const uint16_t kSequenceNumber = 4711; + const uint32_t kTimestamp = 47114711; + const uint32_t kSsrc = 0x12345678; + MakeRtpHeader(kPayloadType, kSequenceNumber, kTimestamp, kSsrc, + packet_memory.MutableData()); + const double kPacketTime = 1.0; + Packet packet(std::move(packet_memory), kPacketTime); + ASSERT_TRUE(packet.valid_header()); + EXPECT_EQ(kPayloadType, packet.header().payloadType); + EXPECT_EQ(kSequenceNumber, packet.header().sequenceNumber); + EXPECT_EQ(kTimestamp, packet.header().timestamp); + EXPECT_EQ(kSsrc, packet.header().ssrc); + EXPECT_EQ(0, packet.header().numCSRCs); + EXPECT_EQ(kPacketLengthBytes, packet.packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.payload_length_bytes()); + EXPECT_EQ(kPacketLengthBytes, packet.virtual_packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.virtual_payload_length_bytes()); + EXPECT_EQ(kPacketTime, packet.time_ms()); +} + +TEST(TestPacket, DummyPacket) { + const size_t kPacketLengthBytes = kHeaderLengthBytes; // Only RTP header. + const size_t kVirtualPacketLengthBytes = 100; + rtc::CopyOnWriteBuffer packet_memory(kPacketLengthBytes); + const uint8_t kPayloadType = 17; + const uint16_t kSequenceNumber = 4711; + const uint32_t kTimestamp = 47114711; + const uint32_t kSsrc = 0x12345678; + MakeRtpHeader(kPayloadType, kSequenceNumber, kTimestamp, kSsrc, + packet_memory.MutableData()); + const double kPacketTime = 1.0; + Packet packet(std::move(packet_memory), kVirtualPacketLengthBytes, + kPacketTime); + ASSERT_TRUE(packet.valid_header()); + EXPECT_EQ(kPayloadType, packet.header().payloadType); + EXPECT_EQ(kSequenceNumber, packet.header().sequenceNumber); + EXPECT_EQ(kTimestamp, packet.header().timestamp); + EXPECT_EQ(kSsrc, packet.header().ssrc); + EXPECT_EQ(0, packet.header().numCSRCs); + EXPECT_EQ(kPacketLengthBytes, packet.packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.payload_length_bytes()); + EXPECT_EQ(kVirtualPacketLengthBytes, packet.virtual_packet_length_bytes()); + EXPECT_EQ(kVirtualPacketLengthBytes - kHeaderLengthBytes, + packet.virtual_payload_length_bytes()); + EXPECT_EQ(kPacketTime, packet.time_ms()); +} + +TEST(TestPacket, DummyPaddingPacket) { + const size_t kPacketLengthBytes = kHeaderLengthBytes; // Only RTP header. + const size_t kVirtualPacketLengthBytes = 100; + rtc::CopyOnWriteBuffer packet_memory(kPacketLengthBytes); + const uint8_t kPayloadType = 17; + const uint16_t kSequenceNumber = 4711; + const uint32_t kTimestamp = 47114711; + const uint32_t kSsrc = 0x12345678; + MakeRtpHeader(kPayloadType, kSequenceNumber, kTimestamp, kSsrc, + packet_memory.MutableData()); + packet_memory.MutableData()[0] |= 0b0010'0000; // Set the padding bit. + const double kPacketTime = 1.0; + Packet packet(std::move(packet_memory), kVirtualPacketLengthBytes, + kPacketTime); + ASSERT_TRUE(packet.valid_header()); + EXPECT_EQ(kPayloadType, packet.header().payloadType); + EXPECT_EQ(kSequenceNumber, packet.header().sequenceNumber); + EXPECT_EQ(kTimestamp, packet.header().timestamp); + EXPECT_EQ(kSsrc, packet.header().ssrc); + EXPECT_EQ(0, packet.header().numCSRCs); + EXPECT_EQ(kPacketLengthBytes, packet.packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.payload_length_bytes()); + EXPECT_EQ(kVirtualPacketLengthBytes, packet.virtual_packet_length_bytes()); + EXPECT_EQ(kVirtualPacketLengthBytes - kHeaderLengthBytes, + packet.virtual_payload_length_bytes()); + EXPECT_EQ(kPacketTime, packet.time_ms()); +} + +namespace { +// Writes one RED block header starting at `rtp_data`, according to RFC 2198. +// returns the number of bytes written (1 or 4). +// +// Format if `last_payoad` is false: +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |1| block PT | timestamp offset | block length | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// Format if `last_payoad` is true: +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |0| Block PT | +// +-+-+-+-+-+-+-+-+ + +int MakeRedHeader(int payload_type, + uint32_t timestamp_offset, + int block_length, + bool last_payload, + uint8_t* rtp_data) { + rtp_data[0] = 0x80 | (payload_type & 0x7F); // Set the first bit to 1. + if (last_payload) { + rtp_data[0] &= 0x7F; // Reset the first but to 0 to indicate last block. + return 1; + } + rtp_data[1] = timestamp_offset >> 6; + rtp_data[2] = (timestamp_offset & 0x3F) << 2; + rtp_data[2] |= block_length >> 8; + rtp_data[3] = block_length & 0xFF; + return 4; +} +} // namespace + +TEST(TestPacket, RED) { + const size_t kPacketLengthBytes = 100; + rtc::CopyOnWriteBuffer packet_memory(kPacketLengthBytes); + const uint8_t kRedPayloadType = 17; + const uint16_t kSequenceNumber = 4711; + const uint32_t kTimestamp = 47114711; + const uint32_t kSsrc = 0x12345678; + MakeRtpHeader(kRedPayloadType, kSequenceNumber, kTimestamp, kSsrc, + packet_memory.MutableData()); + // Create four RED headers. + // Payload types are just the same as the block index the offset is 100 times + // the block index. + const int kRedBlocks = 4; + uint8_t* payload_ptr = packet_memory.MutableData() + + kHeaderLengthBytes; // First byte after header. + for (int i = 0; i < kRedBlocks; ++i) { + int payload_type = i; + // Offset value is not used for the last block. + uint32_t timestamp_offset = 100 * i; + int block_length = 10 * i; + bool last_block = (i == kRedBlocks - 1) ? true : false; + payload_ptr += MakeRedHeader(payload_type, timestamp_offset, block_length, + last_block, payload_ptr); + } + const double kPacketTime = 1.0; + // Hand over ownership of `packet_memory` to `packet`. + Packet packet(packet_memory, kPacketLengthBytes, kPacketTime); + ASSERT_TRUE(packet.valid_header()); + EXPECT_EQ(kRedPayloadType, packet.header().payloadType); + EXPECT_EQ(kSequenceNumber, packet.header().sequenceNumber); + EXPECT_EQ(kTimestamp, packet.header().timestamp); + EXPECT_EQ(kSsrc, packet.header().ssrc); + EXPECT_EQ(0, packet.header().numCSRCs); + EXPECT_EQ(kPacketLengthBytes, packet.packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.payload_length_bytes()); + EXPECT_EQ(kPacketLengthBytes, packet.virtual_packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.virtual_payload_length_bytes()); + EXPECT_EQ(kPacketTime, packet.time_ms()); + std::list red_headers; + EXPECT_TRUE(packet.ExtractRedHeaders(&red_headers)); + EXPECT_EQ(kRedBlocks, static_cast(red_headers.size())); + int block_index = 0; + for (std::list::reverse_iterator it = red_headers.rbegin(); + it != red_headers.rend(); ++it) { + // Reading list from the back, since the extraction puts the main payload + // (which is the last one on wire) first. + RTPHeader* red_block = *it; + EXPECT_EQ(block_index, red_block->payloadType); + EXPECT_EQ(kSequenceNumber, red_block->sequenceNumber); + if (block_index == kRedBlocks - 1) { + // Last block has zero offset per definition. + EXPECT_EQ(kTimestamp, red_block->timestamp); + } else { + EXPECT_EQ(kTimestamp - 100 * block_index, red_block->timestamp); + } + EXPECT_EQ(kSsrc, red_block->ssrc); + EXPECT_EQ(0, red_block->numCSRCs); + ++block_index; + } + Packet::DeleteRedHeaders(&red_headers); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc new file mode 100644 index 0000000000..5050e1fb17 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +bool ResampleInputAudioFile::Read(size_t samples, + int output_rate_hz, + int16_t* destination) { + const size_t samples_to_read = samples * file_rate_hz_ / output_rate_hz; + RTC_CHECK_EQ(samples_to_read * output_rate_hz, samples * file_rate_hz_) + << "Frame size and sample rates don't add up to an integer."; + std::unique_ptr temp_destination(new int16_t[samples_to_read]); + if (!InputAudioFile::Read(samples_to_read, temp_destination.get())) + return false; + resampler_.ResetIfNeeded(file_rate_hz_, output_rate_hz, 1); + size_t output_length = 0; + RTC_CHECK_EQ(resampler_.Push(temp_destination.get(), samples_to_read, + destination, samples, output_length), + 0); + RTC_CHECK_EQ(samples, output_length); + return true; +} + +bool ResampleInputAudioFile::Read(size_t samples, int16_t* destination) { + RTC_CHECK_GT(output_rate_hz_, 0) << "Output rate not set."; + return Read(samples, output_rate_hz_, destination); +} + +void ResampleInputAudioFile::set_output_rate_hz(int rate_hz) { + output_rate_hz_ = rate_hz; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h new file mode 100644 index 0000000000..279fece616 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_ + +#include + +#include "absl/strings/string_view.h" +#include "common_audio/resampler/include/resampler.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" + +namespace webrtc { +namespace test { + +// Class for handling a looping input audio file with resampling. +class ResampleInputAudioFile : public InputAudioFile { + public: + ResampleInputAudioFile(absl::string_view file_name, + int file_rate_hz, + bool loop_at_end = true) + : InputAudioFile(file_name, loop_at_end), + file_rate_hz_(file_rate_hz), + output_rate_hz_(-1) {} + ResampleInputAudioFile(absl::string_view file_name, + int file_rate_hz, + int output_rate_hz, + bool loop_at_end = true) + : InputAudioFile(file_name, loop_at_end), + file_rate_hz_(file_rate_hz), + output_rate_hz_(output_rate_hz) {} + + ResampleInputAudioFile(const ResampleInputAudioFile&) = delete; + ResampleInputAudioFile& operator=(const ResampleInputAudioFile&) = delete; + + bool Read(size_t samples, int output_rate_hz, int16_t* destination); + bool Read(size_t samples, int16_t* destination) override; + void set_output_rate_hz(int rate_hz); + + private: + const int file_rate_hz_; + int output_rate_hz_; + Resampler resampler_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.cc new file mode 100644 index 0000000000..1407aab5f2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/rtc_event_log_source.h" + +#include + +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "logging/rtc_event_log/rtc_event_processor.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +namespace { +bool ShouldSkipStream(ParsedRtcEventLog::MediaType media_type, + uint32_t ssrc, + absl::optional ssrc_filter) { + if (media_type != ParsedRtcEventLog::MediaType::AUDIO) + return true; + if (ssrc_filter.has_value() && ssrc != *ssrc_filter) + return true; + return false; +} +} // namespace + +std::unique_ptr RtcEventLogSource::CreateFromFile( + absl::string_view file_name, + absl::optional ssrc_filter) { + auto source = std::unique_ptr(new RtcEventLogSource()); + ParsedRtcEventLog parsed_log; + auto status = parsed_log.ParseFile(file_name); + if (!status.ok()) { + std::cerr << "Failed to parse event log: " << status.message() << std::endl; + std::cerr << "Skipping log." << std::endl; + return nullptr; + } + if (!source->Initialize(parsed_log, ssrc_filter)) { + std::cerr << "Failed to initialize source from event log, skipping." + << std::endl; + return nullptr; + } + return source; +} + +std::unique_ptr RtcEventLogSource::CreateFromString( + absl::string_view file_contents, + absl::optional ssrc_filter) { + auto source = std::unique_ptr(new RtcEventLogSource()); + ParsedRtcEventLog parsed_log; + auto status = parsed_log.ParseString(file_contents); + if (!status.ok()) { + std::cerr << "Failed to parse event log: " << status.message() << std::endl; + std::cerr << "Skipping log." << std::endl; + return nullptr; + } + if (!source->Initialize(parsed_log, ssrc_filter)) { + std::cerr << "Failed to initialize source from event log, skipping." + << std::endl; + return nullptr; + } + return source; +} + +RtcEventLogSource::~RtcEventLogSource() {} + +std::unique_ptr RtcEventLogSource::NextPacket() { + if (rtp_packet_index_ >= rtp_packets_.size()) + return nullptr; + + std::unique_ptr packet = std::move(rtp_packets_[rtp_packet_index_++]); + return packet; +} + +int64_t RtcEventLogSource::NextAudioOutputEventMs() { + if (audio_output_index_ >= audio_outputs_.size()) + return std::numeric_limits::max(); + + int64_t output_time_ms = audio_outputs_[audio_output_index_++]; + return output_time_ms; +} + +RtcEventLogSource::RtcEventLogSource() : PacketSource() {} + +bool RtcEventLogSource::Initialize(const ParsedRtcEventLog& parsed_log, + absl::optional ssrc_filter) { + const auto first_log_end_time_us = + parsed_log.stop_log_events().empty() + ? std::numeric_limits::max() + : parsed_log.stop_log_events().front().log_time_us(); + + std::set packet_ssrcs; + auto handle_rtp_packet = + [this, first_log_end_time_us, + &packet_ssrcs](const webrtc::LoggedRtpPacketIncoming& incoming) { + if (!filter_.test(incoming.rtp.header.payloadType) && + incoming.log_time_us() < first_log_end_time_us) { + rtp_packets_.emplace_back(std::make_unique( + incoming.rtp.header, incoming.rtp.total_length, + incoming.rtp.total_length - incoming.rtp.header_length, + static_cast(incoming.log_time_ms()))); + packet_ssrcs.insert(rtp_packets_.back()->header().ssrc); + } + }; + + std::set ignored_ssrcs; + auto handle_audio_playout = + [this, first_log_end_time_us, &packet_ssrcs, + &ignored_ssrcs](const webrtc::LoggedAudioPlayoutEvent& audio_playout) { + if (audio_playout.log_time_us() < first_log_end_time_us) { + if (packet_ssrcs.count(audio_playout.ssrc) > 0) { + audio_outputs_.emplace_back(audio_playout.log_time_ms()); + } else { + ignored_ssrcs.insert(audio_playout.ssrc); + } + } + }; + + // This wouldn't be needed if we knew that there was at most one audio stream. + webrtc::RtcEventProcessor event_processor; + for (const auto& rtp_packets : parsed_log.incoming_rtp_packets_by_ssrc()) { + ParsedRtcEventLog::MediaType media_type = + parsed_log.GetMediaType(rtp_packets.ssrc, webrtc::kIncomingPacket); + if (ShouldSkipStream(media_type, rtp_packets.ssrc, ssrc_filter)) { + continue; + } + event_processor.AddEvents(rtp_packets.incoming_packets, handle_rtp_packet); + // If no SSRC filter has been set, use the first SSRC only. The simulator + // does not work properly with interleaved packets from multiple SSRCs. + if (!ssrc_filter.has_value()) { + ssrc_filter = rtp_packets.ssrc; + } + } + + for (const auto& audio_playouts : parsed_log.audio_playout_events()) { + if (ssrc_filter.has_value() && audio_playouts.first != *ssrc_filter) + continue; + event_processor.AddEvents(audio_playouts.second, handle_audio_playout); + } + + // Fills in rtp_packets_ and audio_outputs_. + event_processor.ProcessEventsInOrder(); + + for (const auto& ssrc : ignored_ssrcs) { + std::cout << "Ignoring GetAudio events from SSRC 0x" << std::hex << ssrc + << " because no packets were found with a matching SSRC." + << std::endl; + } + + return true; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.h new file mode 100644 index 0000000000..c67912a67d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_RTC_EVENT_LOG_SOURCE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_RTC_EVENT_LOG_SOURCE_H_ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "logging/rtc_event_log/rtc_event_log_parser.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" + +namespace webrtc { + +class RtpHeaderParser; + +namespace test { + +class Packet; + +class RtcEventLogSource : public PacketSource { + public: + // Creates an RtcEventLogSource reading from `file_name`. If the file cannot + // be opened, or has the wrong format, NULL will be returned. + static std::unique_ptr CreateFromFile( + absl::string_view file_name, + absl::optional ssrc_filter); + // Same as above, but uses a string with the file contents. + static std::unique_ptr CreateFromString( + absl::string_view file_contents, + absl::optional ssrc_filter); + + virtual ~RtcEventLogSource(); + + RtcEventLogSource(const RtcEventLogSource&) = delete; + RtcEventLogSource& operator=(const RtcEventLogSource&) = delete; + + std::unique_ptr NextPacket() override; + + // Returns the timestamp of the next audio output event, in milliseconds. The + // maximum value of int64_t is returned if there are no more audio output + // events available. + int64_t NextAudioOutputEventMs(); + + private: + RtcEventLogSource(); + + bool Initialize(const ParsedRtcEventLog& parsed_log, + absl::optional ssrc_filter); + + std::vector> rtp_packets_; + size_t rtp_packet_index_ = 0; + std::vector audio_outputs_; + size_t audio_output_index_ = 0; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_RTC_EVENT_LOG_SOURCE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_analyze.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_analyze.cc new file mode 100644 index 0000000000..7ecf925ebb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_analyze.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" + +ABSL_FLAG(int, red, 117, "RTP payload type for RED"); +ABSL_FLAG(int, + audio_level, + -1, + "Extension ID for audio level (RFC 6464); " + "-1 not to print audio level"); +ABSL_FLAG(int, + abs_send_time, + -1, + "Extension ID for absolute sender time; " + "-1 not to print absolute send time"); + +int main(int argc, char* argv[]) { + std::vector args = absl::ParseCommandLine(argc, argv); + std::string usage = + "Tool for parsing an RTP dump file to text output.\n" + "Example usage:\n" + "./rtp_analyze input.rtp output.txt\n\n" + "Output is sent to stdout if no output file is given. " + "Note that this tool can read files with or without payloads.\n"; + if (args.size() != 2 && args.size() != 3) { + printf("%s", usage.c_str()); + return 1; + } + + RTC_CHECK(absl::GetFlag(FLAGS_red) >= 0 && + absl::GetFlag(FLAGS_red) <= 127); // Payload type + RTC_CHECK(absl::GetFlag(FLAGS_audio_level) == -1 || // Default + (absl::GetFlag(FLAGS_audio_level) > 0 && + absl::GetFlag(FLAGS_audio_level) <= 255)); // Extension ID + RTC_CHECK(absl::GetFlag(FLAGS_abs_send_time) == -1 || // Default + (absl::GetFlag(FLAGS_abs_send_time) > 0 && + absl::GetFlag(FLAGS_abs_send_time) <= 255)); // Extension ID + + printf("Input file: %s\n", args[1]); + std::unique_ptr file_source( + webrtc::test::RtpFileSource::Create(args[1])); + RTC_DCHECK(file_source.get()); + // Set RTP extension IDs. + bool print_audio_level = false; + if (absl::GetFlag(FLAGS_audio_level) != -1) { + print_audio_level = true; + file_source->RegisterRtpHeaderExtension(webrtc::kRtpExtensionAudioLevel, + absl::GetFlag(FLAGS_audio_level)); + } + bool print_abs_send_time = false; + if (absl::GetFlag(FLAGS_abs_send_time) != -1) { + print_abs_send_time = true; + file_source->RegisterRtpHeaderExtension( + webrtc::kRtpExtensionAbsoluteSendTime, + absl::GetFlag(FLAGS_abs_send_time)); + } + + FILE* out_file; + if (args.size() == 3) { + out_file = fopen(args[2], "wt"); + if (!out_file) { + printf("Cannot open output file %s\n", args[2]); + return -1; + } + printf("Output file: %s\n\n", args[2]); + } else { + out_file = stdout; + } + + // Print file header. + fprintf(out_file, "SeqNo TimeStamp SendTime Size PT M SSRC"); + if (print_audio_level) { + fprintf(out_file, " AuLvl (V)"); + } + if (print_abs_send_time) { + fprintf(out_file, " AbsSendTime"); + } + fprintf(out_file, "\n"); + + uint32_t max_abs_send_time = 0; + int cycles = -1; + std::unique_ptr packet; + while (true) { + packet = file_source->NextPacket(); + if (!packet.get()) { + // End of file reached. + break; + } + // Write packet data to file. Use virtual_packet_length_bytes so that the + // correct packet sizes are printed also for RTP header-only dumps. + fprintf(out_file, "%5u %10u %10u %5i %5i %2i %#08X", + packet->header().sequenceNumber, packet->header().timestamp, + static_cast(packet->time_ms()), + static_cast(packet->virtual_packet_length_bytes()), + packet->header().payloadType, packet->header().markerBit, + packet->header().ssrc); + if (print_audio_level && packet->header().extension.hasAudioLevel) { + fprintf(out_file, " %5u (%1i)", packet->header().extension.audioLevel, + packet->header().extension.voiceActivity); + } + if (print_abs_send_time && packet->header().extension.hasAbsoluteSendTime) { + if (cycles == -1) { + // Initialize. + max_abs_send_time = packet->header().extension.absoluteSendTime; + cycles = 0; + } + // Abs sender time is 24 bit 6.18 fixed point. Shift by 8 to normalize to + // 32 bits (unsigned). Calculate the difference between this packet's + // send time and the maximum observed. Cast to signed 32-bit to get the + // desired wrap-around behavior. + if (static_cast( + (packet->header().extension.absoluteSendTime << 8) - + (max_abs_send_time << 8)) >= 0) { + // The difference is non-negative, meaning that this packet is newer + // than the previously observed maximum absolute send time. + if (packet->header().extension.absoluteSendTime < max_abs_send_time) { + // Wrap detected. + cycles++; + } + max_abs_send_time = packet->header().extension.absoluteSendTime; + } + // Abs sender time is 24 bit 6.18 fixed point. Divide by 2^18 to convert + // to floating point representation. + double send_time_seconds = + static_cast(packet->header().extension.absoluteSendTime) / + 262144 + + 64.0 * cycles; + fprintf(out_file, " %11f", send_time_seconds); + } + fprintf(out_file, "\n"); + + if (packet->header().payloadType == absl::GetFlag(FLAGS_red)) { + std::list red_headers; + packet->ExtractRedHeaders(&red_headers); + while (!red_headers.empty()) { + webrtc::RTPHeader* red = red_headers.front(); + RTC_DCHECK(red); + fprintf(out_file, "* %5u %10u %10u %5i\n", red->sequenceNumber, + red->timestamp, static_cast(packet->time_ms()), + red->payloadType); + red_headers.pop_front(); + delete red; + } + } + } + + fclose(out_file); + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_encode.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_encode.cc new file mode 100644 index 0000000000..8adca927f0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_encode.cc @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#ifdef WIN32 +#include +#endif +#if defined(WEBRTC_LINUX) || defined(WEBRTC_FUCHSIA) +#include +#endif + +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/memory/memory.h" +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/L16/audio_encoder_L16.h" +#include "api/audio_codecs/g711/audio_encoder_g711.h" +#include "api/audio_codecs/g722/audio_encoder_g722.h" +#include "api/audio_codecs/ilbc/audio_encoder_ilbc.h" +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "rtc_base/numerics/safe_conversions.h" + +ABSL_FLAG(bool, list_codecs, false, "Enumerate all codecs"); +ABSL_FLAG(std::string, codec, "opus", "Codec to use"); +ABSL_FLAG(int, + frame_len, + 0, + "Frame length in ms; 0 indicates codec default value"); +ABSL_FLAG(int, bitrate, 0, "Bitrate in kbps; 0 indicates codec default value"); +ABSL_FLAG(int, + payload_type, + -1, + "RTP payload type; -1 indicates codec default value"); +ABSL_FLAG(int, + cng_payload_type, + -1, + "RTP payload type for CNG; -1 indicates default value"); +ABSL_FLAG(int, ssrc, 0, "SSRC to write to the RTP header"); +ABSL_FLAG(bool, dtx, false, "Use DTX/CNG"); +ABSL_FLAG(int, sample_rate, 48000, "Sample rate of the input file"); + +namespace webrtc { +namespace test { +namespace { + +// Add new codecs here, and to the map below. +enum class CodecType { + kOpus, + kPcmU, + kPcmA, + kG722, + kPcm16b8, + kPcm16b16, + kPcm16b32, + kPcm16b48, + kIlbc, +}; + +struct CodecTypeAndInfo { + CodecType type; + int default_payload_type; + bool internal_dtx; +}; + +// List all supported codecs here. This map defines the command-line parameter +// value (the key string) for selecting each codec, together with information +// whether it is using internal or external DTX/CNG. +const std::map& CodecList() { + static const auto* const codec_list = + new std::map{ + {"opus", {CodecType::kOpus, 111, true}}, + {"pcmu", {CodecType::kPcmU, 0, false}}, + {"pcma", {CodecType::kPcmA, 8, false}}, + {"g722", {CodecType::kG722, 9, false}}, + {"pcm16b_8", {CodecType::kPcm16b8, 93, false}}, + {"pcm16b_16", {CodecType::kPcm16b16, 94, false}}, + {"pcm16b_32", {CodecType::kPcm16b32, 95, false}}, + {"pcm16b_48", {CodecType::kPcm16b48, 96, false}}, + {"ilbc", {CodecType::kIlbc, 102, false}}}; + return *codec_list; +} + +// This class will receive callbacks from ACM when a packet is ready, and write +// it to the output file. +class Packetizer : public AudioPacketizationCallback { + public: + Packetizer(FILE* out_file, uint32_t ssrc, int timestamp_rate_hz) + : out_file_(out_file), + ssrc_(ssrc), + timestamp_rate_hz_(timestamp_rate_hz) {} + + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) override { + if (payload_len_bytes == 0) { + return 0; + } + + constexpr size_t kRtpHeaderLength = 12; + constexpr size_t kRtpDumpHeaderLength = 8; + const uint16_t length = htons(rtc::checked_cast( + kRtpHeaderLength + kRtpDumpHeaderLength + payload_len_bytes)); + const uint16_t plen = htons( + rtc::checked_cast(kRtpHeaderLength + payload_len_bytes)); + const uint32_t offset = htonl(timestamp / (timestamp_rate_hz_ / 1000)); + RTC_CHECK_EQ(fwrite(&length, sizeof(uint16_t), 1, out_file_), 1); + RTC_CHECK_EQ(fwrite(&plen, sizeof(uint16_t), 1, out_file_), 1); + RTC_CHECK_EQ(fwrite(&offset, sizeof(uint32_t), 1, out_file_), 1); + + const uint8_t rtp_header[] = {0x80, + static_cast(payload_type & 0x7F), + static_cast(sequence_number_ >> 8), + static_cast(sequence_number_), + static_cast(timestamp >> 24), + static_cast(timestamp >> 16), + static_cast(timestamp >> 8), + static_cast(timestamp), + static_cast(ssrc_ >> 24), + static_cast(ssrc_ >> 16), + static_cast(ssrc_ >> 8), + static_cast(ssrc_)}; + static_assert(sizeof(rtp_header) == kRtpHeaderLength, ""); + RTC_CHECK_EQ( + fwrite(rtp_header, sizeof(uint8_t), kRtpHeaderLength, out_file_), + kRtpHeaderLength); + ++sequence_number_; // Intended to wrap on overflow. + + RTC_CHECK_EQ( + fwrite(payload_data, sizeof(uint8_t), payload_len_bytes, out_file_), + payload_len_bytes); + + return 0; + } + + private: + FILE* const out_file_; + const uint32_t ssrc_; + const int timestamp_rate_hz_; + uint16_t sequence_number_ = 0; +}; + +void SetFrameLenIfFlagIsPositive(int* config_frame_len) { + if (absl::GetFlag(FLAGS_frame_len) > 0) { + *config_frame_len = absl::GetFlag(FLAGS_frame_len); + } +} + +template +typename T::Config GetCodecConfig() { + typename T::Config config; + SetFrameLenIfFlagIsPositive(&config.frame_size_ms); + RTC_CHECK(config.IsOk()); + return config; +} + +AudioEncoderL16::Config Pcm16bConfig(CodecType codec_type) { + auto config = GetCodecConfig(); + switch (codec_type) { + case CodecType::kPcm16b8: + config.sample_rate_hz = 8000; + return config; + case CodecType::kPcm16b16: + config.sample_rate_hz = 16000; + return config; + case CodecType::kPcm16b32: + config.sample_rate_hz = 32000; + return config; + case CodecType::kPcm16b48: + config.sample_rate_hz = 48000; + return config; + default: + RTC_DCHECK_NOTREACHED(); + return config; + } +} + +std::unique_ptr CreateEncoder(CodecType codec_type, + int payload_type) { + switch (codec_type) { + case CodecType::kOpus: { + AudioEncoderOpus::Config config = GetCodecConfig(); + if (absl::GetFlag(FLAGS_bitrate) > 0) { + config.bitrate_bps = absl::GetFlag(FLAGS_bitrate); + } + config.dtx_enabled = absl::GetFlag(FLAGS_dtx); + RTC_CHECK(config.IsOk()); + return AudioEncoderOpus::MakeAudioEncoder(config, payload_type); + } + + case CodecType::kPcmU: + case CodecType::kPcmA: { + AudioEncoderG711::Config config = GetCodecConfig(); + config.type = codec_type == CodecType::kPcmU + ? AudioEncoderG711::Config::Type::kPcmU + : AudioEncoderG711::Config::Type::kPcmA; + RTC_CHECK(config.IsOk()); + return AudioEncoderG711::MakeAudioEncoder(config, payload_type); + } + + case CodecType::kG722: { + return AudioEncoderG722::MakeAudioEncoder( + GetCodecConfig(), payload_type); + } + + case CodecType::kPcm16b8: + case CodecType::kPcm16b16: + case CodecType::kPcm16b32: + case CodecType::kPcm16b48: { + return AudioEncoderL16::MakeAudioEncoder(Pcm16bConfig(codec_type), + payload_type); + } + + case CodecType::kIlbc: { + return AudioEncoderIlbc::MakeAudioEncoder( + GetCodecConfig(), payload_type); + } + } + RTC_DCHECK_NOTREACHED(); + return nullptr; +} + +AudioEncoderCngConfig GetCngConfig(int sample_rate_hz) { + AudioEncoderCngConfig cng_config; + const auto default_payload_type = [&] { + switch (sample_rate_hz) { + case 8000: + return 13; + case 16000: + return 98; + case 32000: + return 99; + case 48000: + return 100; + default: + RTC_DCHECK_NOTREACHED(); + } + return 0; + }; + cng_config.payload_type = absl::GetFlag(FLAGS_cng_payload_type) != -1 + ? absl::GetFlag(FLAGS_cng_payload_type) + : default_payload_type(); + return cng_config; +} + +int RunRtpEncode(int argc, char* argv[]) { + std::vector args = absl::ParseCommandLine(argc, argv); + const std::string usage = + "Tool for generating an RTP dump file from audio input.\n" + "Example usage:\n" + "./rtp_encode input.pcm output.rtp --codec=[codec] " + "--frame_len=[frame_len] --bitrate=[bitrate]\n\n"; + if (!absl::GetFlag(FLAGS_list_codecs) && args.size() != 3) { + printf("%s", usage.c_str()); + return 1; + } + + if (absl::GetFlag(FLAGS_list_codecs)) { + printf("The following arguments are valid --codec parameters:\n"); + for (const auto& c : CodecList()) { + printf(" %s\n", c.first.c_str()); + } + return 0; + } + + const auto codec_it = CodecList().find(absl::GetFlag(FLAGS_codec)); + if (codec_it == CodecList().end()) { + printf("%s is not a valid codec name.\n", + absl::GetFlag(FLAGS_codec).c_str()); + printf("Use argument --list_codecs to see all valid codec names.\n"); + return 1; + } + + // Create the codec. + const int payload_type = absl::GetFlag(FLAGS_payload_type) == -1 + ? codec_it->second.default_payload_type + : absl::GetFlag(FLAGS_payload_type); + std::unique_ptr codec = + CreateEncoder(codec_it->second.type, payload_type); + + // Create an external VAD/CNG encoder if needed. + if (absl::GetFlag(FLAGS_dtx) && !codec_it->second.internal_dtx) { + AudioEncoderCngConfig cng_config = GetCngConfig(codec->SampleRateHz()); + RTC_DCHECK(codec); + cng_config.speech_encoder = std::move(codec); + codec = CreateComfortNoiseEncoder(std::move(cng_config)); + } + RTC_DCHECK(codec); + + // Set up ACM. + const int timestamp_rate_hz = codec->RtpTimestampRateHz(); + AudioCodingModule::Config config; + std::unique_ptr acm(AudioCodingModule::Create(config)); + acm->SetEncoder(std::move(codec)); + + // Open files. + printf("Input file: %s\n", args[1]); + InputAudioFile input_file(args[1], false); // Open input in non-looping mode. + FILE* out_file = fopen(args[2], "wb"); + RTC_CHECK(out_file) << "Could not open file " << args[2] << " for writing"; + printf("Output file: %s\n", args[2]); + fprintf(out_file, "#!rtpplay1.0 \n"); //, + // Write 3 32-bit values followed by 2 16-bit values, all set to 0. This means + // a total of 16 bytes. + const uint8_t file_header[16] = {0}; + RTC_CHECK_EQ(fwrite(file_header, sizeof(file_header), 1, out_file), 1); + + // Create and register the packetizer, which will write the packets to file. + Packetizer packetizer(out_file, absl::GetFlag(FLAGS_ssrc), timestamp_rate_hz); + RTC_DCHECK_EQ(acm->RegisterTransportCallback(&packetizer), 0); + + AudioFrame audio_frame; + audio_frame.samples_per_channel_ = + absl::GetFlag(FLAGS_sample_rate) / 100; // 10 ms + audio_frame.sample_rate_hz_ = absl::GetFlag(FLAGS_sample_rate); + audio_frame.num_channels_ = 1; + + while (input_file.Read(audio_frame.samples_per_channel_, + audio_frame.mutable_data())) { + RTC_CHECK_GE(acm->Add10MsData(audio_frame), 0); + audio_frame.timestamp_ += audio_frame.samples_per_channel_; + } + + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::RunRtpEncode(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.cc new file mode 100644 index 0000000000..a43c29638c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" + +#include + +#include "absl/strings/string_view.h" +#ifndef WIN32 +#include +#endif + +#include + +#include "modules/audio_coding/neteq/tools/packet.h" +#include "rtc_base/checks.h" +#include "test/rtp_file_reader.h" + +namespace webrtc { +namespace test { + +RtpFileSource* RtpFileSource::Create(absl::string_view file_name, + absl::optional ssrc_filter) { + RtpFileSource* source = new RtpFileSource(ssrc_filter); + RTC_CHECK(source->OpenFile(file_name)); + return source; +} + +bool RtpFileSource::ValidRtpDump(absl::string_view file_name) { + std::unique_ptr temp_file( + RtpFileReader::Create(RtpFileReader::kRtpDump, file_name)); + return !!temp_file; +} + +bool RtpFileSource::ValidPcap(absl::string_view file_name) { + std::unique_ptr temp_file( + RtpFileReader::Create(RtpFileReader::kPcap, file_name)); + return !!temp_file; +} + +RtpFileSource::~RtpFileSource() {} + +bool RtpFileSource::RegisterRtpHeaderExtension(RTPExtensionType type, + uint8_t id) { + return rtp_header_extension_map_.RegisterByType(id, type); +} + +std::unique_ptr RtpFileSource::NextPacket() { + while (true) { + RtpPacket temp_packet; + if (!rtp_reader_->NextPacket(&temp_packet)) { + return NULL; + } + if (temp_packet.original_length == 0) { + // May be an RTCP packet. + // Read the next one. + continue; + } + auto packet = std::make_unique( + rtc::CopyOnWriteBuffer(temp_packet.data, temp_packet.length), + temp_packet.original_length, temp_packet.time_ms, + &rtp_header_extension_map_); + if (!packet->valid_header()) { + continue; + } + if (filter_.test(packet->header().payloadType) || + (ssrc_filter_ && packet->header().ssrc != *ssrc_filter_)) { + // This payload type should be filtered out. Continue to the next packet. + continue; + } + return packet; + } +} + +RtpFileSource::RtpFileSource(absl::optional ssrc_filter) + : PacketSource(), + ssrc_filter_(ssrc_filter) {} + +bool RtpFileSource::OpenFile(absl::string_view file_name) { + rtp_reader_.reset(RtpFileReader::Create(RtpFileReader::kRtpDump, file_name)); + if (rtp_reader_) + return true; + rtp_reader_.reset(RtpFileReader::Create(RtpFileReader::kPcap, file_name)); + if (!rtp_reader_) { + RTC_FATAL() + << "Couldn't open input file as either a rtpdump or .pcap. Note " + << "that .pcapng is not supported."; + } + return true; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.h new file mode 100644 index 0000000000..55505be630 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_FILE_SOURCE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_FILE_SOURCE_H_ + +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" + +namespace webrtc { + +namespace test { + +class RtpFileReader; + +class RtpFileSource : public PacketSource { + public: + // Creates an RtpFileSource reading from `file_name`. If the file cannot be + // opened, or has the wrong format, NULL will be returned. + static RtpFileSource* Create( + absl::string_view file_name, + absl::optional ssrc_filter = absl::nullopt); + + // Checks whether a files is a valid RTP dump or PCAP (Wireshark) file. + static bool ValidRtpDump(absl::string_view file_name); + static bool ValidPcap(absl::string_view file_name); + + ~RtpFileSource() override; + + RtpFileSource(const RtpFileSource&) = delete; + RtpFileSource& operator=(const RtpFileSource&) = delete; + + // Registers an RTP header extension and binds it to `id`. + virtual bool RegisterRtpHeaderExtension(RTPExtensionType type, uint8_t id); + + std::unique_ptr NextPacket() override; + + private: + static const int kFirstLineLength = 40; + static const int kRtpFileHeaderSize = 4 + 4 + 4 + 2 + 2; + static const size_t kPacketHeaderSize = 8; + + explicit RtpFileSource(absl::optional ssrc_filter); + + bool OpenFile(absl::string_view file_name); + + std::unique_ptr rtp_reader_; + const absl::optional ssrc_filter_; + RtpHeaderExtensionMap rtp_header_extension_map_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_FILE_SOURCE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.cc new file mode 100644 index 0000000000..e883fc11d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/rtp_generator.h" + + +namespace webrtc { +namespace test { + +uint32_t RtpGenerator::GetRtpHeader(uint8_t payload_type, + size_t payload_length_samples, + RTPHeader* rtp_header) { + RTC_DCHECK(rtp_header); + if (!rtp_header) { + return 0; + } + rtp_header->sequenceNumber = seq_number_++; + rtp_header->timestamp = timestamp_; + timestamp_ += static_cast(payload_length_samples); + rtp_header->payloadType = payload_type; + rtp_header->markerBit = false; + rtp_header->ssrc = ssrc_; + rtp_header->numCSRCs = 0; + + uint32_t this_send_time = next_send_time_ms_; + RTC_DCHECK_GT(samples_per_ms_, 0); + next_send_time_ms_ += + ((1.0 + drift_factor_) * payload_length_samples) / samples_per_ms_; + return this_send_time; +} + +void RtpGenerator::set_drift_factor(double factor) { + if (factor > -1.0) { + drift_factor_ = factor; + } +} + +uint32_t TimestampJumpRtpGenerator::GetRtpHeader(uint8_t payload_type, + size_t payload_length_samples, + RTPHeader* rtp_header) { + uint32_t ret = RtpGenerator::GetRtpHeader(payload_type, + payload_length_samples, rtp_header); + if (timestamp_ - static_cast(payload_length_samples) <= + jump_from_timestamp_ && + timestamp_ > jump_from_timestamp_) { + // We just moved across the `jump_from_timestamp_` timestamp. Do the jump. + timestamp_ = jump_to_timestamp_; + } + return ret; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.h new file mode 100644 index 0000000000..2e615adec5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_GENERATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_GENERATOR_H_ + +#include "api/rtp_headers.h" + +namespace webrtc { +namespace test { + +// Class for generating RTP headers. +class RtpGenerator { + public: + RtpGenerator(int samples_per_ms, + uint16_t start_seq_number = 0, + uint32_t start_timestamp = 0, + uint32_t start_send_time_ms = 0, + uint32_t ssrc = 0x12345678) + : seq_number_(start_seq_number), + timestamp_(start_timestamp), + next_send_time_ms_(start_send_time_ms), + ssrc_(ssrc), + samples_per_ms_(samples_per_ms), + drift_factor_(0.0) {} + + virtual ~RtpGenerator() {} + + RtpGenerator(const RtpGenerator&) = delete; + RtpGenerator& operator=(const RtpGenerator&) = delete; + + // Writes the next RTP header to `rtp_header`, which will be of type + // `payload_type`. Returns the send time for this packet (in ms). The value of + // `payload_length_samples` determines the send time for the next packet. + virtual uint32_t GetRtpHeader(uint8_t payload_type, + size_t payload_length_samples, + RTPHeader* rtp_header); + + void set_drift_factor(double factor); + + protected: + uint16_t seq_number_; + uint32_t timestamp_; + uint32_t next_send_time_ms_; + const uint32_t ssrc_; + const int samples_per_ms_; + double drift_factor_; +}; + +class TimestampJumpRtpGenerator : public RtpGenerator { + public: + TimestampJumpRtpGenerator(int samples_per_ms, + uint16_t start_seq_number, + uint32_t start_timestamp, + uint32_t jump_from_timestamp, + uint32_t jump_to_timestamp) + : RtpGenerator(samples_per_ms, start_seq_number, start_timestamp), + jump_from_timestamp_(jump_from_timestamp), + jump_to_timestamp_(jump_to_timestamp) {} + + TimestampJumpRtpGenerator(const TimestampJumpRtpGenerator&) = delete; + TimestampJumpRtpGenerator& operator=(const TimestampJumpRtpGenerator&) = + delete; + + uint32_t GetRtpHeader(uint8_t payload_type, + size_t payload_length_samples, + RTPHeader* rtp_header) override; + + private: + uint32_t jump_from_timestamp_; + uint32_t jump_to_timestamp_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_GENERATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_jitter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_jitter.cc new file mode 100644 index 0000000000..cccaa9a3bb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_jitter.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "rtc_base/buffer.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr size_t kRtpDumpHeaderLength = 8; + +// Returns the next packet or an empty buffer if end of file was encountered. +rtc::Buffer ReadNextPacket(FILE* file) { + // Read the rtpdump header for the next packet. + rtc::Buffer buffer; + buffer.SetData(kRtpDumpHeaderLength, [&](rtc::ArrayView x) { + return fread(x.data(), 1, x.size(), file); + }); + if (buffer.size() != kRtpDumpHeaderLength) { + return rtc::Buffer(); + } + + // Get length field. This is the total length for this packet written to file, + // including the kRtpDumpHeaderLength bytes already read. + const uint16_t len = ByteReader::ReadBigEndian(buffer.data()); + RTC_CHECK_GE(len, kRtpDumpHeaderLength); + + // Read remaining data from file directly into buffer. + buffer.AppendData(len - kRtpDumpHeaderLength, [&](rtc::ArrayView x) { + return fread(x.data(), 1, x.size(), file); + }); + if (buffer.size() != len) { + buffer.Clear(); + } + return buffer; +} + +struct PacketAndTime { + rtc::Buffer packet; + int time; +}; + +void WritePacket(const PacketAndTime& packet, FILE* file) { + // Write the first 4 bytes from the original packet. + const auto* payload_ptr = packet.packet.data(); + RTC_CHECK_EQ(fwrite(payload_ptr, 4, 1, file), 1); + payload_ptr += 4; + + // Convert the new time offset to network endian, and write to file. + uint8_t time[sizeof(uint32_t)]; + ByteWriter::WriteBigEndian(time, packet.time); + RTC_CHECK_EQ(fwrite(time, sizeof(uint32_t), 1, file), 1); + payload_ptr += 4; // Skip the old time in the original payload. + + // Write the remaining part of the payload. + RTC_DCHECK_EQ(payload_ptr - packet.packet.data(), kRtpDumpHeaderLength); + RTC_CHECK_EQ( + fwrite(payload_ptr, packet.packet.size() - kRtpDumpHeaderLength, 1, file), + 1); +} + +int RunRtpJitter(int argc, char* argv[]) { + const std::string program_name = argv[0]; + const std::string usage = + "Tool for alternating the arrival times in an RTP dump file.\n" + "Example usage:\n" + + program_name + " input.rtp arrival_times_ms.txt output.rtp\n\n"; + if (argc != 4) { + printf("%s", usage.c_str()); + return 1; + } + + printf("Input RTP file: %s\n", argv[1]); + FILE* in_file = fopen(argv[1], "rb"); + RTC_CHECK(in_file) << "Could not open file " << argv[1] << " for reading"; + printf("Timing file: %s\n", argv[2]); + std::ifstream timing_file(argv[2]); + printf("Output file: %s\n", argv[3]); + FILE* out_file = fopen(argv[3], "wb"); + RTC_CHECK(out_file) << "Could not open file " << argv[2] << " for writing"; + + // Copy the RTP file header to the output file. + char header_string[30]; + RTC_CHECK(fgets(header_string, 30, in_file)); + fprintf(out_file, "%s", header_string); + uint8_t file_header[16]; + RTC_CHECK_EQ(fread(file_header, sizeof(file_header), 1, in_file), 1); + RTC_CHECK_EQ(fwrite(file_header, sizeof(file_header), 1, out_file), 1); + + // Read all time values from the timing file. Store in a vector. + std::vector new_arrival_times; + int new_time; + while (timing_file >> new_time) { + new_arrival_times.push_back(new_time); + } + + // Read all packets from the input RTP file, but no more than the number of + // new time values. Store RTP packets together with new time values. + auto time_it = new_arrival_times.begin(); + std::vector packets; + while (1) { + auto packet = ReadNextPacket(in_file); + if (packet.empty() || time_it == new_arrival_times.end()) { + break; + } + packets.push_back({std::move(packet), *time_it}); + ++time_it; + } + + // Sort on new time values. + std::sort(packets.begin(), packets.end(), + [](const PacketAndTime& a, const PacketAndTime& b) { + return a.time < b.time; + }); + + // Write packets to output file. + for (const auto& p : packets) { + WritePacket(p, out_file); + } + + fclose(in_file); + fclose(out_file); + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::RunRtpJitter(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtpcat.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtpcat.cc new file mode 100644 index 0000000000..431de553ae --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtpcat.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include + +#include "rtc_base/checks.h" +#include "test/rtp_file_reader.h" +#include "test/rtp_file_writer.h" + +using webrtc::test::RtpFileReader; +using webrtc::test::RtpFileWriter; + +int main(int argc, char* argv[]) { + if (argc < 3) { + printf("Concatenates multiple rtpdump files into one.\n"); + printf("Usage: rtpcat in1.rtp int2.rtp [...] out.rtp\n"); + exit(1); + } + + std::unique_ptr output( + RtpFileWriter::Create(RtpFileWriter::kRtpDump, argv[argc - 1])); + RTC_CHECK(output.get() != NULL) << "Cannot open output file."; + printf("Output RTP file: %s\n", argv[argc - 1]); + + for (int i = 1; i < argc - 1; i++) { + std::unique_ptr input( + RtpFileReader::Create(RtpFileReader::kRtpDump, argv[i])); + RTC_CHECK(input.get() != NULL) << "Cannot open input file " << argv[i]; + printf("Input RTP file: %s\n", argv[i]); + + webrtc::test::RtpPacket packet; + while (input->NextPacket(&packet)) + RTC_CHECK(output->WritePacket(&packet)); + } + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.cc new file mode 100644 index 0000000000..baed812327 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/underrun_optimizer.h" + +#include + +namespace webrtc { + +namespace { + +constexpr int kDelayBuckets = 100; +constexpr int kBucketSizeMs = 20; + +} // namespace + +UnderrunOptimizer::UnderrunOptimizer(const TickTimer* tick_timer, + int histogram_quantile, + int forget_factor, + absl::optional start_forget_weight, + absl::optional resample_interval_ms) + : tick_timer_(tick_timer), + histogram_(kDelayBuckets, forget_factor, start_forget_weight), + histogram_quantile_(histogram_quantile), + resample_interval_ms_(resample_interval_ms) {} + +void UnderrunOptimizer::Update(int relative_delay_ms) { + absl::optional histogram_update; + if (resample_interval_ms_) { + if (!resample_stopwatch_) { + resample_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + if (static_cast(resample_stopwatch_->ElapsedMs()) > + *resample_interval_ms_) { + histogram_update = max_delay_in_interval_ms_; + resample_stopwatch_ = tick_timer_->GetNewStopwatch(); + max_delay_in_interval_ms_ = 0; + } + max_delay_in_interval_ms_ = + std::max(max_delay_in_interval_ms_, relative_delay_ms); + } else { + histogram_update = relative_delay_ms; + } + if (!histogram_update) { + return; + } + + const int index = *histogram_update / kBucketSizeMs; + if (index < histogram_.NumBuckets()) { + // Maximum delay to register is 2000 ms. + histogram_.Add(index); + } + int bucket_index = histogram_.Quantile(histogram_quantile_); + optimal_delay_ms_ = (1 + bucket_index) * kBucketSizeMs; +} + +void UnderrunOptimizer::Reset() { + histogram_.Reset(); + resample_stopwatch_.reset(); + max_delay_in_interval_ms_ = 0; + optimal_delay_ms_.reset(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.h b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.h new file mode 100644 index 0000000000..b37ce18795 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_UNDERRUN_OPTIMIZER_H_ +#define MODULES_AUDIO_CODING_NETEQ_UNDERRUN_OPTIMIZER_H_ + +#include + +#include "absl/types/optional.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/histogram.h" + +namespace webrtc { + +// Estimates probability of buffer underrun due to late packet arrival. +// The optimal delay is decided such that the probability of underrun is lower +// than 1 - `histogram_quantile`. +class UnderrunOptimizer { + public: + UnderrunOptimizer(const TickTimer* tick_timer, + int histogram_quantile, + int forget_factor, + absl::optional start_forget_weight, + absl::optional resample_interval_ms); + + void Update(int relative_delay_ms); + + absl::optional GetOptimalDelayMs() const { return optimal_delay_ms_; } + + void Reset(); + + private: + const TickTimer* tick_timer_; + Histogram histogram_; + const int histogram_quantile_; // In Q30. + const absl::optional resample_interval_ms_; + std::unique_ptr resample_stopwatch_; + int max_delay_in_interval_ms_ = 0; + absl::optional optimal_delay_ms_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_UNDERRUN_OPTIMIZER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer_unittest.cc new file mode 100644 index 0000000000..a86e9cf107 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer_unittest.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/underrun_optimizer.h" + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kDefaultHistogramQuantile = 1020054733; // 0.95 in Q30. +constexpr int kForgetFactor = 32745; // 0.9993 in Q15. + +} // namespace + +TEST(UnderrunOptimizerTest, ResamplePacketDelays) { + TickTimer tick_timer; + constexpr int kResampleIntervalMs = 500; + UnderrunOptimizer underrun_optimizer(&tick_timer, kDefaultHistogramQuantile, + kForgetFactor, absl::nullopt, + kResampleIntervalMs); + + // The histogram should be updated once with the maximum delay observed for + // the following sequence of updates. + for (int i = 0; i < 500; i += 20) { + underrun_optimizer.Update(i); + EXPECT_FALSE(underrun_optimizer.GetOptimalDelayMs()); + } + tick_timer.Increment(kResampleIntervalMs / tick_timer.ms_per_tick() + 1); + underrun_optimizer.Update(0); + EXPECT_EQ(underrun_optimizer.GetOptimalDelayMs(), 500); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build new file mode 100644 index 0000000000..823aea6dda --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build @@ -0,0 +1,267 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.cc" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/expand.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/histogram.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/merge.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/normal.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/packet.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.cc", + "/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("neteq_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/pcm16b_c_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/pcm16b_c_gn/moz.build new file mode 100644 index 0000000000..54863e95ae --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/pcm16b_c_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.c" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("pcm16b_c_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/pcm16b_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/pcm16b_gn/moz.build new file mode 100644 index 0000000000..afee108cb3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/pcm16b_gn/moz.build @@ -0,0 +1,227 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc", + "/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.cc", + "/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("pcm16b_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/red_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/red_gn/moz.build new file mode 100644 index 0000000000..c4fa5c8bc4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/red_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("red_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/test/Channel.cc b/third_party/libwebrtc/modules/audio_coding/test/Channel.cc new file mode 100644 index 0000000000..35aa6cb6b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/Channel.cc @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/test/Channel.h" + +#include + +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +int32_t Channel::SendData(AudioFrameType frameType, + uint8_t payloadType, + uint32_t timeStamp, + const uint8_t* payloadData, + size_t payloadSize, + int64_t absolute_capture_timestamp_ms) { + RTPHeader rtp_header; + int32_t status; + size_t payloadDataSize = payloadSize; + + rtp_header.markerBit = false; + rtp_header.ssrc = 0; + rtp_header.sequenceNumber = + (external_sequence_number_ < 0) + ? _seqNo++ + : static_cast(external_sequence_number_); + rtp_header.payloadType = payloadType; + rtp_header.timestamp = (external_send_timestamp_ < 0) + ? timeStamp + : static_cast(external_send_timestamp_); + + if (frameType == AudioFrameType::kEmptyFrame) { + // When frame is empty, we should not transmit it. The frame size of the + // next non-empty frame will be based on the previous frame size. + _useLastFrameSize = _lastFrameSizeSample > 0; + return 0; + } + + memcpy(_payloadData, payloadData, payloadDataSize); + if (_isStereo) { + if (_leftChannel) { + _rtp_header = rtp_header; + _leftChannel = false; + } else { + rtp_header = _rtp_header; + _leftChannel = true; + } + } + + _channelCritSect.Lock(); + if (_saveBitStream) { + // fwrite(payloadData, sizeof(uint8_t), payloadSize, _bitStreamFile); + } + + if (!_isStereo) { + CalcStatistics(rtp_header, payloadSize); + } + _useLastFrameSize = false; + _lastInTimestamp = timeStamp; + _totalBytes += payloadDataSize; + _channelCritSect.Unlock(); + + if (_useFECTestWithPacketLoss) { + _packetLoss += 1; + if (_packetLoss == 3) { + _packetLoss = 0; + return 0; + } + } + + if (num_packets_to_drop_ > 0) { + num_packets_to_drop_--; + return 0; + } + + status = + _receiverACM->IncomingPacket(_payloadData, payloadDataSize, rtp_header); + + return status; +} + +// TODO(turajs): rewite this method. +void Channel::CalcStatistics(const RTPHeader& rtp_header, size_t payloadSize) { + int n; + if ((rtp_header.payloadType != _lastPayloadType) && + (_lastPayloadType != -1)) { + // payload-type is changed. + // we have to terminate the calculations on the previous payload type + // we ignore the last packet in that payload type just to make things + // easier. + for (n = 0; n < MAX_NUM_PAYLOADS; n++) { + if (_lastPayloadType == _payloadStats[n].payloadType) { + _payloadStats[n].newPacket = true; + break; + } + } + } + _lastPayloadType = rtp_header.payloadType; + + bool newPayload = true; + ACMTestPayloadStats* currentPayloadStr = NULL; + for (n = 0; n < MAX_NUM_PAYLOADS; n++) { + if (rtp_header.payloadType == _payloadStats[n].payloadType) { + newPayload = false; + currentPayloadStr = &_payloadStats[n]; + break; + } + } + + if (!newPayload) { + if (!currentPayloadStr->newPacket) { + if (!_useLastFrameSize) { + _lastFrameSizeSample = + (uint32_t)((uint32_t)rtp_header.timestamp - + (uint32_t)currentPayloadStr->lastTimestamp); + } + RTC_DCHECK_GT(_lastFrameSizeSample, 0); + int k = 0; + for (; k < MAX_NUM_FRAMESIZES; ++k) { + if ((currentPayloadStr->frameSizeStats[k].frameSizeSample == + _lastFrameSizeSample) || + (currentPayloadStr->frameSizeStats[k].frameSizeSample == 0)) { + break; + } + } + if (k == MAX_NUM_FRAMESIZES) { + // New frame size found but no space to count statistics on it. Skip it. + printf("No memory to store statistics for payload %d : frame size %d\n", + _lastPayloadType, _lastFrameSizeSample); + return; + } + ACMTestFrameSizeStats* currentFrameSizeStats = + &(currentPayloadStr->frameSizeStats[k]); + currentFrameSizeStats->frameSizeSample = (int16_t)_lastFrameSizeSample; + + // increment the number of encoded samples. + currentFrameSizeStats->totalEncodedSamples += _lastFrameSizeSample; + // increment the number of recveived packets + currentFrameSizeStats->numPackets++; + // increment the total number of bytes (this is based on + // the previous payload we don't know the frame-size of + // the current payload. + currentFrameSizeStats->totalPayloadLenByte += + currentPayloadStr->lastPayloadLenByte; + // store the maximum payload-size (this is based on + // the previous payload we don't know the frame-size of + // the current payload. + if (currentFrameSizeStats->maxPayloadLen < + currentPayloadStr->lastPayloadLenByte) { + currentFrameSizeStats->maxPayloadLen = + currentPayloadStr->lastPayloadLenByte; + } + // store the current values for the next time + currentPayloadStr->lastTimestamp = rtp_header.timestamp; + currentPayloadStr->lastPayloadLenByte = payloadSize; + } else { + currentPayloadStr->newPacket = false; + currentPayloadStr->lastPayloadLenByte = payloadSize; + currentPayloadStr->lastTimestamp = rtp_header.timestamp; + currentPayloadStr->payloadType = rtp_header.payloadType; + memset(currentPayloadStr->frameSizeStats, 0, + MAX_NUM_FRAMESIZES * sizeof(ACMTestFrameSizeStats)); + } + } else { + n = 0; + while (_payloadStats[n].payloadType != -1) { + n++; + } + // first packet + _payloadStats[n].newPacket = false; + _payloadStats[n].lastPayloadLenByte = payloadSize; + _payloadStats[n].lastTimestamp = rtp_header.timestamp; + _payloadStats[n].payloadType = rtp_header.payloadType; + memset(_payloadStats[n].frameSizeStats, 0, + MAX_NUM_FRAMESIZES * sizeof(ACMTestFrameSizeStats)); + } +} + +Channel::Channel(int16_t chID) + : _receiverACM(NULL), + _seqNo(0), + _bitStreamFile(NULL), + _saveBitStream(false), + _lastPayloadType(-1), + _isStereo(false), + _leftChannel(true), + _lastInTimestamp(0), + _useLastFrameSize(false), + _lastFrameSizeSample(0), + _packetLoss(0), + _useFECTestWithPacketLoss(false), + _beginTime(rtc::TimeMillis()), + _totalBytes(0), + external_send_timestamp_(-1), + external_sequence_number_(-1), + num_packets_to_drop_(0) { + int n; + int k; + for (n = 0; n < MAX_NUM_PAYLOADS; n++) { + _payloadStats[n].payloadType = -1; + _payloadStats[n].newPacket = true; + for (k = 0; k < MAX_NUM_FRAMESIZES; k++) { + _payloadStats[n].frameSizeStats[k].frameSizeSample = 0; + _payloadStats[n].frameSizeStats[k].maxPayloadLen = 0; + _payloadStats[n].frameSizeStats[k].numPackets = 0; + _payloadStats[n].frameSizeStats[k].totalPayloadLenByte = 0; + _payloadStats[n].frameSizeStats[k].totalEncodedSamples = 0; + } + } + if (chID >= 0) { + _saveBitStream = true; + rtc::StringBuilder ss; + ss.AppendFormat("bitStream_%d.dat", chID); + _bitStreamFile = fopen(ss.str().c_str(), "wb"); + } else { + _saveBitStream = false; + } +} + +Channel::~Channel() {} + +void Channel::RegisterReceiverACM(AudioCodingModule* acm) { + _receiverACM = acm; + return; +} + +void Channel::ResetStats() { + int n; + int k; + _channelCritSect.Lock(); + _lastPayloadType = -1; + for (n = 0; n < MAX_NUM_PAYLOADS; n++) { + _payloadStats[n].payloadType = -1; + _payloadStats[n].newPacket = true; + for (k = 0; k < MAX_NUM_FRAMESIZES; k++) { + _payloadStats[n].frameSizeStats[k].frameSizeSample = 0; + _payloadStats[n].frameSizeStats[k].maxPayloadLen = 0; + _payloadStats[n].frameSizeStats[k].numPackets = 0; + _payloadStats[n].frameSizeStats[k].totalPayloadLenByte = 0; + _payloadStats[n].frameSizeStats[k].totalEncodedSamples = 0; + } + } + _beginTime = rtc::TimeMillis(); + _totalBytes = 0; + _channelCritSect.Unlock(); +} + +uint32_t Channel::LastInTimestamp() { + uint32_t timestamp; + _channelCritSect.Lock(); + timestamp = _lastInTimestamp; + _channelCritSect.Unlock(); + return timestamp; +} + +double Channel::BitRate() { + double rate; + uint64_t currTime = rtc::TimeMillis(); + _channelCritSect.Lock(); + rate = ((double)_totalBytes * 8.0) / (double)(currTime - _beginTime); + _channelCritSect.Unlock(); + return rate; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/Channel.h b/third_party/libwebrtc/modules/audio_coding/test/Channel.h new file mode 100644 index 0000000000..7a8829e1d2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/Channel.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_CHANNEL_H_ +#define MODULES_AUDIO_CODING_TEST_CHANNEL_H_ + +#include + +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/synchronization/mutex.h" + +namespace webrtc { + +#define MAX_NUM_PAYLOADS 50 +#define MAX_NUM_FRAMESIZES 6 + +// TODO(turajs): Write constructor for this structure. +struct ACMTestFrameSizeStats { + uint16_t frameSizeSample; + size_t maxPayloadLen; + uint32_t numPackets; + uint64_t totalPayloadLenByte; + uint64_t totalEncodedSamples; + double rateBitPerSec; + double usageLenSec; +}; + +// TODO(turajs): Write constructor for this structure. +struct ACMTestPayloadStats { + bool newPacket; + int16_t payloadType; + size_t lastPayloadLenByte; + uint32_t lastTimestamp; + ACMTestFrameSizeStats frameSizeStats[MAX_NUM_FRAMESIZES]; +}; + +class Channel : public AudioPacketizationCallback { + public: + Channel(int16_t chID = -1); + ~Channel() override; + + int32_t SendData(AudioFrameType frameType, + uint8_t payloadType, + uint32_t timeStamp, + const uint8_t* payloadData, + size_t payloadSize, + int64_t absolute_capture_timestamp_ms) override; + + void RegisterReceiverACM(AudioCodingModule* acm); + + void ResetStats(); + + void SetIsStereo(bool isStereo) { _isStereo = isStereo; } + + uint32_t LastInTimestamp(); + + void SetFECTestWithPacketLoss(bool usePacketLoss) { + _useFECTestWithPacketLoss = usePacketLoss; + } + + double BitRate(); + + void set_send_timestamp(uint32_t new_send_ts) { + external_send_timestamp_ = new_send_ts; + } + + void set_sequence_number(uint16_t new_sequence_number) { + external_sequence_number_ = new_sequence_number; + } + + void set_num_packets_to_drop(int new_num_packets_to_drop) { + num_packets_to_drop_ = new_num_packets_to_drop; + } + + private: + void CalcStatistics(const RTPHeader& rtp_header, size_t payloadSize); + + AudioCodingModule* _receiverACM; + uint16_t _seqNo; + // 60msec * 32 sample(max)/msec * 2 description (maybe) * 2 bytes/sample + uint8_t _payloadData[60 * 32 * 2 * 2]; + + Mutex _channelCritSect; + FILE* _bitStreamFile; + bool _saveBitStream; + int16_t _lastPayloadType; + ACMTestPayloadStats _payloadStats[MAX_NUM_PAYLOADS]; + bool _isStereo; + RTPHeader _rtp_header; + bool _leftChannel; + uint32_t _lastInTimestamp; + bool _useLastFrameSize; + uint32_t _lastFrameSizeSample; + // FEC Test variables + int16_t _packetLoss; + bool _useFECTestWithPacketLoss; + uint64_t _beginTime; + uint64_t _totalBytes; + + // External timing info, defaulted to -1. Only used if they are + // non-negative. + int64_t external_send_timestamp_; + int32_t external_sequence_number_; + int num_packets_to_drop_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_CHANNEL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/EncodeDecodeTest.cc b/third_party/libwebrtc/modules/audio_coding/test/EncodeDecodeTest.cc new file mode 100644 index 0000000000..9f9c4aa74c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/EncodeDecodeTest.cc @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/test/EncodeDecodeTest.h" + +#include +#include + +#include + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace { +// Buffer size for stereo 48 kHz audio. +constexpr size_t kWebRtc10MsPcmAudio = 960; + +} // namespace + +TestPacketization::TestPacketization(RTPStream* rtpStream, uint16_t frequency) + : _rtpStream(rtpStream), _frequency(frequency), _seqNo(0) {} + +TestPacketization::~TestPacketization() {} + +int32_t TestPacketization::SendData(const AudioFrameType /* frameType */, + const uint8_t payloadType, + const uint32_t timeStamp, + const uint8_t* payloadData, + const size_t payloadSize, + int64_t absolute_capture_timestamp_ms) { + _rtpStream->Write(payloadType, timeStamp, _seqNo++, payloadData, payloadSize, + _frequency); + return 1; +} + +Sender::Sender() + : _acm(NULL), _pcmFile(), _audioFrame(), _packetization(NULL) {} + +void Sender::Setup(AudioCodingModule* acm, + RTPStream* rtpStream, + absl::string_view in_file_name, + int in_sample_rate, + int payload_type, + SdpAudioFormat format) { + // Open input file + const std::string file_name = webrtc::test::ResourcePath(in_file_name, "pcm"); + _pcmFile.Open(file_name, in_sample_rate, "rb"); + if (format.num_channels == 2) { + _pcmFile.ReadStereo(true); + } + // Set test length to 500 ms (50 blocks of 10 ms each). + _pcmFile.SetNum10MsBlocksToRead(50); + // Fast-forward 1 second (100 blocks) since the file starts with silence. + _pcmFile.FastForward(100); + + acm->SetEncoder(CreateBuiltinAudioEncoderFactory()->MakeAudioEncoder( + payload_type, format, absl::nullopt)); + _packetization = new TestPacketization(rtpStream, format.clockrate_hz); + EXPECT_EQ(0, acm->RegisterTransportCallback(_packetization)); + + _acm = acm; +} + +void Sender::Teardown() { + _pcmFile.Close(); + delete _packetization; +} + +bool Sender::Add10MsData() { + if (!_pcmFile.EndOfFile()) { + EXPECT_GT(_pcmFile.Read10MsData(_audioFrame), 0); + int32_t ok = _acm->Add10MsData(_audioFrame); + EXPECT_GE(ok, 0); + return ok >= 0 ? true : false; + } + return false; +} + +void Sender::Run() { + while (true) { + if (!Add10MsData()) { + break; + } + } +} + +Receiver::Receiver() + : _playoutLengthSmpls(kWebRtc10MsPcmAudio), + _payloadSizeBytes(MAX_INCOMING_PAYLOAD) {} + +void Receiver::Setup(AudioCodingModule* acm, + RTPStream* rtpStream, + absl::string_view out_file_name, + size_t channels, + int file_num) { + EXPECT_EQ(0, acm->InitializeReceiver()); + + if (channels == 1) { + acm->SetReceiveCodecs({{107, {"L16", 8000, 1}}, + {108, {"L16", 16000, 1}}, + {109, {"L16", 32000, 1}}, + {0, {"PCMU", 8000, 1}}, + {8, {"PCMA", 8000, 1}}, + {102, {"ILBC", 8000, 1}}, + {9, {"G722", 8000, 1}}, + {120, {"OPUS", 48000, 2}}, + {13, {"CN", 8000, 1}}, + {98, {"CN", 16000, 1}}, + {99, {"CN", 32000, 1}}}); + } else { + ASSERT_EQ(channels, 2u); + acm->SetReceiveCodecs({{111, {"L16", 8000, 2}}, + {112, {"L16", 16000, 2}}, + {113, {"L16", 32000, 2}}, + {110, {"PCMU", 8000, 2}}, + {118, {"PCMA", 8000, 2}}, + {119, {"G722", 8000, 2}}, + {120, {"OPUS", 48000, 2, {{"stereo", "1"}}}}}); + } + + int playSampFreq; + std::string file_name; + rtc::StringBuilder file_stream; + file_stream << webrtc::test::OutputPath() << out_file_name << file_num + << ".pcm"; + file_name = file_stream.str(); + _rtpStream = rtpStream; + + playSampFreq = 32000; + _pcmFile.Open(file_name, 32000, "wb+"); + + _realPayloadSizeBytes = 0; + _playoutBuffer = new int16_t[kWebRtc10MsPcmAudio]; + _frequency = playSampFreq; + _acm = acm; + _firstTime = true; +} + +void Receiver::Teardown() { + delete[] _playoutBuffer; + _pcmFile.Close(); +} + +bool Receiver::IncomingPacket() { + if (!_rtpStream->EndOfFile()) { + if (_firstTime) { + _firstTime = false; + _realPayloadSizeBytes = _rtpStream->Read(&_rtpHeader, _incomingPayload, + _payloadSizeBytes, &_nextTime); + if (_realPayloadSizeBytes == 0) { + if (_rtpStream->EndOfFile()) { + _firstTime = true; + return true; + } else { + return false; + } + } + } + + EXPECT_EQ(0, _acm->IncomingPacket(_incomingPayload, _realPayloadSizeBytes, + _rtpHeader)); + _realPayloadSizeBytes = _rtpStream->Read(&_rtpHeader, _incomingPayload, + _payloadSizeBytes, &_nextTime); + if (_realPayloadSizeBytes == 0 && _rtpStream->EndOfFile()) { + _firstTime = true; + } + } + return true; +} + +bool Receiver::PlayoutData() { + AudioFrame audioFrame; + bool muted; + int32_t ok = _acm->PlayoutData10Ms(_frequency, &audioFrame, &muted); + if (muted) { + ADD_FAILURE(); + return false; + } + EXPECT_EQ(0, ok); + if (ok < 0) { + return false; + } + if (_playoutLengthSmpls == 0) { + return false; + } + _pcmFile.Write10MsData(audioFrame.data(), audioFrame.samples_per_channel_ * + audioFrame.num_channels_); + return true; +} + +void Receiver::Run() { + uint8_t counter500Ms = 50; + uint32_t clock = 0; + + while (counter500Ms > 0) { + if (clock == 0 || clock >= _nextTime) { + EXPECT_TRUE(IncomingPacket()); + if (clock == 0) { + clock = _nextTime; + } + } + if ((clock % 10) == 0) { + if (!PlayoutData()) { + clock++; + continue; + } + } + if (_rtpStream->EndOfFile()) { + counter500Ms--; + } + clock++; + } +} + +EncodeDecodeTest::EncodeDecodeTest() = default; + +void EncodeDecodeTest::Perform() { + const std::map send_codecs = { + {107, {"L16", 8000, 1}}, {108, {"L16", 16000, 1}}, + {109, {"L16", 32000, 1}}, {0, {"PCMU", 8000, 1}}, + {8, {"PCMA", 8000, 1}}, +#ifdef WEBRTC_CODEC_ILBC + {102, {"ILBC", 8000, 1}}, +#endif + {9, {"G722", 8000, 1}}}; + int file_num = 0; + for (const auto& send_codec : send_codecs) { + RTPFile rtpFile; + std::unique_ptr acm(AudioCodingModule::Create( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))); + + std::string fileName = webrtc::test::TempFilename( + webrtc::test::OutputPath(), "encode_decode_rtp"); + rtpFile.Open(fileName.c_str(), "wb+"); + rtpFile.WriteHeader(); + Sender sender; + sender.Setup(acm.get(), &rtpFile, "audio_coding/testfile32kHz", 32000, + send_codec.first, send_codec.second); + sender.Run(); + sender.Teardown(); + rtpFile.Close(); + + rtpFile.Open(fileName.c_str(), "rb"); + rtpFile.ReadHeader(); + Receiver receiver; + receiver.Setup(acm.get(), &rtpFile, "encodeDecode_out", 1, file_num); + receiver.Run(); + receiver.Teardown(); + rtpFile.Close(); + + file_num++; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/EncodeDecodeTest.h b/third_party/libwebrtc/modules/audio_coding/test/EncodeDecodeTest.h new file mode 100644 index 0000000000..89b76440ef --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/EncodeDecodeTest.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_ENCODEDECODETEST_H_ +#define MODULES_AUDIO_CODING_TEST_ENCODEDECODETEST_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/test/PCMFile.h" +#include "modules/audio_coding/test/RTPFile.h" +#include "modules/include/module_common_types.h" + +namespace webrtc { + +#define MAX_INCOMING_PAYLOAD 8096 + +// TestPacketization callback which writes the encoded payloads to file +class TestPacketization : public AudioPacketizationCallback { + public: + TestPacketization(RTPStream* rtpStream, uint16_t frequency); + ~TestPacketization(); + int32_t SendData(AudioFrameType frameType, + uint8_t payloadType, + uint32_t timeStamp, + const uint8_t* payloadData, + size_t payloadSize, + int64_t absolute_capture_timestamp_ms) override; + + private: + static void MakeRTPheader(uint8_t* rtpHeader, + uint8_t payloadType, + int16_t seqNo, + uint32_t timeStamp, + uint32_t ssrc); + RTPStream* _rtpStream; + int32_t _frequency; + int16_t _seqNo; +}; + +class Sender { + public: + Sender(); + void Setup(AudioCodingModule* acm, + RTPStream* rtpStream, + absl::string_view in_file_name, + int in_sample_rate, + int payload_type, + SdpAudioFormat format); + void Teardown(); + void Run(); + bool Add10MsData(); + + protected: + AudioCodingModule* _acm; + + private: + PCMFile _pcmFile; + AudioFrame _audioFrame; + TestPacketization* _packetization; +}; + +class Receiver { + public: + Receiver(); + virtual ~Receiver() {} + void Setup(AudioCodingModule* acm, + RTPStream* rtpStream, + absl::string_view out_file_name, + size_t channels, + int file_num); + void Teardown(); + void Run(); + virtual bool IncomingPacket(); + bool PlayoutData(); + + private: + PCMFile _pcmFile; + int16_t* _playoutBuffer; + uint16_t _playoutLengthSmpls; + int32_t _frequency; + bool _firstTime; + + protected: + AudioCodingModule* _acm; + uint8_t _incomingPayload[MAX_INCOMING_PAYLOAD]; + RTPStream* _rtpStream; + RTPHeader _rtpHeader; + size_t _realPayloadSizeBytes; + size_t _payloadSizeBytes; + uint32_t _nextTime; +}; + +class EncodeDecodeTest { + public: + EncodeDecodeTest(); + void Perform(); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_ENCODEDECODETEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/PCMFile.cc b/third_party/libwebrtc/modules/audio_coding/test/PCMFile.cc new file mode 100644 index 0000000000..e069a42de1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/PCMFile.cc @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/test/PCMFile.h" + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +#define MAX_FILE_NAME_LENGTH_BYTE 500 + +PCMFile::PCMFile() + : pcm_file_(NULL), + samples_10ms_(160), + frequency_(16000), + end_of_file_(false), + auto_rewind_(false), + rewinded_(false), + read_stereo_(false), + save_stereo_(false) { + timestamp_ = + (((uint32_t)rand() & 0x0000FFFF) << 16) | ((uint32_t)rand() & 0x0000FFFF); +} + +PCMFile::PCMFile(uint32_t timestamp) + : pcm_file_(NULL), + samples_10ms_(160), + frequency_(16000), + end_of_file_(false), + auto_rewind_(false), + rewinded_(false), + read_stereo_(false), + save_stereo_(false) { + timestamp_ = timestamp; +} + +PCMFile::~PCMFile() { + if (pcm_file_) { + fclose(pcm_file_); + } +} + +int16_t PCMFile::ChooseFile(std::string* file_name, + int16_t max_len, + uint16_t* frequency_hz) { + char tmp_name[MAX_FILE_NAME_LENGTH_BYTE]; + + EXPECT_TRUE(fgets(tmp_name, MAX_FILE_NAME_LENGTH_BYTE, stdin) != NULL); + tmp_name[MAX_FILE_NAME_LENGTH_BYTE - 1] = '\0'; + int16_t n = 0; + + // Removing trailing spaces. + while ((isspace(static_cast(tmp_name[n])) || + iscntrl(static_cast(tmp_name[n]))) && + (static_cast(tmp_name[n]) != 0) && + (n < MAX_FILE_NAME_LENGTH_BYTE)) { + n++; + } + if (n > 0) { + memmove(tmp_name, &tmp_name[n], MAX_FILE_NAME_LENGTH_BYTE - n); + } + + // Removing trailing spaces. + n = (int16_t)(strlen(tmp_name) - 1); + if (n >= 0) { + while ((isspace(static_cast(tmp_name[n])) || + iscntrl(static_cast(tmp_name[n]))) && + (n >= 0)) { + n--; + } + } + if (n >= 0) { + tmp_name[n + 1] = '\0'; + } + + int16_t len = (int16_t)strlen(tmp_name); + if (len > max_len) { + return -1; + } + if (len > 0) { + std::string tmp_string(tmp_name, len + 1); + *file_name = tmp_string; + } + printf("Enter the sampling frequency (in Hz) of the above file [%u]: ", + *frequency_hz); + EXPECT_TRUE(fgets(tmp_name, 10, stdin) != NULL); + uint16_t tmp_frequency = (uint16_t)atoi(tmp_name); + if (tmp_frequency > 0) { + *frequency_hz = tmp_frequency; + } + return 0; +} + +void PCMFile::Open(absl::string_view file_name, + uint16_t frequency, + absl::string_view mode, + bool auto_rewind) { + if ((pcm_file_ = fopen(std::string(file_name).c_str(), + std::string(mode).c_str())) == NULL) { + printf("Cannot open file %s.\n", std::string(file_name).c_str()); + ADD_FAILURE() << "Unable to read file"; + } + frequency_ = frequency; + samples_10ms_ = (uint16_t)(frequency_ / 100); + auto_rewind_ = auto_rewind; + end_of_file_ = false; + rewinded_ = false; +} + +int32_t PCMFile::SamplingFrequency() const { + return frequency_; +} + +uint16_t PCMFile::PayloadLength10Ms() const { + return samples_10ms_; +} + +int32_t PCMFile::Read10MsData(AudioFrame& audio_frame) { + uint16_t channels = 1; + if (read_stereo_) { + channels = 2; + } + + int32_t payload_size = + (int32_t)fread(audio_frame.mutable_data(), sizeof(uint16_t), + samples_10ms_ * channels, pcm_file_); + if (payload_size < samples_10ms_ * channels) { + int16_t* frame_data = audio_frame.mutable_data(); + for (int k = payload_size; k < samples_10ms_ * channels; k++) { + frame_data[k] = 0; + } + if (auto_rewind_) { + rewind(pcm_file_); + rewinded_ = true; + } else { + end_of_file_ = true; + } + } + audio_frame.samples_per_channel_ = samples_10ms_; + audio_frame.sample_rate_hz_ = frequency_; + audio_frame.num_channels_ = channels; + audio_frame.timestamp_ = timestamp_; + timestamp_ += samples_10ms_; + ++blocks_read_; + if (num_10ms_blocks_to_read_ && blocks_read_ >= *num_10ms_blocks_to_read_) + end_of_file_ = true; + return samples_10ms_; +} + +void PCMFile::Write10MsData(const AudioFrame& audio_frame) { + if (audio_frame.num_channels_ == 1) { + if (!save_stereo_) { + if (fwrite(audio_frame.data(), sizeof(uint16_t), + audio_frame.samples_per_channel_, pcm_file_) != + static_cast(audio_frame.samples_per_channel_)) { + return; + } + } else { + const int16_t* frame_data = audio_frame.data(); + int16_t* stereo_audio = new int16_t[2 * audio_frame.samples_per_channel_]; + for (size_t k = 0; k < audio_frame.samples_per_channel_; k++) { + stereo_audio[k << 1] = frame_data[k]; + stereo_audio[(k << 1) + 1] = frame_data[k]; + } + if (fwrite(stereo_audio, sizeof(int16_t), + 2 * audio_frame.samples_per_channel_, pcm_file_) != + static_cast(2 * audio_frame.samples_per_channel_)) { + return; + } + delete[] stereo_audio; + } + } else { + if (fwrite(audio_frame.data(), sizeof(int16_t), + audio_frame.num_channels_ * audio_frame.samples_per_channel_, + pcm_file_) != + static_cast(audio_frame.num_channels_ * + audio_frame.samples_per_channel_)) { + return; + } + } +} + +void PCMFile::Write10MsData(const int16_t* playout_buffer, + size_t length_smpls) { + if (fwrite(playout_buffer, sizeof(uint16_t), length_smpls, pcm_file_) != + length_smpls) { + return; + } +} + +void PCMFile::Close() { + fclose(pcm_file_); + pcm_file_ = NULL; + blocks_read_ = 0; +} + +void PCMFile::FastForward(int num_10ms_blocks) { + const int channels = read_stereo_ ? 2 : 1; + long num_bytes_to_move = + num_10ms_blocks * sizeof(int16_t) * samples_10ms_ * channels; + int error = fseek(pcm_file_, num_bytes_to_move, SEEK_CUR); + RTC_DCHECK_EQ(error, 0); +} + +void PCMFile::Rewind() { + rewind(pcm_file_); + end_of_file_ = false; + blocks_read_ = 0; +} + +bool PCMFile::Rewinded() { + return rewinded_; +} + +void PCMFile::SaveStereo(bool is_stereo) { + save_stereo_ = is_stereo; +} + +void PCMFile::ReadStereo(bool is_stereo) { + read_stereo_ = is_stereo; +} + +void PCMFile::SetNum10MsBlocksToRead(int value) { + num_10ms_blocks_to_read_ = value; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/PCMFile.h b/third_party/libwebrtc/modules/audio_coding/test/PCMFile.h new file mode 100644 index 0000000000..5320aa63d0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/PCMFile.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_PCMFILE_H_ +#define MODULES_AUDIO_CODING_TEST_PCMFILE_H_ + +#include +#include + +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/audio/audio_frame.h" + +namespace webrtc { + +class PCMFile { + public: + PCMFile(); + PCMFile(uint32_t timestamp); + ~PCMFile(); + + void Open(absl::string_view filename, + uint16_t frequency, + absl::string_view mode, + bool auto_rewind = false); + + int32_t Read10MsData(AudioFrame& audio_frame); + + void Write10MsData(const int16_t* playout_buffer, size_t length_smpls); + void Write10MsData(const AudioFrame& audio_frame); + + uint16_t PayloadLength10Ms() const; + int32_t SamplingFrequency() const; + void Close(); + bool EndOfFile() const { return end_of_file_; } + // Moves forward the specified number of 10 ms blocks. If a limit has been set + // with SetNum10MsBlocksToRead, fast-forwarding does not count towards this + // limit. + void FastForward(int num_10ms_blocks); + void Rewind(); + static int16_t ChooseFile(std::string* file_name, + int16_t max_len, + uint16_t* frequency_hz); + bool Rewinded(); + void SaveStereo(bool is_stereo = true); + void ReadStereo(bool is_stereo = true); + // If set, the reading will stop after the specified number of blocks have + // been read. When that has happened, EndOfFile() will return true. Calling + // Rewind() will reset the counter and start over. + void SetNum10MsBlocksToRead(int value); + + private: + FILE* pcm_file_; + uint16_t samples_10ms_; + int32_t frequency_; + bool end_of_file_; + bool auto_rewind_; + bool rewinded_; + uint32_t timestamp_; + bool read_stereo_; + bool save_stereo_; + absl::optional num_10ms_blocks_to_read_; + int blocks_read_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_PCMFILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/PacketLossTest.cc b/third_party/libwebrtc/modules/audio_coding/test/PacketLossTest.cc new file mode 100644 index 0000000000..799e9c5b1f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/PacketLossTest.cc @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/test/PacketLossTest.h" + +#include + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +ReceiverWithPacketLoss::ReceiverWithPacketLoss() + : loss_rate_(0), + burst_length_(1), + packet_counter_(0), + lost_packet_counter_(0), + burst_lost_counter_(burst_length_) {} + +void ReceiverWithPacketLoss::Setup(AudioCodingModule* acm, + RTPStream* rtpStream, + absl::string_view out_file_name, + int channels, + int file_num, + int loss_rate, + int burst_length) { + loss_rate_ = loss_rate; + burst_length_ = burst_length; + burst_lost_counter_ = burst_length_; // To prevent first packet gets lost. + rtc::StringBuilder ss; + ss << out_file_name << "_" << loss_rate_ << "_" << burst_length_ << "_"; + Receiver::Setup(acm, rtpStream, ss.str(), channels, file_num); +} + +bool ReceiverWithPacketLoss::IncomingPacket() { + if (!_rtpStream->EndOfFile()) { + if (packet_counter_ == 0) { + _realPayloadSizeBytes = _rtpStream->Read(&_rtpHeader, _incomingPayload, + _payloadSizeBytes, &_nextTime); + if (_realPayloadSizeBytes == 0) { + if (_rtpStream->EndOfFile()) { + packet_counter_ = 0; + return true; + } else { + return false; + } + } + } + + if (!PacketLost()) { + _acm->IncomingPacket(_incomingPayload, _realPayloadSizeBytes, _rtpHeader); + } + packet_counter_++; + _realPayloadSizeBytes = _rtpStream->Read(&_rtpHeader, _incomingPayload, + _payloadSizeBytes, &_nextTime); + if (_realPayloadSizeBytes == 0 && _rtpStream->EndOfFile()) { + packet_counter_ = 0; + lost_packet_counter_ = 0; + } + } + return true; +} + +bool ReceiverWithPacketLoss::PacketLost() { + if (burst_lost_counter_ < burst_length_) { + lost_packet_counter_++; + burst_lost_counter_++; + return true; + } + + if (lost_packet_counter_ * 100 < loss_rate_ * packet_counter_) { + lost_packet_counter_++; + burst_lost_counter_ = 1; + return true; + } + return false; +} + +SenderWithFEC::SenderWithFEC() : expected_loss_rate_(0) {} + +void SenderWithFEC::Setup(AudioCodingModule* acm, + RTPStream* rtpStream, + absl::string_view in_file_name, + int payload_type, + SdpAudioFormat format, + int expected_loss_rate) { + Sender::Setup(acm, rtpStream, in_file_name, format.clockrate_hz, payload_type, + format); + EXPECT_TRUE(SetFEC(true)); + EXPECT_TRUE(SetPacketLossRate(expected_loss_rate)); +} + +bool SenderWithFEC::SetFEC(bool enable_fec) { + bool success = false; + _acm->ModifyEncoder([&](std::unique_ptr* enc) { + if (*enc && (*enc)->SetFec(enable_fec)) { + success = true; + } + }); + return success; +} + +bool SenderWithFEC::SetPacketLossRate(int expected_loss_rate) { + if (_acm->SetPacketLossRate(expected_loss_rate) == 0) { + expected_loss_rate_ = expected_loss_rate; + return true; + } + return false; +} + +PacketLossTest::PacketLossTest(int channels, + int expected_loss_rate, + int actual_loss_rate, + int burst_length) + : channels_(channels), + in_file_name_(channels_ == 1 ? "audio_coding/testfile32kHz" + : "audio_coding/teststereo32kHz"), + sample_rate_hz_(32000), + expected_loss_rate_(expected_loss_rate), + actual_loss_rate_(actual_loss_rate), + burst_length_(burst_length) {} + +void PacketLossTest::Perform() { +#ifndef WEBRTC_CODEC_OPUS + return; +#else + RTPFile rtpFile; + std::unique_ptr acm(AudioCodingModule::Create( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))); + SdpAudioFormat send_format = SdpAudioFormat("opus", 48000, 2); + if (channels_ == 2) { + send_format.parameters = {{"stereo", "1"}}; + } + + std::string fileName = webrtc::test::TempFilename(webrtc::test::OutputPath(), + "packet_loss_test"); + rtpFile.Open(fileName.c_str(), "wb+"); + rtpFile.WriteHeader(); + SenderWithFEC sender; + sender.Setup(acm.get(), &rtpFile, in_file_name_, 120, send_format, + expected_loss_rate_); + sender.Run(); + sender.Teardown(); + rtpFile.Close(); + + rtpFile.Open(fileName.c_str(), "rb"); + rtpFile.ReadHeader(); + ReceiverWithPacketLoss receiver; + receiver.Setup(acm.get(), &rtpFile, "packetLoss_out", channels_, 15, + actual_loss_rate_, burst_length_); + receiver.Run(); + receiver.Teardown(); + rtpFile.Close(); +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/PacketLossTest.h b/third_party/libwebrtc/modules/audio_coding/test/PacketLossTest.h new file mode 100644 index 0000000000..d841d65a1b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/PacketLossTest.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_PACKETLOSSTEST_H_ +#define MODULES_AUDIO_CODING_TEST_PACKETLOSSTEST_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/test/EncodeDecodeTest.h" + +namespace webrtc { + +class ReceiverWithPacketLoss : public Receiver { + public: + ReceiverWithPacketLoss(); + void Setup(AudioCodingModule* acm, + RTPStream* rtpStream, + absl::string_view out_file_name, + int channels, + int file_num, + int loss_rate, + int burst_length); + bool IncomingPacket() override; + + protected: + bool PacketLost(); + int loss_rate_; + int burst_length_; + int packet_counter_; + int lost_packet_counter_; + int burst_lost_counter_; +}; + +class SenderWithFEC : public Sender { + public: + SenderWithFEC(); + void Setup(AudioCodingModule* acm, + RTPStream* rtpStream, + absl::string_view in_file_name, + int payload_type, + SdpAudioFormat format, + int expected_loss_rate); + bool SetPacketLossRate(int expected_loss_rate); + bool SetFEC(bool enable_fec); + + protected: + int expected_loss_rate_; +}; + +class PacketLossTest { + public: + PacketLossTest(int channels, + int expected_loss_rate_, + int actual_loss_rate, + int burst_length); + void Perform(); + + protected: + int channels_; + std::string in_file_name_; + int sample_rate_hz_; + int expected_loss_rate_; + int actual_loss_rate_; + int burst_length_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_PACKETLOSSTEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/RTPFile.cc b/third_party/libwebrtc/modules/audio_coding/test/RTPFile.cc new file mode 100644 index 0000000000..0c2ab3c443 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/RTPFile.cc @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "RTPFile.h" + +#include + +#include + +#include "absl/strings/string_view.h" + +#ifdef WIN32 +#include +#else +#include +#endif + +// TODO(tlegrand): Consider removing usage of gtest. +#include "test/gtest.h" + +namespace webrtc { + +void RTPStream::ParseRTPHeader(RTPHeader* rtp_header, + const uint8_t* rtpHeader) { + rtp_header->payloadType = rtpHeader[1]; + rtp_header->sequenceNumber = + (static_cast(rtpHeader[2]) << 8) | rtpHeader[3]; + rtp_header->timestamp = (static_cast(rtpHeader[4]) << 24) | + (static_cast(rtpHeader[5]) << 16) | + (static_cast(rtpHeader[6]) << 8) | + rtpHeader[7]; + rtp_header->ssrc = (static_cast(rtpHeader[8]) << 24) | + (static_cast(rtpHeader[9]) << 16) | + (static_cast(rtpHeader[10]) << 8) | + rtpHeader[11]; +} + +void RTPStream::MakeRTPheader(uint8_t* rtpHeader, + uint8_t payloadType, + int16_t seqNo, + uint32_t timeStamp, + uint32_t ssrc) { + rtpHeader[0] = 0x80; + rtpHeader[1] = payloadType; + rtpHeader[2] = (seqNo >> 8) & 0xFF; + rtpHeader[3] = seqNo & 0xFF; + rtpHeader[4] = timeStamp >> 24; + rtpHeader[5] = (timeStamp >> 16) & 0xFF; + rtpHeader[6] = (timeStamp >> 8) & 0xFF; + rtpHeader[7] = timeStamp & 0xFF; + rtpHeader[8] = ssrc >> 24; + rtpHeader[9] = (ssrc >> 16) & 0xFF; + rtpHeader[10] = (ssrc >> 8) & 0xFF; + rtpHeader[11] = ssrc & 0xFF; +} + +RTPPacket::RTPPacket(uint8_t payloadType, + uint32_t timeStamp, + int16_t seqNo, + const uint8_t* payloadData, + size_t payloadSize, + uint32_t frequency) + : payloadType(payloadType), + timeStamp(timeStamp), + seqNo(seqNo), + payloadSize(payloadSize), + frequency(frequency) { + if (payloadSize > 0) { + this->payloadData = new uint8_t[payloadSize]; + memcpy(this->payloadData, payloadData, payloadSize); + } +} + +RTPPacket::~RTPPacket() { + delete[] payloadData; +} + +void RTPBuffer::Write(const uint8_t payloadType, + const uint32_t timeStamp, + const int16_t seqNo, + const uint8_t* payloadData, + const size_t payloadSize, + uint32_t frequency) { + RTPPacket* packet = new RTPPacket(payloadType, timeStamp, seqNo, payloadData, + payloadSize, frequency); + MutexLock lock(&mutex_); + _rtpQueue.push(packet); +} + +size_t RTPBuffer::Read(RTPHeader* rtp_header, + uint8_t* payloadData, + size_t payloadSize, + uint32_t* offset) { + RTPPacket* packet; + { + MutexLock lock(&mutex_); + packet = _rtpQueue.front(); + _rtpQueue.pop(); + } + rtp_header->markerBit = 1; + rtp_header->payloadType = packet->payloadType; + rtp_header->sequenceNumber = packet->seqNo; + rtp_header->ssrc = 0; + rtp_header->timestamp = packet->timeStamp; + if (packet->payloadSize > 0 && payloadSize >= packet->payloadSize) { + memcpy(payloadData, packet->payloadData, packet->payloadSize); + } else { + return 0; + } + *offset = (packet->timeStamp / (packet->frequency / 1000)); + + return packet->payloadSize; +} + +bool RTPBuffer::EndOfFile() const { + MutexLock lock(&mutex_); + return _rtpQueue.empty(); +} + +void RTPFile::Open(absl::string_view filename, absl::string_view mode) { + std::string filename_str = std::string(filename); + if ((_rtpFile = fopen(filename_str.c_str(), std::string(mode).c_str())) == + NULL) { + printf("Cannot write file %s.\n", filename_str.c_str()); + ADD_FAILURE() << "Unable to write file"; + exit(1); + } +} + +void RTPFile::Close() { + if (_rtpFile != NULL) { + fclose(_rtpFile); + _rtpFile = NULL; + } +} + +void RTPFile::WriteHeader() { + // Write data in a format that NetEQ and RTP Play can parse + fprintf(_rtpFile, "#!RTPencode%s\n", "1.0"); + uint32_t dummy_variable = 0; + // should be converted to network endian format, but does not matter when 0 + EXPECT_EQ(1u, fwrite(&dummy_variable, 4, 1, _rtpFile)); + EXPECT_EQ(1u, fwrite(&dummy_variable, 4, 1, _rtpFile)); + EXPECT_EQ(1u, fwrite(&dummy_variable, 4, 1, _rtpFile)); + EXPECT_EQ(1u, fwrite(&dummy_variable, 2, 1, _rtpFile)); + EXPECT_EQ(1u, fwrite(&dummy_variable, 2, 1, _rtpFile)); + fflush(_rtpFile); +} + +void RTPFile::ReadHeader() { + uint32_t start_sec, start_usec, source; + uint16_t port, padding; + char fileHeader[40]; + EXPECT_TRUE(fgets(fileHeader, 40, _rtpFile) != 0); + EXPECT_EQ(1u, fread(&start_sec, 4, 1, _rtpFile)); + start_sec = ntohl(start_sec); + EXPECT_EQ(1u, fread(&start_usec, 4, 1, _rtpFile)); + start_usec = ntohl(start_usec); + EXPECT_EQ(1u, fread(&source, 4, 1, _rtpFile)); + source = ntohl(source); + EXPECT_EQ(1u, fread(&port, 2, 1, _rtpFile)); + port = ntohs(port); + EXPECT_EQ(1u, fread(&padding, 2, 1, _rtpFile)); + padding = ntohs(padding); +} + +void RTPFile::Write(const uint8_t payloadType, + const uint32_t timeStamp, + const int16_t seqNo, + const uint8_t* payloadData, + const size_t payloadSize, + uint32_t frequency) { + /* write RTP packet to file */ + uint8_t rtpHeader[12]; + MakeRTPheader(rtpHeader, payloadType, seqNo, timeStamp, 0); + ASSERT_LE(12 + payloadSize + 8, std::numeric_limits::max()); + uint16_t lengthBytes = htons(static_cast(12 + payloadSize + 8)); + uint16_t plen = htons(static_cast(12 + payloadSize)); + uint32_t offsetMs; + + offsetMs = (timeStamp / (frequency / 1000)); + offsetMs = htonl(offsetMs); + EXPECT_EQ(1u, fwrite(&lengthBytes, 2, 1, _rtpFile)); + EXPECT_EQ(1u, fwrite(&plen, 2, 1, _rtpFile)); + EXPECT_EQ(1u, fwrite(&offsetMs, 4, 1, _rtpFile)); + EXPECT_EQ(1u, fwrite(&rtpHeader, 12, 1, _rtpFile)); + EXPECT_EQ(payloadSize, fwrite(payloadData, 1, payloadSize, _rtpFile)); +} + +size_t RTPFile::Read(RTPHeader* rtp_header, + uint8_t* payloadData, + size_t payloadSize, + uint32_t* offset) { + uint16_t lengthBytes; + uint16_t plen; + uint8_t rtpHeader[12]; + size_t read_len = fread(&lengthBytes, 2, 1, _rtpFile); + /* Check if we have reached end of file. */ + if ((read_len == 0) && feof(_rtpFile)) { + _rtpEOF = true; + return 0; + } + EXPECT_EQ(1u, fread(&plen, 2, 1, _rtpFile)); + EXPECT_EQ(1u, fread(offset, 4, 1, _rtpFile)); + lengthBytes = ntohs(lengthBytes); + plen = ntohs(plen); + *offset = ntohl(*offset); + EXPECT_GT(plen, 11); + + EXPECT_EQ(1u, fread(rtpHeader, 12, 1, _rtpFile)); + ParseRTPHeader(rtp_header, rtpHeader); + EXPECT_EQ(lengthBytes, plen + 8); + + if (plen == 0) { + return 0; + } + if (lengthBytes < 20) { + return 0; + } + if (payloadSize < static_cast((lengthBytes - 20))) { + return 0; + } + lengthBytes -= 20; + EXPECT_EQ(lengthBytes, fread(payloadData, 1, lengthBytes, _rtpFile)); + return lengthBytes; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/RTPFile.h b/third_party/libwebrtc/modules/audio_coding/test/RTPFile.h new file mode 100644 index 0000000000..b796491da9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/RTPFile.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_RTPFILE_H_ +#define MODULES_AUDIO_CODING_TEST_RTPFILE_H_ + +#include + +#include + +#include "absl/strings/string_view.h" +#include "api/rtp_headers.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class RTPStream { + public: + virtual ~RTPStream() {} + + virtual void Write(uint8_t payloadType, + uint32_t timeStamp, + int16_t seqNo, + const uint8_t* payloadData, + size_t payloadSize, + uint32_t frequency) = 0; + + // Returns the packet's payload size. Zero should be treated as an + // end-of-stream (in the case that EndOfFile() is true) or an error. + virtual size_t Read(RTPHeader* rtp_Header, + uint8_t* payloadData, + size_t payloadSize, + uint32_t* offset) = 0; + virtual bool EndOfFile() const = 0; + + protected: + void MakeRTPheader(uint8_t* rtpHeader, + uint8_t payloadType, + int16_t seqNo, + uint32_t timeStamp, + uint32_t ssrc); + + void ParseRTPHeader(RTPHeader* rtp_header, const uint8_t* rtpHeader); +}; + +class RTPPacket { + public: + RTPPacket(uint8_t payloadType, + uint32_t timeStamp, + int16_t seqNo, + const uint8_t* payloadData, + size_t payloadSize, + uint32_t frequency); + + ~RTPPacket(); + + uint8_t payloadType; + uint32_t timeStamp; + int16_t seqNo; + uint8_t* payloadData; + size_t payloadSize; + uint32_t frequency; +}; + +class RTPBuffer : public RTPStream { + public: + RTPBuffer() = default; + + ~RTPBuffer() = default; + + void Write(uint8_t payloadType, + uint32_t timeStamp, + int16_t seqNo, + const uint8_t* payloadData, + size_t payloadSize, + uint32_t frequency) override; + + size_t Read(RTPHeader* rtp_header, + uint8_t* payloadData, + size_t payloadSize, + uint32_t* offset) override; + + bool EndOfFile() const override; + + private: + mutable Mutex mutex_; + std::queue _rtpQueue RTC_GUARDED_BY(&mutex_); +}; + +class RTPFile : public RTPStream { + public: + ~RTPFile() {} + + RTPFile() : _rtpFile(NULL), _rtpEOF(false) {} + + void Open(absl::string_view outFilename, absl::string_view mode); + + void Close(); + + void WriteHeader(); + + void ReadHeader(); + + void Write(uint8_t payloadType, + uint32_t timeStamp, + int16_t seqNo, + const uint8_t* payloadData, + size_t payloadSize, + uint32_t frequency) override; + + size_t Read(RTPHeader* rtp_header, + uint8_t* payloadData, + size_t payloadSize, + uint32_t* offset) override; + + bool EndOfFile() const override { return _rtpEOF; } + + private: + FILE* _rtpFile; + bool _rtpEOF; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_RTPFILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/TestAllCodecs.cc b/third_party/libwebrtc/modules/audio_coding/test/TestAllCodecs.cc new file mode 100644 index 0000000000..b44037d732 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TestAllCodecs.cc @@ -0,0 +1,412 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/test/TestAllCodecs.h" + +#include +#include +#include + +#include "absl/strings/match.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/logging.h" +#include "rtc_base/string_encode.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +// Description of the test: +// In this test we set up a one-way communication channel from a participant +// called "a" to a participant called "b". +// a -> channel_a_to_b -> b +// +// The test loops through all available mono codecs, encode at "a" sends over +// the channel, and decodes at "b". + +#define CHECK_ERROR(f) \ + do { \ + EXPECT_GE(f, 0) << "Error Calling API"; \ + } while (0) + +namespace { +const size_t kVariableSize = std::numeric_limits::max(); +} + +namespace webrtc { + +// Class for simulating packet handling. +TestPack::TestPack() + : receiver_acm_(NULL), + sequence_number_(0), + timestamp_diff_(0), + last_in_timestamp_(0), + total_bytes_(0), + payload_size_(0) {} + +TestPack::~TestPack() {} + +void TestPack::RegisterReceiverACM(AudioCodingModule* acm) { + receiver_acm_ = acm; + return; +} + +int32_t TestPack::SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_size, + int64_t absolute_capture_timestamp_ms) { + RTPHeader rtp_header; + int32_t status; + + rtp_header.markerBit = false; + rtp_header.ssrc = 0; + rtp_header.sequenceNumber = sequence_number_++; + rtp_header.payloadType = payload_type; + rtp_header.timestamp = timestamp; + + if (frame_type == AudioFrameType::kEmptyFrame) { + // Skip this frame. + return 0; + } + + // Only run mono for all test cases. + memcpy(payload_data_, payload_data, payload_size); + + status = + receiver_acm_->IncomingPacket(payload_data_, payload_size, rtp_header); + + payload_size_ = payload_size; + timestamp_diff_ = timestamp - last_in_timestamp_; + last_in_timestamp_ = timestamp; + total_bytes_ += payload_size; + return status; +} + +size_t TestPack::payload_size() { + return payload_size_; +} + +uint32_t TestPack::timestamp_diff() { + return timestamp_diff_; +} + +void TestPack::reset_payload_size() { + payload_size_ = 0; +} + +TestAllCodecs::TestAllCodecs() + : acm_a_(AudioCodingModule::Create( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))), + acm_b_(AudioCodingModule::Create( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))), + channel_a_to_b_(NULL), + test_count_(0), + packet_size_samples_(0), + packet_size_bytes_(0) {} + +TestAllCodecs::~TestAllCodecs() { + if (channel_a_to_b_ != NULL) { + delete channel_a_to_b_; + channel_a_to_b_ = NULL; + } +} + +void TestAllCodecs::Perform() { + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + infile_a_.Open(file_name, 32000, "rb"); + + acm_a_->InitializeReceiver(); + acm_b_->InitializeReceiver(); + + acm_b_->SetReceiveCodecs({{107, {"L16", 8000, 1}}, + {108, {"L16", 16000, 1}}, + {109, {"L16", 32000, 1}}, + {111, {"L16", 8000, 2}}, + {112, {"L16", 16000, 2}}, + {113, {"L16", 32000, 2}}, + {0, {"PCMU", 8000, 1}}, + {110, {"PCMU", 8000, 2}}, + {8, {"PCMA", 8000, 1}}, + {118, {"PCMA", 8000, 2}}, + {102, {"ILBC", 8000, 1}}, + {9, {"G722", 8000, 1}}, + {119, {"G722", 8000, 2}}, + {120, {"OPUS", 48000, 2, {{"stereo", "1"}}}}, + {13, {"CN", 8000, 1}}, + {98, {"CN", 16000, 1}}, + {99, {"CN", 32000, 1}}}); + + // Create and connect the channel + channel_a_to_b_ = new TestPack; + acm_a_->RegisterTransportCallback(channel_a_to_b_); + channel_a_to_b_->RegisterReceiverACM(acm_b_.get()); + + // All codecs are tested for all allowed sampling frequencies, rates and + // packet sizes. + test_count_++; + OpenOutFile(test_count_); + char codec_g722[] = "G722"; + RegisterSendCodec('A', codec_g722, 16000, 64000, 160, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_g722, 16000, 64000, 320, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_g722, 16000, 64000, 480, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_g722, 16000, 64000, 640, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_g722, 16000, 64000, 800, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_g722, 16000, 64000, 960, 0); + Run(channel_a_to_b_); + outfile_b_.Close(); +#ifdef WEBRTC_CODEC_ILBC + test_count_++; + OpenOutFile(test_count_); + char codec_ilbc[] = "ILBC"; + RegisterSendCodec('A', codec_ilbc, 8000, 13300, 240, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_ilbc, 8000, 13300, 480, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_ilbc, 8000, 15200, 160, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_ilbc, 8000, 15200, 320, 0); + Run(channel_a_to_b_); + outfile_b_.Close(); +#endif + test_count_++; + OpenOutFile(test_count_); + char codec_l16[] = "L16"; + RegisterSendCodec('A', codec_l16, 8000, 128000, 80, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_l16, 8000, 128000, 160, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_l16, 8000, 128000, 240, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_l16, 8000, 128000, 320, 0); + Run(channel_a_to_b_); + outfile_b_.Close(); + + test_count_++; + OpenOutFile(test_count_); + RegisterSendCodec('A', codec_l16, 16000, 256000, 160, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_l16, 16000, 256000, 320, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_l16, 16000, 256000, 480, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_l16, 16000, 256000, 640, 0); + Run(channel_a_to_b_); + outfile_b_.Close(); + + test_count_++; + OpenOutFile(test_count_); + RegisterSendCodec('A', codec_l16, 32000, 512000, 320, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_l16, 32000, 512000, 640, 0); + Run(channel_a_to_b_); + outfile_b_.Close(); + + test_count_++; + OpenOutFile(test_count_); + char codec_pcma[] = "PCMA"; + RegisterSendCodec('A', codec_pcma, 8000, 64000, 80, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 160, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 240, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 320, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 400, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 480, 0); + Run(channel_a_to_b_); + + char codec_pcmu[] = "PCMU"; + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 80, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 160, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 240, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 320, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 400, 0); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 480, 0); + Run(channel_a_to_b_); + outfile_b_.Close(); +#ifdef WEBRTC_CODEC_OPUS + test_count_++; + OpenOutFile(test_count_); + char codec_opus[] = "OPUS"; + RegisterSendCodec('A', codec_opus, 48000, 6000, 480, kVariableSize); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_opus, 48000, 20000, 480 * 2, kVariableSize); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_opus, 48000, 32000, 480 * 4, kVariableSize); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_opus, 48000, 48000, 480, kVariableSize); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_opus, 48000, 64000, 480 * 4, kVariableSize); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_opus, 48000, 96000, 480 * 6, kVariableSize); + Run(channel_a_to_b_); + RegisterSendCodec('A', codec_opus, 48000, 500000, 480 * 2, kVariableSize); + Run(channel_a_to_b_); + outfile_b_.Close(); +#endif +} + +// Register Codec to use in the test +// +// Input: side - which ACM to use, 'A' or 'B' +// codec_name - name to use when register the codec +// sampling_freq_hz - sampling frequency in Herz +// rate - bitrate in bytes +// packet_size - packet size in samples +// extra_byte - if extra bytes needed compared to the bitrate +// used when registering, can be an internal header +// set to kVariableSize if the codec is a variable +// rate codec +void TestAllCodecs::RegisterSendCodec(char side, + char* codec_name, + int32_t sampling_freq_hz, + int rate, + int packet_size, + size_t extra_byte) { + // Store packet-size in samples, used to validate the received packet. + // If G.722, store half the size to compensate for the timestamp bug in the + // RFC for G.722. + int clockrate_hz = sampling_freq_hz; + size_t num_channels = 1; + if (absl::EqualsIgnoreCase(codec_name, "G722")) { + packet_size_samples_ = packet_size / 2; + clockrate_hz = sampling_freq_hz / 2; + } else if (absl::EqualsIgnoreCase(codec_name, "OPUS")) { + packet_size_samples_ = packet_size; + num_channels = 2; + } else { + packet_size_samples_ = packet_size; + } + + // Store the expected packet size in bytes, used to validate the received + // packet. If variable rate codec (extra_byte == -1), set to -1. + if (extra_byte != kVariableSize) { + // Add 0.875 to always round up to a whole byte + packet_size_bytes_ = + static_cast(static_cast(packet_size * rate) / + static_cast(sampling_freq_hz * 8) + + 0.875) + + extra_byte; + } else { + // Packets will have a variable size. + packet_size_bytes_ = kVariableSize; + } + + // Set pointer to the ACM where to register the codec. + AudioCodingModule* my_acm = NULL; + switch (side) { + case 'A': { + my_acm = acm_a_.get(); + break; + } + case 'B': { + my_acm = acm_b_.get(); + break; + } + default: { + break; + } + } + ASSERT_TRUE(my_acm != NULL); + + auto factory = CreateBuiltinAudioEncoderFactory(); + constexpr int payload_type = 17; + SdpAudioFormat format = {codec_name, clockrate_hz, num_channels}; + format.parameters["ptime"] = rtc::ToString(rtc::CheckedDivExact( + packet_size, rtc::CheckedDivExact(sampling_freq_hz, 1000))); + my_acm->SetEncoder( + factory->MakeAudioEncoder(payload_type, format, absl::nullopt)); +} + +void TestAllCodecs::Run(TestPack* channel) { + AudioFrame audio_frame; + + int32_t out_freq_hz = outfile_b_.SamplingFrequency(); + size_t receive_size; + uint32_t timestamp_diff; + channel->reset_payload_size(); + int error_count = 0; + int counter = 0; + // Set test length to 500 ms (50 blocks of 10 ms each). + infile_a_.SetNum10MsBlocksToRead(50); + // Fast-forward 1 second (100 blocks) since the file starts with silence. + infile_a_.FastForward(100); + + while (!infile_a_.EndOfFile()) { + // Add 10 msec to ACM. + infile_a_.Read10MsData(audio_frame); + CHECK_ERROR(acm_a_->Add10MsData(audio_frame)); + + // Verify that the received packet size matches the settings. + receive_size = channel->payload_size(); + if (receive_size) { + if ((receive_size != packet_size_bytes_) && + (packet_size_bytes_ != kVariableSize)) { + error_count++; + } + + // Verify that the timestamp is updated with expected length. The counter + // is used to avoid problems when switching codec or frame size in the + // test. + timestamp_diff = channel->timestamp_diff(); + if ((counter > 10) && + (static_cast(timestamp_diff) != packet_size_samples_) && + (packet_size_samples_ > -1)) + error_count++; + } + + // Run received side of ACM. + bool muted; + CHECK_ERROR(acm_b_->PlayoutData10Ms(out_freq_hz, &audio_frame, &muted)); + ASSERT_FALSE(muted); + + // Write output speech to file. + outfile_b_.Write10MsData(audio_frame.data(), + audio_frame.samples_per_channel_); + + // Update loop counter + counter++; + } + + EXPECT_EQ(0, error_count); + + if (infile_a_.EndOfFile()) { + infile_a_.Rewind(); + } +} + +void TestAllCodecs::OpenOutFile(int test_number) { + std::string filename = webrtc::test::OutputPath(); + rtc::StringBuilder test_number_str; + test_number_str << test_number; + filename += "testallcodecs_out_"; + filename += test_number_str.str(); + filename += ".pcm"; + outfile_b_.Open(filename, 32000, "wb"); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/TestAllCodecs.h b/third_party/libwebrtc/modules/audio_coding/test/TestAllCodecs.h new file mode 100644 index 0000000000..0c276414e4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TestAllCodecs.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_TESTALLCODECS_H_ +#define MODULES_AUDIO_CODING_TEST_TESTALLCODECS_H_ + +#include + +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/test/PCMFile.h" + +namespace webrtc { + +class TestPack : public AudioPacketizationCallback { + public: + TestPack(); + ~TestPack(); + + void RegisterReceiverACM(AudioCodingModule* acm); + + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_size, + int64_t absolute_capture_timestamp_ms) override; + + size_t payload_size(); + uint32_t timestamp_diff(); + void reset_payload_size(); + + private: + AudioCodingModule* receiver_acm_; + uint16_t sequence_number_; + uint8_t payload_data_[60 * 32 * 2 * 2]; + uint32_t timestamp_diff_; + uint32_t last_in_timestamp_; + uint64_t total_bytes_; + size_t payload_size_; +}; + +class TestAllCodecs { + public: + TestAllCodecs(); + ~TestAllCodecs(); + + void Perform(); + + private: + // The default value of '-1' indicates that the registration is based only on + // codec name, and a sampling frequency matching is not required. + // This is useful for codecs which support several sampling frequency. + // Note! Only mono mode is tested in this test. + void RegisterSendCodec(char side, + char* codec_name, + int32_t sampling_freq_hz, + int rate, + int packet_size, + size_t extra_byte); + + void Run(TestPack* channel); + void OpenOutFile(int test_number); + + std::unique_ptr acm_a_; + std::unique_ptr acm_b_; + TestPack* channel_a_to_b_; + PCMFile infile_a_; + PCMFile outfile_b_; + int test_count_; + int packet_size_samples_; + size_t packet_size_bytes_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_TESTALLCODECS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/TestRedFec.cc b/third_party/libwebrtc/modules/audio_coding/test/TestRedFec.cc new file mode 100644 index 0000000000..fff48b27bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TestRedFec.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/test/TestRedFec.h" + +#include +#include + +#include "absl/strings/match.h" +#include "api/audio_codecs/L16/audio_decoder_L16.h" +#include "api/audio_codecs/L16/audio_encoder_L16.h" +#include "api/audio_codecs/audio_decoder_factory_template.h" +#include "api/audio_codecs/audio_encoder_factory_template.h" +#include "api/audio_codecs/g711/audio_decoder_g711.h" +#include "api/audio_codecs/g711/audio_encoder_g711.h" +#include "api/audio_codecs/g722/audio_decoder_g722.h" +#include "api/audio_codecs/g722/audio_encoder_g722.h" +#include "api/audio_codecs/opus/audio_decoder_opus.h" +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" +#include "modules/audio_coding/codecs/red/audio_encoder_copy_red.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TestRedFec::TestRedFec() + : encoder_factory_(CreateAudioEncoderFactory()), + decoder_factory_(CreateAudioDecoderFactory()), + _acmA(AudioCodingModule::Create( + AudioCodingModule::Config(decoder_factory_))), + _acmB(AudioCodingModule::Create( + AudioCodingModule::Config(decoder_factory_))), + _channelA2B(NULL), + _testCntr(0) {} + +TestRedFec::~TestRedFec() { + if (_channelA2B != NULL) { + delete _channelA2B; + _channelA2B = NULL; + } +} + +void TestRedFec::Perform() { + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + _inFileA.Open(file_name, 32000, "rb"); + + ASSERT_EQ(0, _acmA->InitializeReceiver()); + ASSERT_EQ(0, _acmB->InitializeReceiver()); + + // Create and connect the channel + _channelA2B = new Channel; + _acmA->RegisterTransportCallback(_channelA2B); + _channelA2B->RegisterReceiverACM(_acmB.get()); + + RegisterSendCodec(_acmA, {"L16", 8000, 1}, Vad::kVadAggressive, true); + + OpenOutFile(_testCntr); + Run(); + _outFileB.Close(); + + // Switch to another 8 kHz codec; RED should remain switched on. + RegisterSendCodec(_acmA, {"PCMU", 8000, 1}, Vad::kVadAggressive, true); + OpenOutFile(_testCntr); + Run(); + _outFileB.Close(); + + // Switch to a 16 kHz codec; RED should be switched off. + RegisterSendCodec(_acmA, {"G722", 8000, 1}, Vad::kVadAggressive, false); + + OpenOutFile(_testCntr); + RegisterSendCodec(_acmA, {"G722", 8000, 1}, Vad::kVadAggressive, false); + Run(); + RegisterSendCodec(_acmA, {"G722", 8000, 1}, Vad::kVadAggressive, false); + Run(); + _outFileB.Close(); + + _channelA2B->SetFECTestWithPacketLoss(true); + // Following tests are under packet losses. + + // Switch to a 16 kHz codec; RED should be switched off. + RegisterSendCodec(_acmA, {"G722", 8000, 1}, Vad::kVadAggressive, false); + + OpenOutFile(_testCntr); + Run(); + _outFileB.Close(); + + RegisterSendCodec(_acmA, {"opus", 48000, 2}, absl::nullopt, false); + + // _channelA2B imposes 25% packet loss rate. + EXPECT_EQ(0, _acmA->SetPacketLossRate(25)); + + _acmA->ModifyEncoder([&](std::unique_ptr* enc) { + EXPECT_EQ(true, (*enc)->SetFec(true)); + }); + + OpenOutFile(_testCntr); + Run(); + + // Switch to L16 with RED. + RegisterSendCodec(_acmA, {"L16", 8000, 1}, absl::nullopt, true); + Run(); + + // Switch to Opus again. + RegisterSendCodec(_acmA, {"opus", 48000, 2}, absl::nullopt, false); + _acmA->ModifyEncoder([&](std::unique_ptr* enc) { + EXPECT_EQ(true, (*enc)->SetFec(false)); + }); + Run(); + + _acmA->ModifyEncoder([&](std::unique_ptr* enc) { + EXPECT_EQ(true, (*enc)->SetFec(true)); + }); + _outFileB.Close(); +} + +void TestRedFec::RegisterSendCodec( + const std::unique_ptr& acm, + const SdpAudioFormat& codec_format, + absl::optional vad_mode, + bool use_red) { + constexpr int payload_type = 17, cn_payload_type = 27, red_payload_type = 37; + const auto& other_acm = &acm == &_acmA ? _acmB : _acmA; + + auto encoder = encoder_factory_->MakeAudioEncoder(payload_type, codec_format, + absl::nullopt); + EXPECT_NE(encoder, nullptr); + std::map receive_codecs = {{payload_type, codec_format}}; + if (!absl::EqualsIgnoreCase(codec_format.name, "opus")) { + if (vad_mode.has_value()) { + AudioEncoderCngConfig config; + config.speech_encoder = std::move(encoder); + config.num_channels = 1; + config.payload_type = cn_payload_type; + config.vad_mode = vad_mode.value(); + encoder = CreateComfortNoiseEncoder(std::move(config)); + receive_codecs.emplace(std::make_pair( + cn_payload_type, SdpAudioFormat("CN", codec_format.clockrate_hz, 1))); + } + if (use_red) { + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type; + config.speech_encoder = std::move(encoder); + encoder = std::make_unique(std::move(config), + field_trials_); + receive_codecs.emplace( + std::make_pair(red_payload_type, + SdpAudioFormat("red", codec_format.clockrate_hz, 1))); + } + } + acm->SetEncoder(std::move(encoder)); + other_acm->SetReceiveCodecs(receive_codecs); +} + +void TestRedFec::Run() { + AudioFrame audioFrame; + int32_t outFreqHzB = _outFileB.SamplingFrequency(); + // Set test length to 500 ms (50 blocks of 10 ms each). + _inFileA.SetNum10MsBlocksToRead(50); + // Fast-forward 1 second (100 blocks) since the file starts with silence. + _inFileA.FastForward(100); + + while (!_inFileA.EndOfFile()) { + EXPECT_GT(_inFileA.Read10MsData(audioFrame), 0); + EXPECT_GE(_acmA->Add10MsData(audioFrame), 0); + bool muted; + EXPECT_EQ(0, _acmB->PlayoutData10Ms(outFreqHzB, &audioFrame, &muted)); + ASSERT_FALSE(muted); + _outFileB.Write10MsData(audioFrame.data(), audioFrame.samples_per_channel_); + } + _inFileA.Rewind(); +} + +void TestRedFec::OpenOutFile(int16_t test_number) { + std::string file_name; + rtc::StringBuilder file_stream; + file_stream << webrtc::test::OutputPath(); + file_stream << "TestRedFec_outFile_"; + file_stream << test_number << ".pcm"; + file_name = file_stream.str(); + _outFileB.Open(file_name, 16000, "wb"); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/TestRedFec.h b/third_party/libwebrtc/modules/audio_coding/test/TestRedFec.h new file mode 100644 index 0000000000..dbadd88487 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TestRedFec.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_TESTREDFEC_H_ +#define MODULES_AUDIO_CODING_TEST_TESTREDFEC_H_ + +#include +#include + +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/audio_codecs/audio_encoder_factory.h" +#include "common_audio/vad/include/vad.h" +#include "modules/audio_coding/test/Channel.h" +#include "modules/audio_coding/test/PCMFile.h" +#include "test/scoped_key_value_config.h" + +namespace webrtc { + +class TestRedFec { + public: + explicit TestRedFec(); + ~TestRedFec(); + + void Perform(); + + private: + void RegisterSendCodec(const std::unique_ptr& acm, + const SdpAudioFormat& codec_format, + absl::optional vad_mode, + bool use_red); + void Run(); + void OpenOutFile(int16_t testNumber); + + test::ScopedKeyValueConfig field_trials_; + const rtc::scoped_refptr encoder_factory_; + const rtc::scoped_refptr decoder_factory_; + std::unique_ptr _acmA; + std::unique_ptr _acmB; + + Channel* _channelA2B; + + PCMFile _inFileA; + PCMFile _outFileB; + int16_t _testCntr; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_TESTREDFEC_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/TestStereo.cc b/third_party/libwebrtc/modules/audio_coding/test/TestStereo.cc new file mode 100644 index 0000000000..599fafb602 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TestStereo.cc @@ -0,0 +1,599 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/test/TestStereo.h" + +#include + +#include "absl/strings/match.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +// Class for simulating packet handling +TestPackStereo::TestPackStereo() + : receiver_acm_(NULL), + seq_no_(0), + timestamp_diff_(0), + last_in_timestamp_(0), + total_bytes_(0), + payload_size_(0), + lost_packet_(false) {} + +TestPackStereo::~TestPackStereo() {} + +void TestPackStereo::RegisterReceiverACM(AudioCodingModule* acm) { + receiver_acm_ = acm; + return; +} + +int32_t TestPackStereo::SendData(const AudioFrameType frame_type, + const uint8_t payload_type, + const uint32_t timestamp, + const uint8_t* payload_data, + const size_t payload_size, + int64_t absolute_capture_timestamp_ms) { + RTPHeader rtp_header; + int32_t status = 0; + + rtp_header.markerBit = false; + rtp_header.ssrc = 0; + rtp_header.sequenceNumber = seq_no_++; + rtp_header.payloadType = payload_type; + rtp_header.timestamp = timestamp; + if (frame_type == AudioFrameType::kEmptyFrame) { + // Skip this frame + return 0; + } + + if (lost_packet_ == false) { + status = + receiver_acm_->IncomingPacket(payload_data, payload_size, rtp_header); + + if (frame_type != AudioFrameType::kAudioFrameCN) { + payload_size_ = static_cast(payload_size); + } else { + payload_size_ = -1; + } + + timestamp_diff_ = timestamp - last_in_timestamp_; + last_in_timestamp_ = timestamp; + total_bytes_ += payload_size; + } + return status; +} + +uint16_t TestPackStereo::payload_size() { + return static_cast(payload_size_); +} + +uint32_t TestPackStereo::timestamp_diff() { + return timestamp_diff_; +} + +void TestPackStereo::reset_payload_size() { + payload_size_ = 0; +} + +void TestPackStereo::set_codec_mode(enum StereoMonoMode mode) { + codec_mode_ = mode; +} + +void TestPackStereo::set_lost_packet(bool lost) { + lost_packet_ = lost; +} + +TestStereo::TestStereo() + : acm_a_(AudioCodingModule::Create( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))), + acm_b_(AudioCodingModule::Create( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))), + channel_a2b_(NULL), + test_cntr_(0), + pack_size_samp_(0), + pack_size_bytes_(0), + counter_(0) {} + +TestStereo::~TestStereo() { + if (channel_a2b_ != NULL) { + delete channel_a2b_; + channel_a2b_ = NULL; + } +} + +void TestStereo::Perform() { + uint16_t frequency_hz; + int audio_channels; + int codec_channels; + + // Open both mono and stereo test files in 32 kHz. + const std::string file_name_stereo = + webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"); + const std::string file_name_mono = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + frequency_hz = 32000; + in_file_stereo_ = new PCMFile(); + in_file_mono_ = new PCMFile(); + in_file_stereo_->Open(file_name_stereo, frequency_hz, "rb"); + in_file_stereo_->ReadStereo(true); + in_file_mono_->Open(file_name_mono, frequency_hz, "rb"); + in_file_mono_->ReadStereo(false); + + // Create and initialize two ACMs, one for each side of a one-to-one call. + ASSERT_TRUE((acm_a_.get() != NULL) && (acm_b_.get() != NULL)); + EXPECT_EQ(0, acm_a_->InitializeReceiver()); + EXPECT_EQ(0, acm_b_->InitializeReceiver()); + + acm_b_->SetReceiveCodecs({{103, {"ISAC", 16000, 1}}, + {104, {"ISAC", 32000, 1}}, + {107, {"L16", 8000, 1}}, + {108, {"L16", 16000, 1}}, + {109, {"L16", 32000, 1}}, + {111, {"L16", 8000, 2}}, + {112, {"L16", 16000, 2}}, + {113, {"L16", 32000, 2}}, + {0, {"PCMU", 8000, 1}}, + {110, {"PCMU", 8000, 2}}, + {8, {"PCMA", 8000, 1}}, + {118, {"PCMA", 8000, 2}}, + {102, {"ILBC", 8000, 1}}, + {9, {"G722", 8000, 1}}, + {119, {"G722", 8000, 2}}, + {120, {"OPUS", 48000, 2, {{"stereo", "1"}}}}, + {13, {"CN", 8000, 1}}, + {98, {"CN", 16000, 1}}, + {99, {"CN", 32000, 1}}}); + + // Create and connect the channel. + channel_a2b_ = new TestPackStereo; + EXPECT_EQ(0, acm_a_->RegisterTransportCallback(channel_a2b_)); + channel_a2b_->RegisterReceiverACM(acm_b_.get()); + + char codec_pcma_temp[] = "PCMA"; + RegisterSendCodec('A', codec_pcma_temp, 8000, 64000, 80, 2); + + // + // Test Stereo-To-Stereo for all codecs. + // + audio_channels = 2; + codec_channels = 2; + + // All codecs are tested for all allowed sampling frequencies, rates and + // packet sizes. + channel_a2b_->set_codec_mode(kStereo); + test_cntr_++; + OpenOutFile(test_cntr_); + char codec_g722[] = "G722"; + RegisterSendCodec('A', codec_g722, 16000, 64000, 160, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_g722, 16000, 64000, 320, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_g722, 16000, 64000, 480, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_g722, 16000, 64000, 640, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_g722, 16000, 64000, 800, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_g722, 16000, 64000, 960, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + channel_a2b_->set_codec_mode(kStereo); + test_cntr_++; + OpenOutFile(test_cntr_); + char codec_l16[] = "L16"; + RegisterSendCodec('A', codec_l16, 8000, 128000, 80, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_l16, 8000, 128000, 160, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_l16, 8000, 128000, 240, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_l16, 8000, 128000, 320, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + test_cntr_++; + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_l16, 16000, 256000, 160, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_l16, 16000, 256000, 320, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_l16, 16000, 256000, 480, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_l16, 16000, 256000, 640, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + test_cntr_++; + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_l16, 32000, 512000, 320, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_l16, 32000, 512000, 640, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); +#ifdef PCMA_AND_PCMU + channel_a2b_->set_codec_mode(kStereo); + audio_channels = 2; + codec_channels = 2; + test_cntr_++; + OpenOutFile(test_cntr_); + char codec_pcma[] = "PCMA"; + RegisterSendCodec('A', codec_pcma, 8000, 64000, 80, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 160, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 240, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 320, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 400, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 480, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + test_cntr_++; + OpenOutFile(test_cntr_); + char codec_pcmu[] = "PCMU"; + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 80, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 160, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 240, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 320, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 400, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 480, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); +#endif +#ifdef WEBRTC_CODEC_OPUS + channel_a2b_->set_codec_mode(kStereo); + audio_channels = 2; + codec_channels = 2; + test_cntr_++; + OpenOutFile(test_cntr_); + + char codec_opus[] = "opus"; + // Run Opus with 10 ms frame size. + RegisterSendCodec('A', codec_opus, 48000, 64000, 480, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + // Run Opus with 20 ms frame size. + RegisterSendCodec('A', codec_opus, 48000, 64000, 480 * 2, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + // Run Opus with 40 ms frame size. + RegisterSendCodec('A', codec_opus, 48000, 64000, 480 * 4, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + // Run Opus with 60 ms frame size. + RegisterSendCodec('A', codec_opus, 48000, 64000, 480 * 6, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + // Run Opus with 20 ms frame size and different bitrates. + RegisterSendCodec('A', codec_opus, 48000, 40000, 960, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_opus, 48000, 510000, 960, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); +#endif + // + // Test Mono-To-Stereo for all codecs. + // + audio_channels = 1; + codec_channels = 2; + + test_cntr_++; + channel_a2b_->set_codec_mode(kStereo); + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_g722, 16000, 64000, 160, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + test_cntr_++; + channel_a2b_->set_codec_mode(kStereo); + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_l16, 8000, 128000, 80, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + test_cntr_++; + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_l16, 16000, 256000, 160, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + test_cntr_++; + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_l16, 32000, 512000, 320, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); +#ifdef PCMA_AND_PCMU + test_cntr_++; + channel_a2b_->set_codec_mode(kStereo); + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 80, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 80, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); +#endif +#ifdef WEBRTC_CODEC_OPUS + // Keep encode and decode in stereo. + test_cntr_++; + channel_a2b_->set_codec_mode(kStereo); + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_opus, 48000, 64000, 960, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + + // Encode in mono, decode in stereo mode. + RegisterSendCodec('A', codec_opus, 48000, 64000, 960, 1); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); +#endif + + // + // Test Stereo-To-Mono for all codecs. + // + audio_channels = 2; + codec_channels = 1; + channel_a2b_->set_codec_mode(kMono); + + // Run stereo audio and mono codec. + test_cntr_++; + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_g722, 16000, 64000, 160, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + test_cntr_++; + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_l16, 8000, 128000, 80, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + test_cntr_++; + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_l16, 16000, 256000, 160, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + + test_cntr_++; + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_l16, 32000, 512000, 320, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); +#ifdef PCMA_AND_PCMU + test_cntr_++; + OpenOutFile(test_cntr_); + RegisterSendCodec('A', codec_pcmu, 8000, 64000, 80, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + RegisterSendCodec('A', codec_pcma, 8000, 64000, 80, codec_channels); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); +#endif +#ifdef WEBRTC_CODEC_OPUS + test_cntr_++; + OpenOutFile(test_cntr_); + // Encode and decode in mono. + RegisterSendCodec('A', codec_opus, 48000, 32000, 960, codec_channels); + acm_b_->SetReceiveCodecs({{120, {"OPUS", 48000, 2}}}); + Run(channel_a2b_, audio_channels, codec_channels); + + // Encode in stereo, decode in mono. + RegisterSendCodec('A', codec_opus, 48000, 32000, 960, 2); + Run(channel_a2b_, audio_channels, codec_channels); + + out_file_.Close(); + + // Test switching between decoding mono and stereo for Opus. + + // Decode in mono. + test_cntr_++; + OpenOutFile(test_cntr_); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); + // Decode in stereo. + test_cntr_++; + OpenOutFile(test_cntr_); + acm_b_->SetReceiveCodecs({{120, {"OPUS", 48000, 2, {{"stereo", "1"}}}}}); + Run(channel_a2b_, audio_channels, 2); + out_file_.Close(); + // Decode in mono. + test_cntr_++; + OpenOutFile(test_cntr_); + acm_b_->SetReceiveCodecs({{120, {"OPUS", 48000, 2}}}); + Run(channel_a2b_, audio_channels, codec_channels); + out_file_.Close(); +#endif + + // Delete the file pointers. + delete in_file_stereo_; + delete in_file_mono_; +} + +// Register Codec to use in the test +// +// Input: side - which ACM to use, 'A' or 'B' +// codec_name - name to use when register the codec +// sampling_freq_hz - sampling frequency in Herz +// rate - bitrate in bytes +// pack_size - packet size in samples +// channels - number of channels; 1 for mono, 2 for stereo +void TestStereo::RegisterSendCodec(char side, + char* codec_name, + int32_t sampling_freq_hz, + int rate, + int pack_size, + int channels) { + // Store packet size in samples, used to validate the received packet + pack_size_samp_ = pack_size; + + // Store the expected packet size in bytes, used to validate the received + // packet. Add 0.875 to always round up to a whole byte. + pack_size_bytes_ = (uint16_t)(static_cast(pack_size * rate) / + static_cast(sampling_freq_hz * 8) + + 0.875); + + // Set pointer to the ACM where to register the codec + AudioCodingModule* my_acm = NULL; + switch (side) { + case 'A': { + my_acm = acm_a_.get(); + break; + } + case 'B': { + my_acm = acm_b_.get(); + break; + } + default: + break; + } + ASSERT_TRUE(my_acm != NULL); + + auto encoder_factory = CreateBuiltinAudioEncoderFactory(); + const int clockrate_hz = absl::EqualsIgnoreCase(codec_name, "g722") + ? sampling_freq_hz / 2 + : sampling_freq_hz; + const std::string ptime = rtc::ToString(rtc::CheckedDivExact( + pack_size, rtc::CheckedDivExact(sampling_freq_hz, 1000))); + SdpAudioFormat::Parameters params = {{"ptime", ptime}}; + RTC_CHECK(channels == 1 || channels == 2); + if (absl::EqualsIgnoreCase(codec_name, "opus")) { + if (channels == 2) { + params["stereo"] = "1"; + } + channels = 2; + params["maxaveragebitrate"] = rtc::ToString(rate); + } + constexpr int payload_type = 17; + auto encoder = encoder_factory->MakeAudioEncoder( + payload_type, SdpAudioFormat(codec_name, clockrate_hz, channels, params), + absl::nullopt); + EXPECT_NE(nullptr, encoder); + my_acm->SetEncoder(std::move(encoder)); + + send_codec_name_ = codec_name; +} + +void TestStereo::Run(TestPackStereo* channel, + int in_channels, + int out_channels, + int percent_loss) { + AudioFrame audio_frame; + + int32_t out_freq_hz_b = out_file_.SamplingFrequency(); + uint16_t rec_size; + uint32_t time_stamp_diff; + channel->reset_payload_size(); + int error_count = 0; + int variable_bytes = 0; + int variable_packets = 0; + // Set test length to 500 ms (50 blocks of 10 ms each). + in_file_mono_->SetNum10MsBlocksToRead(50); + in_file_stereo_->SetNum10MsBlocksToRead(50); + // Fast-forward 1 second (100 blocks) since the files start with silence. + in_file_stereo_->FastForward(100); + in_file_mono_->FastForward(100); + + while (true) { + // Simulate packet loss by setting `packet_loss_` to "true" in + // `percent_loss` percent of the loops. + if (percent_loss > 0) { + if (counter_ == floor((100 / percent_loss) + 0.5)) { + counter_ = 0; + channel->set_lost_packet(true); + } else { + channel->set_lost_packet(false); + } + counter_++; + } + + // Add 10 msec to ACM + if (in_channels == 1) { + if (in_file_mono_->EndOfFile()) { + break; + } + in_file_mono_->Read10MsData(audio_frame); + } else { + if (in_file_stereo_->EndOfFile()) { + break; + } + in_file_stereo_->Read10MsData(audio_frame); + } + EXPECT_GE(acm_a_->Add10MsData(audio_frame), 0); + + // Verify that the received packet size matches the settings. + rec_size = channel->payload_size(); + if ((0 < rec_size) & (rec_size < 65535)) { + if (strcmp(send_codec_name_, "opus") == 0) { + // Opus is a variable rate codec, hence calculate the average packet + // size, and later make sure the average is in the right range. + variable_bytes += rec_size; + variable_packets++; + } else { + // For fixed rate codecs, check that packet size is correct. + if ((rec_size != pack_size_bytes_ * out_channels) && + (pack_size_bytes_ < 65535)) { + error_count++; + } + } + // Verify that the timestamp is updated with expected length + time_stamp_diff = channel->timestamp_diff(); + if ((counter_ > 10) && (time_stamp_diff != pack_size_samp_)) { + error_count++; + } + } + + // Run receive side of ACM + bool muted; + EXPECT_EQ(0, acm_b_->PlayoutData10Ms(out_freq_hz_b, &audio_frame, &muted)); + ASSERT_FALSE(muted); + + // Write output speech to file + out_file_.Write10MsData( + audio_frame.data(), + audio_frame.samples_per_channel_ * audio_frame.num_channels_); + } + + EXPECT_EQ(0, error_count); + + // Check that packet size is in the right range for variable rate codecs, + // such as Opus. + if (variable_packets > 0) { + variable_bytes /= variable_packets; + EXPECT_NEAR(variable_bytes, pack_size_bytes_, 18); + } + + if (in_file_mono_->EndOfFile()) { + in_file_mono_->Rewind(); + } + if (in_file_stereo_->EndOfFile()) { + in_file_stereo_->Rewind(); + } + // Reset in case we ended with a lost packet + channel->set_lost_packet(false); +} + +void TestStereo::OpenOutFile(int16_t test_number) { + std::string file_name; + rtc::StringBuilder file_stream; + file_stream << webrtc::test::OutputPath() << "teststereo_out_" << test_number + << ".pcm"; + file_name = file_stream.str(); + out_file_.Open(file_name, 32000, "wb"); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/TestStereo.h b/third_party/libwebrtc/modules/audio_coding/test/TestStereo.h new file mode 100644 index 0000000000..4c50a4b555 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TestStereo.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_TESTSTEREO_H_ +#define MODULES_AUDIO_CODING_TEST_TESTSTEREO_H_ + +#include + +#include + +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/test/PCMFile.h" + +#define PCMA_AND_PCMU + +namespace webrtc { + +enum StereoMonoMode { kNotSet, kMono, kStereo }; + +class TestPackStereo : public AudioPacketizationCallback { + public: + TestPackStereo(); + ~TestPackStereo(); + + void RegisterReceiverACM(AudioCodingModule* acm); + + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_size, + int64_t absolute_capture_timestamp_ms) override; + + uint16_t payload_size(); + uint32_t timestamp_diff(); + void reset_payload_size(); + void set_codec_mode(StereoMonoMode mode); + void set_lost_packet(bool lost); + + private: + AudioCodingModule* receiver_acm_; + int16_t seq_no_; + uint32_t timestamp_diff_; + uint32_t last_in_timestamp_; + uint64_t total_bytes_; + int payload_size_; + StereoMonoMode codec_mode_; + // Simulate packet losses + bool lost_packet_; +}; + +class TestStereo { + public: + TestStereo(); + ~TestStereo(); + + void Perform(); + + private: + // The default value of '-1' indicates that the registration is based only on + // codec name and a sampling frequncy matching is not required. This is useful + // for codecs which support several sampling frequency. + void RegisterSendCodec(char side, + char* codec_name, + int32_t samp_freq_hz, + int rate, + int pack_size, + int channels); + + void Run(TestPackStereo* channel, + int in_channels, + int out_channels, + int percent_loss = 0); + void OpenOutFile(int16_t test_number); + + std::unique_ptr acm_a_; + std::unique_ptr acm_b_; + + TestPackStereo* channel_a2b_; + + PCMFile* in_file_stereo_; + PCMFile* in_file_mono_; + PCMFile out_file_; + int16_t test_cntr_; + uint16_t pack_size_samp_; + uint16_t pack_size_bytes_; + int counter_; + char* send_codec_name_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_TESTSTEREO_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/TestVADDTX.cc b/third_party/libwebrtc/modules/audio_coding/test/TestVADDTX.cc new file mode 100644 index 0000000000..de26cafb68 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TestVADDTX.cc @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/test/TestVADDTX.h" + +#include + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_decoder_factory_template.h" +#include "api/audio_codecs/audio_encoder_factory_template.h" +#include "api/audio_codecs/ilbc/audio_decoder_ilbc.h" +#include "api/audio_codecs/ilbc/audio_encoder_ilbc.h" +#include "api/audio_codecs/opus/audio_decoder_opus.h" +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" +#include "modules/audio_coding/test/PCMFile.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +MonitoringAudioPacketizationCallback::MonitoringAudioPacketizationCallback( + AudioPacketizationCallback* next) + : next_(next) { + ResetStatistics(); +} + +int32_t MonitoringAudioPacketizationCallback::SendData( + AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) { + counter_[static_cast(frame_type)]++; + return next_->SendData(frame_type, payload_type, timestamp, payload_data, + payload_len_bytes, absolute_capture_timestamp_ms); +} + +void MonitoringAudioPacketizationCallback::PrintStatistics() { + printf("\n"); + printf("kEmptyFrame %u\n", + counter_[static_cast(AudioFrameType::kEmptyFrame)]); + printf("kAudioFrameSpeech %u\n", + counter_[static_cast(AudioFrameType::kAudioFrameSpeech)]); + printf("kAudioFrameCN %u\n", + counter_[static_cast(AudioFrameType::kAudioFrameCN)]); + printf("\n\n"); +} + +void MonitoringAudioPacketizationCallback::ResetStatistics() { + memset(counter_, 0, sizeof(counter_)); +} + +void MonitoringAudioPacketizationCallback::GetStatistics(uint32_t* counter) { + memcpy(counter, counter_, sizeof(counter_)); +} + +TestVadDtx::TestVadDtx() + : encoder_factory_( + CreateAudioEncoderFactory()), + decoder_factory_( + CreateAudioDecoderFactory()), + acm_send_(AudioCodingModule::Create( + AudioCodingModule::Config(decoder_factory_))), + acm_receive_(AudioCodingModule::Create( + AudioCodingModule::Config(decoder_factory_))), + channel_(std::make_unique()), + packetization_callback_( + std::make_unique( + channel_.get())) { + EXPECT_EQ( + 0, acm_send_->RegisterTransportCallback(packetization_callback_.get())); + channel_->RegisterReceiverACM(acm_receive_.get()); +} + +bool TestVadDtx::RegisterCodec(const SdpAudioFormat& codec_format, + absl::optional vad_mode) { + constexpr int payload_type = 17, cn_payload_type = 117; + bool added_comfort_noise = false; + + auto encoder = encoder_factory_->MakeAudioEncoder(payload_type, codec_format, + absl::nullopt); + if (vad_mode.has_value() && + !absl::EqualsIgnoreCase(codec_format.name, "opus")) { + AudioEncoderCngConfig config; + config.speech_encoder = std::move(encoder); + config.num_channels = 1; + config.payload_type = cn_payload_type; + config.vad_mode = vad_mode.value(); + encoder = CreateComfortNoiseEncoder(std::move(config)); + added_comfort_noise = true; + } + channel_->SetIsStereo(encoder->NumChannels() > 1); + acm_send_->SetEncoder(std::move(encoder)); + + std::map receive_codecs = {{payload_type, codec_format}}; + acm_receive_->SetReceiveCodecs(receive_codecs); + + return added_comfort_noise; +} + +// Encoding a file and see if the numbers that various packets occur follow +// the expectation. +void TestVadDtx::Run(absl::string_view in_filename, + int frequency, + int channels, + absl::string_view out_filename, + bool append, + const int* expects) { + packetization_callback_->ResetStatistics(); + + PCMFile in_file; + in_file.Open(in_filename, frequency, "rb"); + in_file.ReadStereo(channels > 1); + // Set test length to 1000 ms (100 blocks of 10 ms each). + in_file.SetNum10MsBlocksToRead(100); + // Fast-forward both files 500 ms (50 blocks). The first second of the file is + // silence, but we want to keep half of that to test silence periods. + in_file.FastForward(50); + + PCMFile out_file; + if (append) { + out_file.Open(out_filename, kOutputFreqHz, "ab"); + } else { + out_file.Open(out_filename, kOutputFreqHz, "wb"); + } + + uint16_t frame_size_samples = in_file.PayloadLength10Ms(); + AudioFrame audio_frame; + while (!in_file.EndOfFile()) { + in_file.Read10MsData(audio_frame); + audio_frame.timestamp_ = time_stamp_; + time_stamp_ += frame_size_samples; + EXPECT_GE(acm_send_->Add10MsData(audio_frame), 0); + bool muted; + acm_receive_->PlayoutData10Ms(kOutputFreqHz, &audio_frame, &muted); + ASSERT_FALSE(muted); + out_file.Write10MsData(audio_frame); + } + + in_file.Close(); + out_file.Close(); + +#ifdef PRINT_STAT + packetization_callback_->PrintStatistics(); +#endif + + uint32_t stats[3]; + packetization_callback_->GetStatistics(stats); + packetization_callback_->ResetStatistics(); + + for (const auto& st : stats) { + int i = &st - stats; // Calculate the current position in stats. + switch (expects[i]) { + case 0: { + EXPECT_EQ(0u, st) << "stats[" << i << "] error."; + break; + } + case 1: { + EXPECT_GT(st, 0u) << "stats[" << i << "] error."; + break; + } + } + } +} + +// Following is the implementation of TestWebRtcVadDtx. +TestWebRtcVadDtx::TestWebRtcVadDtx() : output_file_num_(0) {} + +void TestWebRtcVadDtx::Perform() { + RunTestCases({"ILBC", 8000, 1}); + RunTestCases({"opus", 48000, 2}); +} + +// Test various configurations on VAD/DTX. +void TestWebRtcVadDtx::RunTestCases(const SdpAudioFormat& codec_format) { + Test(/*new_outfile=*/true, + /*expect_dtx_enabled=*/RegisterCodec(codec_format, absl::nullopt)); + + Test(/*new_outfile=*/false, + /*expect_dtx_enabled=*/RegisterCodec(codec_format, Vad::kVadAggressive)); + + Test(/*new_outfile=*/false, + /*expect_dtx_enabled=*/RegisterCodec(codec_format, Vad::kVadLowBitrate)); + + Test(/*new_outfile=*/false, /*expect_dtx_enabled=*/RegisterCodec( + codec_format, Vad::kVadVeryAggressive)); + + Test(/*new_outfile=*/false, + /*expect_dtx_enabled=*/RegisterCodec(codec_format, Vad::kVadNormal)); +} + +// Set the expectation and run the test. +void TestWebRtcVadDtx::Test(bool new_outfile, bool expect_dtx_enabled) { + int expects[] = {-1, 1, expect_dtx_enabled, 0, 0}; + if (new_outfile) { + output_file_num_++; + } + rtc::StringBuilder out_filename; + out_filename << webrtc::test::OutputPath() << "testWebRtcVadDtx_outFile_" + << output_file_num_ << ".pcm"; + Run(webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), 32000, 1, + out_filename.str(), !new_outfile, expects); +} + +// Following is the implementation of TestOpusDtx. +void TestOpusDtx::Perform() { + int expects[] = {0, 1, 0, 0, 0}; + + // Register Opus as send codec + std::string out_filename = + webrtc::test::OutputPath() + "testOpusDtx_outFile_mono.pcm"; + RegisterCodec({"opus", 48000, 2}, absl::nullopt); + acm_send_->ModifyEncoder([](std::unique_ptr* encoder_ptr) { + (*encoder_ptr)->SetDtx(false); + }); + + Run(webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), 32000, 1, + out_filename, false, expects); + + acm_send_->ModifyEncoder([](std::unique_ptr* encoder_ptr) { + (*encoder_ptr)->SetDtx(true); + }); + expects[static_cast(AudioFrameType::kEmptyFrame)] = 1; + expects[static_cast(AudioFrameType::kAudioFrameCN)] = 1; + Run(webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), 32000, 1, + out_filename, true, expects); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/TestVADDTX.h b/third_party/libwebrtc/modules/audio_coding/test/TestVADDTX.h new file mode 100644 index 0000000000..d81ae28beb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TestVADDTX.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_TESTVADDTX_H_ +#define MODULES_AUDIO_CODING_TEST_TESTVADDTX_H_ + +#include + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/audio_codecs/audio_encoder_factory.h" +#include "common_audio/vad/include/vad.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "modules/audio_coding/test/Channel.h" + +namespace webrtc { + +// This class records the frame type, and delegates actual sending to the +// `next_` AudioPacketizationCallback. +class MonitoringAudioPacketizationCallback : public AudioPacketizationCallback { + public: + explicit MonitoringAudioPacketizationCallback( + AudioPacketizationCallback* next); + + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) override; + + void PrintStatistics(); + void ResetStatistics(); + void GetStatistics(uint32_t* stats); + + private: + // 0 - kEmptyFrame + // 1 - kAudioFrameSpeech + // 2 - kAudioFrameCN + uint32_t counter_[3]; + AudioPacketizationCallback* const next_; +}; + +// TestVadDtx is to verify that VAD/DTX perform as they should. It runs through +// an audio file and check if the occurrence of various packet types follows +// expectation. TestVadDtx needs its derived class to implement the Perform() +// to put the test together. +class TestVadDtx { + public: + static const int kOutputFreqHz = 16000; + + TestVadDtx(); + + protected: + // Returns true iff CN was added. + bool RegisterCodec(const SdpAudioFormat& codec_format, + absl::optional vad_mode); + + // Encoding a file and see if the numbers that various packets occur follow + // the expectation. Saves result to a file. + // expects[x] means + // -1 : do not care, + // 0 : there have been no packets of type `x`, + // 1 : there have been packets of type `x`, + // with `x` indicates the following packet types + // 0 - kEmptyFrame + // 1 - kAudioFrameSpeech + // 2 - kAudioFrameCN + void Run(absl::string_view in_filename, + int frequency, + int channels, + absl::string_view out_filename, + bool append, + const int* expects); + + const rtc::scoped_refptr encoder_factory_; + const rtc::scoped_refptr decoder_factory_; + std::unique_ptr acm_send_; + std::unique_ptr acm_receive_; + std::unique_ptr channel_; + std::unique_ptr packetization_callback_; + uint32_t time_stamp_ = 0x12345678; +}; + +// TestWebRtcVadDtx is to verify that the WebRTC VAD/DTX perform as they should. +class TestWebRtcVadDtx final : public TestVadDtx { + public: + TestWebRtcVadDtx(); + + void Perform(); + + private: + void RunTestCases(const SdpAudioFormat& codec_format); + void Test(bool new_outfile, bool expect_dtx_enabled); + + int output_file_num_; +}; + +// TestOpusDtx is to verify that the Opus DTX performs as it should. +class TestOpusDtx final : public TestVadDtx { + public: + void Perform(); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_TESTVADDTX_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/Tester.cc b/third_party/libwebrtc/modules/audio_coding/test/Tester.cc new file mode 100644 index 0000000000..7612aa43a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/Tester.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include +#include + +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/test/EncodeDecodeTest.h" +#include "modules/audio_coding/test/PacketLossTest.h" +#include "modules/audio_coding/test/TestAllCodecs.h" +#include "modules/audio_coding/test/TestRedFec.h" +#include "modules/audio_coding/test/TestStereo.h" +#include "modules/audio_coding/test/TestVADDTX.h" +#include "modules/audio_coding/test/TwoWayCommunication.h" +#include "modules/audio_coding/test/opus_test.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +TEST(AudioCodingModuleTest, TestAllCodecs) { + webrtc::TestAllCodecs().Perform(); +} + +#if defined(WEBRTC_ANDROID) +TEST(AudioCodingModuleTest, DISABLED_TestEncodeDecode) { +#else +TEST(AudioCodingModuleTest, TestEncodeDecode) { +#endif + webrtc::EncodeDecodeTest().Perform(); +} + +TEST(AudioCodingModuleTest, TestRedFec) { + webrtc::TestRedFec().Perform(); +} + +// Disabled on ios as flaky, see https://crbug.com/webrtc/7057 +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) +TEST(AudioCodingModuleTest, DISABLED_TestStereo) { +#else +TEST(AudioCodingModuleTest, TestStereo) { +#endif + webrtc::TestStereo().Perform(); +} + +TEST(AudioCodingModuleTest, TestWebRtcVadDtx) { + webrtc::TestWebRtcVadDtx().Perform(); +} + +TEST(AudioCodingModuleTest, TestOpusDtx) { + webrtc::TestOpusDtx().Perform(); +} + +// Disabled on ios as flaky, see https://crbug.com/webrtc/7057 +#if defined(WEBRTC_IOS) +TEST(AudioCodingModuleTest, DISABLED_TestOpus) { +#else +TEST(AudioCodingModuleTest, TestOpus) { +#endif + webrtc::OpusTest().Perform(); +} + +TEST(AudioCodingModuleTest, TestPacketLoss) { + webrtc::PacketLossTest(1, 10, 10, 1).Perform(); +} + +TEST(AudioCodingModuleTest, TestPacketLossBurst) { + webrtc::PacketLossTest(1, 10, 10, 2).Perform(); +} + +// Disabled on ios as flake, see https://crbug.com/webrtc/7057 +#if defined(WEBRTC_IOS) +TEST(AudioCodingModuleTest, DISABLED_TestPacketLossStereo) { +#else +TEST(AudioCodingModuleTest, TestPacketLossStereo) { +#endif + webrtc::PacketLossTest(2, 10, 10, 1).Perform(); +} + +// Disabled on ios as flake, see https://crbug.com/webrtc/7057 +#if defined(WEBRTC_IOS) +TEST(AudioCodingModuleTest, DISABLED_TestPacketLossStereoBurst) { +#else +TEST(AudioCodingModuleTest, TestPacketLossStereoBurst) { +#endif + webrtc::PacketLossTest(2, 10, 10, 2).Perform(); +} + +// The full API test is too long to run automatically on bots, but can be used +// for offline testing. User interaction is needed. +#ifdef ACM_TEST_FULL_API +TEST(AudioCodingModuleTest, TestAPI) { + webrtc::APITest().Perform(); +} +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/test/TwoWayCommunication.cc b/third_party/libwebrtc/modules/audio_coding/test/TwoWayCommunication.cc new file mode 100644 index 0000000000..b42415a21a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TwoWayCommunication.cc @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "TwoWayCommunication.h" + +#include +#include + +#include + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "modules/audio_coding/test/PCMFile.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +#define MAX_FILE_NAME_LENGTH_BYTE 500 + +TwoWayCommunication::TwoWayCommunication() + : _acmA(AudioCodingModule::Create( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))), + _acmRefA(AudioCodingModule::Create( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))) { + AudioCodingModule::Config config; + // The clicks will be more obvious if time-stretching is not allowed. + // TODO(henrik.lundin) Really? + config.neteq_config.for_test_no_time_stretching = true; + config.decoder_factory = CreateBuiltinAudioDecoderFactory(); + _acmB.reset(AudioCodingModule::Create(config)); + _acmRefB.reset(AudioCodingModule::Create(config)); +} + +TwoWayCommunication::~TwoWayCommunication() { + delete _channel_A2B; + delete _channel_B2A; + delete _channelRef_A2B; + delete _channelRef_B2A; + _inFileA.Close(); + _inFileB.Close(); + _outFileA.Close(); + _outFileB.Close(); + _outFileRefA.Close(); + _outFileRefB.Close(); +} + +void TwoWayCommunication::SetUpAutotest( + AudioEncoderFactory* const encoder_factory, + const SdpAudioFormat& format1, + const int payload_type1, + const SdpAudioFormat& format2, + const int payload_type2) { + //--- Set A codecs + _acmA->SetEncoder( + encoder_factory->MakeAudioEncoder(payload_type1, format1, absl::nullopt)); + _acmA->SetReceiveCodecs({{payload_type2, format2}}); + + //--- Set ref-A codecs + _acmRefA->SetEncoder( + encoder_factory->MakeAudioEncoder(payload_type1, format1, absl::nullopt)); + _acmRefA->SetReceiveCodecs({{payload_type2, format2}}); + + //--- Set B codecs + _acmB->SetEncoder( + encoder_factory->MakeAudioEncoder(payload_type2, format2, absl::nullopt)); + _acmB->SetReceiveCodecs({{payload_type1, format1}}); + + //--- Set ref-B codecs + _acmRefB->SetEncoder( + encoder_factory->MakeAudioEncoder(payload_type2, format2, absl::nullopt)); + _acmRefB->SetReceiveCodecs({{payload_type1, format1}}); + + uint16_t frequencyHz; + + //--- Input A and B + std::string in_file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + frequencyHz = 16000; + _inFileA.Open(in_file_name, frequencyHz, "rb"); + _inFileB.Open(in_file_name, frequencyHz, "rb"); + + //--- Output A + std::string output_file_a = webrtc::test::OutputPath() + "outAutotestA.pcm"; + frequencyHz = 16000; + _outFileA.Open(output_file_a, frequencyHz, "wb"); + std::string output_ref_file_a = + webrtc::test::OutputPath() + "ref_outAutotestA.pcm"; + _outFileRefA.Open(output_ref_file_a, frequencyHz, "wb"); + + //--- Output B + std::string output_file_b = webrtc::test::OutputPath() + "outAutotestB.pcm"; + frequencyHz = 16000; + _outFileB.Open(output_file_b, frequencyHz, "wb"); + std::string output_ref_file_b = + webrtc::test::OutputPath() + "ref_outAutotestB.pcm"; + _outFileRefB.Open(output_ref_file_b, frequencyHz, "wb"); + + //--- Set A-to-B channel + _channel_A2B = new Channel; + _acmA->RegisterTransportCallback(_channel_A2B); + _channel_A2B->RegisterReceiverACM(_acmB.get()); + //--- Do the same for the reference + _channelRef_A2B = new Channel; + _acmRefA->RegisterTransportCallback(_channelRef_A2B); + _channelRef_A2B->RegisterReceiverACM(_acmRefB.get()); + + //--- Set B-to-A channel + _channel_B2A = new Channel; + _acmB->RegisterTransportCallback(_channel_B2A); + _channel_B2A->RegisterReceiverACM(_acmA.get()); + //--- Do the same for reference + _channelRef_B2A = new Channel; + _acmRefB->RegisterTransportCallback(_channelRef_B2A); + _channelRef_B2A->RegisterReceiverACM(_acmRefA.get()); +} + +void TwoWayCommunication::Perform() { + const SdpAudioFormat format1("ISAC", 16000, 1); + const SdpAudioFormat format2("L16", 8000, 1); + constexpr int payload_type1 = 17, payload_type2 = 18; + + auto encoder_factory = CreateBuiltinAudioEncoderFactory(); + + SetUpAutotest(encoder_factory.get(), format1, payload_type1, format2, + payload_type2); + + unsigned int msecPassed = 0; + unsigned int secPassed = 0; + + int32_t outFreqHzA = _outFileA.SamplingFrequency(); + int32_t outFreqHzB = _outFileB.SamplingFrequency(); + + AudioFrame audioFrame; + + // In the following loop we tests that the code can handle misuse of the APIs. + // In the middle of a session with data flowing between two sides, called A + // and B, APIs will be called, and the code should continue to run, and be + // able to recover. + while (!_inFileA.EndOfFile() && !_inFileB.EndOfFile()) { + msecPassed += 10; + EXPECT_GT(_inFileA.Read10MsData(audioFrame), 0); + EXPECT_GE(_acmA->Add10MsData(audioFrame), 0); + EXPECT_GE(_acmRefA->Add10MsData(audioFrame), 0); + + EXPECT_GT(_inFileB.Read10MsData(audioFrame), 0); + + EXPECT_GE(_acmB->Add10MsData(audioFrame), 0); + EXPECT_GE(_acmRefB->Add10MsData(audioFrame), 0); + bool muted; + EXPECT_EQ(0, _acmA->PlayoutData10Ms(outFreqHzA, &audioFrame, &muted)); + ASSERT_FALSE(muted); + _outFileA.Write10MsData(audioFrame); + EXPECT_EQ(0, _acmRefA->PlayoutData10Ms(outFreqHzA, &audioFrame, &muted)); + ASSERT_FALSE(muted); + _outFileRefA.Write10MsData(audioFrame); + EXPECT_EQ(0, _acmB->PlayoutData10Ms(outFreqHzB, &audioFrame, &muted)); + ASSERT_FALSE(muted); + _outFileB.Write10MsData(audioFrame); + EXPECT_EQ(0, _acmRefB->PlayoutData10Ms(outFreqHzB, &audioFrame, &muted)); + ASSERT_FALSE(muted); + _outFileRefB.Write10MsData(audioFrame); + + // Update time counters each time a second of data has passed. + if (msecPassed >= 1000) { + msecPassed = 0; + secPassed++; + } + // Re-register send codec on side B. + if (((secPassed % 5) == 4) && (msecPassed >= 990)) { + _acmB->SetEncoder(encoder_factory->MakeAudioEncoder( + payload_type2, format2, absl::nullopt)); + } + // Initialize receiver on side A. + if (((secPassed % 7) == 6) && (msecPassed == 0)) + EXPECT_EQ(0, _acmA->InitializeReceiver()); + // Re-register codec on side A. + if (((secPassed % 7) == 6) && (msecPassed >= 990)) { + _acmA->SetReceiveCodecs({{payload_type2, format2}}); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/TwoWayCommunication.h b/third_party/libwebrtc/modules/audio_coding/test/TwoWayCommunication.h new file mode 100644 index 0000000000..b7eb9e5583 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/TwoWayCommunication.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_TWOWAYCOMMUNICATION_H_ +#define MODULES_AUDIO_CODING_TEST_TWOWAYCOMMUNICATION_H_ + +#include + +#include "api/audio_codecs/audio_encoder_factory.h" +#include "api/audio_codecs/audio_format.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/test/Channel.h" +#include "modules/audio_coding/test/PCMFile.h" + +namespace webrtc { + +class TwoWayCommunication { + public: + TwoWayCommunication(); + ~TwoWayCommunication(); + + void Perform(); + + private: + void SetUpAutotest(AudioEncoderFactory* const encoder_factory, + const SdpAudioFormat& format1, + int payload_type1, + const SdpAudioFormat& format2, + int payload_type2); + + std::unique_ptr _acmA; + std::unique_ptr _acmB; + + std::unique_ptr _acmRefA; + std::unique_ptr _acmRefB; + + Channel* _channel_A2B; + Channel* _channel_B2A; + + Channel* _channelRef_A2B; + Channel* _channelRef_B2A; + + PCMFile _inFileA; + PCMFile _inFileB; + + PCMFile _outFileA; + PCMFile _outFileB; + + PCMFile _outFileRefA; + PCMFile _outFileRefB; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_TWOWAYCOMMUNICATION_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/opus_test.cc b/third_party/libwebrtc/modules/audio_coding/test/opus_test.cc new file mode 100644 index 0000000000..6822bc3d72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/opus_test.cc @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/test/opus_test.h" + +#include + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "modules/audio_coding/test/TestStereo.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +OpusTest::OpusTest() + : acm_receiver_(AudioCodingModule::Create( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))), + channel_a2b_(NULL), + counter_(0), + payload_type_(255), + rtp_timestamp_(0) {} + +OpusTest::~OpusTest() { + if (channel_a2b_ != NULL) { + delete channel_a2b_; + channel_a2b_ = NULL; + } + if (opus_mono_encoder_ != NULL) { + WebRtcOpus_EncoderFree(opus_mono_encoder_); + opus_mono_encoder_ = NULL; + } + if (opus_stereo_encoder_ != NULL) { + WebRtcOpus_EncoderFree(opus_stereo_encoder_); + opus_stereo_encoder_ = NULL; + } + if (opus_mono_decoder_ != NULL) { + WebRtcOpus_DecoderFree(opus_mono_decoder_); + opus_mono_decoder_ = NULL; + } + if (opus_stereo_decoder_ != NULL) { + WebRtcOpus_DecoderFree(opus_stereo_decoder_); + opus_stereo_decoder_ = NULL; + } +} + +void OpusTest::Perform() { +#ifndef WEBRTC_CODEC_OPUS + // Opus isn't defined, exit. + return; +#else + uint16_t frequency_hz; + size_t audio_channels; + int16_t test_cntr = 0; + + // Open both mono and stereo test files in 32 kHz. + const std::string file_name_stereo = + webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"); + const std::string file_name_mono = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + frequency_hz = 32000; + in_file_stereo_.Open(file_name_stereo, frequency_hz, "rb"); + in_file_stereo_.ReadStereo(true); + in_file_mono_.Open(file_name_mono, frequency_hz, "rb"); + in_file_mono_.ReadStereo(false); + + // Create Opus encoders for mono and stereo. + ASSERT_GT(WebRtcOpus_EncoderCreate(&opus_mono_encoder_, 1, 0, 48000), -1); + ASSERT_GT(WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2, 1, 48000), -1); + + // Create Opus decoders for mono and stereo for stand-alone testing of Opus. + ASSERT_GT(WebRtcOpus_DecoderCreate(&opus_mono_decoder_, 1, 48000), -1); + ASSERT_GT(WebRtcOpus_DecoderCreate(&opus_stereo_decoder_, 2, 48000), -1); + WebRtcOpus_DecoderInit(opus_mono_decoder_); + WebRtcOpus_DecoderInit(opus_stereo_decoder_); + + ASSERT_TRUE(acm_receiver_.get() != NULL); + EXPECT_EQ(0, acm_receiver_->InitializeReceiver()); + + // Register Opus stereo as receiving codec. + constexpr int kOpusPayloadType = 120; + const SdpAudioFormat kOpusFormatStereo("opus", 48000, 2, {{"stereo", "1"}}); + payload_type_ = kOpusPayloadType; + acm_receiver_->SetReceiveCodecs({{kOpusPayloadType, kOpusFormatStereo}}); + + // Create and connect the channel. + channel_a2b_ = new TestPackStereo; + channel_a2b_->RegisterReceiverACM(acm_receiver_.get()); + + // + // Test Stereo. + // + + channel_a2b_->set_codec_mode(kStereo); + audio_channels = 2; + test_cntr++; + OpenOutFile(test_cntr); + + // Run Opus with 2.5 ms frame size. + Run(channel_a2b_, audio_channels, 64000, 120); + + // Run Opus with 5 ms frame size. + Run(channel_a2b_, audio_channels, 64000, 240); + + // Run Opus with 10 ms frame size. + Run(channel_a2b_, audio_channels, 64000, 480); + + // Run Opus with 20 ms frame size. + Run(channel_a2b_, audio_channels, 64000, 960); + + // Run Opus with 40 ms frame size. + Run(channel_a2b_, audio_channels, 64000, 1920); + + // Run Opus with 60 ms frame size. + Run(channel_a2b_, audio_channels, 64000, 2880); + + out_file_.Close(); + out_file_standalone_.Close(); + + // + // Test Opus stereo with packet-losses. + // + + test_cntr++; + OpenOutFile(test_cntr); + + // Run Opus with 20 ms frame size, 1% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 1); + + // Run Opus with 20 ms frame size, 5% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 5); + + // Run Opus with 20 ms frame size, 10% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 10); + + out_file_.Close(); + out_file_standalone_.Close(); + + // + // Test Mono. + // + channel_a2b_->set_codec_mode(kMono); + audio_channels = 1; + test_cntr++; + OpenOutFile(test_cntr); + + // Register Opus mono as receiving codec. + const SdpAudioFormat kOpusFormatMono("opus", 48000, 2); + acm_receiver_->SetReceiveCodecs({{kOpusPayloadType, kOpusFormatMono}}); + + // Run Opus with 2.5 ms frame size. + Run(channel_a2b_, audio_channels, 32000, 120); + + // Run Opus with 5 ms frame size. + Run(channel_a2b_, audio_channels, 32000, 240); + + // Run Opus with 10 ms frame size. + Run(channel_a2b_, audio_channels, 32000, 480); + + // Run Opus with 20 ms frame size. + Run(channel_a2b_, audio_channels, 32000, 960); + + // Run Opus with 40 ms frame size. + Run(channel_a2b_, audio_channels, 32000, 1920); + + // Run Opus with 60 ms frame size. + Run(channel_a2b_, audio_channels, 32000, 2880); + + out_file_.Close(); + out_file_standalone_.Close(); + + // + // Test Opus mono with packet-losses. + // + test_cntr++; + OpenOutFile(test_cntr); + + // Run Opus with 20 ms frame size, 1% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 1); + + // Run Opus with 20 ms frame size, 5% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 5); + + // Run Opus with 20 ms frame size, 10% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 10); + + // Close the files. + in_file_stereo_.Close(); + in_file_mono_.Close(); + out_file_.Close(); + out_file_standalone_.Close(); +#endif +} + +void OpusTest::Run(TestPackStereo* channel, + size_t channels, + int bitrate, + size_t frame_length, + int percent_loss) { + AudioFrame audio_frame; + int32_t out_freq_hz_b = out_file_.SamplingFrequency(); + const size_t kBufferSizeSamples = 480 * 12 * 2; // 120 ms stereo audio. + int16_t audio[kBufferSizeSamples]; + int16_t out_audio[kBufferSizeSamples]; + int16_t audio_type; + size_t written_samples = 0; + size_t read_samples = 0; + size_t decoded_samples = 0; + bool first_packet = true; + uint32_t start_time_stamp = 0; + + channel->reset_payload_size(); + counter_ = 0; + + // Set encoder rate. + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_mono_encoder_, bitrate)); + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_stereo_encoder_, bitrate)); + +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) || defined(WEBRTC_ARCH_ARM) + // If we are on Android, iOS and/or ARM, use a lower complexity setting as + // default. + const int kOpusComplexity5 = 5; + EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_mono_encoder_, kOpusComplexity5)); + EXPECT_EQ(0, + WebRtcOpus_SetComplexity(opus_stereo_encoder_, kOpusComplexity5)); +#endif + + // Fast-forward 1 second (100 blocks) since the files start with silence. + in_file_stereo_.FastForward(100); + in_file_mono_.FastForward(100); + + // Limit the runtime to 1000 blocks of 10 ms each. + for (size_t audio_length = 0; audio_length < 1000; audio_length += 10) { + bool lost_packet = false; + + // Get 10 msec of audio. + if (channels == 1) { + if (in_file_mono_.EndOfFile()) { + break; + } + in_file_mono_.Read10MsData(audio_frame); + } else { + if (in_file_stereo_.EndOfFile()) { + break; + } + in_file_stereo_.Read10MsData(audio_frame); + } + + // If input audio is sampled at 32 kHz, resampling to 48 kHz is required. + EXPECT_EQ(480, resampler_.Resample10Msec( + audio_frame.data(), audio_frame.sample_rate_hz_, 48000, + channels, kBufferSizeSamples - written_samples, + &audio[written_samples])); + written_samples += 480 * channels; + + // Sometimes we need to loop over the audio vector to produce the right + // number of packets. + size_t loop_encode = + (written_samples - read_samples) / (channels * frame_length); + + if (loop_encode > 0) { + const size_t kMaxBytes = 1000; // Maximum number of bytes for one packet. + size_t bitstream_len_byte; + uint8_t bitstream[kMaxBytes]; + for (size_t i = 0; i < loop_encode; i++) { + int bitstream_len_byte_int = WebRtcOpus_Encode( + (channels == 1) ? opus_mono_encoder_ : opus_stereo_encoder_, + &audio[read_samples], frame_length, kMaxBytes, bitstream); + ASSERT_GE(bitstream_len_byte_int, 0); + bitstream_len_byte = static_cast(bitstream_len_byte_int); + + // Simulate packet loss by setting `packet_loss_` to "true" in + // `percent_loss` percent of the loops. + // TODO(tlegrand): Move handling of loss simulation to TestPackStereo. + if (percent_loss > 0) { + if (counter_ == floor((100 / percent_loss) + 0.5)) { + counter_ = 0; + lost_packet = true; + channel->set_lost_packet(true); + } else { + lost_packet = false; + channel->set_lost_packet(false); + } + counter_++; + } + + // Run stand-alone Opus decoder, or decode PLC. + if (channels == 1) { + if (!lost_packet) { + decoded_samples += WebRtcOpus_Decode( + opus_mono_decoder_, bitstream, bitstream_len_byte, + &out_audio[decoded_samples * channels], &audio_type); + } else { + // Call decoder PLC. + constexpr int kPlcDurationMs = 10; + constexpr int kPlcSamples = 48 * kPlcDurationMs; + size_t total_plc_samples = 0; + while (total_plc_samples < frame_length) { + int ret = WebRtcOpus_Decode( + opus_mono_decoder_, NULL, 0, + &out_audio[decoded_samples * channels], &audio_type); + EXPECT_EQ(ret, kPlcSamples); + decoded_samples += ret; + total_plc_samples += ret; + } + EXPECT_EQ(total_plc_samples, frame_length); + } + } else { + if (!lost_packet) { + decoded_samples += WebRtcOpus_Decode( + opus_stereo_decoder_, bitstream, bitstream_len_byte, + &out_audio[decoded_samples * channels], &audio_type); + } else { + // Call decoder PLC. + constexpr int kPlcDurationMs = 10; + constexpr int kPlcSamples = 48 * kPlcDurationMs; + size_t total_plc_samples = 0; + while (total_plc_samples < frame_length) { + int ret = WebRtcOpus_Decode( + opus_stereo_decoder_, NULL, 0, + &out_audio[decoded_samples * channels], &audio_type); + EXPECT_EQ(ret, kPlcSamples); + decoded_samples += ret; + total_plc_samples += ret; + } + EXPECT_EQ(total_plc_samples, frame_length); + } + } + + // Send data to the channel. "channel" will handle the loss simulation. + channel->SendData(AudioFrameType::kAudioFrameSpeech, payload_type_, + rtp_timestamp_, bitstream, bitstream_len_byte, 0); + if (first_packet) { + first_packet = false; + start_time_stamp = rtp_timestamp_; + } + rtp_timestamp_ += static_cast(frame_length); + read_samples += frame_length * channels; + } + if (read_samples == written_samples) { + read_samples = 0; + written_samples = 0; + } + } + + // Run received side of ACM. + bool muted; + ASSERT_EQ( + 0, acm_receiver_->PlayoutData10Ms(out_freq_hz_b, &audio_frame, &muted)); + ASSERT_FALSE(muted); + + // Write output speech to file. + out_file_.Write10MsData( + audio_frame.data(), + audio_frame.samples_per_channel_ * audio_frame.num_channels_); + + // Write stand-alone speech to file. + out_file_standalone_.Write10MsData(out_audio, decoded_samples * channels); + + if (audio_frame.timestamp_ > start_time_stamp) { + // Number of channels should be the same for both stand-alone and + // ACM-decoding. + EXPECT_EQ(audio_frame.num_channels_, channels); + } + + decoded_samples = 0; + } + + if (in_file_mono_.EndOfFile()) { + in_file_mono_.Rewind(); + } + if (in_file_stereo_.EndOfFile()) { + in_file_stereo_.Rewind(); + } + // Reset in case we ended with a lost packet. + channel->set_lost_packet(false); +} + +void OpusTest::OpenOutFile(int test_number) { + std::string file_name; + std::stringstream file_stream; + file_stream << webrtc::test::OutputPath() << "opustest_out_" << test_number + << ".pcm"; + file_name = file_stream.str(); + out_file_.Open(file_name, 48000, "wb"); + file_stream.str(""); + file_name = file_stream.str(); + file_stream << webrtc::test::OutputPath() << "opusstandalone_out_" + << test_number << ".pcm"; + file_name = file_stream.str(); + out_file_standalone_.Open(file_name, 48000, "wb"); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/test/opus_test.h b/third_party/libwebrtc/modules/audio_coding/test/opus_test.h new file mode 100644 index 0000000000..c69f922adb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/opus_test.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_TEST_OPUS_TEST_H_ +#define MODULES_AUDIO_CODING_TEST_OPUS_TEST_H_ + +#include + +#include + +#include "modules/audio_coding/acm2/acm_resampler.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/test/PCMFile.h" +#include "modules/audio_coding/test/TestStereo.h" + +namespace webrtc { + +class OpusTest { + public: + OpusTest(); + ~OpusTest(); + + void Perform(); + + private: + void Run(TestPackStereo* channel, + size_t channels, + int bitrate, + size_t frame_length, + int percent_loss = 0); + + void OpenOutFile(int test_number); + + std::unique_ptr acm_receiver_; + TestPackStereo* channel_a2b_; + PCMFile in_file_stereo_; + PCMFile in_file_mono_; + PCMFile out_file_; + PCMFile out_file_standalone_; + int counter_; + uint8_t payload_type_; + uint32_t rtp_timestamp_; + acm2::ACMResampler resampler_; + WebRtcOpusEncInst* opus_mono_encoder_; + WebRtcOpusEncInst* opus_stereo_encoder_; + WebRtcOpusDecInst* opus_mono_decoder_; + WebRtcOpusDecInst* opus_stereo_decoder_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_TEST_OPUS_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/test/target_delay_unittest.cc b/third_party/libwebrtc/modules/audio_coding/test/target_delay_unittest.cc new file mode 100644 index 0000000000..5eccdcf8eb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/test/target_delay_unittest.cc @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/rtp_headers.h" +#include "modules/audio_coding/acm2/acm_receiver.h" +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +class TargetDelayTest : public ::testing::Test { + protected: + TargetDelayTest() + : receiver_( + AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory())) {} + + ~TargetDelayTest() {} + + void SetUp() { + constexpr int pltype = 108; + std::map receive_codecs = { + {pltype, {"L16", kSampleRateHz, 1}}}; + receiver_.SetCodecs(receive_codecs); + + rtp_header_.payloadType = pltype; + rtp_header_.timestamp = 0; + rtp_header_.ssrc = 0x12345678; + rtp_header_.markerBit = false; + rtp_header_.sequenceNumber = 0; + + int16_t audio[kFrameSizeSamples]; + const int kRange = 0x7FF; // 2047, easy for masking. + for (size_t n = 0; n < kFrameSizeSamples; ++n) + audio[n] = (rand() & kRange) - kRange / 2; + WebRtcPcm16b_Encode(audio, kFrameSizeSamples, payload_); + } + + void OutOfRangeInput() { + EXPECT_EQ(-1, SetMinimumDelay(-1)); + EXPECT_EQ(-1, SetMinimumDelay(10001)); + } + + void TargetDelayBufferMinMax() { + const int kTargetMinDelayMs = kNum10msPerFrame * 10; + ASSERT_EQ(0, SetMinimumDelay(kTargetMinDelayMs)); + for (int m = 0; m < 30; ++m) // Run enough iterations to fill the buffer. + Run(true); + int clean_optimal_delay = GetCurrentOptimalDelayMs(); + EXPECT_EQ(kTargetMinDelayMs, clean_optimal_delay); + + const int kTargetMaxDelayMs = 2 * (kNum10msPerFrame * 10); + ASSERT_EQ(0, SetMaximumDelay(kTargetMaxDelayMs)); + for (int n = 0; n < 30; ++n) // Run enough iterations to fill the buffer. + Run(false); + + int capped_optimal_delay = GetCurrentOptimalDelayMs(); + EXPECT_EQ(kTargetMaxDelayMs, capped_optimal_delay); + } + + private: + static const int kSampleRateHz = 16000; + static const int kNum10msPerFrame = 2; + static const size_t kFrameSizeSamples = 320; // 20 ms @ 16 kHz. + // payload-len = frame-samples * 2 bytes/sample. + static const int kPayloadLenBytes = 320 * 2; + // Inter-arrival time in number of packets in a jittery channel. One is no + // jitter. + static const int kInterarrivalJitterPacket = 2; + + void Push() { + rtp_header_.timestamp += kFrameSizeSamples; + rtp_header_.sequenceNumber++; + ASSERT_EQ(0, receiver_.InsertPacket(rtp_header_, + rtc::ArrayView( + payload_, kFrameSizeSamples * 2))); + } + + // Pull audio equivalent to the amount of audio in one RTP packet. + void Pull() { + AudioFrame frame; + bool muted; + for (int k = 0; k < kNum10msPerFrame; ++k) { // Pull one frame. + ASSERT_EQ(0, receiver_.GetAudio(-1, &frame, &muted)); + ASSERT_FALSE(muted); + // Had to use ASSERT_TRUE, ASSERT_EQ generated error. + ASSERT_TRUE(kSampleRateHz == frame.sample_rate_hz_); + ASSERT_EQ(1u, frame.num_channels_); + ASSERT_TRUE(kSampleRateHz / 100 == frame.samples_per_channel_); + } + } + + void Run(bool clean) { + for (int n = 0; n < 10; ++n) { + for (int m = 0; m < 5; ++m) { + Push(); + Pull(); + } + + if (!clean) { + for (int m = 0; m < 10; ++m) { // Long enough to trigger delay change. + Push(); + for (int n = 0; n < kInterarrivalJitterPacket; ++n) + Pull(); + } + } + } + } + + int SetMinimumDelay(int delay_ms) { + return receiver_.SetMinimumDelay(delay_ms); + } + + int SetMaximumDelay(int delay_ms) { + return receiver_.SetMaximumDelay(delay_ms); + } + + int GetCurrentOptimalDelayMs() { + NetworkStatistics stats; + receiver_.GetNetworkStatistics(&stats); + return stats.preferredBufferSize; + } + + acm2::AcmReceiver receiver_; + RTPHeader rtp_header_; + uint8_t payload_[kPayloadLenBytes]; +}; + +// Flaky on iOS: webrtc:7057. +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) +#define MAYBE_OutOfRangeInput DISABLED_OutOfRangeInput +#else +#define MAYBE_OutOfRangeInput OutOfRangeInput +#endif +TEST_F(TargetDelayTest, MAYBE_OutOfRangeInput) { + OutOfRangeInput(); +} + +// Flaky on iOS: webrtc:7057. +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) +#define MAYBE_TargetDelayBufferMinMax DISABLED_TargetDelayBufferMinMax +#else +#define MAYBE_TargetDelayBufferMinMax TargetDelayBufferMinMax +#endif +TEST_F(TargetDelayTest, MAYBE_TargetDelayBufferMinMax) { + TargetDelayBufferMinMax(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/webrtc_cng_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/webrtc_cng_gn/moz.build new file mode 100644 index 0000000000..4a9c24069e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/webrtc_cng_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("webrtc_cng_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/webrtc_multiopus_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/webrtc_multiopus_gn/moz.build new file mode 100644 index 0000000000..99ea47c13a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/webrtc_multiopus_gn/moz.build @@ -0,0 +1,230 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_CODEC_ILBC"] = True +DEFINES["WEBRTC_CODEC_OPUS"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_OPUS_SUPPORT_120MS_PTIME"] = "1" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/media/libopus/include/", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.cc", + "/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("webrtc_multiopus_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/webrtc_opus_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/webrtc_opus_gn/moz.build new file mode 100644 index 0000000000..49fb552b0d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/webrtc_opus_gn/moz.build @@ -0,0 +1,237 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_CODEC_ILBC"] = True +DEFINES["WEBRTC_CODEC_OPUS"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_OPUS_SUPPORT_120MS_PTIME"] = "1" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/media/libopus/include/", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc", + "/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("webrtc_opus_gn") diff --git a/third_party/libwebrtc/modules/audio_coding/webrtc_opus_wrapper_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/webrtc_opus_wrapper_gn/moz.build new file mode 100644 index 0000000000..20e1d37306 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/webrtc_opus_wrapper_gn/moz.build @@ -0,0 +1,229 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_CODEC_ILBC"] = True +DEFINES["WEBRTC_CODEC_OPUS"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_OPUS_SUPPORT_120MS_PTIME"] = "1" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/media/libopus/include/", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("webrtc_opus_wrapper_gn") diff --git a/third_party/libwebrtc/modules/audio_device/BUILD.gn b/third_party/libwebrtc/modules/audio_device/BUILD.gn new file mode 100644 index 0000000000..61cd531edd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/BUILD.gn @@ -0,0 +1,504 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") + +if (is_android) { + import("//build/config/android/config.gni") + import("//build/config/android/rules.gni") +} + +config("audio_device_warnings_config") { + if (is_win && is_clang) { + cflags = [ + # Disable warnings failing when compiling with Clang on Windows. + # https://bugs.chromium.org/p/webrtc/issues/detail?id=5366 + "-Wno-microsoft-goto", + ] + } +} + +rtc_source_set("audio_device_default") { + visibility = [ "*" ] + sources = [ "include/audio_device_default.h" ] + deps = [ ":audio_device_api" ] +} + +rtc_source_set("audio_device") { +if (!build_with_mozilla) { # See Bug 1820869. + visibility = [ "*" ] + public_deps = [ + ":audio_device_api", + + # Deprecated. + # TODO(webrtc:7452): Remove this public dep. audio_device_impl should + # be depended on directly if needed. + ":audio_device_impl", + ] +} +} + +rtc_source_set("audio_device_api") { + visibility = [ "*" ] + sources = [ + "include/audio_device.h", + "include/audio_device_defines.h", + ] + deps = [ + "../../api:scoped_refptr", + "../../api/task_queue", + "../../rtc_base:checks", + "../../rtc_base:refcount", + "../../rtc_base:stringutils", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("audio_device_buffer") { +if (!build_with_mozilla) { # See Bug 1820869. + sources = [ + "audio_device_buffer.cc", + "audio_device_buffer.h", + "audio_device_config.h", + "fine_audio_buffer.cc", + "fine_audio_buffer.h", + ] + deps = [ + ":audio_device_api", + "../../api:array_view", + "../../api:sequence_checker", + "../../api/task_queue", + "../../common_audio:common_audio_c", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:event_tracer", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:rtc_task_queue", + "../../rtc_base:safe_conversions", + "../../rtc_base:timestamp_aligner", + "../../rtc_base:timeutils", + "../../rtc_base/synchronization:mutex", + "../../system_wrappers", + "../../system_wrappers:metrics", + ] +} +} + +rtc_library("audio_device_generic") { + sources = [ + "audio_device_generic.cc", + "audio_device_generic.h", + ] + deps = [ + ":audio_device_api", + ":audio_device_buffer", + "../../rtc_base:logging", + ] +} + +rtc_library("audio_device_name") { + sources = [ + "audio_device_name.cc", + "audio_device_name.h", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_source_set("windows_core_audio_utility") { + if (is_win && !build_with_chromium) { + sources = [ + "win/core_audio_utility_win.cc", + "win/core_audio_utility_win.h", + ] + + deps = [ + ":audio_device_api", + ":audio_device_name", + "../../api/units:time_delta", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:platform_thread_types", + "../../rtc_base:stringutils", + "../../rtc_base/win:windows_version", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings:strings" ] + + libs = [ "oleaut32.lib" ] + } +} + +# An ADM with a dedicated factory method which does not depend on the +# audio_device_impl target. The goal is to use this new structure and +# gradually phase out the old design. +# TODO(henrika): currently only supported on Windows. +rtc_source_set("audio_device_module_from_input_and_output") { + visibility = [ "*" ] + if (is_win && !build_with_chromium) { + sources = [ + "include/audio_device_factory.cc", + "include/audio_device_factory.h", + ] + sources += [ + "win/audio_device_module_win.cc", + "win/audio_device_module_win.h", + "win/core_audio_base_win.cc", + "win/core_audio_base_win.h", + "win/core_audio_input_win.cc", + "win/core_audio_input_win.h", + "win/core_audio_output_win.cc", + "win/core_audio_output_win.h", + ] + + deps = [ + ":audio_device_api", + ":audio_device_buffer", + ":windows_core_audio_utility", + "../../api:make_ref_counted", + "../../api:scoped_refptr", + "../../api:sequence_checker", + "../../api/task_queue", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:platform_thread", + "../../rtc_base:safe_conversions", + "../../rtc_base:stringutils", + "../../rtc_base:timeutils", + "../../rtc_base/win:scoped_com_initializer", + "../../rtc_base/win:windows_version", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings:strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } +} + +# Contains default implementations of webrtc::AudioDeviceModule for Windows, +# Linux, Mac, iOS and Android. +rtc_library("audio_device_impl") { +if (!build_with_mozilla) { # See Bug 1820869. + visibility = [ "*" ] + deps = [ + ":audio_device_api", + ":audio_device_buffer", + ":audio_device_default", + ":audio_device_generic", + "../../api:array_view", + "../../api:make_ref_counted", + "../../api:refcountedbase", + "../../api:scoped_refptr", + "../../api:sequence_checker", + "../../api/task_queue", + "../../common_audio", + "../../common_audio:common_audio_c", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:platform_thread", + "../../rtc_base:random", + "../../rtc_base:rtc_event", + "../../rtc_base:rtc_task_queue", + "../../rtc_base:safe_conversions", + "../../rtc_base:stringutils", + "../../rtc_base:timeutils", + "../../rtc_base/synchronization:mutex", + "../../rtc_base/system:arch", + "../../rtc_base/system:file_wrapper", + "../../rtc_base/task_utils:repeating_task", + "../../system_wrappers", + "../../system_wrappers:field_trial", + "../../system_wrappers:metrics", + "../utility", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/base:core_headers", + "//third_party/abseil-cpp/absl/strings:strings", + ] + if (rtc_include_internal_audio_device && is_ios) { + deps += [ "../../sdk:audio_device" ] + } + + sources = [ + "dummy/audio_device_dummy.cc", + "dummy/audio_device_dummy.h", + "dummy/file_audio_device.cc", + "dummy/file_audio_device.h", + "include/fake_audio_device.h", + "include/test_audio_device.cc", + "include/test_audio_device.h", + ] + + if (build_with_mozilla) { + sources -= [ + "include/test_audio_device.cc", + "include/test_audio_device.h", + ] + } + + defines = [] + cflags = [] + if (rtc_audio_device_plays_sinus_tone) { + defines += [ "AUDIO_DEVICE_PLAYS_SINUS_TONE" ] + } + if (rtc_enable_android_aaudio) { + defines += [ "WEBRTC_AUDIO_DEVICE_INCLUDE_ANDROID_AAUDIO" ] + } + if (rtc_include_internal_audio_device) { + sources += [ + "audio_device_data_observer.cc", + "audio_device_impl.cc", + "audio_device_impl.h", + "include/audio_device_data_observer.h", + ] + if (is_android) { + sources += [ + "android/audio_common.h", + "android/audio_device_template.h", + "android/audio_manager.cc", + "android/audio_manager.h", + "android/audio_record_jni.cc", + "android/audio_record_jni.h", + "android/audio_track_jni.cc", + "android/audio_track_jni.h", + "android/build_info.cc", + "android/build_info.h", + "android/opensles_common.cc", + "android/opensles_common.h", + "android/opensles_player.cc", + "android/opensles_player.h", + "android/opensles_recorder.cc", + "android/opensles_recorder.h", + ] + libs = [ + "log", + "OpenSLES", + ] + if (rtc_enable_android_aaudio) { + sources += [ + "android/aaudio_player.cc", + "android/aaudio_player.h", + "android/aaudio_recorder.cc", + "android/aaudio_recorder.h", + "android/aaudio_wrapper.cc", + "android/aaudio_wrapper.h", + ] + libs += [ "aaudio" ] + } + + if (build_with_mozilla) { + include_dirs += [ + "/config/external/nspr", + "/nsprpub/lib/ds", + "/nsprpub/pr/include", + ] + } + } + if (rtc_use_dummy_audio_file_devices) { + defines += [ "WEBRTC_DUMMY_FILE_DEVICES" ] + } else { + if (is_linux || is_chromeos) { + sources += [ + "linux/alsasymboltable_linux.cc", + "linux/alsasymboltable_linux.h", + "linux/audio_device_alsa_linux.cc", + "linux/audio_device_alsa_linux.h", + "linux/audio_mixer_manager_alsa_linux.cc", + "linux/audio_mixer_manager_alsa_linux.h", + "linux/latebindingsymboltable_linux.cc", + "linux/latebindingsymboltable_linux.h", + ] + defines += [ "WEBRTC_ENABLE_LINUX_ALSA" ] + libs = [ "dl" ] + if (rtc_use_x11) { + libs += [ "X11" ] + defines += [ "WEBRTC_USE_X11" ] + } + if (rtc_include_pulse_audio) { + defines += [ "WEBRTC_ENABLE_LINUX_PULSE" ] + } + sources += [ + "linux/audio_device_pulse_linux.cc", + "linux/audio_device_pulse_linux.h", + "linux/audio_mixer_manager_pulse_linux.cc", + "linux/audio_mixer_manager_pulse_linux.h", + "linux/pulseaudiosymboltable_linux.cc", + "linux/pulseaudiosymboltable_linux.h", + ] + } + if (is_mac) { + sources += [ + "mac/audio_device_mac.cc", + "mac/audio_device_mac.h", + "mac/audio_mixer_manager_mac.cc", + "mac/audio_mixer_manager_mac.h", + ] + deps += [ + ":audio_device_impl_frameworks", + "../third_party/portaudio:mac_portaudio", + ] + } + if (is_win) { + sources += [ + "win/audio_device_core_win.cc", + "win/audio_device_core_win.h", + ] + libs = [ + # Required for the built-in WASAPI AEC. + "dmoguids.lib", + "wmcodecdspuuid.lib", + "amstrmid.lib", + "msdmo.lib", + "oleaut32.lib", + ] + deps += [ + "../../rtc_base:win32", + "../../rtc_base/win:scoped_com_initializer", + ] + } + configs += [ ":audio_device_warnings_config" ] + } + } else { + defines = [ "WEBRTC_DUMMY_AUDIO_BUILD" ] + } + + if (!build_with_chromium) { + sources += [ + # Do not link these into Chrome since they contain static data. + "dummy/file_audio_device_factory.cc", + "dummy/file_audio_device_factory.h", + ] + } +} +} + +if (is_mac) { + rtc_source_set("audio_device_impl_frameworks") { + visibility = [ ":*" ] + frameworks = [ + # Needed for CoreGraphics: + "ApplicationServices.framework", + + "AudioToolbox.framework", + "CoreAudio.framework", + + # Needed for CGEventSourceKeyState in audio_device_mac.cc: + "CoreGraphics.framework", + ] + } +} + +if (!build_with_mozilla) { # See Bug 1820869. +rtc_source_set("mock_audio_device") { + visibility = [ "*" ] + testonly = true + sources = [ + "include/mock_audio_device.h", + "include/mock_audio_transport.h", + "mock_audio_device_buffer.h", + ] + deps = [ + ":audio_device", + ":audio_device_buffer", + ":audio_device_impl", + "../../api:make_ref_counted", + "../../test:test_support", + ] +} +} + +# See Bug 1820869 for !build_with_mozilla. +if (rtc_include_tests && !build_with_chromium && !build_with_mozilla) { + rtc_library("audio_device_unittests") { + testonly = true + + sources = [ + "fine_audio_buffer_unittest.cc", + "include/test_audio_device_unittest.cc", + ] + deps = [ + ":audio_device", + ":audio_device_buffer", + ":audio_device_impl", + ":mock_audio_device", + "../../api:array_view", + "../../api:scoped_refptr", + "../../api:sequence_checker", + "../../api/task_queue", + "../../api/task_queue:default_task_queue_factory", + "../../common_audio", + "../../rtc_base:buffer", + "../../rtc_base:checks", + "../../rtc_base:ignore_wundef", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:race_checker", + "../../rtc_base:rtc_event", + "../../rtc_base:safe_conversions", + "../../rtc_base:timeutils", + "../../rtc_base/synchronization:mutex", + "../../system_wrappers", + "../../test:fileutils", + "../../test:test_support", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + if (is_linux || is_chromeos || is_mac || is_win) { + sources += [ "audio_device_unittest.cc" ] + } + if (is_win) { + sources += [ "win/core_audio_utility_win_unittest.cc" ] + deps += [ + ":audio_device_module_from_input_and_output", + ":windows_core_audio_utility", + "../../rtc_base/win:scoped_com_initializer", + "../../rtc_base/win:windows_version", + ] + } + if (is_android) { + sources += [ + "android/audio_device_unittest.cc", + "android/audio_manager_unittest.cc", + "android/ensure_initialized.cc", + "android/ensure_initialized.h", + ] + deps += [ + "../../sdk/android:internal_jni", + "../../sdk/android:libjingle_peerconnection_java", + "../../sdk/android:native_api_jni", + "../../sdk/android:native_test_jni_onload", + "../utility", + ] + } + if (!rtc_include_internal_audio_device) { + defines = [ "WEBRTC_DUMMY_AUDIO_BUILD" ] + } + } +} + +if ((!build_with_chromium && !build_with_mozilla) && is_android) { + rtc_android_library("audio_device_java") { + sources = [ + "android/java/src/org/webrtc/voiceengine/BuildInfo.java", + "android/java/src/org/webrtc/voiceengine/WebRtcAudioEffects.java", + "android/java/src/org/webrtc/voiceengine/WebRtcAudioManager.java", + "android/java/src/org/webrtc/voiceengine/WebRtcAudioRecord.java", + "android/java/src/org/webrtc/voiceengine/WebRtcAudioTrack.java", + "android/java/src/org/webrtc/voiceengine/WebRtcAudioUtils.java", + ] + deps = [ + "../../rtc_base:base_java", + "//third_party/androidx:androidx_annotation_annotation_java", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_device/DEPS b/third_party/libwebrtc/modules/audio_device/DEPS new file mode 100644 index 0000000000..9cc627d330 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/DEPS @@ -0,0 +1,13 @@ +include_rules = [ + "+common_audio", + "+system_wrappers", +] + +specific_include_rules = { + "ensure_initialized\.cc": [ + "+sdk/android", + ], + "audio_device_impl\.cc": [ + "+sdk/objc", + ], +} diff --git a/third_party/libwebrtc/modules/audio_device/OWNERS b/third_party/libwebrtc/modules/audio_device/OWNERS new file mode 100644 index 0000000000..22d03d552b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/OWNERS @@ -0,0 +1,2 @@ +henrika@webrtc.org +tkchin@webrtc.org diff --git a/third_party/libwebrtc/modules/audio_device/android/aaudio_player.cc b/third_party/libwebrtc/modules/audio_device/android/aaudio_player.cc new file mode 100644 index 0000000000..81e5bf5427 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/aaudio_player.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/aaudio_player.h" + +#include + +#include "api/array_view.h" +#include "api/task_queue/task_queue_base.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/fine_audio_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +AAudioPlayer::AAudioPlayer(AudioManager* audio_manager) + : main_thread_(TaskQueueBase::Current()), + aaudio_(audio_manager, AAUDIO_DIRECTION_OUTPUT, this) { + RTC_LOG(LS_INFO) << "ctor"; + thread_checker_aaudio_.Detach(); +} + +AAudioPlayer::~AAudioPlayer() { + RTC_LOG(LS_INFO) << "dtor"; + RTC_DCHECK_RUN_ON(&main_thread_checker_); + Terminate(); + RTC_LOG(LS_INFO) << "#detected underruns: " << underrun_count_; +} + +int AAudioPlayer::Init() { + RTC_LOG(LS_INFO) << "Init"; + RTC_DCHECK_RUN_ON(&main_thread_checker_); + if (aaudio_.audio_parameters().channels() == 2) { + RTC_DLOG(LS_WARNING) << "Stereo mode is enabled"; + } + return 0; +} + +int AAudioPlayer::Terminate() { + RTC_LOG(LS_INFO) << "Terminate"; + RTC_DCHECK_RUN_ON(&main_thread_checker_); + StopPlayout(); + return 0; +} + +int AAudioPlayer::InitPlayout() { + RTC_LOG(LS_INFO) << "InitPlayout"; + RTC_DCHECK_RUN_ON(&main_thread_checker_); + RTC_DCHECK(!initialized_); + RTC_DCHECK(!playing_); + if (!aaudio_.Init()) { + return -1; + } + initialized_ = true; + return 0; +} + +bool AAudioPlayer::PlayoutIsInitialized() const { + RTC_DCHECK_RUN_ON(&main_thread_checker_); + return initialized_; +} + +int AAudioPlayer::StartPlayout() { + RTC_LOG(LS_INFO) << "StartPlayout"; + RTC_DCHECK_RUN_ON(&main_thread_checker_); + RTC_DCHECK(!playing_); + if (!initialized_) { + RTC_DLOG(LS_WARNING) + << "Playout can not start since InitPlayout must succeed first"; + return 0; + } + if (fine_audio_buffer_) { + fine_audio_buffer_->ResetPlayout(); + } + if (!aaudio_.Start()) { + return -1; + } + underrun_count_ = aaudio_.xrun_count(); + first_data_callback_ = true; + playing_ = true; + return 0; +} + +int AAudioPlayer::StopPlayout() { + RTC_LOG(LS_INFO) << "StopPlayout"; + RTC_DCHECK_RUN_ON(&main_thread_checker_); + if (!initialized_ || !playing_) { + return 0; + } + if (!aaudio_.Stop()) { + RTC_LOG(LS_ERROR) << "StopPlayout failed"; + return -1; + } + thread_checker_aaudio_.Detach(); + initialized_ = false; + playing_ = false; + return 0; +} + +bool AAudioPlayer::Playing() const { + RTC_DCHECK_RUN_ON(&main_thread_checker_); + return playing_; +} + +void AAudioPlayer::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + RTC_DLOG(LS_INFO) << "AttachAudioBuffer"; + RTC_DCHECK_RUN_ON(&main_thread_checker_); + audio_device_buffer_ = audioBuffer; + const AudioParameters audio_parameters = aaudio_.audio_parameters(); + audio_device_buffer_->SetPlayoutSampleRate(audio_parameters.sample_rate()); + audio_device_buffer_->SetPlayoutChannels(audio_parameters.channels()); + RTC_CHECK(audio_device_buffer_); + // Create a modified audio buffer class which allows us to ask for any number + // of samples (and not only multiple of 10ms) to match the optimal buffer + // size per callback used by AAudio. + fine_audio_buffer_ = std::make_unique(audio_device_buffer_); +} + +int AAudioPlayer::SpeakerVolumeIsAvailable(bool& available) { + available = false; + return 0; +} + +void AAudioPlayer::OnErrorCallback(aaudio_result_t error) { + RTC_LOG(LS_ERROR) << "OnErrorCallback: " << AAudio_convertResultToText(error); + // TODO(henrika): investigate if we can use a thread checker here. Initial + // tests shows that this callback can sometimes be called on a unique thread + // but according to the documentation it should be on the same thread as the + // data callback. + // RTC_DCHECK_RUN_ON(&thread_checker_aaudio_); + if (aaudio_.stream_state() == AAUDIO_STREAM_STATE_DISCONNECTED) { + // The stream is disconnected and any attempt to use it will return + // AAUDIO_ERROR_DISCONNECTED. + RTC_LOG(LS_WARNING) << "Output stream disconnected"; + // AAudio documentation states: "You should not close or reopen the stream + // from the callback, use another thread instead". A message is therefore + // sent to the main thread to do the restart operation. + RTC_DCHECK(main_thread_); + main_thread_->PostTask([this] { HandleStreamDisconnected(); }); + } +} + +aaudio_data_callback_result_t AAudioPlayer::OnDataCallback(void* audio_data, + int32_t num_frames) { + RTC_DCHECK_RUN_ON(&thread_checker_aaudio_); + // Log device id in first data callback to ensure that a valid device is + // utilized. + if (first_data_callback_) { + RTC_LOG(LS_INFO) << "--- First output data callback: " + "device id=" + << aaudio_.device_id(); + first_data_callback_ = false; + } + + // Check if the underrun count has increased. If it has, increase the buffer + // size by adding the size of a burst. It will reduce the risk of underruns + // at the expense of an increased latency. + // TODO(henrika): enable possibility to disable and/or tune the algorithm. + const int32_t underrun_count = aaudio_.xrun_count(); + if (underrun_count > underrun_count_) { + RTC_LOG(LS_ERROR) << "Underrun detected: " << underrun_count; + underrun_count_ = underrun_count; + aaudio_.IncreaseOutputBufferSize(); + } + + // Estimate latency between writing an audio frame to the output stream and + // the time that same frame is played out on the output audio device. + latency_millis_ = aaudio_.EstimateLatencyMillis(); + // TODO(henrika): use for development only. + if (aaudio_.frames_written() % (1000 * aaudio_.frames_per_burst()) == 0) { + RTC_DLOG(LS_INFO) << "output latency: " << latency_millis_ + << ", num_frames: " << num_frames; + } + + // Read audio data from the WebRTC source using the FineAudioBuffer object + // and write that data into `audio_data` to be played out by AAudio. + // Prime output with zeros during a short initial phase to avoid distortion. + // TODO(henrika): do more work to figure out of if the initial forced silence + // period is really needed. + if (aaudio_.frames_written() < 50 * aaudio_.frames_per_burst()) { + const size_t num_bytes = + sizeof(int16_t) * aaudio_.samples_per_frame() * num_frames; + memset(audio_data, 0, num_bytes); + } else { + fine_audio_buffer_->GetPlayoutData( + rtc::MakeArrayView(static_cast(audio_data), + aaudio_.samples_per_frame() * num_frames), + static_cast(latency_millis_ + 0.5)); + } + + // TODO(henrika): possibly add trace here to be included in systrace. + // See https://developer.android.com/studio/profile/systrace-commandline.html. + return AAUDIO_CALLBACK_RESULT_CONTINUE; +} + +void AAudioPlayer::HandleStreamDisconnected() { + RTC_DCHECK_RUN_ON(&main_thread_checker_); + RTC_DLOG(LS_INFO) << "HandleStreamDisconnected"; + if (!initialized_ || !playing_) { + return; + } + // Perform a restart by first closing the disconnected stream and then start + // a new stream; this time using the new (preferred) audio output device. + StopPlayout(); + InitPlayout(); + StartPlayout(); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/aaudio_player.h b/third_party/libwebrtc/modules/audio_device/android/aaudio_player.h new file mode 100644 index 0000000000..ea5d578092 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/aaudio_player.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_AAUDIO_PLAYER_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_AAUDIO_PLAYER_H_ + +#include + +#include + +#include "api/sequence_checker.h" +#include "api/task_queue/task_queue_base.h" +#include "modules/audio_device/android/aaudio_wrapper.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class AudioDeviceBuffer; +class FineAudioBuffer; +class AudioManager; + +// Implements low-latency 16-bit mono PCM audio output support for Android +// using the C based AAudio API. +// +// An instance must be created and destroyed on one and the same thread. +// All public methods must also be called on the same thread. A thread checker +// will DCHECK if any method is called on an invalid thread. Audio buffers +// are requested on a dedicated high-priority thread owned by AAudio. +// +// The existing design forces the user to call InitPlayout() after StopPlayout() +// to be able to call StartPlayout() again. This is in line with how the Java- +// based implementation works. +// +// An audio stream can be disconnected, e.g. when an audio device is removed. +// This implementation will restart the audio stream using the new preferred +// device if such an event happens. +// +// Also supports automatic buffer-size adjustment based on underrun detections +// where the internal AAudio buffer can be increased when needed. It will +// reduce the risk of underruns (~glitches) at the expense of an increased +// latency. +class AAudioPlayer final : public AAudioObserverInterface { + public: + explicit AAudioPlayer(AudioManager* audio_manager); + ~AAudioPlayer(); + + int Init(); + int Terminate(); + + int InitPlayout(); + bool PlayoutIsInitialized() const; + + int StartPlayout(); + int StopPlayout(); + bool Playing() const; + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer); + + // Not implemented in AAudio. + int SpeakerVolumeIsAvailable(bool& available); // NOLINT + int SetSpeakerVolume(uint32_t volume) { return -1; } + int SpeakerVolume(uint32_t& volume) const { return -1; } // NOLINT + int MaxSpeakerVolume(uint32_t& maxVolume) const { return -1; } // NOLINT + int MinSpeakerVolume(uint32_t& minVolume) const { return -1; } // NOLINT + + protected: + // AAudioObserverInterface implementation. + + // For an output stream, this function should render and write `num_frames` + // of data in the streams current data format to the `audio_data` buffer. + // Called on a real-time thread owned by AAudio. + aaudio_data_callback_result_t OnDataCallback(void* audio_data, + int32_t num_frames) override; + // AAudio calls this functions if any error occurs on a callback thread. + // Called on a real-time thread owned by AAudio. + void OnErrorCallback(aaudio_result_t error) override; + + private: + // Closes the existing stream and starts a new stream. + void HandleStreamDisconnected(); + + // Ensures that methods are called from the same thread as this object is + // created on. + SequenceChecker main_thread_checker_; + + // Stores thread ID in first call to AAudioPlayer::OnDataCallback from a + // real-time thread owned by AAudio. Detached during construction of this + // object. + SequenceChecker thread_checker_aaudio_; + + // The task queue on which this object is created on. + TaskQueueBase* main_thread_; + + // Wraps all AAudio resources. Contains an output stream using the default + // output audio device. Can be accessed on both the main thread and the + // real-time thread owned by AAudio. See separate AAudio documentation about + // thread safety. + AAudioWrapper aaudio_; + + // FineAudioBuffer takes an AudioDeviceBuffer which delivers audio data + // in chunks of 10ms. It then allows for this data to be pulled in + // a finer or coarser granularity. I.e. interacting with this class instead + // of directly with the AudioDeviceBuffer one can ask for any number of + // audio data samples. + // Example: native buffer size can be 192 audio frames at 48kHz sample rate. + // WebRTC will provide 480 audio frames per 10ms but AAudio asks for 192 + // in each callback (once every 4th ms). This class can then ask for 192 and + // the FineAudioBuffer will ask WebRTC for new data approximately only every + // second callback and also cache non-utilized audio. + std::unique_ptr fine_audio_buffer_; + + // Counts number of detected underrun events reported by AAudio. + int32_t underrun_count_ = 0; + + // True only for the first data callback in each audio session. + bool first_data_callback_ = true; + + // Raw pointer handle provided to us in AttachAudioBuffer(). Owned by the + // AudioDeviceModuleImpl class and set by AudioDeviceModule::Create(). + AudioDeviceBuffer* audio_device_buffer_ RTC_GUARDED_BY(main_thread_checker_) = + nullptr; + + bool initialized_ RTC_GUARDED_BY(main_thread_checker_) = false; + bool playing_ RTC_GUARDED_BY(main_thread_checker_) = false; + + // Estimated latency between writing an audio frame to the output stream and + // the time that same frame is played out on the output audio device. + double latency_millis_ RTC_GUARDED_BY(thread_checker_aaudio_) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_AAUDIO_PLAYER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/aaudio_recorder.cc b/third_party/libwebrtc/modules/audio_device/android/aaudio_recorder.cc new file mode 100644 index 0000000000..21e5dd8a74 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/aaudio_recorder.cc @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/aaudio_recorder.h" + +#include + +#include "api/array_view.h" +#include "api/task_queue/task_queue_base.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/fine_audio_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +AAudioRecorder::AAudioRecorder(AudioManager* audio_manager) + : main_thread_(TaskQueueBase::Current()), + aaudio_(audio_manager, AAUDIO_DIRECTION_INPUT, this) { + RTC_LOG(LS_INFO) << "ctor"; + thread_checker_aaudio_.Detach(); +} + +AAudioRecorder::~AAudioRecorder() { + RTC_LOG(LS_INFO) << "dtor"; + RTC_DCHECK(thread_checker_.IsCurrent()); + Terminate(); + RTC_LOG(LS_INFO) << "detected owerflows: " << overflow_count_; +} + +int AAudioRecorder::Init() { + RTC_LOG(LS_INFO) << "Init"; + RTC_DCHECK(thread_checker_.IsCurrent()); + if (aaudio_.audio_parameters().channels() == 2) { + RTC_DLOG(LS_WARNING) << "Stereo mode is enabled"; + } + return 0; +} + +int AAudioRecorder::Terminate() { + RTC_LOG(LS_INFO) << "Terminate"; + RTC_DCHECK(thread_checker_.IsCurrent()); + StopRecording(); + return 0; +} + +int AAudioRecorder::InitRecording() { + RTC_LOG(LS_INFO) << "InitRecording"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!initialized_); + RTC_DCHECK(!recording_); + if (!aaudio_.Init()) { + return -1; + } + initialized_ = true; + return 0; +} + +int AAudioRecorder::StartRecording() { + RTC_LOG(LS_INFO) << "StartRecording"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(initialized_); + RTC_DCHECK(!recording_); + if (fine_audio_buffer_) { + fine_audio_buffer_->ResetPlayout(); + } + if (!aaudio_.Start()) { + return -1; + } + overflow_count_ = aaudio_.xrun_count(); + first_data_callback_ = true; + recording_ = true; + return 0; +} + +int AAudioRecorder::StopRecording() { + RTC_LOG(LS_INFO) << "StopRecording"; + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!initialized_ || !recording_) { + return 0; + } + if (!aaudio_.Stop()) { + return -1; + } + thread_checker_aaudio_.Detach(); + initialized_ = false; + recording_ = false; + return 0; +} + +void AAudioRecorder::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + RTC_LOG(LS_INFO) << "AttachAudioBuffer"; + RTC_DCHECK(thread_checker_.IsCurrent()); + audio_device_buffer_ = audioBuffer; + const AudioParameters audio_parameters = aaudio_.audio_parameters(); + audio_device_buffer_->SetRecordingSampleRate(audio_parameters.sample_rate()); + audio_device_buffer_->SetRecordingChannels(audio_parameters.channels()); + RTC_CHECK(audio_device_buffer_); + // Create a modified audio buffer class which allows us to deliver any number + // of samples (and not only multiples of 10ms which WebRTC uses) to match the + // native AAudio buffer size. + fine_audio_buffer_ = std::make_unique(audio_device_buffer_); +} + +int AAudioRecorder::EnableBuiltInAEC(bool enable) { + RTC_LOG(LS_INFO) << "EnableBuiltInAEC: " << enable; + RTC_LOG(LS_ERROR) << "Not implemented"; + return -1; +} + +int AAudioRecorder::EnableBuiltInAGC(bool enable) { + RTC_LOG(LS_INFO) << "EnableBuiltInAGC: " << enable; + RTC_LOG(LS_ERROR) << "Not implemented"; + return -1; +} + +int AAudioRecorder::EnableBuiltInNS(bool enable) { + RTC_LOG(LS_INFO) << "EnableBuiltInNS: " << enable; + RTC_LOG(LS_ERROR) << "Not implemented"; + return -1; +} + +void AAudioRecorder::OnErrorCallback(aaudio_result_t error) { + RTC_LOG(LS_ERROR) << "OnErrorCallback: " << AAudio_convertResultToText(error); + // RTC_DCHECK(thread_checker_aaudio_.IsCurrent()); + if (aaudio_.stream_state() == AAUDIO_STREAM_STATE_DISCONNECTED) { + // The stream is disconnected and any attempt to use it will return + // AAUDIO_ERROR_DISCONNECTED.. + RTC_LOG(LS_WARNING) << "Input stream disconnected => restart is required"; + // AAudio documentation states: "You should not close or reopen the stream + // from the callback, use another thread instead". A message is therefore + // sent to the main thread to do the restart operation. + RTC_DCHECK(main_thread_); + main_thread_->PostTask([this] { HandleStreamDisconnected(); }); + } +} + +// Read and process `num_frames` of data from the `audio_data` buffer. +// TODO(henrika): possibly add trace here to be included in systrace. +// See https://developer.android.com/studio/profile/systrace-commandline.html. +aaudio_data_callback_result_t AAudioRecorder::OnDataCallback( + void* audio_data, + int32_t num_frames) { + // TODO(henrika): figure out why we sometimes hit this one. + // RTC_DCHECK(thread_checker_aaudio_.IsCurrent()); + // RTC_LOG(LS_INFO) << "OnDataCallback: " << num_frames; + // Drain the input buffer at first callback to ensure that it does not + // contain any old data. Will also ensure that the lowest possible latency + // is obtained. + if (first_data_callback_) { + RTC_LOG(LS_INFO) << "--- First input data callback: " + "device id=" + << aaudio_.device_id(); + aaudio_.ClearInputStream(audio_data, num_frames); + first_data_callback_ = false; + } + // Check if the overflow counter has increased and if so log a warning. + // TODO(henrika): possible add UMA stat or capacity extension. + const int32_t overflow_count = aaudio_.xrun_count(); + if (overflow_count > overflow_count_) { + RTC_LOG(LS_ERROR) << "Overflow detected: " << overflow_count; + overflow_count_ = overflow_count; + } + // Estimated time between an audio frame was recorded by the input device and + // it can read on the input stream. + latency_millis_ = aaudio_.EstimateLatencyMillis(); + // TODO(henrika): use for development only. + if (aaudio_.frames_read() % (1000 * aaudio_.frames_per_burst()) == 0) { + RTC_DLOG(LS_INFO) << "input latency: " << latency_millis_ + << ", num_frames: " << num_frames; + } + // Copy recorded audio in `audio_data` to the WebRTC sink using the + // FineAudioBuffer object. + fine_audio_buffer_->DeliverRecordedData( + rtc::MakeArrayView(static_cast(audio_data), + aaudio_.samples_per_frame() * num_frames), + static_cast(latency_millis_ + 0.5)); + + return AAUDIO_CALLBACK_RESULT_CONTINUE; +} + +void AAudioRecorder::HandleStreamDisconnected() { + RTC_DCHECK_RUN_ON(&thread_checker_); + RTC_LOG(LS_INFO) << "HandleStreamDisconnected"; + if (!initialized_ || !recording_) { + return; + } + // Perform a restart by first closing the disconnected stream and then start + // a new stream; this time using the new (preferred) audio input device. + // TODO(henrika): resolve issue where a one restart attempt leads to a long + // sequence of new calls to OnErrorCallback(). + // See b/73148976 for details. + StopRecording(); + InitRecording(); + StartRecording(); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/aaudio_recorder.h b/third_party/libwebrtc/modules/audio_device/android/aaudio_recorder.h new file mode 100644 index 0000000000..6df7eed076 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/aaudio_recorder.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_AAUDIO_RECORDER_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_AAUDIO_RECORDER_H_ + +#include + +#include + +#include "api/sequence_checker.h" +#include "api/task_queue/task_queue_base.h" +#include "modules/audio_device/android/aaudio_wrapper.h" +#include "modules/audio_device/include/audio_device_defines.h" + +namespace webrtc { + +class AudioDeviceBuffer; +class FineAudioBuffer; +class AudioManager; + +// Implements low-latency 16-bit mono PCM audio input support for Android +// using the C based AAudio API. +// +// An instance must be created and destroyed on one and the same thread. +// All public methods must also be called on the same thread. A thread checker +// will RTC_DCHECK if any method is called on an invalid thread. Audio buffers +// are delivered on a dedicated high-priority thread owned by AAudio. +// +// The existing design forces the user to call InitRecording() after +// StopRecording() to be able to call StartRecording() again. This is in line +// with how the Java- based implementation works. +// +// TODO(henrika): add comments about device changes and adaptive buffer +// management. +class AAudioRecorder : public AAudioObserverInterface { + public: + explicit AAudioRecorder(AudioManager* audio_manager); + ~AAudioRecorder(); + + int Init(); + int Terminate(); + + int InitRecording(); + bool RecordingIsInitialized() const { return initialized_; } + + int StartRecording(); + int StopRecording(); + bool Recording() const { return recording_; } + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer); + + double latency_millis() const { return latency_millis_; } + + // TODO(henrika): add support using AAudio APIs when available. + int EnableBuiltInAEC(bool enable); + int EnableBuiltInAGC(bool enable); + int EnableBuiltInNS(bool enable); + + protected: + // AAudioObserverInterface implementation. + + // For an input stream, this function should read `num_frames` of recorded + // data, in the stream's current data format, from the `audio_data` buffer. + // Called on a real-time thread owned by AAudio. + aaudio_data_callback_result_t OnDataCallback(void* audio_data, + int32_t num_frames) override; + + // AAudio calls this function if any error occurs on a callback thread. + // Called on a real-time thread owned by AAudio. + void OnErrorCallback(aaudio_result_t error) override; + + private: + // Closes the existing stream and starts a new stream. + void HandleStreamDisconnected(); + + // Ensures that methods are called from the same thread as this object is + // created on. + SequenceChecker thread_checker_; + + // Stores thread ID in first call to AAudioPlayer::OnDataCallback from a + // real-time thread owned by AAudio. Detached during construction of this + // object. + SequenceChecker thread_checker_aaudio_; + + // The thread on which this object is created on. + TaskQueueBase* main_thread_; + + // Wraps all AAudio resources. Contains an input stream using the default + // input audio device. + AAudioWrapper aaudio_; + + // Raw pointer handle provided to us in AttachAudioBuffer(). Owned by the + // AudioDeviceModuleImpl class and called by AudioDeviceModule::Create(). + AudioDeviceBuffer* audio_device_buffer_ = nullptr; + + bool initialized_ = false; + bool recording_ = false; + + // Consumes audio of native buffer size and feeds the WebRTC layer with 10ms + // chunks of audio. + std::unique_ptr fine_audio_buffer_; + + // Counts number of detected overflow events reported by AAudio. + int32_t overflow_count_ = 0; + + // Estimated time between an audio frame was recorded by the input device and + // it can read on the input stream. + double latency_millis_ = 0; + + // True only for the first data callback in each audio session. + bool first_data_callback_ = true; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_AAUDIO_RECORDER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/aaudio_wrapper.cc b/third_party/libwebrtc/modules/audio_device/android/aaudio_wrapper.cc new file mode 100644 index 0000000000..3d824b5c57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/aaudio_wrapper.cc @@ -0,0 +1,499 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/aaudio_wrapper.h" + +#include "modules/audio_device/android/audio_manager.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/time_utils.h" + +#define LOG_ON_ERROR(op) \ + do { \ + aaudio_result_t result = (op); \ + if (result != AAUDIO_OK) { \ + RTC_LOG(LS_ERROR) << #op << ": " << AAudio_convertResultToText(result); \ + } \ + } while (0) + +#define RETURN_ON_ERROR(op, ...) \ + do { \ + aaudio_result_t result = (op); \ + if (result != AAUDIO_OK) { \ + RTC_LOG(LS_ERROR) << #op << ": " << AAudio_convertResultToText(result); \ + return __VA_ARGS__; \ + } \ + } while (0) + +namespace webrtc { + +namespace { + +const char* DirectionToString(aaudio_direction_t direction) { + switch (direction) { + case AAUDIO_DIRECTION_OUTPUT: + return "OUTPUT"; + case AAUDIO_DIRECTION_INPUT: + return "INPUT"; + default: + return "UNKNOWN"; + } +} + +const char* SharingModeToString(aaudio_sharing_mode_t mode) { + switch (mode) { + case AAUDIO_SHARING_MODE_EXCLUSIVE: + return "EXCLUSIVE"; + case AAUDIO_SHARING_MODE_SHARED: + return "SHARED"; + default: + return "UNKNOWN"; + } +} + +const char* PerformanceModeToString(aaudio_performance_mode_t mode) { + switch (mode) { + case AAUDIO_PERFORMANCE_MODE_NONE: + return "NONE"; + case AAUDIO_PERFORMANCE_MODE_POWER_SAVING: + return "POWER_SAVING"; + case AAUDIO_PERFORMANCE_MODE_LOW_LATENCY: + return "LOW_LATENCY"; + default: + return "UNKNOWN"; + } +} + +const char* FormatToString(int32_t id) { + switch (id) { + case AAUDIO_FORMAT_INVALID: + return "INVALID"; + case AAUDIO_FORMAT_UNSPECIFIED: + return "UNSPECIFIED"; + case AAUDIO_FORMAT_PCM_I16: + return "PCM_I16"; + case AAUDIO_FORMAT_PCM_FLOAT: + return "FLOAT"; + default: + return "UNKNOWN"; + } +} + +void ErrorCallback(AAudioStream* stream, + void* user_data, + aaudio_result_t error) { + RTC_DCHECK(user_data); + AAudioWrapper* aaudio_wrapper = reinterpret_cast(user_data); + RTC_LOG(LS_WARNING) << "ErrorCallback: " + << DirectionToString(aaudio_wrapper->direction()); + RTC_DCHECK(aaudio_wrapper->observer()); + aaudio_wrapper->observer()->OnErrorCallback(error); +} + +aaudio_data_callback_result_t DataCallback(AAudioStream* stream, + void* user_data, + void* audio_data, + int32_t num_frames) { + RTC_DCHECK(user_data); + RTC_DCHECK(audio_data); + AAudioWrapper* aaudio_wrapper = reinterpret_cast(user_data); + RTC_DCHECK(aaudio_wrapper->observer()); + return aaudio_wrapper->observer()->OnDataCallback(audio_data, num_frames); +} + +// Wraps the stream builder object to ensure that it is released properly when +// the stream builder goes out of scope. +class ScopedStreamBuilder { + public: + ScopedStreamBuilder() { + LOG_ON_ERROR(AAudio_createStreamBuilder(&builder_)); + RTC_DCHECK(builder_); + } + ~ScopedStreamBuilder() { + if (builder_) { + LOG_ON_ERROR(AAudioStreamBuilder_delete(builder_)); + } + } + + AAudioStreamBuilder* get() const { return builder_; } + + private: + AAudioStreamBuilder* builder_ = nullptr; +}; + +} // namespace + +AAudioWrapper::AAudioWrapper(AudioManager* audio_manager, + aaudio_direction_t direction, + AAudioObserverInterface* observer) + : direction_(direction), observer_(observer) { + RTC_LOG(LS_INFO) << "ctor"; + RTC_DCHECK(observer_); + direction_ == AAUDIO_DIRECTION_OUTPUT + ? audio_parameters_ = audio_manager->GetPlayoutAudioParameters() + : audio_parameters_ = audio_manager->GetRecordAudioParameters(); + aaudio_thread_checker_.Detach(); + RTC_LOG(LS_INFO) << audio_parameters_.ToString(); +} + +AAudioWrapper::~AAudioWrapper() { + RTC_LOG(LS_INFO) << "dtor"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!stream_); +} + +bool AAudioWrapper::Init() { + RTC_LOG(LS_INFO) << "Init"; + RTC_DCHECK(thread_checker_.IsCurrent()); + // Creates a stream builder which can be used to open an audio stream. + ScopedStreamBuilder builder; + // Configures the stream builder using audio parameters given at construction. + SetStreamConfiguration(builder.get()); + // Opens a stream based on options in the stream builder. + if (!OpenStream(builder.get())) { + return false; + } + // Ensures that the opened stream could activate the requested settings. + if (!VerifyStreamConfiguration()) { + return false; + } + // Optimizes the buffer scheme for lowest possible latency and creates + // additional buffer logic to match the 10ms buffer size used in WebRTC. + if (!OptimizeBuffers()) { + return false; + } + LogStreamState(); + return true; +} + +bool AAudioWrapper::Start() { + RTC_LOG(LS_INFO) << "Start"; + RTC_DCHECK(thread_checker_.IsCurrent()); + // TODO(henrika): this state check might not be needed. + aaudio_stream_state_t current_state = AAudioStream_getState(stream_); + if (current_state != AAUDIO_STREAM_STATE_OPEN) { + RTC_LOG(LS_ERROR) << "Invalid state: " + << AAudio_convertStreamStateToText(current_state); + return false; + } + // Asynchronous request for the stream to start. + RETURN_ON_ERROR(AAudioStream_requestStart(stream_), false); + LogStreamState(); + return true; +} + +bool AAudioWrapper::Stop() { + RTC_LOG(LS_INFO) << "Stop: " << DirectionToString(direction()); + RTC_DCHECK(thread_checker_.IsCurrent()); + // Asynchronous request for the stream to stop. + RETURN_ON_ERROR(AAudioStream_requestStop(stream_), false); + CloseStream(); + aaudio_thread_checker_.Detach(); + return true; +} + +double AAudioWrapper::EstimateLatencyMillis() const { + RTC_DCHECK(stream_); + double latency_millis = 0.0; + if (direction() == AAUDIO_DIRECTION_INPUT) { + // For input streams. Best guess we can do is to use the current burst size + // as delay estimate. + latency_millis = static_cast(frames_per_burst()) / sample_rate() * + rtc::kNumMillisecsPerSec; + } else { + int64_t existing_frame_index; + int64_t existing_frame_presentation_time; + // Get the time at which a particular frame was presented to audio hardware. + aaudio_result_t result = AAudioStream_getTimestamp( + stream_, CLOCK_MONOTONIC, &existing_frame_index, + &existing_frame_presentation_time); + // Results are only valid when the stream is in AAUDIO_STREAM_STATE_STARTED. + if (result == AAUDIO_OK) { + // Get write index for next audio frame. + int64_t next_frame_index = frames_written(); + // Number of frames between next frame and the existing frame. + int64_t frame_index_delta = next_frame_index - existing_frame_index; + // Assume the next frame will be written now. + int64_t next_frame_write_time = rtc::TimeNanos(); + // Calculate time when next frame will be presented to the hardware taking + // sample rate into account. + int64_t frame_time_delta = + (frame_index_delta * rtc::kNumNanosecsPerSec) / sample_rate(); + int64_t next_frame_presentation_time = + existing_frame_presentation_time + frame_time_delta; + // Derive a latency estimate given results above. + latency_millis = static_cast(next_frame_presentation_time - + next_frame_write_time) / + rtc::kNumNanosecsPerMillisec; + } + } + return latency_millis; +} + +// Returns new buffer size or a negative error value if buffer size could not +// be increased. +bool AAudioWrapper::IncreaseOutputBufferSize() { + RTC_LOG(LS_INFO) << "IncreaseBufferSize"; + RTC_DCHECK(stream_); + RTC_DCHECK(aaudio_thread_checker_.IsCurrent()); + RTC_DCHECK_EQ(direction(), AAUDIO_DIRECTION_OUTPUT); + aaudio_result_t buffer_size = AAudioStream_getBufferSizeInFrames(stream_); + // Try to increase size of buffer with one burst to reduce risk of underrun. + buffer_size += frames_per_burst(); + // Verify that the new buffer size is not larger than max capacity. + // TODO(henrika): keep track of case when we reach the capacity limit. + const int32_t max_buffer_size = buffer_capacity_in_frames(); + if (buffer_size > max_buffer_size) { + RTC_LOG(LS_ERROR) << "Required buffer size (" << buffer_size + << ") is higher than max: " << max_buffer_size; + return false; + } + RTC_LOG(LS_INFO) << "Updating buffer size to: " << buffer_size + << " (max=" << max_buffer_size << ")"; + buffer_size = AAudioStream_setBufferSizeInFrames(stream_, buffer_size); + if (buffer_size < 0) { + RTC_LOG(LS_ERROR) << "Failed to change buffer size: " + << AAudio_convertResultToText(buffer_size); + return false; + } + RTC_LOG(LS_INFO) << "Buffer size changed to: " << buffer_size; + return true; +} + +void AAudioWrapper::ClearInputStream(void* audio_data, int32_t num_frames) { + RTC_LOG(LS_INFO) << "ClearInputStream"; + RTC_DCHECK(stream_); + RTC_DCHECK(aaudio_thread_checker_.IsCurrent()); + RTC_DCHECK_EQ(direction(), AAUDIO_DIRECTION_INPUT); + aaudio_result_t cleared_frames = 0; + do { + cleared_frames = AAudioStream_read(stream_, audio_data, num_frames, 0); + } while (cleared_frames > 0); +} + +AAudioObserverInterface* AAudioWrapper::observer() const { + return observer_; +} + +AudioParameters AAudioWrapper::audio_parameters() const { + return audio_parameters_; +} + +int32_t AAudioWrapper::samples_per_frame() const { + RTC_DCHECK(stream_); + return AAudioStream_getSamplesPerFrame(stream_); +} + +int32_t AAudioWrapper::buffer_size_in_frames() const { + RTC_DCHECK(stream_); + return AAudioStream_getBufferSizeInFrames(stream_); +} + +int32_t AAudioWrapper::buffer_capacity_in_frames() const { + RTC_DCHECK(stream_); + return AAudioStream_getBufferCapacityInFrames(stream_); +} + +int32_t AAudioWrapper::device_id() const { + RTC_DCHECK(stream_); + return AAudioStream_getDeviceId(stream_); +} + +int32_t AAudioWrapper::xrun_count() const { + RTC_DCHECK(stream_); + return AAudioStream_getXRunCount(stream_); +} + +int32_t AAudioWrapper::format() const { + RTC_DCHECK(stream_); + return AAudioStream_getFormat(stream_); +} + +int32_t AAudioWrapper::sample_rate() const { + RTC_DCHECK(stream_); + return AAudioStream_getSampleRate(stream_); +} + +int32_t AAudioWrapper::channel_count() const { + RTC_DCHECK(stream_); + return AAudioStream_getChannelCount(stream_); +} + +int32_t AAudioWrapper::frames_per_callback() const { + RTC_DCHECK(stream_); + return AAudioStream_getFramesPerDataCallback(stream_); +} + +aaudio_sharing_mode_t AAudioWrapper::sharing_mode() const { + RTC_DCHECK(stream_); + return AAudioStream_getSharingMode(stream_); +} + +aaudio_performance_mode_t AAudioWrapper::performance_mode() const { + RTC_DCHECK(stream_); + return AAudioStream_getPerformanceMode(stream_); +} + +aaudio_stream_state_t AAudioWrapper::stream_state() const { + RTC_DCHECK(stream_); + return AAudioStream_getState(stream_); +} + +int64_t AAudioWrapper::frames_written() const { + RTC_DCHECK(stream_); + return AAudioStream_getFramesWritten(stream_); +} + +int64_t AAudioWrapper::frames_read() const { + RTC_DCHECK(stream_); + return AAudioStream_getFramesRead(stream_); +} + +void AAudioWrapper::SetStreamConfiguration(AAudioStreamBuilder* builder) { + RTC_LOG(LS_INFO) << "SetStreamConfiguration"; + RTC_DCHECK(builder); + RTC_DCHECK(thread_checker_.IsCurrent()); + // Request usage of default primary output/input device. + // TODO(henrika): verify that default device follows Java APIs. + // https://developer.android.com/reference/android/media/AudioDeviceInfo.html. + AAudioStreamBuilder_setDeviceId(builder, AAUDIO_UNSPECIFIED); + // Use preferred sample rate given by the audio parameters. + AAudioStreamBuilder_setSampleRate(builder, audio_parameters().sample_rate()); + // Use preferred channel configuration given by the audio parameters. + AAudioStreamBuilder_setChannelCount(builder, audio_parameters().channels()); + // Always use 16-bit PCM audio sample format. + AAudioStreamBuilder_setFormat(builder, AAUDIO_FORMAT_PCM_I16); + // TODO(henrika): investigate effect of using AAUDIO_SHARING_MODE_EXCLUSIVE. + // Ask for exclusive mode since this will give us the lowest possible latency. + // If exclusive mode isn't available, shared mode will be used instead. + AAudioStreamBuilder_setSharingMode(builder, AAUDIO_SHARING_MODE_SHARED); + // Use the direction that was given at construction. + AAudioStreamBuilder_setDirection(builder, direction_); + // TODO(henrika): investigate performance using different performance modes. + AAudioStreamBuilder_setPerformanceMode(builder, + AAUDIO_PERFORMANCE_MODE_LOW_LATENCY); + // Given that WebRTC applications require low latency, our audio stream uses + // an asynchronous callback function to transfer data to and from the + // application. AAudio executes the callback in a higher-priority thread that + // has better performance. + AAudioStreamBuilder_setDataCallback(builder, DataCallback, this); + // Request that AAudio calls this functions if any error occurs on a callback + // thread. + AAudioStreamBuilder_setErrorCallback(builder, ErrorCallback, this); +} + +bool AAudioWrapper::OpenStream(AAudioStreamBuilder* builder) { + RTC_LOG(LS_INFO) << "OpenStream"; + RTC_DCHECK(builder); + AAudioStream* stream = nullptr; + RETURN_ON_ERROR(AAudioStreamBuilder_openStream(builder, &stream), false); + stream_ = stream; + LogStreamConfiguration(); + return true; +} + +void AAudioWrapper::CloseStream() { + RTC_LOG(LS_INFO) << "CloseStream"; + RTC_DCHECK(stream_); + LOG_ON_ERROR(AAudioStream_close(stream_)); + stream_ = nullptr; +} + +void AAudioWrapper::LogStreamConfiguration() { + RTC_DCHECK(stream_); + char ss_buf[1024]; + rtc::SimpleStringBuilder ss(ss_buf); + ss << "Stream Configuration: "; + ss << "sample rate=" << sample_rate() << ", channels=" << channel_count(); + ss << ", samples per frame=" << samples_per_frame(); + ss << ", format=" << FormatToString(format()); + ss << ", sharing mode=" << SharingModeToString(sharing_mode()); + ss << ", performance mode=" << PerformanceModeToString(performance_mode()); + ss << ", direction=" << DirectionToString(direction()); + ss << ", device id=" << AAudioStream_getDeviceId(stream_); + ss << ", frames per callback=" << frames_per_callback(); + RTC_LOG(LS_INFO) << ss.str(); +} + +void AAudioWrapper::LogStreamState() { + RTC_LOG(LS_INFO) << "AAudio stream state: " + << AAudio_convertStreamStateToText(stream_state()); +} + +bool AAudioWrapper::VerifyStreamConfiguration() { + RTC_LOG(LS_INFO) << "VerifyStreamConfiguration"; + RTC_DCHECK(stream_); + // TODO(henrika): should we verify device ID as well? + if (AAudioStream_getSampleRate(stream_) != audio_parameters().sample_rate()) { + RTC_LOG(LS_ERROR) << "Stream unable to use requested sample rate"; + return false; + } + if (AAudioStream_getChannelCount(stream_) != + static_cast(audio_parameters().channels())) { + RTC_LOG(LS_ERROR) << "Stream unable to use requested channel count"; + return false; + } + if (AAudioStream_getFormat(stream_) != AAUDIO_FORMAT_PCM_I16) { + RTC_LOG(LS_ERROR) << "Stream unable to use requested format"; + return false; + } + if (AAudioStream_getSharingMode(stream_) != AAUDIO_SHARING_MODE_SHARED) { + RTC_LOG(LS_ERROR) << "Stream unable to use requested sharing mode"; + return false; + } + if (AAudioStream_getPerformanceMode(stream_) != + AAUDIO_PERFORMANCE_MODE_LOW_LATENCY) { + RTC_LOG(LS_ERROR) << "Stream unable to use requested performance mode"; + return false; + } + if (AAudioStream_getDirection(stream_) != direction()) { + RTC_LOG(LS_ERROR) << "Stream direction could not be set"; + return false; + } + if (AAudioStream_getSamplesPerFrame(stream_) != + static_cast(audio_parameters().channels())) { + RTC_LOG(LS_ERROR) << "Invalid number of samples per frame"; + return false; + } + return true; +} + +bool AAudioWrapper::OptimizeBuffers() { + RTC_LOG(LS_INFO) << "OptimizeBuffers"; + RTC_DCHECK(stream_); + // Maximum number of frames that can be filled without blocking. + RTC_LOG(LS_INFO) << "max buffer capacity in frames: " + << buffer_capacity_in_frames(); + // Query the number of frames that the application should read or write at + // one time for optimal performance. + int32_t frames_per_burst = AAudioStream_getFramesPerBurst(stream_); + RTC_LOG(LS_INFO) << "frames per burst for optimal performance: " + << frames_per_burst; + frames_per_burst_ = frames_per_burst; + if (direction() == AAUDIO_DIRECTION_INPUT) { + // There is no point in calling setBufferSizeInFrames() for input streams + // since it has no effect on the performance (latency in this case). + return true; + } + // Set buffer size to same as burst size to guarantee lowest possible latency. + // This size might change for output streams if underruns are detected and + // automatic buffer adjustment is enabled. + AAudioStream_setBufferSizeInFrames(stream_, frames_per_burst); + int32_t buffer_size = AAudioStream_getBufferSizeInFrames(stream_); + if (buffer_size != frames_per_burst) { + RTC_LOG(LS_ERROR) << "Failed to use optimal buffer burst size"; + return false; + } + // Maximum number of frames that can be filled without blocking. + RTC_LOG(LS_INFO) << "buffer burst size in frames: " << buffer_size; + return true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/aaudio_wrapper.h b/third_party/libwebrtc/modules/audio_device/android/aaudio_wrapper.h new file mode 100644 index 0000000000..1f925b96d3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/aaudio_wrapper.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_AAUDIO_WRAPPER_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_AAUDIO_WRAPPER_H_ + +#include + +#include "api/sequence_checker.h" +#include "modules/audio_device/include/audio_device_defines.h" + +namespace webrtc { + +class AudioManager; + +// AAudio callback interface for audio transport to/from the AAudio stream. +// The interface also contains an error callback method for notifications of +// e.g. device changes. +class AAudioObserverInterface { + public: + // Audio data will be passed in our out of this function dependning on the + // direction of the audio stream. This callback function will be called on a + // real-time thread owned by AAudio. + virtual aaudio_data_callback_result_t OnDataCallback(void* audio_data, + int32_t num_frames) = 0; + // AAudio will call this functions if any error occurs on a callback thread. + // In response, this function could signal or launch another thread to reopen + // a stream on another device. Do not reopen the stream in this callback. + virtual void OnErrorCallback(aaudio_result_t error) = 0; + + protected: + virtual ~AAudioObserverInterface() {} +}; + +// Utility class which wraps the C-based AAudio API into a more handy C++ class +// where the underlying resources (AAudioStreamBuilder and AAudioStream) are +// encapsulated. User must set the direction (in or out) at construction since +// it defines the stream type and the direction of the data flow in the +// AAudioObserverInterface. +// +// AAudio is a new Android C API introduced in the Android O (26) release. +// It is designed for high-performance audio applications that require low +// latency. Applications communicate with AAudio by reading and writing data +// to streams. +// +// Each stream is attached to a single audio device, where each audio device +// has a unique ID. The ID can be used to bind an audio stream to a specific +// audio device but this implementation lets AAudio choose the default primary +// device instead (device selection takes place in Java). A stream can only +// move data in one direction. When a stream is opened, Android checks to +// ensure that the audio device and stream direction agree. +class AAudioWrapper { + public: + AAudioWrapper(AudioManager* audio_manager, + aaudio_direction_t direction, + AAudioObserverInterface* observer); + ~AAudioWrapper(); + + bool Init(); + bool Start(); + bool Stop(); + + // For output streams: estimates latency between writing an audio frame to + // the output stream and the time that same frame is played out on the output + // audio device. + // For input streams: estimates latency between reading an audio frame from + // the input stream and the time that same frame was recorded on the input + // audio device. + double EstimateLatencyMillis() const; + + // Increases the internal buffer size for output streams by one burst size to + // reduce the risk of underruns. Can be used while a stream is active. + bool IncreaseOutputBufferSize(); + + // Drains the recording stream of any existing data by reading from it until + // it's empty. Can be used to clear out old data before starting a new audio + // session. + void ClearInputStream(void* audio_data, int32_t num_frames); + + AAudioObserverInterface* observer() const; + AudioParameters audio_parameters() const; + int32_t samples_per_frame() const; + int32_t buffer_size_in_frames() const; + int32_t buffer_capacity_in_frames() const; + int32_t device_id() const; + int32_t xrun_count() const; + int32_t format() const; + int32_t sample_rate() const; + int32_t channel_count() const; + int32_t frames_per_callback() const; + aaudio_sharing_mode_t sharing_mode() const; + aaudio_performance_mode_t performance_mode() const; + aaudio_stream_state_t stream_state() const; + int64_t frames_written() const; + int64_t frames_read() const; + aaudio_direction_t direction() const { return direction_; } + AAudioStream* stream() const { return stream_; } + int32_t frames_per_burst() const { return frames_per_burst_; } + + private: + void SetStreamConfiguration(AAudioStreamBuilder* builder); + bool OpenStream(AAudioStreamBuilder* builder); + void CloseStream(); + void LogStreamConfiguration(); + void LogStreamState(); + bool VerifyStreamConfiguration(); + bool OptimizeBuffers(); + + SequenceChecker thread_checker_; + SequenceChecker aaudio_thread_checker_; + AudioParameters audio_parameters_; + const aaudio_direction_t direction_; + AAudioObserverInterface* observer_ = nullptr; + AAudioStream* stream_ = nullptr; + int32_t frames_per_burst_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_AAUDIO_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_common.h b/third_party/libwebrtc/modules/audio_device/android/audio_common.h new file mode 100644 index 0000000000..81ea733aa4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_common.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_AUDIO_COMMON_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_AUDIO_COMMON_H_ + +namespace webrtc { + +const int kDefaultSampleRate = 44100; +// Delay estimates for the two different supported modes. These values are based +// on real-time round-trip delay estimates on a large set of devices and they +// are lower bounds since the filter length is 128 ms, so the AEC works for +// delays in the range [50, ~170] ms and [150, ~270] ms. Note that, in most +// cases, the lowest delay estimate will not be utilized since devices that +// support low-latency output audio often supports HW AEC as well. +const int kLowLatencyModeDelayEstimateInMilliseconds = 50; +const int kHighLatencyModeDelayEstimateInMilliseconds = 150; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_AUDIO_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_device_template.h b/third_party/libwebrtc/modules/audio_device/android/audio_device_template.h new file mode 100644 index 0000000000..999c5878c6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_device_template.h @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_AUDIO_DEVICE_TEMPLATE_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_AUDIO_DEVICE_TEMPLATE_H_ + +#include "api/sequence_checker.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/audio_device_generic.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +// InputType/OutputType can be any class that implements the capturing/rendering +// part of the AudioDeviceGeneric API. +// Construction and destruction must be done on one and the same thread. Each +// internal implementation of InputType and OutputType will RTC_DCHECK if that +// is not the case. All implemented methods must also be called on the same +// thread. See comments in each InputType/OutputType class for more info. +// It is possible to call the two static methods (SetAndroidAudioDeviceObjects +// and ClearAndroidAudioDeviceObjects) from a different thread but both will +// RTC_CHECK that the calling thread is attached to a Java VM. + +template +class AudioDeviceTemplate : public AudioDeviceGeneric { + public: + AudioDeviceTemplate(AudioDeviceModule::AudioLayer audio_layer, + AudioManager* audio_manager) + : audio_layer_(audio_layer), + audio_manager_(audio_manager), + output_(audio_manager_), + input_(audio_manager_), + initialized_(false) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_CHECK(audio_manager); + audio_manager_->SetActiveAudioLayer(audio_layer); + } + + virtual ~AudioDeviceTemplate() { RTC_LOG(LS_INFO) << __FUNCTION__; } + + int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + audioLayer = audio_layer_; + return 0; + } + + InitStatus Init() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!initialized_); + if (!audio_manager_->Init()) { + return InitStatus::OTHER_ERROR; + } + if (output_.Init() != 0) { + audio_manager_->Close(); + return InitStatus::PLAYOUT_ERROR; + } + if (input_.Init() != 0) { + output_.Terminate(); + audio_manager_->Close(); + return InitStatus::RECORDING_ERROR; + } + initialized_ = true; + return InitStatus::OK; + } + + int32_t Terminate() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK(thread_checker_.IsCurrent()); + int32_t err = input_.Terminate(); + err |= output_.Terminate(); + err |= !audio_manager_->Close(); + initialized_ = false; + RTC_DCHECK_EQ(err, 0); + return err; + } + + bool Initialized() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK(thread_checker_.IsCurrent()); + return initialized_; + } + + int16_t PlayoutDevices() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return 1; + } + + int16_t RecordingDevices() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return 1; + } + + int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override { + RTC_CHECK_NOTREACHED(); + } + + int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override { + RTC_CHECK_NOTREACHED(); + } + + int32_t SetPlayoutDevice(uint16_t index) override { + // OK to use but it has no effect currently since device selection is + // done using Andoid APIs instead. + RTC_DLOG(LS_INFO) << __FUNCTION__; + return 0; + } + + int32_t SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) override { + RTC_CHECK_NOTREACHED(); + } + + int32_t SetRecordingDevice(uint16_t index) override { + // OK to use but it has no effect currently since device selection is + // done using Andoid APIs instead. + RTC_DLOG(LS_INFO) << __FUNCTION__; + return 0; + } + + int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) override { + RTC_CHECK_NOTREACHED(); + } + + int32_t PlayoutIsAvailable(bool& available) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + available = true; + return 0; + } + + int32_t InitPlayout() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return output_.InitPlayout(); + } + + bool PlayoutIsInitialized() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return output_.PlayoutIsInitialized(); + } + + int32_t RecordingIsAvailable(bool& available) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + available = true; + return 0; + } + + int32_t InitRecording() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return input_.InitRecording(); + } + + bool RecordingIsInitialized() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return input_.RecordingIsInitialized(); + } + + int32_t StartPlayout() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + if (!audio_manager_->IsCommunicationModeEnabled()) { + RTC_LOG(LS_WARNING) + << "The application should use MODE_IN_COMMUNICATION audio mode!"; + } + return output_.StartPlayout(); + } + + int32_t StopPlayout() override { + // Avoid using audio manger (JNI/Java cost) if playout was inactive. + if (!Playing()) + return 0; + RTC_DLOG(LS_INFO) << __FUNCTION__; + int32_t err = output_.StopPlayout(); + return err; + } + + bool Playing() const override { + RTC_LOG(LS_INFO) << __FUNCTION__; + return output_.Playing(); + } + + int32_t StartRecording() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + if (!audio_manager_->IsCommunicationModeEnabled()) { + RTC_LOG(LS_WARNING) + << "The application should use MODE_IN_COMMUNICATION audio mode!"; + } + return input_.StartRecording(); + } + + int32_t StopRecording() override { + // Avoid using audio manger (JNI/Java cost) if recording was inactive. + RTC_DLOG(LS_INFO) << __FUNCTION__; + if (!Recording()) + return 0; + int32_t err = input_.StopRecording(); + return err; + } + + bool Recording() const override { return input_.Recording(); } + + int32_t InitSpeaker() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return 0; + } + + bool SpeakerIsInitialized() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return true; + } + + int32_t InitMicrophone() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return 0; + } + + bool MicrophoneIsInitialized() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return true; + } + + int32_t SpeakerVolumeIsAvailable(bool& available) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return output_.SpeakerVolumeIsAvailable(available); + } + + int32_t SetSpeakerVolume(uint32_t volume) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return output_.SetSpeakerVolume(volume); + } + + int32_t SpeakerVolume(uint32_t& volume) const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return output_.SpeakerVolume(volume); + } + + int32_t MaxSpeakerVolume(uint32_t& maxVolume) const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return output_.MaxSpeakerVolume(maxVolume); + } + + int32_t MinSpeakerVolume(uint32_t& minVolume) const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return output_.MinSpeakerVolume(minVolume); + } + + int32_t MicrophoneVolumeIsAvailable(bool& available) override { + available = false; + return -1; + } + + int32_t SetMicrophoneVolume(uint32_t volume) override { + RTC_CHECK_NOTREACHED(); + } + + int32_t MicrophoneVolume(uint32_t& volume) const override { + RTC_CHECK_NOTREACHED(); + return -1; + } + + int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const override { + RTC_CHECK_NOTREACHED(); + } + + int32_t MinMicrophoneVolume(uint32_t& minVolume) const override { + RTC_CHECK_NOTREACHED(); + } + + int32_t SpeakerMuteIsAvailable(bool& available) override { + RTC_CHECK_NOTREACHED(); + } + + int32_t SetSpeakerMute(bool enable) override { RTC_CHECK_NOTREACHED(); } + + int32_t SpeakerMute(bool& enabled) const override { RTC_CHECK_NOTREACHED(); } + + int32_t MicrophoneMuteIsAvailable(bool& available) override { + RTC_CHECK_NOTREACHED(); + } + + int32_t SetMicrophoneMute(bool enable) override { RTC_CHECK_NOTREACHED(); } + + int32_t MicrophoneMute(bool& enabled) const override { + RTC_CHECK_NOTREACHED(); + } + + // Returns true if the audio manager has been configured to support stereo + // and false otherwised. Default is mono. + int32_t StereoPlayoutIsAvailable(bool& available) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + available = audio_manager_->IsStereoPlayoutSupported(); + return 0; + } + + int32_t SetStereoPlayout(bool enable) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + bool available = audio_manager_->IsStereoPlayoutSupported(); + // Android does not support changes between mono and stero on the fly. + // Instead, the native audio layer is configured via the audio manager + // to either support mono or stereo. It is allowed to call this method + // if that same state is not modified. + return (enable == available) ? 0 : -1; + } + + int32_t StereoPlayout(bool& enabled) const override { + enabled = audio_manager_->IsStereoPlayoutSupported(); + return 0; + } + + int32_t StereoRecordingIsAvailable(bool& available) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + available = audio_manager_->IsStereoRecordSupported(); + return 0; + } + + int32_t SetStereoRecording(bool enable) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + bool available = audio_manager_->IsStereoRecordSupported(); + // Android does not support changes between mono and stero on the fly. + // Instead, the native audio layer is configured via the audio manager + // to either support mono or stereo. It is allowed to call this method + // if that same state is not modified. + return (enable == available) ? 0 : -1; + } + + int32_t StereoRecording(bool& enabled) const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + enabled = audio_manager_->IsStereoRecordSupported(); + return 0; + } + + int32_t PlayoutDelay(uint16_t& delay_ms) const override { + // Best guess we can do is to use half of the estimated total delay. + delay_ms = audio_manager_->GetDelayEstimateInMilliseconds() / 2; + RTC_DCHECK_GT(delay_ms, 0); + return 0; + } + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + output_.AttachAudioBuffer(audioBuffer); + input_.AttachAudioBuffer(audioBuffer); + } + + // Returns true if the device both supports built in AEC and the device + // is not blacklisted. + // Currently, if OpenSL ES is used in both directions, this method will still + // report the correct value and it has the correct effect. As an example: + // a device supports built in AEC and this method returns true. Libjingle + // will then disable the WebRTC based AEC and that will work for all devices + // (mainly Nexus) even when OpenSL ES is used for input since our current + // implementation will enable built-in AEC by default also for OpenSL ES. + // The only "bad" thing that happens today is that when Libjingle calls + // OpenSLESRecorder::EnableBuiltInAEC() it will not have any real effect and + // a "Not Implemented" log will be filed. This non-perfect state will remain + // until I have added full support for audio effects based on OpenSL ES APIs. + bool BuiltInAECIsAvailable() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return audio_manager_->IsAcousticEchoCancelerSupported(); + } + + // TODO(henrika): add implementation for OpenSL ES based audio as well. + int32_t EnableBuiltInAEC(bool enable) override { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + RTC_CHECK(BuiltInAECIsAvailable()) << "HW AEC is not available"; + return input_.EnableBuiltInAEC(enable); + } + + // Returns true if the device both supports built in AGC and the device + // is not blacklisted. + // TODO(henrika): add implementation for OpenSL ES based audio as well. + // In addition, see comments for BuiltInAECIsAvailable(). + bool BuiltInAGCIsAvailable() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return audio_manager_->IsAutomaticGainControlSupported(); + } + + // TODO(henrika): add implementation for OpenSL ES based audio as well. + int32_t EnableBuiltInAGC(bool enable) override { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + RTC_CHECK(BuiltInAGCIsAvailable()) << "HW AGC is not available"; + return input_.EnableBuiltInAGC(enable); + } + + // Returns true if the device both supports built in NS and the device + // is not blacklisted. + // TODO(henrika): add implementation for OpenSL ES based audio as well. + // In addition, see comments for BuiltInAECIsAvailable(). + bool BuiltInNSIsAvailable() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return audio_manager_->IsNoiseSuppressorSupported(); + } + + // TODO(henrika): add implementation for OpenSL ES based audio as well. + int32_t EnableBuiltInNS(bool enable) override { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + RTC_CHECK(BuiltInNSIsAvailable()) << "HW NS is not available"; + return input_.EnableBuiltInNS(enable); + } + + private: + SequenceChecker thread_checker_; + + // Local copy of the audio layer set during construction of the + // AudioDeviceModuleImpl instance. Read only value. + const AudioDeviceModule::AudioLayer audio_layer_; + + // Non-owning raw pointer to AudioManager instance given to use at + // construction. The real object is owned by AudioDeviceModuleImpl and the + // life time is the same as that of the AudioDeviceModuleImpl, hence there + // is no risk of reading a NULL pointer at any time in this class. + AudioManager* const audio_manager_; + + OutputType output_; + + InputType input_; + + bool initialized_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_AUDIO_DEVICE_TEMPLATE_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_device_unittest.cc b/third_party/libwebrtc/modules/audio_device/android/audio_device_unittest.cc new file mode 100644 index 0000000000..d9d52cdcdc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_device_unittest.cc @@ -0,0 +1,1018 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/include/audio_device.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/scoped_refptr.h" +#include "api/task_queue/default_task_queue_factory.h" +#include "api/task_queue/task_queue_factory.h" +#include "modules/audio_device/android/audio_common.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/android/build_info.h" +#include "modules/audio_device/android/ensure_initialized.h" +#include "modules/audio_device/audio_device_impl.h" +#include "modules/audio_device/include/mock_audio_transport.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/event.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/time_utils.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using std::cout; +using std::endl; +using ::testing::_; +using ::testing::AtLeast; +using ::testing::Gt; +using ::testing::Invoke; +using ::testing::NiceMock; +using ::testing::NotNull; +using ::testing::Return; + +// #define ENABLE_DEBUG_PRINTF +#ifdef ENABLE_DEBUG_PRINTF +#define PRINTD(...) fprintf(stderr, __VA_ARGS__); +#else +#define PRINTD(...) ((void)0) +#endif +#define PRINT(...) fprintf(stderr, __VA_ARGS__); + +namespace webrtc { + +// Number of callbacks (input or output) the tests waits for before we set +// an event indicating that the test was OK. +static const size_t kNumCallbacks = 10; +// Max amount of time we wait for an event to be set while counting callbacks. +static constexpr TimeDelta kTestTimeOut = TimeDelta::Seconds(10); +// Average number of audio callbacks per second assuming 10ms packet size. +static const size_t kNumCallbacksPerSecond = 100; +// Play out a test file during this time (unit is in seconds). +static const int kFilePlayTimeInSec = 5; +static const size_t kBitsPerSample = 16; +static const size_t kBytesPerSample = kBitsPerSample / 8; +// Run the full-duplex test during this time (unit is in seconds). +// Note that first `kNumIgnoreFirstCallbacks` are ignored. +static constexpr TimeDelta kFullDuplexTime = TimeDelta::Seconds(5); +// Wait for the callback sequence to stabilize by ignoring this amount of the +// initial callbacks (avoids initial FIFO access). +// Only used in the RunPlayoutAndRecordingInFullDuplex test. +static const size_t kNumIgnoreFirstCallbacks = 50; +// Sets the number of impulses per second in the latency test. +static const int kImpulseFrequencyInHz = 1; +// Length of round-trip latency measurements. Number of transmitted impulses +// is kImpulseFrequencyInHz * kMeasureLatencyTime - 1. +static constexpr TimeDelta kMeasureLatencyTime = TimeDelta::Seconds(11); +// Utilized in round-trip latency measurements to avoid capturing noise samples. +static const int kImpulseThreshold = 1000; +static const char kTag[] = "[..........] "; + +enum TransportType { + kPlayout = 0x1, + kRecording = 0x2, +}; + +// Interface for processing the audio stream. Real implementations can e.g. +// run audio in loopback, read audio from a file or perform latency +// measurements. +class AudioStreamInterface { + public: + virtual void Write(const void* source, size_t num_frames) = 0; + virtual void Read(void* destination, size_t num_frames) = 0; + + protected: + virtual ~AudioStreamInterface() {} +}; + +// Reads audio samples from a PCM file where the file is stored in memory at +// construction. +class FileAudioStream : public AudioStreamInterface { + public: + FileAudioStream(size_t num_callbacks, + absl::string_view file_name, + int sample_rate) + : file_size_in_bytes_(0), sample_rate_(sample_rate), file_pos_(0) { + file_size_in_bytes_ = test::GetFileSize(file_name); + sample_rate_ = sample_rate; + EXPECT_GE(file_size_in_callbacks(), num_callbacks) + << "Size of test file is not large enough to last during the test."; + const size_t num_16bit_samples = + test::GetFileSize(file_name) / kBytesPerSample; + file_.reset(new int16_t[num_16bit_samples]); + FILE* audio_file = fopen(std::string(file_name).c_str(), "rb"); + EXPECT_NE(audio_file, nullptr); + size_t num_samples_read = + fread(file_.get(), sizeof(int16_t), num_16bit_samples, audio_file); + EXPECT_EQ(num_samples_read, num_16bit_samples); + fclose(audio_file); + } + + // AudioStreamInterface::Write() is not implemented. + void Write(const void* source, size_t num_frames) override {} + + // Read samples from file stored in memory (at construction) and copy + // `num_frames` (<=> 10ms) to the `destination` byte buffer. + void Read(void* destination, size_t num_frames) override { + memcpy(destination, static_cast(&file_[file_pos_]), + num_frames * sizeof(int16_t)); + file_pos_ += num_frames; + } + + int file_size_in_seconds() const { + return static_cast(file_size_in_bytes_ / + (kBytesPerSample * sample_rate_)); + } + size_t file_size_in_callbacks() const { + return file_size_in_seconds() * kNumCallbacksPerSecond; + } + + private: + size_t file_size_in_bytes_; + int sample_rate_; + std::unique_ptr file_; + size_t file_pos_; +}; + +// Simple first in first out (FIFO) class that wraps a list of 16-bit audio +// buffers of fixed size and allows Write and Read operations. The idea is to +// store recorded audio buffers (using Write) and then read (using Read) these +// stored buffers with as short delay as possible when the audio layer needs +// data to play out. The number of buffers in the FIFO will stabilize under +// normal conditions since there will be a balance between Write and Read calls. +// The container is a std::list container and access is protected with a lock +// since both sides (playout and recording) are driven by its own thread. +class FifoAudioStream : public AudioStreamInterface { + public: + explicit FifoAudioStream(size_t frames_per_buffer) + : frames_per_buffer_(frames_per_buffer), + bytes_per_buffer_(frames_per_buffer_ * sizeof(int16_t)), + fifo_(new AudioBufferList), + largest_size_(0), + total_written_elements_(0), + write_count_(0) { + EXPECT_NE(fifo_.get(), nullptr); + } + + ~FifoAudioStream() { Flush(); } + + // Allocate new memory, copy `num_frames` samples from `source` into memory + // and add pointer to the memory location to end of the list. + // Increases the size of the FIFO by one element. + void Write(const void* source, size_t num_frames) override { + ASSERT_EQ(num_frames, frames_per_buffer_); + PRINTD("+"); + if (write_count_++ < kNumIgnoreFirstCallbacks) { + return; + } + int16_t* memory = new int16_t[frames_per_buffer_]; + memcpy(static_cast(&memory[0]), source, bytes_per_buffer_); + MutexLock lock(&lock_); + fifo_->push_back(memory); + const size_t size = fifo_->size(); + if (size > largest_size_) { + largest_size_ = size; + PRINTD("(%zu)", largest_size_); + } + total_written_elements_ += size; + } + + // Read pointer to data buffer from front of list, copy `num_frames` of stored + // data into `destination` and delete the utilized memory allocation. + // Decreases the size of the FIFO by one element. + void Read(void* destination, size_t num_frames) override { + ASSERT_EQ(num_frames, frames_per_buffer_); + PRINTD("-"); + MutexLock lock(&lock_); + if (fifo_->empty()) { + memset(destination, 0, bytes_per_buffer_); + } else { + int16_t* memory = fifo_->front(); + fifo_->pop_front(); + memcpy(destination, static_cast(&memory[0]), bytes_per_buffer_); + delete memory; + } + } + + size_t size() const { return fifo_->size(); } + + size_t largest_size() const { return largest_size_; } + + size_t average_size() const { + return (total_written_elements_ == 0) + ? 0.0 + : 0.5 + static_cast(total_written_elements_) / + (write_count_ - kNumIgnoreFirstCallbacks); + } + + private: + void Flush() { + for (auto it = fifo_->begin(); it != fifo_->end(); ++it) { + delete *it; + } + fifo_->clear(); + } + + using AudioBufferList = std::list; + Mutex lock_; + const size_t frames_per_buffer_; + const size_t bytes_per_buffer_; + std::unique_ptr fifo_; + size_t largest_size_; + size_t total_written_elements_; + size_t write_count_; +}; + +// Inserts periodic impulses and measures the latency between the time of +// transmission and time of receiving the same impulse. +// Usage requires a special hardware called Audio Loopback Dongle. +// See http://source.android.com/devices/audio/loopback.html for details. +class LatencyMeasuringAudioStream : public AudioStreamInterface { + public: + explicit LatencyMeasuringAudioStream(size_t frames_per_buffer) + : frames_per_buffer_(frames_per_buffer), + bytes_per_buffer_(frames_per_buffer_ * sizeof(int16_t)), + play_count_(0), + rec_count_(0), + pulse_time_(0) {} + + // Insert periodic impulses in first two samples of `destination`. + void Read(void* destination, size_t num_frames) override { + ASSERT_EQ(num_frames, frames_per_buffer_); + if (play_count_ == 0) { + PRINT("["); + } + play_count_++; + memset(destination, 0, bytes_per_buffer_); + if (play_count_ % (kNumCallbacksPerSecond / kImpulseFrequencyInHz) == 0) { + if (pulse_time_ == 0) { + pulse_time_ = rtc::TimeMillis(); + } + PRINT("."); + const int16_t impulse = std::numeric_limits::max(); + int16_t* ptr16 = static_cast(destination); + for (size_t i = 0; i < 2; ++i) { + ptr16[i] = impulse; + } + } + } + + // Detect received impulses in `source`, derive time between transmission and + // detection and add the calculated delay to list of latencies. + void Write(const void* source, size_t num_frames) override { + ASSERT_EQ(num_frames, frames_per_buffer_); + rec_count_++; + if (pulse_time_ == 0) { + // Avoid detection of new impulse response until a new impulse has + // been transmitted (sets `pulse_time_` to value larger than zero). + return; + } + const int16_t* ptr16 = static_cast(source); + std::vector vec(ptr16, ptr16 + num_frames); + // Find max value in the audio buffer. + int max = *std::max_element(vec.begin(), vec.end()); + // Find index (element position in vector) of the max element. + int index_of_max = + std::distance(vec.begin(), std::find(vec.begin(), vec.end(), max)); + if (max > kImpulseThreshold) { + PRINTD("(%d,%d)", max, index_of_max); + int64_t now_time = rtc::TimeMillis(); + int extra_delay = IndexToMilliseconds(static_cast(index_of_max)); + PRINTD("[%d]", static_cast(now_time - pulse_time_)); + PRINTD("[%d]", extra_delay); + // Total latency is the difference between transmit time and detection + // tome plus the extra delay within the buffer in which we detected the + // received impulse. It is transmitted at sample 0 but can be received + // at sample N where N > 0. The term `extra_delay` accounts for N and it + // is a value between 0 and 10ms. + latencies_.push_back(now_time - pulse_time_ + extra_delay); + pulse_time_ = 0; + } else { + PRINTD("-"); + } + } + + size_t num_latency_values() const { return latencies_.size(); } + + int min_latency() const { + if (latencies_.empty()) + return 0; + return *std::min_element(latencies_.begin(), latencies_.end()); + } + + int max_latency() const { + if (latencies_.empty()) + return 0; + return *std::max_element(latencies_.begin(), latencies_.end()); + } + + int average_latency() const { + if (latencies_.empty()) + return 0; + return 0.5 + static_cast( + std::accumulate(latencies_.begin(), latencies_.end(), 0)) / + latencies_.size(); + } + + void PrintResults() const { + PRINT("] "); + for (auto it = latencies_.begin(); it != latencies_.end(); ++it) { + PRINT("%d ", *it); + } + PRINT("\n"); + PRINT("%s[min, max, avg]=[%d, %d, %d] ms\n", kTag, min_latency(), + max_latency(), average_latency()); + } + + int IndexToMilliseconds(double index) const { + return static_cast(10.0 * (index / frames_per_buffer_) + 0.5); + } + + private: + const size_t frames_per_buffer_; + const size_t bytes_per_buffer_; + size_t play_count_; + size_t rec_count_; + int64_t pulse_time_; + std::vector latencies_; +}; + +// Mocks the AudioTransport object and proxies actions for the two callbacks +// (RecordedDataIsAvailable and NeedMorePlayData) to different implementations +// of AudioStreamInterface. +class MockAudioTransportAndroid : public test::MockAudioTransport { + public: + explicit MockAudioTransportAndroid(int type) + : num_callbacks_(0), + type_(type), + play_count_(0), + rec_count_(0), + audio_stream_(nullptr) {} + + virtual ~MockAudioTransportAndroid() {} + + // Set default actions of the mock object. We are delegating to fake + // implementations (of AudioStreamInterface) here. + void HandleCallbacks(rtc::Event* test_is_done, + AudioStreamInterface* audio_stream, + int num_callbacks) { + test_is_done_ = test_is_done; + audio_stream_ = audio_stream; + num_callbacks_ = num_callbacks; + if (play_mode()) { + ON_CALL(*this, NeedMorePlayData(_, _, _, _, _, _, _, _)) + .WillByDefault( + Invoke(this, &MockAudioTransportAndroid::RealNeedMorePlayData)); + } + if (rec_mode()) { + ON_CALL(*this, RecordedDataIsAvailable(_, _, _, _, _, _, _, _, _, _)) + .WillByDefault(Invoke( + this, &MockAudioTransportAndroid::RealRecordedDataIsAvailable)); + } + } + + int32_t RealRecordedDataIsAvailable(const void* audioSamples, + const size_t nSamples, + const size_t nBytesPerSample, + const size_t nChannels, + const uint32_t samplesPerSec, + const uint32_t totalDelayMS, + const int32_t clockDrift, + const uint32_t currentMicLevel, + const bool keyPressed, + uint32_t& newMicLevel) { // NOLINT + EXPECT_TRUE(rec_mode()) << "No test is expecting these callbacks."; + rec_count_++; + // Process the recorded audio stream if an AudioStreamInterface + // implementation exists. + if (audio_stream_) { + audio_stream_->Write(audioSamples, nSamples); + } + if (ReceivedEnoughCallbacks()) { + test_is_done_->Set(); + } + return 0; + } + + int32_t RealNeedMorePlayData(const size_t nSamples, + const size_t nBytesPerSample, + const size_t nChannels, + const uint32_t samplesPerSec, + void* audioSamples, + size_t& nSamplesOut, // NOLINT + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) { + EXPECT_TRUE(play_mode()) << "No test is expecting these callbacks."; + play_count_++; + nSamplesOut = nSamples; + // Read (possibly processed) audio stream samples to be played out if an + // AudioStreamInterface implementation exists. + if (audio_stream_) { + audio_stream_->Read(audioSamples, nSamples); + } + if (ReceivedEnoughCallbacks()) { + test_is_done_->Set(); + } + return 0; + } + + bool ReceivedEnoughCallbacks() { + bool recording_done = false; + if (rec_mode()) + recording_done = rec_count_ >= num_callbacks_; + else + recording_done = true; + + bool playout_done = false; + if (play_mode()) + playout_done = play_count_ >= num_callbacks_; + else + playout_done = true; + + return recording_done && playout_done; + } + + bool play_mode() const { return type_ & kPlayout; } + bool rec_mode() const { return type_ & kRecording; } + + private: + rtc::Event* test_is_done_; + size_t num_callbacks_; + int type_; + size_t play_count_; + size_t rec_count_; + AudioStreamInterface* audio_stream_; + std::unique_ptr latency_audio_stream_; +}; + +// AudioDeviceTest test fixture. +class AudioDeviceTest : public ::testing::Test { + protected: + AudioDeviceTest() : task_queue_factory_(CreateDefaultTaskQueueFactory()) { + // One-time initialization of JVM and application context. Ensures that we + // can do calls between C++ and Java. Initializes both Java and OpenSL ES + // implementations. + webrtc::audiodevicemodule::EnsureInitialized(); + // Creates an audio device using a default audio layer. + audio_device_ = CreateAudioDevice(AudioDeviceModule::kPlatformDefaultAudio); + EXPECT_NE(audio_device_.get(), nullptr); + EXPECT_EQ(0, audio_device_->Init()); + playout_parameters_ = audio_manager()->GetPlayoutAudioParameters(); + record_parameters_ = audio_manager()->GetRecordAudioParameters(); + build_info_.reset(new BuildInfo()); + } + virtual ~AudioDeviceTest() { EXPECT_EQ(0, audio_device_->Terminate()); } + + int playout_sample_rate() const { return playout_parameters_.sample_rate(); } + int record_sample_rate() const { return record_parameters_.sample_rate(); } + size_t playout_channels() const { return playout_parameters_.channels(); } + size_t record_channels() const { return record_parameters_.channels(); } + size_t playout_frames_per_10ms_buffer() const { + return playout_parameters_.frames_per_10ms_buffer(); + } + size_t record_frames_per_10ms_buffer() const { + return record_parameters_.frames_per_10ms_buffer(); + } + + int total_delay_ms() const { + return audio_manager()->GetDelayEstimateInMilliseconds(); + } + + rtc::scoped_refptr audio_device() const { + return audio_device_; + } + + AudioDeviceModuleImpl* audio_device_impl() const { + return static_cast(audio_device_.get()); + } + + AudioManager* audio_manager() const { + return audio_device_impl()->GetAndroidAudioManagerForTest(); + } + + AudioManager* GetAudioManager(AudioDeviceModule* adm) const { + return static_cast(adm) + ->GetAndroidAudioManagerForTest(); + } + + AudioDeviceBuffer* audio_device_buffer() const { + return audio_device_impl()->GetAudioDeviceBuffer(); + } + + rtc::scoped_refptr CreateAudioDevice( + AudioDeviceModule::AudioLayer audio_layer) { + rtc::scoped_refptr module( + AudioDeviceModule::Create(audio_layer, task_queue_factory_.get())); + return module; + } + + // Returns file name relative to the resource root given a sample rate. + std::string GetFileName(int sample_rate) { + EXPECT_TRUE(sample_rate == 48000 || sample_rate == 44100); + char fname[64]; + snprintf(fname, sizeof(fname), "audio_device/audio_short%d", + sample_rate / 1000); + std::string file_name(webrtc::test::ResourcePath(fname, "pcm")); + EXPECT_TRUE(test::FileExists(file_name)); +#ifdef ENABLE_PRINTF + PRINT("file name: %s\n", file_name.c_str()); + const size_t bytes = test::GetFileSize(file_name); + PRINT("file size: %zu [bytes]\n", bytes); + PRINT("file size: %zu [samples]\n", bytes / kBytesPerSample); + const int seconds = + static_cast(bytes / (sample_rate * kBytesPerSample)); + PRINT("file size: %d [secs]\n", seconds); + PRINT("file size: %zu [callbacks]\n", seconds * kNumCallbacksPerSecond); +#endif + return file_name; + } + + AudioDeviceModule::AudioLayer GetActiveAudioLayer() const { + AudioDeviceModule::AudioLayer audio_layer; + EXPECT_EQ(0, audio_device()->ActiveAudioLayer(&audio_layer)); + return audio_layer; + } + + int TestDelayOnAudioLayer( + const AudioDeviceModule::AudioLayer& layer_to_test) { + rtc::scoped_refptr audio_device; + audio_device = CreateAudioDevice(layer_to_test); + EXPECT_NE(audio_device.get(), nullptr); + AudioManager* audio_manager = GetAudioManager(audio_device.get()); + EXPECT_NE(audio_manager, nullptr); + return audio_manager->GetDelayEstimateInMilliseconds(); + } + + AudioDeviceModule::AudioLayer TestActiveAudioLayer( + const AudioDeviceModule::AudioLayer& layer_to_test) { + rtc::scoped_refptr audio_device; + audio_device = CreateAudioDevice(layer_to_test); + EXPECT_NE(audio_device.get(), nullptr); + AudioDeviceModule::AudioLayer active; + EXPECT_EQ(0, audio_device->ActiveAudioLayer(&active)); + return active; + } + + bool DisableTestForThisDevice(absl::string_view model) { + return (build_info_->GetDeviceModel() == model); + } + + // Volume control is currently only supported for the Java output audio layer. + // For OpenSL ES, the internal stream volume is always on max level and there + // is no need for this test to set it to max. + bool AudioLayerSupportsVolumeControl() const { + return GetActiveAudioLayer() == AudioDeviceModule::kAndroidJavaAudio; + } + + void SetMaxPlayoutVolume() { + if (!AudioLayerSupportsVolumeControl()) + return; + uint32_t max_volume; + EXPECT_EQ(0, audio_device()->MaxSpeakerVolume(&max_volume)); + EXPECT_EQ(0, audio_device()->SetSpeakerVolume(max_volume)); + } + + void DisableBuiltInAECIfAvailable() { + if (audio_device()->BuiltInAECIsAvailable()) { + EXPECT_EQ(0, audio_device()->EnableBuiltInAEC(false)); + } + } + + void StartPlayout() { + EXPECT_FALSE(audio_device()->PlayoutIsInitialized()); + EXPECT_FALSE(audio_device()->Playing()); + EXPECT_EQ(0, audio_device()->InitPlayout()); + EXPECT_TRUE(audio_device()->PlayoutIsInitialized()); + EXPECT_EQ(0, audio_device()->StartPlayout()); + EXPECT_TRUE(audio_device()->Playing()); + } + + void StopPlayout() { + EXPECT_EQ(0, audio_device()->StopPlayout()); + EXPECT_FALSE(audio_device()->Playing()); + EXPECT_FALSE(audio_device()->PlayoutIsInitialized()); + } + + void StartRecording() { + EXPECT_FALSE(audio_device()->RecordingIsInitialized()); + EXPECT_FALSE(audio_device()->Recording()); + EXPECT_EQ(0, audio_device()->InitRecording()); + EXPECT_TRUE(audio_device()->RecordingIsInitialized()); + EXPECT_EQ(0, audio_device()->StartRecording()); + EXPECT_TRUE(audio_device()->Recording()); + } + + void StopRecording() { + EXPECT_EQ(0, audio_device()->StopRecording()); + EXPECT_FALSE(audio_device()->Recording()); + } + + int GetMaxSpeakerVolume() const { + uint32_t max_volume(0); + EXPECT_EQ(0, audio_device()->MaxSpeakerVolume(&max_volume)); + return max_volume; + } + + int GetMinSpeakerVolume() const { + uint32_t min_volume(0); + EXPECT_EQ(0, audio_device()->MinSpeakerVolume(&min_volume)); + return min_volume; + } + + int GetSpeakerVolume() const { + uint32_t volume(0); + EXPECT_EQ(0, audio_device()->SpeakerVolume(&volume)); + return volume; + } + + rtc::Event test_is_done_; + std::unique_ptr task_queue_factory_; + rtc::scoped_refptr audio_device_; + AudioParameters playout_parameters_; + AudioParameters record_parameters_; + std::unique_ptr build_info_; +}; + +TEST_F(AudioDeviceTest, ConstructDestruct) { + // Using the test fixture to create and destruct the audio device module. +} + +// We always ask for a default audio layer when the ADM is constructed. But the +// ADM will then internally set the best suitable combination of audio layers, +// for input and output based on if low-latency output and/or input audio in +// combination with OpenSL ES is supported or not. This test ensures that the +// correct selection is done. +TEST_F(AudioDeviceTest, VerifyDefaultAudioLayer) { + const AudioDeviceModule::AudioLayer audio_layer = GetActiveAudioLayer(); + bool low_latency_output = audio_manager()->IsLowLatencyPlayoutSupported(); + bool low_latency_input = audio_manager()->IsLowLatencyRecordSupported(); + bool aaudio = audio_manager()->IsAAudioSupported(); + AudioDeviceModule::AudioLayer expected_audio_layer; + if (aaudio) { + expected_audio_layer = AudioDeviceModule::kAndroidAAudioAudio; + } else if (low_latency_output && low_latency_input) { + expected_audio_layer = AudioDeviceModule::kAndroidOpenSLESAudio; + } else if (low_latency_output && !low_latency_input) { + expected_audio_layer = + AudioDeviceModule::kAndroidJavaInputAndOpenSLESOutputAudio; + } else { + expected_audio_layer = AudioDeviceModule::kAndroidJavaAudio; + } + EXPECT_EQ(expected_audio_layer, audio_layer); +} + +// Verify that it is possible to explicitly create the two types of supported +// ADMs. These two tests overrides the default selection of native audio layer +// by ignoring if the device supports low-latency output or not. +TEST_F(AudioDeviceTest, CorrectAudioLayerIsUsedForCombinedJavaOpenSLCombo) { + AudioDeviceModule::AudioLayer expected_layer = + AudioDeviceModule::kAndroidJavaInputAndOpenSLESOutputAudio; + AudioDeviceModule::AudioLayer active_layer = + TestActiveAudioLayer(expected_layer); + EXPECT_EQ(expected_layer, active_layer); +} + +TEST_F(AudioDeviceTest, CorrectAudioLayerIsUsedForJavaInBothDirections) { + AudioDeviceModule::AudioLayer expected_layer = + AudioDeviceModule::kAndroidJavaAudio; + AudioDeviceModule::AudioLayer active_layer = + TestActiveAudioLayer(expected_layer); + EXPECT_EQ(expected_layer, active_layer); +} + +TEST_F(AudioDeviceTest, CorrectAudioLayerIsUsedForOpenSLInBothDirections) { + AudioDeviceModule::AudioLayer expected_layer = + AudioDeviceModule::kAndroidOpenSLESAudio; + AudioDeviceModule::AudioLayer active_layer = + TestActiveAudioLayer(expected_layer); + EXPECT_EQ(expected_layer, active_layer); +} + +// TODO(bugs.webrtc.org/8914) +#if !defined(WEBRTC_AUDIO_DEVICE_INCLUDE_ANDROID_AAUDIO) +#define MAYBE_CorrectAudioLayerIsUsedForAAudioInBothDirections \ + DISABLED_CorrectAudioLayerIsUsedForAAudioInBothDirections +#else +#define MAYBE_CorrectAudioLayerIsUsedForAAudioInBothDirections \ + CorrectAudioLayerIsUsedForAAudioInBothDirections +#endif +TEST_F(AudioDeviceTest, + MAYBE_CorrectAudioLayerIsUsedForAAudioInBothDirections) { + AudioDeviceModule::AudioLayer expected_layer = + AudioDeviceModule::kAndroidAAudioAudio; + AudioDeviceModule::AudioLayer active_layer = + TestActiveAudioLayer(expected_layer); + EXPECT_EQ(expected_layer, active_layer); +} + +// TODO(bugs.webrtc.org/8914) +#if !defined(WEBRTC_AUDIO_DEVICE_INCLUDE_ANDROID_AAUDIO) +#define MAYBE_CorrectAudioLayerIsUsedForCombinedJavaAAudioCombo \ + DISABLED_CorrectAudioLayerIsUsedForCombinedJavaAAudioCombo +#else +#define MAYBE_CorrectAudioLayerIsUsedForCombinedJavaAAudioCombo \ + CorrectAudioLayerIsUsedForCombinedJavaAAudioCombo +#endif +TEST_F(AudioDeviceTest, + MAYBE_CorrectAudioLayerIsUsedForCombinedJavaAAudioCombo) { + AudioDeviceModule::AudioLayer expected_layer = + AudioDeviceModule::kAndroidJavaInputAndAAudioOutputAudio; + AudioDeviceModule::AudioLayer active_layer = + TestActiveAudioLayer(expected_layer); + EXPECT_EQ(expected_layer, active_layer); +} + +// The Android ADM supports two different delay reporting modes. One for the +// low-latency output path (in combination with OpenSL ES), and one for the +// high-latency output path (Java backends in both directions). These two tests +// verifies that the audio manager reports correct delay estimate given the +// selected audio layer. Note that, this delay estimate will only be utilized +// if the HW AEC is disabled. +TEST_F(AudioDeviceTest, UsesCorrectDelayEstimateForHighLatencyOutputPath) { + EXPECT_EQ(kHighLatencyModeDelayEstimateInMilliseconds, + TestDelayOnAudioLayer(AudioDeviceModule::kAndroidJavaAudio)); +} + +TEST_F(AudioDeviceTest, UsesCorrectDelayEstimateForLowLatencyOutputPath) { + EXPECT_EQ(kLowLatencyModeDelayEstimateInMilliseconds, + TestDelayOnAudioLayer( + AudioDeviceModule::kAndroidJavaInputAndOpenSLESOutputAudio)); +} + +// Ensure that the ADM internal audio device buffer is configured to use the +// correct set of parameters. +TEST_F(AudioDeviceTest, VerifyAudioDeviceBufferParameters) { + EXPECT_EQ(playout_parameters_.sample_rate(), + static_cast(audio_device_buffer()->PlayoutSampleRate())); + EXPECT_EQ(record_parameters_.sample_rate(), + static_cast(audio_device_buffer()->RecordingSampleRate())); + EXPECT_EQ(playout_parameters_.channels(), + audio_device_buffer()->PlayoutChannels()); + EXPECT_EQ(record_parameters_.channels(), + audio_device_buffer()->RecordingChannels()); +} + +TEST_F(AudioDeviceTest, InitTerminate) { + // Initialization is part of the test fixture. + EXPECT_TRUE(audio_device()->Initialized()); + EXPECT_EQ(0, audio_device()->Terminate()); + EXPECT_FALSE(audio_device()->Initialized()); +} + +TEST_F(AudioDeviceTest, Devices) { + // Device enumeration is not supported. Verify fixed values only. + EXPECT_EQ(1, audio_device()->PlayoutDevices()); + EXPECT_EQ(1, audio_device()->RecordingDevices()); +} + +TEST_F(AudioDeviceTest, SpeakerVolumeShouldBeAvailable) { + // The OpenSL ES output audio path does not support volume control. + if (!AudioLayerSupportsVolumeControl()) + return; + bool available; + EXPECT_EQ(0, audio_device()->SpeakerVolumeIsAvailable(&available)); + EXPECT_TRUE(available); +} + +TEST_F(AudioDeviceTest, MaxSpeakerVolumeIsPositive) { + // The OpenSL ES output audio path does not support volume control. + if (!AudioLayerSupportsVolumeControl()) + return; + StartPlayout(); + EXPECT_GT(GetMaxSpeakerVolume(), 0); + StopPlayout(); +} + +TEST_F(AudioDeviceTest, MinSpeakerVolumeIsZero) { + // The OpenSL ES output audio path does not support volume control. + if (!AudioLayerSupportsVolumeControl()) + return; + EXPECT_EQ(GetMinSpeakerVolume(), 0); +} + +TEST_F(AudioDeviceTest, DefaultSpeakerVolumeIsWithinMinMax) { + // The OpenSL ES output audio path does not support volume control. + if (!AudioLayerSupportsVolumeControl()) + return; + const int default_volume = GetSpeakerVolume(); + EXPECT_GE(default_volume, GetMinSpeakerVolume()); + EXPECT_LE(default_volume, GetMaxSpeakerVolume()); +} + +TEST_F(AudioDeviceTest, SetSpeakerVolumeActuallySetsVolume) { + // The OpenSL ES output audio path does not support volume control. + if (!AudioLayerSupportsVolumeControl()) + return; + const int default_volume = GetSpeakerVolume(); + const int max_volume = GetMaxSpeakerVolume(); + EXPECT_EQ(0, audio_device()->SetSpeakerVolume(max_volume)); + int new_volume = GetSpeakerVolume(); + EXPECT_EQ(new_volume, max_volume); + EXPECT_EQ(0, audio_device()->SetSpeakerVolume(default_volume)); +} + +// Tests that playout can be initiated, started and stopped. No audio callback +// is registered in this test. +TEST_F(AudioDeviceTest, StartStopPlayout) { + StartPlayout(); + StopPlayout(); + StartPlayout(); + StopPlayout(); +} + +// Tests that recording can be initiated, started and stopped. No audio callback +// is registered in this test. +TEST_F(AudioDeviceTest, StartStopRecording) { + StartRecording(); + StopRecording(); + StartRecording(); + StopRecording(); +} + +// Verify that calling StopPlayout() will leave us in an uninitialized state +// which will require a new call to InitPlayout(). This test does not call +// StartPlayout() while being uninitialized since doing so will hit a +// RTC_DCHECK and death tests are not supported on Android. +TEST_F(AudioDeviceTest, StopPlayoutRequiresInitToRestart) { + EXPECT_EQ(0, audio_device()->InitPlayout()); + EXPECT_EQ(0, audio_device()->StartPlayout()); + EXPECT_EQ(0, audio_device()->StopPlayout()); + EXPECT_FALSE(audio_device()->PlayoutIsInitialized()); +} + +// Verify that calling StopRecording() will leave us in an uninitialized state +// which will require a new call to InitRecording(). This test does not call +// StartRecording() while being uninitialized since doing so will hit a +// RTC_DCHECK and death tests are not supported on Android. +TEST_F(AudioDeviceTest, StopRecordingRequiresInitToRestart) { + EXPECT_EQ(0, audio_device()->InitRecording()); + EXPECT_EQ(0, audio_device()->StartRecording()); + EXPECT_EQ(0, audio_device()->StopRecording()); + EXPECT_FALSE(audio_device()->RecordingIsInitialized()); +} + +// Start playout and verify that the native audio layer starts asking for real +// audio samples to play out using the NeedMorePlayData callback. +TEST_F(AudioDeviceTest, StartPlayoutVerifyCallbacks) { + MockAudioTransportAndroid mock(kPlayout); + mock.HandleCallbacks(&test_is_done_, nullptr, kNumCallbacks); + EXPECT_CALL(mock, NeedMorePlayData(playout_frames_per_10ms_buffer(), + kBytesPerSample, playout_channels(), + playout_sample_rate(), NotNull(), _, _, _)) + .Times(AtLeast(kNumCallbacks)); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartPlayout(); + test_is_done_.Wait(kTestTimeOut); + StopPlayout(); +} + +// Start recording and verify that the native audio layer starts feeding real +// audio samples via the RecordedDataIsAvailable callback. +// TODO(henrika): investigate if it is possible to perform a sanity check of +// delay estimates as well (argument #6). +TEST_F(AudioDeviceTest, StartRecordingVerifyCallbacks) { + MockAudioTransportAndroid mock(kRecording); + mock.HandleCallbacks(&test_is_done_, nullptr, kNumCallbacks); + EXPECT_CALL( + mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(), + kBytesPerSample, record_channels(), + record_sample_rate(), _, 0, 0, false, _, _)) + .Times(AtLeast(kNumCallbacks)); + + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartRecording(); + test_is_done_.Wait(kTestTimeOut); + StopRecording(); +} + +// Start playout and recording (full-duplex audio) and verify that audio is +// active in both directions. +TEST_F(AudioDeviceTest, StartPlayoutAndRecordingVerifyCallbacks) { + MockAudioTransportAndroid mock(kPlayout | kRecording); + mock.HandleCallbacks(&test_is_done_, nullptr, kNumCallbacks); + EXPECT_CALL(mock, NeedMorePlayData(playout_frames_per_10ms_buffer(), + kBytesPerSample, playout_channels(), + playout_sample_rate(), NotNull(), _, _, _)) + .Times(AtLeast(kNumCallbacks)); + EXPECT_CALL( + mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(), + kBytesPerSample, record_channels(), + record_sample_rate(), _, 0, 0, false, _, _)) + .Times(AtLeast(kNumCallbacks)); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartPlayout(); + StartRecording(); + test_is_done_.Wait(kTestTimeOut); + StopRecording(); + StopPlayout(); +} + +// Start playout and read audio from an external PCM file when the audio layer +// asks for data to play out. Real audio is played out in this test but it does +// not contain any explicit verification that the audio quality is perfect. +TEST_F(AudioDeviceTest, RunPlayoutWithFileAsSource) { + // TODO(henrika): extend test when mono output is supported. + EXPECT_EQ(1u, playout_channels()); + NiceMock mock(kPlayout); + const int num_callbacks = kFilePlayTimeInSec * kNumCallbacksPerSecond; + std::string file_name = GetFileName(playout_sample_rate()); + std::unique_ptr file_audio_stream( + new FileAudioStream(num_callbacks, file_name, playout_sample_rate())); + mock.HandleCallbacks(&test_is_done_, file_audio_stream.get(), num_callbacks); + // SetMaxPlayoutVolume(); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartPlayout(); + test_is_done_.Wait(kTestTimeOut); + StopPlayout(); +} + +// Start playout and recording and store recorded data in an intermediate FIFO +// buffer from which the playout side then reads its samples in the same order +// as they were stored. Under ideal circumstances, a callback sequence would +// look like: ...+-+-+-+-+-+-+-..., where '+' means 'packet recorded' and '-' +// means 'packet played'. Under such conditions, the FIFO would only contain +// one packet on average. However, under more realistic conditions, the size +// of the FIFO will vary more due to an unbalance between the two sides. +// This test tries to verify that the device maintains a balanced callback- +// sequence by running in loopback for ten seconds while measuring the size +// (max and average) of the FIFO. The size of the FIFO is increased by the +// recording side and decreased by the playout side. +// TODO(henrika): tune the final test parameters after running tests on several +// different devices. +// Disabling this test on bots since it is difficult to come up with a robust +// test condition that all worked as intended. The main issue is that, when +// swarming is used, an initial latency can be built up when the both sides +// starts at different times. Hence, the test can fail even if audio works +// as intended. Keeping the test so it can be enabled manually. +// http://bugs.webrtc.org/7744 +TEST_F(AudioDeviceTest, DISABLED_RunPlayoutAndRecordingInFullDuplex) { + EXPECT_EQ(record_channels(), playout_channels()); + EXPECT_EQ(record_sample_rate(), playout_sample_rate()); + NiceMock mock(kPlayout | kRecording); + std::unique_ptr fifo_audio_stream( + new FifoAudioStream(playout_frames_per_10ms_buffer())); + mock.HandleCallbacks(&test_is_done_, fifo_audio_stream.get(), + kFullDuplexTime.seconds() * kNumCallbacksPerSecond); + SetMaxPlayoutVolume(); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartRecording(); + StartPlayout(); + test_is_done_.Wait(std::max(kTestTimeOut, kFullDuplexTime)); + StopPlayout(); + StopRecording(); + + // These thresholds are set rather high to accomodate differences in hardware + // in several devices, so this test can be used in swarming. + // See http://bugs.webrtc.org/6464 + EXPECT_LE(fifo_audio_stream->average_size(), 60u); + EXPECT_LE(fifo_audio_stream->largest_size(), 70u); +} + +// Measures loopback latency and reports the min, max and average values for +// a full duplex audio session. +// The latency is measured like so: +// - Insert impulses periodically on the output side. +// - Detect the impulses on the input side. +// - Measure the time difference between the transmit time and receive time. +// - Store time differences in a vector and calculate min, max and average. +// This test requires a special hardware called Audio Loopback Dongle. +// See http://source.android.com/devices/audio/loopback.html for details. +TEST_F(AudioDeviceTest, DISABLED_MeasureLoopbackLatency) { + EXPECT_EQ(record_channels(), playout_channels()); + EXPECT_EQ(record_sample_rate(), playout_sample_rate()); + NiceMock mock(kPlayout | kRecording); + std::unique_ptr latency_audio_stream( + new LatencyMeasuringAudioStream(playout_frames_per_10ms_buffer())); + mock.HandleCallbacks(&test_is_done_, latency_audio_stream.get(), + kMeasureLatencyTime.seconds() * kNumCallbacksPerSecond); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + SetMaxPlayoutVolume(); + DisableBuiltInAECIfAvailable(); + StartRecording(); + StartPlayout(); + test_is_done_.Wait(std::max(kTestTimeOut, kMeasureLatencyTime)); + StopPlayout(); + StopRecording(); + // Verify that the correct number of transmitted impulses are detected. + EXPECT_EQ(latency_audio_stream->num_latency_values(), + static_cast( + kImpulseFrequencyInHz * kMeasureLatencyTime.seconds() - 1)); + latency_audio_stream->PrintResults(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_manager.cc b/third_party/libwebrtc/modules/audio_device/android/audio_manager.cc new file mode 100644 index 0000000000..0b55496619 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_manager.cc @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/audio_manager.h" + +#include + +#include "modules/audio_device/android/audio_common.h" +#include "modules/utility/include/helpers_android.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_thread.h" + +namespace webrtc { + +// AudioManager::JavaAudioManager implementation +AudioManager::JavaAudioManager::JavaAudioManager( + NativeRegistration* native_reg, + std::unique_ptr audio_manager) + : audio_manager_(std::move(audio_manager)), + init_(native_reg->GetMethodId("init", "()Z")), + dispose_(native_reg->GetMethodId("dispose", "()V")), + is_communication_mode_enabled_( + native_reg->GetMethodId("isCommunicationModeEnabled", "()Z")), + is_device_blacklisted_for_open_sles_usage_( + native_reg->GetMethodId("isDeviceBlacklistedForOpenSLESUsage", + "()Z")) { + RTC_LOG(LS_INFO) << "JavaAudioManager::ctor"; +} + +AudioManager::JavaAudioManager::~JavaAudioManager() { + RTC_LOG(LS_INFO) << "JavaAudioManager::~dtor"; +} + +bool AudioManager::JavaAudioManager::Init() { + return audio_manager_->CallBooleanMethod(init_); +} + +void AudioManager::JavaAudioManager::Close() { + audio_manager_->CallVoidMethod(dispose_); +} + +bool AudioManager::JavaAudioManager::IsCommunicationModeEnabled() { + return audio_manager_->CallBooleanMethod(is_communication_mode_enabled_); +} + +bool AudioManager::JavaAudioManager::IsDeviceBlacklistedForOpenSLESUsage() { + return audio_manager_->CallBooleanMethod( + is_device_blacklisted_for_open_sles_usage_); +} + +// AudioManager implementation +AudioManager::AudioManager() + : j_environment_(JVM::GetInstance()->environment()), + audio_layer_(AudioDeviceModule::kPlatformDefaultAudio), + initialized_(false), + hardware_aec_(false), + hardware_agc_(false), + hardware_ns_(false), + low_latency_playout_(false), + low_latency_record_(false), + delay_estimate_in_milliseconds_(0) { + RTC_LOG(LS_INFO) << "ctor"; + RTC_CHECK(j_environment_); + JNINativeMethod native_methods[] = { + {"nativeCacheAudioParameters", "(IIIZZZZZZZIIJ)V", + reinterpret_cast(&webrtc::AudioManager::CacheAudioParameters)}}; + j_native_registration_ = j_environment_->RegisterNatives( + "org/webrtc/voiceengine/WebRtcAudioManager", native_methods, + arraysize(native_methods)); + j_audio_manager_.reset( + new JavaAudioManager(j_native_registration_.get(), + j_native_registration_->NewObject( + "", "(J)V", PointerTojlong(this)))); +} + +AudioManager::~AudioManager() { + RTC_LOG(LS_INFO) << "dtor"; + RTC_DCHECK(thread_checker_.IsCurrent()); + Close(); +} + +void AudioManager::SetActiveAudioLayer( + AudioDeviceModule::AudioLayer audio_layer) { + RTC_LOG(LS_INFO) << "SetActiveAudioLayer: " << audio_layer; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!initialized_); + // Store the currently utilized audio layer. + audio_layer_ = audio_layer; + // The delay estimate can take one of two fixed values depending on if the + // device supports low-latency output or not. However, it is also possible + // that the user explicitly selects the high-latency audio path, hence we use + // the selected `audio_layer` here to set the delay estimate. + delay_estimate_in_milliseconds_ = + (audio_layer == AudioDeviceModule::kAndroidJavaAudio) + ? kHighLatencyModeDelayEstimateInMilliseconds + : kLowLatencyModeDelayEstimateInMilliseconds; + RTC_LOG(LS_INFO) << "delay_estimate_in_milliseconds: " + << delay_estimate_in_milliseconds_; +} + +SLObjectItf AudioManager::GetOpenSLEngine() { + RTC_LOG(LS_INFO) << "GetOpenSLEngine"; + RTC_DCHECK(thread_checker_.IsCurrent()); + // Only allow usage of OpenSL ES if such an audio layer has been specified. + if (audio_layer_ != AudioDeviceModule::kAndroidOpenSLESAudio && + audio_layer_ != + AudioDeviceModule::kAndroidJavaInputAndOpenSLESOutputAudio) { + RTC_LOG(LS_INFO) + << "Unable to create OpenSL engine for the current audio layer: " + << audio_layer_; + return nullptr; + } + // OpenSL ES for Android only supports a single engine per application. + // If one already has been created, return existing object instead of + // creating a new. + if (engine_object_.Get() != nullptr) { + RTC_LOG(LS_WARNING) + << "The OpenSL ES engine object has already been created"; + return engine_object_.Get(); + } + // Create the engine object in thread safe mode. + const SLEngineOption option[] = { + {SL_ENGINEOPTION_THREADSAFE, static_cast(SL_BOOLEAN_TRUE)}}; + SLresult result = + slCreateEngine(engine_object_.Receive(), 1, option, 0, NULL, NULL); + if (result != SL_RESULT_SUCCESS) { + RTC_LOG(LS_ERROR) << "slCreateEngine() failed: " + << GetSLErrorString(result); + engine_object_.Reset(); + return nullptr; + } + // Realize the SL Engine in synchronous mode. + result = engine_object_->Realize(engine_object_.Get(), SL_BOOLEAN_FALSE); + if (result != SL_RESULT_SUCCESS) { + RTC_LOG(LS_ERROR) << "Realize() failed: " << GetSLErrorString(result); + engine_object_.Reset(); + return nullptr; + } + // Finally return the SLObjectItf interface of the engine object. + return engine_object_.Get(); +} + +bool AudioManager::Init() { + RTC_LOG(LS_INFO) << "Init"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!initialized_); + RTC_DCHECK_NE(audio_layer_, AudioDeviceModule::kPlatformDefaultAudio); + if (!j_audio_manager_->Init()) { + RTC_LOG(LS_ERROR) << "Init() failed"; + return false; + } + initialized_ = true; + return true; +} + +bool AudioManager::Close() { + RTC_LOG(LS_INFO) << "Close"; + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!initialized_) + return true; + j_audio_manager_->Close(); + initialized_ = false; + return true; +} + +bool AudioManager::IsCommunicationModeEnabled() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return j_audio_manager_->IsCommunicationModeEnabled(); +} + +bool AudioManager::IsAcousticEchoCancelerSupported() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return hardware_aec_; +} + +bool AudioManager::IsAutomaticGainControlSupported() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return hardware_agc_; +} + +bool AudioManager::IsNoiseSuppressorSupported() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return hardware_ns_; +} + +bool AudioManager::IsLowLatencyPlayoutSupported() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + // Some devices are blacklisted for usage of OpenSL ES even if they report + // that low-latency playout is supported. See b/21485703 for details. + return j_audio_manager_->IsDeviceBlacklistedForOpenSLESUsage() + ? false + : low_latency_playout_; +} + +bool AudioManager::IsLowLatencyRecordSupported() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return low_latency_record_; +} + +bool AudioManager::IsProAudioSupported() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + // TODO(henrika): return the state independently of if OpenSL ES is + // blacklisted or not for now. We could use the same approach as in + // IsLowLatencyPlayoutSupported() but I can't see the need for it yet. + return pro_audio_; +} + +// TODO(henrika): improve comments... +bool AudioManager::IsAAudioSupported() const { +#if defined(WEBRTC_AUDIO_DEVICE_INCLUDE_ANDROID_AAUDIO) + return a_audio_; +#else + return false; +#endif +} + +bool AudioManager::IsStereoPlayoutSupported() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (playout_parameters_.channels() == 2); +} + +bool AudioManager::IsStereoRecordSupported() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (record_parameters_.channels() == 2); +} + +int AudioManager::GetDelayEstimateInMilliseconds() const { + return delay_estimate_in_milliseconds_; +} + +JNI_FUNCTION_ALIGN +void JNICALL AudioManager::CacheAudioParameters(JNIEnv* env, + jobject obj, + jint sample_rate, + jint output_channels, + jint input_channels, + jboolean hardware_aec, + jboolean hardware_agc, + jboolean hardware_ns, + jboolean low_latency_output, + jboolean low_latency_input, + jboolean pro_audio, + jboolean a_audio, + jint output_buffer_size, + jint input_buffer_size, + jlong native_audio_manager) { + webrtc::AudioManager* this_object = + reinterpret_cast(native_audio_manager); + this_object->OnCacheAudioParameters( + env, sample_rate, output_channels, input_channels, hardware_aec, + hardware_agc, hardware_ns, low_latency_output, low_latency_input, + pro_audio, a_audio, output_buffer_size, input_buffer_size); +} + +void AudioManager::OnCacheAudioParameters(JNIEnv* env, + jint sample_rate, + jint output_channels, + jint input_channels, + jboolean hardware_aec, + jboolean hardware_agc, + jboolean hardware_ns, + jboolean low_latency_output, + jboolean low_latency_input, + jboolean pro_audio, + jboolean a_audio, + jint output_buffer_size, + jint input_buffer_size) { + RTC_LOG(LS_INFO) + << "OnCacheAudioParameters: " + "hardware_aec: " + << static_cast(hardware_aec) + << ", hardware_agc: " << static_cast(hardware_agc) + << ", hardware_ns: " << static_cast(hardware_ns) + << ", low_latency_output: " << static_cast(low_latency_output) + << ", low_latency_input: " << static_cast(low_latency_input) + << ", pro_audio: " << static_cast(pro_audio) + << ", a_audio: " << static_cast(a_audio) + << ", sample_rate: " << static_cast(sample_rate) + << ", output_channels: " << static_cast(output_channels) + << ", input_channels: " << static_cast(input_channels) + << ", output_buffer_size: " << static_cast(output_buffer_size) + << ", input_buffer_size: " << static_cast(input_buffer_size); + RTC_DCHECK(thread_checker_.IsCurrent()); + hardware_aec_ = hardware_aec; + hardware_agc_ = hardware_agc; + hardware_ns_ = hardware_ns; + low_latency_playout_ = low_latency_output; + low_latency_record_ = low_latency_input; + pro_audio_ = pro_audio; + a_audio_ = a_audio; + playout_parameters_.reset(sample_rate, static_cast(output_channels), + static_cast(output_buffer_size)); + record_parameters_.reset(sample_rate, static_cast(input_channels), + static_cast(input_buffer_size)); +} + +const AudioParameters& AudioManager::GetPlayoutAudioParameters() { + RTC_CHECK(playout_parameters_.is_valid()); + RTC_DCHECK(thread_checker_.IsCurrent()); + return playout_parameters_; +} + +const AudioParameters& AudioManager::GetRecordAudioParameters() { + RTC_CHECK(record_parameters_.is_valid()); + RTC_DCHECK(thread_checker_.IsCurrent()); + return record_parameters_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_manager.h b/third_party/libwebrtc/modules/audio_device/android/audio_manager.h new file mode 100644 index 0000000000..900fc78a68 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_manager.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_AUDIO_MANAGER_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_AUDIO_MANAGER_H_ + +#include +#include + +#include + +#include "api/sequence_checker.h" +#include "modules/audio_device/android/audio_common.h" +#include "modules/audio_device/android/opensles_common.h" +#include "modules/audio_device/audio_device_config.h" +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "modules/utility/include/helpers_android.h" +#include "modules/utility/include/jvm_android.h" + +namespace webrtc { + +// Implements support for functions in the WebRTC audio stack for Android that +// relies on the AudioManager in android.media. It also populates an +// AudioParameter structure with native audio parameters detected at +// construction. This class does not make any audio-related modifications +// unless Init() is called. Caching audio parameters makes no changes but only +// reads data from the Java side. +class AudioManager { + public: + // Wraps the Java specific parts of the AudioManager into one helper class. + // Stores method IDs for all supported methods at construction and then + // allows calls like JavaAudioManager::Close() while hiding the Java/JNI + // parts that are associated with this call. + class JavaAudioManager { + public: + JavaAudioManager(NativeRegistration* native_registration, + std::unique_ptr audio_manager); + ~JavaAudioManager(); + + bool Init(); + void Close(); + bool IsCommunicationModeEnabled(); + bool IsDeviceBlacklistedForOpenSLESUsage(); + + private: + std::unique_ptr audio_manager_; + jmethodID init_; + jmethodID dispose_; + jmethodID is_communication_mode_enabled_; + jmethodID is_device_blacklisted_for_open_sles_usage_; + }; + + AudioManager(); + ~AudioManager(); + + // Sets the currently active audio layer combination. Must be called before + // Init(). + void SetActiveAudioLayer(AudioDeviceModule::AudioLayer audio_layer); + + // Creates and realizes the main (global) Open SL engine object and returns + // a reference to it. The engine object is only created at the first call + // since OpenSL ES for Android only supports a single engine per application. + // Subsequent calls returns the already created engine. The SL engine object + // is destroyed when the AudioManager object is deleted. It means that the + // engine object will be the first OpenSL ES object to be created and last + // object to be destroyed. + // Note that NULL will be returned unless the audio layer is specified as + // AudioDeviceModule::kAndroidOpenSLESAudio or + // AudioDeviceModule::kAndroidJavaInputAndOpenSLESOutputAudio. + SLObjectItf GetOpenSLEngine(); + + // Initializes the audio manager and stores the current audio mode. + bool Init(); + // Revert any setting done by Init(). + bool Close(); + + // Returns true if current audio mode is AudioManager.MODE_IN_COMMUNICATION. + bool IsCommunicationModeEnabled() const; + + // Native audio parameters stored during construction. + const AudioParameters& GetPlayoutAudioParameters(); + const AudioParameters& GetRecordAudioParameters(); + + // Returns true if the device supports built-in audio effects for AEC, AGC + // and NS. Some devices can also be blacklisted for use in combination with + // platform effects and these devices will return false. + // Can currently only be used in combination with a Java based audio backend + // for the recoring side (i.e. using the android.media.AudioRecord API). + bool IsAcousticEchoCancelerSupported() const; + bool IsAutomaticGainControlSupported() const; + bool IsNoiseSuppressorSupported() const; + + // Returns true if the device supports the low-latency audio paths in + // combination with OpenSL ES. + bool IsLowLatencyPlayoutSupported() const; + bool IsLowLatencyRecordSupported() const; + + // Returns true if the device supports (and has been configured for) stereo. + // Call the Java API WebRtcAudioManager.setStereoOutput/Input() with true as + // paramter to enable stereo. Default is mono in both directions and the + // setting is set once and for all when the audio manager object is created. + // TODO(henrika): stereo is not supported in combination with OpenSL ES. + bool IsStereoPlayoutSupported() const; + bool IsStereoRecordSupported() const; + + // Returns true if the device supports pro-audio features in combination with + // OpenSL ES. + bool IsProAudioSupported() const; + + // Returns true if the device supports AAudio. + bool IsAAudioSupported() const; + + // Returns the estimated total delay of this device. Unit is in milliseconds. + // The vaule is set once at construction and never changes after that. + // Possible values are webrtc::kLowLatencyModeDelayEstimateInMilliseconds and + // webrtc::kHighLatencyModeDelayEstimateInMilliseconds. + int GetDelayEstimateInMilliseconds() const; + + private: + // Called from Java side so we can cache the native audio parameters. + // This method will be called by the WebRtcAudioManager constructor, i.e. + // on the same thread that this object is created on. + static void JNICALL CacheAudioParameters(JNIEnv* env, + jobject obj, + jint sample_rate, + jint output_channels, + jint input_channels, + jboolean hardware_aec, + jboolean hardware_agc, + jboolean hardware_ns, + jboolean low_latency_output, + jboolean low_latency_input, + jboolean pro_audio, + jboolean a_audio, + jint output_buffer_size, + jint input_buffer_size, + jlong native_audio_manager); + void OnCacheAudioParameters(JNIEnv* env, + jint sample_rate, + jint output_channels, + jint input_channels, + jboolean hardware_aec, + jboolean hardware_agc, + jboolean hardware_ns, + jboolean low_latency_output, + jboolean low_latency_input, + jboolean pro_audio, + jboolean a_audio, + jint output_buffer_size, + jint input_buffer_size); + + // Stores thread ID in the constructor. + // We can then use RTC_DCHECK_RUN_ON(&thread_checker_) to ensure that + // other methods are called from the same thread. + SequenceChecker thread_checker_; + + // Calls JavaVM::AttachCurrentThread() if this thread is not attached at + // construction. + // Also ensures that DetachCurrentThread() is called at destruction. + JvmThreadConnector attach_thread_if_needed_; + + // Wraps the JNI interface pointer and methods associated with it. + std::unique_ptr j_environment_; + + // Contains factory method for creating the Java object. + std::unique_ptr j_native_registration_; + + // Wraps the Java specific parts of the AudioManager. + std::unique_ptr j_audio_manager_; + + // Contains the selected audio layer specified by the AudioLayer enumerator + // in the AudioDeviceModule class. + AudioDeviceModule::AudioLayer audio_layer_; + + // This object is the global entry point of the OpenSL ES API. + // After creating the engine object, the application can obtain this object‘s + // SLEngineItf interface. This interface contains creation methods for all + // the other object types in the API. None of these interface are realized + // by this class. It only provides access to the global engine object. + webrtc::ScopedSLObjectItf engine_object_; + + // Set to true by Init() and false by Close(). + bool initialized_; + + // True if device supports hardware (or built-in) AEC. + bool hardware_aec_; + // True if device supports hardware (or built-in) AGC. + bool hardware_agc_; + // True if device supports hardware (or built-in) NS. + bool hardware_ns_; + + // True if device supports the low-latency OpenSL ES audio path for output. + bool low_latency_playout_; + + // True if device supports the low-latency OpenSL ES audio path for input. + bool low_latency_record_; + + // True if device supports the low-latency OpenSL ES pro-audio path. + bool pro_audio_; + + // True if device supports the low-latency AAudio audio path. + bool a_audio_; + + // The delay estimate can take one of two fixed values depending on if the + // device supports low-latency output or not. + int delay_estimate_in_milliseconds_; + + // Contains native parameters (e.g. sample rate, channel configuration). + // Set at construction in OnCacheAudioParameters() which is called from + // Java on the same thread as this object is created on. + AudioParameters playout_parameters_; + AudioParameters record_parameters_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_AUDIO_MANAGER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_manager_unittest.cc b/third_party/libwebrtc/modules/audio_device/android/audio_manager_unittest.cc new file mode 100644 index 0000000000..093eddd2e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_manager_unittest.cc @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/audio_manager.h" + +#include + +#include "modules/audio_device/android/build_info.h" +#include "modules/audio_device/android/ensure_initialized.h" +#include "rtc_base/arraysize.h" +#include "test/gtest.h" + +#define PRINT(...) fprintf(stderr, __VA_ARGS__); + +namespace webrtc { + +static const char kTag[] = " "; + +class AudioManagerTest : public ::testing::Test { + protected: + AudioManagerTest() { + // One-time initialization of JVM and application context. Ensures that we + // can do calls between C++ and Java. + webrtc::audiodevicemodule::EnsureInitialized(); + audio_manager_.reset(new AudioManager()); + SetActiveAudioLayer(); + playout_parameters_ = audio_manager()->GetPlayoutAudioParameters(); + record_parameters_ = audio_manager()->GetRecordAudioParameters(); + } + + AudioManager* audio_manager() const { return audio_manager_.get(); } + + // A valid audio layer must always be set before calling Init(), hence we + // might as well make it a part of the test fixture. + void SetActiveAudioLayer() { + EXPECT_EQ(0, audio_manager()->GetDelayEstimateInMilliseconds()); + audio_manager()->SetActiveAudioLayer(AudioDeviceModule::kAndroidJavaAudio); + EXPECT_NE(0, audio_manager()->GetDelayEstimateInMilliseconds()); + } + + // One way to ensure that the engine object is valid is to create an + // SL Engine interface since it exposes creation methods of all the OpenSL ES + // object types and it is only supported on the engine object. This method + // also verifies that the engine interface supports at least one interface. + // Note that, the test below is not a full test of the SLEngineItf object + // but only a simple sanity test to check that the global engine object is OK. + void ValidateSLEngine(SLObjectItf engine_object) { + EXPECT_NE(nullptr, engine_object); + // Get the SL Engine interface which is exposed by the engine object. + SLEngineItf engine; + SLresult result = + (*engine_object)->GetInterface(engine_object, SL_IID_ENGINE, &engine); + EXPECT_EQ(result, SL_RESULT_SUCCESS) << "GetInterface() on engine failed"; + // Ensure that the SL Engine interface exposes at least one interface. + SLuint32 object_id = SL_OBJECTID_ENGINE; + SLuint32 num_supported_interfaces = 0; + result = (*engine)->QueryNumSupportedInterfaces(engine, object_id, + &num_supported_interfaces); + EXPECT_EQ(result, SL_RESULT_SUCCESS) + << "QueryNumSupportedInterfaces() failed"; + EXPECT_GE(num_supported_interfaces, 1u); + } + + std::unique_ptr audio_manager_; + AudioParameters playout_parameters_; + AudioParameters record_parameters_; +}; + +TEST_F(AudioManagerTest, ConstructDestruct) {} + +// It should not be possible to create an OpenSL engine object if Java based +// audio is requested in both directions. +TEST_F(AudioManagerTest, GetOpenSLEngineShouldFailForJavaAudioLayer) { + audio_manager()->SetActiveAudioLayer(AudioDeviceModule::kAndroidJavaAudio); + SLObjectItf engine_object = audio_manager()->GetOpenSLEngine(); + EXPECT_EQ(nullptr, engine_object); +} + +// It should be possible to create an OpenSL engine object if OpenSL ES based +// audio is requested in any direction. +TEST_F(AudioManagerTest, GetOpenSLEngineShouldSucceedForOpenSLESAudioLayer) { + // List of supported audio layers that uses OpenSL ES audio. + const AudioDeviceModule::AudioLayer opensles_audio[] = { + AudioDeviceModule::kAndroidOpenSLESAudio, + AudioDeviceModule::kAndroidJavaInputAndOpenSLESOutputAudio}; + // Verify that the global (singleton) OpenSL Engine can be acquired for all + // audio layes that uses OpenSL ES. Note that the engine is only created once. + for (const AudioDeviceModule::AudioLayer audio_layer : opensles_audio) { + audio_manager()->SetActiveAudioLayer(audio_layer); + SLObjectItf engine_object = audio_manager()->GetOpenSLEngine(); + EXPECT_NE(nullptr, engine_object); + // Perform a simple sanity check of the created engine object. + ValidateSLEngine(engine_object); + } +} + +TEST_F(AudioManagerTest, InitClose) { + EXPECT_TRUE(audio_manager()->Init()); + EXPECT_TRUE(audio_manager()->Close()); +} + +TEST_F(AudioManagerTest, IsAcousticEchoCancelerSupported) { + PRINT("%sAcoustic Echo Canceler support: %s\n", kTag, + audio_manager()->IsAcousticEchoCancelerSupported() ? "Yes" : "No"); +} + +TEST_F(AudioManagerTest, IsAutomaticGainControlSupported) { + EXPECT_FALSE(audio_manager()->IsAutomaticGainControlSupported()); +} + +TEST_F(AudioManagerTest, IsNoiseSuppressorSupported) { + PRINT("%sNoise Suppressor support: %s\n", kTag, + audio_manager()->IsNoiseSuppressorSupported() ? "Yes" : "No"); +} + +TEST_F(AudioManagerTest, IsLowLatencyPlayoutSupported) { + PRINT("%sLow latency output support: %s\n", kTag, + audio_manager()->IsLowLatencyPlayoutSupported() ? "Yes" : "No"); +} + +TEST_F(AudioManagerTest, IsLowLatencyRecordSupported) { + PRINT("%sLow latency input support: %s\n", kTag, + audio_manager()->IsLowLatencyRecordSupported() ? "Yes" : "No"); +} + +TEST_F(AudioManagerTest, IsProAudioSupported) { + PRINT("%sPro audio support: %s\n", kTag, + audio_manager()->IsProAudioSupported() ? "Yes" : "No"); +} + +// Verify that playout side is configured for mono by default. +TEST_F(AudioManagerTest, IsStereoPlayoutSupported) { + EXPECT_FALSE(audio_manager()->IsStereoPlayoutSupported()); +} + +// Verify that recording side is configured for mono by default. +TEST_F(AudioManagerTest, IsStereoRecordSupported) { + EXPECT_FALSE(audio_manager()->IsStereoRecordSupported()); +} + +TEST_F(AudioManagerTest, ShowAudioParameterInfo) { + const bool low_latency_out = audio_manager()->IsLowLatencyPlayoutSupported(); + const bool low_latency_in = audio_manager()->IsLowLatencyRecordSupported(); + PRINT("PLAYOUT:\n"); + PRINT("%saudio layer: %s\n", kTag, + low_latency_out ? "Low latency OpenSL" : "Java/JNI based AudioTrack"); + PRINT("%ssample rate: %d Hz\n", kTag, playout_parameters_.sample_rate()); + PRINT("%schannels: %zu\n", kTag, playout_parameters_.channels()); + PRINT("%sframes per buffer: %zu <=> %.2f ms\n", kTag, + playout_parameters_.frames_per_buffer(), + playout_parameters_.GetBufferSizeInMilliseconds()); + PRINT("RECORD: \n"); + PRINT("%saudio layer: %s\n", kTag, + low_latency_in ? "Low latency OpenSL" : "Java/JNI based AudioRecord"); + PRINT("%ssample rate: %d Hz\n", kTag, record_parameters_.sample_rate()); + PRINT("%schannels: %zu\n", kTag, record_parameters_.channels()); + PRINT("%sframes per buffer: %zu <=> %.2f ms\n", kTag, + record_parameters_.frames_per_buffer(), + record_parameters_.GetBufferSizeInMilliseconds()); +} + +// The audio device module only suppors the same sample rate in both directions. +// In addition, in full-duplex low-latency mode (OpenSL ES), both input and +// output must use the same native buffer size to allow for usage of the fast +// audio track in Android. +TEST_F(AudioManagerTest, VerifyAudioParameters) { + const bool low_latency_out = audio_manager()->IsLowLatencyPlayoutSupported(); + const bool low_latency_in = audio_manager()->IsLowLatencyRecordSupported(); + EXPECT_EQ(playout_parameters_.sample_rate(), + record_parameters_.sample_rate()); + if (low_latency_out && low_latency_in) { + EXPECT_EQ(playout_parameters_.frames_per_buffer(), + record_parameters_.frames_per_buffer()); + } +} + +// Add device-specific information to the test for logging purposes. +TEST_F(AudioManagerTest, ShowDeviceInfo) { + BuildInfo build_info; + PRINT("%smodel: %s\n", kTag, build_info.GetDeviceModel().c_str()); + PRINT("%sbrand: %s\n", kTag, build_info.GetBrand().c_str()); + PRINT("%smanufacturer: %s\n", kTag, + build_info.GetDeviceManufacturer().c_str()); +} + +// Add Android build information to the test for logging purposes. +TEST_F(AudioManagerTest, ShowBuildInfo) { + BuildInfo build_info; + PRINT("%sbuild release: %s\n", kTag, build_info.GetBuildRelease().c_str()); + PRINT("%sbuild id: %s\n", kTag, build_info.GetAndroidBuildId().c_str()); + PRINT("%sbuild type: %s\n", kTag, build_info.GetBuildType().c_str()); + PRINT("%sSDK version: %d\n", kTag, build_info.GetSdkVersion()); +} + +// Basic test of the AudioParameters class using default construction where +// all members are set to zero. +TEST_F(AudioManagerTest, AudioParametersWithDefaultConstruction) { + AudioParameters params; + EXPECT_FALSE(params.is_valid()); + EXPECT_EQ(0, params.sample_rate()); + EXPECT_EQ(0U, params.channels()); + EXPECT_EQ(0U, params.frames_per_buffer()); + EXPECT_EQ(0U, params.frames_per_10ms_buffer()); + EXPECT_EQ(0U, params.GetBytesPerFrame()); + EXPECT_EQ(0U, params.GetBytesPerBuffer()); + EXPECT_EQ(0U, params.GetBytesPer10msBuffer()); + EXPECT_EQ(0.0f, params.GetBufferSizeInMilliseconds()); +} + +// Basic test of the AudioParameters class using non default construction. +TEST_F(AudioManagerTest, AudioParametersWithNonDefaultConstruction) { + const int kSampleRate = 48000; + const size_t kChannels = 1; + const size_t kFramesPerBuffer = 480; + const size_t kFramesPer10msBuffer = 480; + const size_t kBytesPerFrame = 2; + const float kBufferSizeInMs = 10.0f; + AudioParameters params(kSampleRate, kChannels, kFramesPerBuffer); + EXPECT_TRUE(params.is_valid()); + EXPECT_EQ(kSampleRate, params.sample_rate()); + EXPECT_EQ(kChannels, params.channels()); + EXPECT_EQ(kFramesPerBuffer, params.frames_per_buffer()); + EXPECT_EQ(static_cast(kSampleRate / 100), + params.frames_per_10ms_buffer()); + EXPECT_EQ(kBytesPerFrame, params.GetBytesPerFrame()); + EXPECT_EQ(kBytesPerFrame * kFramesPerBuffer, params.GetBytesPerBuffer()); + EXPECT_EQ(kBytesPerFrame * kFramesPer10msBuffer, + params.GetBytesPer10msBuffer()); + EXPECT_EQ(kBufferSizeInMs, params.GetBufferSizeInMilliseconds()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_record_jni.cc b/third_party/libwebrtc/modules/audio_device/android/audio_record_jni.cc new file mode 100644 index 0000000000..919eabb983 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_record_jni.cc @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/audio_record_jni.h" + +#include +#include + +#include "modules/audio_device/android/audio_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { +// Scoped class which logs its time of life as a UMA statistic. It generates +// a histogram which measures the time it takes for a method/scope to execute. +class ScopedHistogramTimer { + public: + explicit ScopedHistogramTimer(const std::string& name) + : histogram_name_(name), start_time_ms_(rtc::TimeMillis()) {} + ~ScopedHistogramTimer() { + const int64_t life_time_ms = rtc::TimeSince(start_time_ms_); + RTC_HISTOGRAM_COUNTS_1000(histogram_name_, life_time_ms); + RTC_LOG(LS_INFO) << histogram_name_ << ": " << life_time_ms; + } + + private: + const std::string histogram_name_; + int64_t start_time_ms_; +}; +} // namespace + +// AudioRecordJni::JavaAudioRecord implementation. +AudioRecordJni::JavaAudioRecord::JavaAudioRecord( + NativeRegistration* native_reg, + std::unique_ptr audio_record) + : audio_record_(std::move(audio_record)), + init_recording_(native_reg->GetMethodId("initRecording", "(II)I")), + start_recording_(native_reg->GetMethodId("startRecording", "()Z")), + stop_recording_(native_reg->GetMethodId("stopRecording", "()Z")), + enable_built_in_aec_(native_reg->GetMethodId("enableBuiltInAEC", "(Z)Z")), + enable_built_in_ns_(native_reg->GetMethodId("enableBuiltInNS", "(Z)Z")) {} + +AudioRecordJni::JavaAudioRecord::~JavaAudioRecord() {} + +int AudioRecordJni::JavaAudioRecord::InitRecording(int sample_rate, + size_t channels) { + return audio_record_->CallIntMethod(init_recording_, + static_cast(sample_rate), + static_cast(channels)); +} + +bool AudioRecordJni::JavaAudioRecord::StartRecording() { + return audio_record_->CallBooleanMethod(start_recording_); +} + +bool AudioRecordJni::JavaAudioRecord::StopRecording() { + return audio_record_->CallBooleanMethod(stop_recording_); +} + +bool AudioRecordJni::JavaAudioRecord::EnableBuiltInAEC(bool enable) { + return audio_record_->CallBooleanMethod(enable_built_in_aec_, + static_cast(enable)); +} + +bool AudioRecordJni::JavaAudioRecord::EnableBuiltInNS(bool enable) { + return audio_record_->CallBooleanMethod(enable_built_in_ns_, + static_cast(enable)); +} + +// AudioRecordJni implementation. +AudioRecordJni::AudioRecordJni(AudioManager* audio_manager) + : j_environment_(JVM::GetInstance()->environment()), + audio_manager_(audio_manager), + audio_parameters_(audio_manager->GetRecordAudioParameters()), + total_delay_in_milliseconds_(0), + direct_buffer_address_(nullptr), + direct_buffer_capacity_in_bytes_(0), + frames_per_buffer_(0), + initialized_(false), + recording_(false), + audio_device_buffer_(nullptr) { + RTC_LOG(LS_INFO) << "ctor"; + RTC_DCHECK(audio_parameters_.is_valid()); + RTC_CHECK(j_environment_); + JNINativeMethod native_methods[] = { + {"nativeCacheDirectBufferAddress", "(Ljava/nio/ByteBuffer;J)V", + reinterpret_cast( + &webrtc::AudioRecordJni::CacheDirectBufferAddress)}, + {"nativeDataIsRecorded", "(IJ)V", + reinterpret_cast(&webrtc::AudioRecordJni::DataIsRecorded)}}; + j_native_registration_ = j_environment_->RegisterNatives( + "org/webrtc/voiceengine/WebRtcAudioRecord", native_methods, + arraysize(native_methods)); + j_audio_record_.reset( + new JavaAudioRecord(j_native_registration_.get(), + j_native_registration_->NewObject( + "", "(J)V", PointerTojlong(this)))); + // Detach from this thread since we want to use the checker to verify calls + // from the Java based audio thread. + thread_checker_java_.Detach(); +} + +AudioRecordJni::~AudioRecordJni() { + RTC_LOG(LS_INFO) << "dtor"; + RTC_DCHECK(thread_checker_.IsCurrent()); + Terminate(); +} + +int32_t AudioRecordJni::Init() { + RTC_LOG(LS_INFO) << "Init"; + RTC_DCHECK(thread_checker_.IsCurrent()); + return 0; +} + +int32_t AudioRecordJni::Terminate() { + RTC_LOG(LS_INFO) << "Terminate"; + RTC_DCHECK(thread_checker_.IsCurrent()); + StopRecording(); + return 0; +} + +int32_t AudioRecordJni::InitRecording() { + RTC_LOG(LS_INFO) << "InitRecording"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!initialized_); + RTC_DCHECK(!recording_); + ScopedHistogramTimer timer("WebRTC.Audio.InitRecordingDurationMs"); + int frames_per_buffer = j_audio_record_->InitRecording( + audio_parameters_.sample_rate(), audio_parameters_.channels()); + if (frames_per_buffer < 0) { + direct_buffer_address_ = nullptr; + RTC_LOG(LS_ERROR) << "InitRecording failed"; + return -1; + } + frames_per_buffer_ = static_cast(frames_per_buffer); + RTC_LOG(LS_INFO) << "frames_per_buffer: " << frames_per_buffer_; + const size_t bytes_per_frame = audio_parameters_.channels() * sizeof(int16_t); + RTC_CHECK_EQ(direct_buffer_capacity_in_bytes_, + frames_per_buffer_ * bytes_per_frame); + RTC_CHECK_EQ(frames_per_buffer_, audio_parameters_.frames_per_10ms_buffer()); + initialized_ = true; + return 0; +} + +int32_t AudioRecordJni::StartRecording() { + RTC_LOG(LS_INFO) << "StartRecording"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!recording_); + if (!initialized_) { + RTC_DLOG(LS_WARNING) + << "Recording can not start since InitRecording must succeed first"; + return 0; + } + ScopedHistogramTimer timer("WebRTC.Audio.StartRecordingDurationMs"); + if (!j_audio_record_->StartRecording()) { + RTC_LOG(LS_ERROR) << "StartRecording failed"; + return -1; + } + recording_ = true; + return 0; +} + +int32_t AudioRecordJni::StopRecording() { + RTC_LOG(LS_INFO) << "StopRecording"; + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!initialized_ || !recording_) { + return 0; + } + if (!j_audio_record_->StopRecording()) { + RTC_LOG(LS_ERROR) << "StopRecording failed"; + return -1; + } + // If we don't detach here, we will hit a RTC_DCHECK in OnDataIsRecorded() + // next time StartRecording() is called since it will create a new Java + // thread. + thread_checker_java_.Detach(); + initialized_ = false; + recording_ = false; + direct_buffer_address_ = nullptr; + return 0; +} + +void AudioRecordJni::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + RTC_LOG(LS_INFO) << "AttachAudioBuffer"; + RTC_DCHECK(thread_checker_.IsCurrent()); + audio_device_buffer_ = audioBuffer; + const int sample_rate_hz = audio_parameters_.sample_rate(); + RTC_LOG(LS_INFO) << "SetRecordingSampleRate(" << sample_rate_hz << ")"; + audio_device_buffer_->SetRecordingSampleRate(sample_rate_hz); + const size_t channels = audio_parameters_.channels(); + RTC_LOG(LS_INFO) << "SetRecordingChannels(" << channels << ")"; + audio_device_buffer_->SetRecordingChannels(channels); + total_delay_in_milliseconds_ = + audio_manager_->GetDelayEstimateInMilliseconds(); + RTC_DCHECK_GT(total_delay_in_milliseconds_, 0); + RTC_LOG(LS_INFO) << "total_delay_in_milliseconds: " + << total_delay_in_milliseconds_; +} + +int32_t AudioRecordJni::EnableBuiltInAEC(bool enable) { + RTC_LOG(LS_INFO) << "EnableBuiltInAEC(" << enable << ")"; + RTC_DCHECK(thread_checker_.IsCurrent()); + return j_audio_record_->EnableBuiltInAEC(enable) ? 0 : -1; +} + +int32_t AudioRecordJni::EnableBuiltInAGC(bool enable) { + // TODO(henrika): possibly remove when no longer used by any client. + RTC_CHECK_NOTREACHED(); +} + +int32_t AudioRecordJni::EnableBuiltInNS(bool enable) { + RTC_LOG(LS_INFO) << "EnableBuiltInNS(" << enable << ")"; + RTC_DCHECK(thread_checker_.IsCurrent()); + return j_audio_record_->EnableBuiltInNS(enable) ? 0 : -1; +} + +JNI_FUNCTION_ALIGN +void JNICALL AudioRecordJni::CacheDirectBufferAddress(JNIEnv* env, + jobject obj, + jobject byte_buffer, + jlong nativeAudioRecord) { + webrtc::AudioRecordJni* this_object = + reinterpret_cast(nativeAudioRecord); + this_object->OnCacheDirectBufferAddress(env, byte_buffer); +} + +void AudioRecordJni::OnCacheDirectBufferAddress(JNIEnv* env, + jobject byte_buffer) { + RTC_LOG(LS_INFO) << "OnCacheDirectBufferAddress"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!direct_buffer_address_); + direct_buffer_address_ = env->GetDirectBufferAddress(byte_buffer); + jlong capacity = env->GetDirectBufferCapacity(byte_buffer); + RTC_LOG(LS_INFO) << "direct buffer capacity: " << capacity; + direct_buffer_capacity_in_bytes_ = static_cast(capacity); +} + +JNI_FUNCTION_ALIGN +void JNICALL AudioRecordJni::DataIsRecorded(JNIEnv* env, + jobject obj, + jint length, + jlong nativeAudioRecord) { + webrtc::AudioRecordJni* this_object = + reinterpret_cast(nativeAudioRecord); + this_object->OnDataIsRecorded(length); +} + +// This method is called on a high-priority thread from Java. The name of +// the thread is 'AudioRecordThread'. +void AudioRecordJni::OnDataIsRecorded(int length) { + RTC_DCHECK(thread_checker_java_.IsCurrent()); + if (!audio_device_buffer_) { + RTC_LOG(LS_ERROR) << "AttachAudioBuffer has not been called"; + return; + } + audio_device_buffer_->SetRecordedBuffer(direct_buffer_address_, + frames_per_buffer_); + // We provide one (combined) fixed delay estimate for the APM and use the + // `playDelayMs` parameter only. Components like the AEC only sees the sum + // of `playDelayMs` and `recDelayMs`, hence the distributions does not matter. + audio_device_buffer_->SetVQEData(total_delay_in_milliseconds_, 0); + if (audio_device_buffer_->DeliverRecordedData() == -1) { + RTC_LOG(LS_INFO) << "AudioDeviceBuffer::DeliverRecordedData failed"; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_record_jni.h b/third_party/libwebrtc/modules/audio_device/android/audio_record_jni.h new file mode 100644 index 0000000000..66a6a89f41 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_record_jni.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_AUDIO_RECORD_JNI_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_AUDIO_RECORD_JNI_H_ + +#include + +#include + +#include "api/sequence_checker.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "modules/utility/include/helpers_android.h" +#include "modules/utility/include/jvm_android.h" + +namespace webrtc { + +// Implements 16-bit mono PCM audio input support for Android using the Java +// AudioRecord interface. Most of the work is done by its Java counterpart in +// WebRtcAudioRecord.java. This class is created and lives on a thread in +// C++-land, but recorded audio buffers are delivered on a high-priority +// thread managed by the Java class. +// +// The Java class makes use of AudioEffect features (mainly AEC) which are +// first available in Jelly Bean. If it is instantiated running against earlier +// SDKs, the AEC provided by the APM in WebRTC must be used and enabled +// separately instead. +// +// An instance must be created and destroyed on one and the same thread. +// All public methods must also be called on the same thread. A thread checker +// will RTC_DCHECK if any method is called on an invalid thread. +// +// This class uses JvmThreadConnector to attach to a Java VM if needed +// and detach when the object goes out of scope. Additional thread checking +// guarantees that no other (possibly non attached) thread is used. +class AudioRecordJni { + public: + // Wraps the Java specific parts of the AudioRecordJni into one helper class. + class JavaAudioRecord { + public: + JavaAudioRecord(NativeRegistration* native_registration, + std::unique_ptr audio_track); + ~JavaAudioRecord(); + + int InitRecording(int sample_rate, size_t channels); + bool StartRecording(); + bool StopRecording(); + bool EnableBuiltInAEC(bool enable); + bool EnableBuiltInNS(bool enable); + + private: + std::unique_ptr audio_record_; + jmethodID init_recording_; + jmethodID start_recording_; + jmethodID stop_recording_; + jmethodID enable_built_in_aec_; + jmethodID enable_built_in_ns_; + }; + + explicit AudioRecordJni(AudioManager* audio_manager); + ~AudioRecordJni(); + + int32_t Init(); + int32_t Terminate(); + + int32_t InitRecording(); + bool RecordingIsInitialized() const { return initialized_; } + + int32_t StartRecording(); + int32_t StopRecording(); + bool Recording() const { return recording_; } + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer); + + int32_t EnableBuiltInAEC(bool enable); + int32_t EnableBuiltInAGC(bool enable); + int32_t EnableBuiltInNS(bool enable); + + private: + // Called from Java side so we can cache the address of the Java-manged + // `byte_buffer` in `direct_buffer_address_`. The size of the buffer + // is also stored in `direct_buffer_capacity_in_bytes_`. + // This method will be called by the WebRtcAudioRecord constructor, i.e., + // on the same thread that this object is created on. + static void JNICALL CacheDirectBufferAddress(JNIEnv* env, + jobject obj, + jobject byte_buffer, + jlong nativeAudioRecord); + void OnCacheDirectBufferAddress(JNIEnv* env, jobject byte_buffer); + + // Called periodically by the Java based WebRtcAudioRecord object when + // recording has started. Each call indicates that there are `length` new + // bytes recorded in the memory area `direct_buffer_address_` and it is + // now time to send these to the consumer. + // This method is called on a high-priority thread from Java. The name of + // the thread is 'AudioRecordThread'. + static void JNICALL DataIsRecorded(JNIEnv* env, + jobject obj, + jint length, + jlong nativeAudioRecord); + void OnDataIsRecorded(int length); + + // Stores thread ID in constructor. + SequenceChecker thread_checker_; + + // Stores thread ID in first call to OnDataIsRecorded() from high-priority + // thread in Java. Detached during construction of this object. + SequenceChecker thread_checker_java_; + + // Calls JavaVM::AttachCurrentThread() if this thread is not attached at + // construction. + // Also ensures that DetachCurrentThread() is called at destruction. + JvmThreadConnector attach_thread_if_needed_; + + // Wraps the JNI interface pointer and methods associated with it. + std::unique_ptr j_environment_; + + // Contains factory method for creating the Java object. + std::unique_ptr j_native_registration_; + + // Wraps the Java specific parts of the AudioRecordJni class. + std::unique_ptr j_audio_record_; + + // Raw pointer to the audio manger. + const AudioManager* audio_manager_; + + // Contains audio parameters provided to this class at construction by the + // AudioManager. + const AudioParameters audio_parameters_; + + // Delay estimate of the total round-trip delay (input + output). + // Fixed value set once in AttachAudioBuffer() and it can take one out of two + // possible values. See audio_common.h for details. + int total_delay_in_milliseconds_; + + // Cached copy of address to direct audio buffer owned by `j_audio_record_`. + void* direct_buffer_address_; + + // Number of bytes in the direct audio buffer owned by `j_audio_record_`. + size_t direct_buffer_capacity_in_bytes_; + + // Number audio frames per audio buffer. Each audio frame corresponds to + // one sample of PCM mono data at 16 bits per sample. Hence, each audio + // frame contains 2 bytes (given that the Java layer only supports mono). + // Example: 480 for 48000 Hz or 441 for 44100 Hz. + size_t frames_per_buffer_; + + bool initialized_; + + bool recording_; + + // Raw pointer handle provided to us in AttachAudioBuffer(). Owned by the + // AudioDeviceModuleImpl class and called by AudioDeviceModule::Create(). + AudioDeviceBuffer* audio_device_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_AUDIO_RECORD_JNI_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_track_jni.cc b/third_party/libwebrtc/modules/audio_device/android/audio_track_jni.cc new file mode 100644 index 0000000000..5afa1ec252 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_track_jni.cc @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/audio_track_jni.h" + +#include + +#include "modules/audio_device/android/audio_manager.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_thread.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +// AudioTrackJni::JavaAudioTrack implementation. +AudioTrackJni::JavaAudioTrack::JavaAudioTrack( + NativeRegistration* native_reg, + std::unique_ptr audio_track) + : audio_track_(std::move(audio_track)), + init_playout_(native_reg->GetMethodId("initPlayout", "(IID)I")), + start_playout_(native_reg->GetMethodId("startPlayout", "()Z")), + stop_playout_(native_reg->GetMethodId("stopPlayout", "()Z")), + set_stream_volume_(native_reg->GetMethodId("setStreamVolume", "(I)Z")), + get_stream_max_volume_( + native_reg->GetMethodId("getStreamMaxVolume", "()I")), + get_stream_volume_(native_reg->GetMethodId("getStreamVolume", "()I")), + get_buffer_size_in_frames_( + native_reg->GetMethodId("getBufferSizeInFrames", "()I")) {} + +AudioTrackJni::JavaAudioTrack::~JavaAudioTrack() {} + +bool AudioTrackJni::JavaAudioTrack::InitPlayout(int sample_rate, int channels) { + double buffer_size_factor = + strtod(webrtc::field_trial::FindFullName( + "WebRTC-AudioDevicePlayoutBufferSizeFactor") + .c_str(), + nullptr); + if (buffer_size_factor == 0) + buffer_size_factor = 1.0; + int requested_buffer_size_bytes = audio_track_->CallIntMethod( + init_playout_, sample_rate, channels, buffer_size_factor); + // Update UMA histograms for both the requested and actual buffer size. + if (requested_buffer_size_bytes >= 0) { + // To avoid division by zero, we assume the sample rate is 48k if an invalid + // value is found. + sample_rate = sample_rate <= 0 ? 48000 : sample_rate; + // This calculation assumes that audio is mono. + const int requested_buffer_size_ms = + (requested_buffer_size_bytes * 1000) / (2 * sample_rate); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AndroidNativeRequestedAudioBufferSizeMs", + requested_buffer_size_ms, 0, 1000, 100); + int actual_buffer_size_frames = + audio_track_->CallIntMethod(get_buffer_size_in_frames_); + if (actual_buffer_size_frames >= 0) { + const int actual_buffer_size_ms = + actual_buffer_size_frames * 1000 / sample_rate; + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AndroidNativeAudioBufferSizeMs", + actual_buffer_size_ms, 0, 1000, 100); + } + return true; + } + return false; +} + +bool AudioTrackJni::JavaAudioTrack::StartPlayout() { + return audio_track_->CallBooleanMethod(start_playout_); +} + +bool AudioTrackJni::JavaAudioTrack::StopPlayout() { + return audio_track_->CallBooleanMethod(stop_playout_); +} + +bool AudioTrackJni::JavaAudioTrack::SetStreamVolume(int volume) { + return audio_track_->CallBooleanMethod(set_stream_volume_, volume); +} + +int AudioTrackJni::JavaAudioTrack::GetStreamMaxVolume() { + return audio_track_->CallIntMethod(get_stream_max_volume_); +} + +int AudioTrackJni::JavaAudioTrack::GetStreamVolume() { + return audio_track_->CallIntMethod(get_stream_volume_); +} + +// TODO(henrika): possible extend usage of AudioManager and add it as member. +AudioTrackJni::AudioTrackJni(AudioManager* audio_manager) + : j_environment_(JVM::GetInstance()->environment()), + audio_parameters_(audio_manager->GetPlayoutAudioParameters()), + direct_buffer_address_(nullptr), + direct_buffer_capacity_in_bytes_(0), + frames_per_buffer_(0), + initialized_(false), + playing_(false), + audio_device_buffer_(nullptr) { + RTC_LOG(LS_INFO) << "ctor"; + RTC_DCHECK(audio_parameters_.is_valid()); + RTC_CHECK(j_environment_); + JNINativeMethod native_methods[] = { + {"nativeCacheDirectBufferAddress", "(Ljava/nio/ByteBuffer;J)V", + reinterpret_cast( + &webrtc::AudioTrackJni::CacheDirectBufferAddress)}, + {"nativeGetPlayoutData", "(IJ)V", + reinterpret_cast(&webrtc::AudioTrackJni::GetPlayoutData)}}; + j_native_registration_ = j_environment_->RegisterNatives( + "org/webrtc/voiceengine/WebRtcAudioTrack", native_methods, + arraysize(native_methods)); + j_audio_track_.reset( + new JavaAudioTrack(j_native_registration_.get(), + j_native_registration_->NewObject( + "", "(J)V", PointerTojlong(this)))); + // Detach from this thread since we want to use the checker to verify calls + // from the Java based audio thread. + thread_checker_java_.Detach(); +} + +AudioTrackJni::~AudioTrackJni() { + RTC_LOG(LS_INFO) << "dtor"; + RTC_DCHECK(thread_checker_.IsCurrent()); + Terminate(); +} + +int32_t AudioTrackJni::Init() { + RTC_LOG(LS_INFO) << "Init"; + RTC_DCHECK(thread_checker_.IsCurrent()); + return 0; +} + +int32_t AudioTrackJni::Terminate() { + RTC_LOG(LS_INFO) << "Terminate"; + RTC_DCHECK(thread_checker_.IsCurrent()); + StopPlayout(); + return 0; +} + +int32_t AudioTrackJni::InitPlayout() { + RTC_LOG(LS_INFO) << "InitPlayout"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!initialized_); + RTC_DCHECK(!playing_); + if (!j_audio_track_->InitPlayout(audio_parameters_.sample_rate(), + audio_parameters_.channels())) { + RTC_LOG(LS_ERROR) << "InitPlayout failed"; + return -1; + } + initialized_ = true; + return 0; +} + +int32_t AudioTrackJni::StartPlayout() { + RTC_LOG(LS_INFO) << "StartPlayout"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!playing_); + if (!initialized_) { + RTC_DLOG(LS_WARNING) + << "Playout can not start since InitPlayout must succeed first"; + return 0; + } + if (!j_audio_track_->StartPlayout()) { + RTC_LOG(LS_ERROR) << "StartPlayout failed"; + return -1; + } + playing_ = true; + return 0; +} + +int32_t AudioTrackJni::StopPlayout() { + RTC_LOG(LS_INFO) << "StopPlayout"; + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!initialized_ || !playing_) { + return 0; + } + if (!j_audio_track_->StopPlayout()) { + RTC_LOG(LS_ERROR) << "StopPlayout failed"; + return -1; + } + // If we don't detach here, we will hit a RTC_DCHECK in OnDataIsRecorded() + // next time StartRecording() is called since it will create a new Java + // thread. + thread_checker_java_.Detach(); + initialized_ = false; + playing_ = false; + direct_buffer_address_ = nullptr; + return 0; +} + +int AudioTrackJni::SpeakerVolumeIsAvailable(bool& available) { + available = true; + return 0; +} + +int AudioTrackJni::SetSpeakerVolume(uint32_t volume) { + RTC_LOG(LS_INFO) << "SetSpeakerVolume(" << volume << ")"; + RTC_DCHECK(thread_checker_.IsCurrent()); + return j_audio_track_->SetStreamVolume(volume) ? 0 : -1; +} + +int AudioTrackJni::MaxSpeakerVolume(uint32_t& max_volume) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + max_volume = j_audio_track_->GetStreamMaxVolume(); + return 0; +} + +int AudioTrackJni::MinSpeakerVolume(uint32_t& min_volume) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + min_volume = 0; + return 0; +} + +int AudioTrackJni::SpeakerVolume(uint32_t& volume) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + volume = j_audio_track_->GetStreamVolume(); + RTC_LOG(LS_INFO) << "SpeakerVolume: " << volume; + return 0; +} + +// TODO(henrika): possibly add stereo support. +void AudioTrackJni::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + RTC_LOG(LS_INFO) << "AttachAudioBuffer"; + RTC_DCHECK(thread_checker_.IsCurrent()); + audio_device_buffer_ = audioBuffer; + const int sample_rate_hz = audio_parameters_.sample_rate(); + RTC_LOG(LS_INFO) << "SetPlayoutSampleRate(" << sample_rate_hz << ")"; + audio_device_buffer_->SetPlayoutSampleRate(sample_rate_hz); + const size_t channels = audio_parameters_.channels(); + RTC_LOG(LS_INFO) << "SetPlayoutChannels(" << channels << ")"; + audio_device_buffer_->SetPlayoutChannels(channels); +} + +JNI_FUNCTION_ALIGN +void JNICALL AudioTrackJni::CacheDirectBufferAddress(JNIEnv* env, + jobject obj, + jobject byte_buffer, + jlong nativeAudioTrack) { + webrtc::AudioTrackJni* this_object = + reinterpret_cast(nativeAudioTrack); + this_object->OnCacheDirectBufferAddress(env, byte_buffer); +} + +void AudioTrackJni::OnCacheDirectBufferAddress(JNIEnv* env, + jobject byte_buffer) { + RTC_LOG(LS_INFO) << "OnCacheDirectBufferAddress"; + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!direct_buffer_address_); + direct_buffer_address_ = env->GetDirectBufferAddress(byte_buffer); + jlong capacity = env->GetDirectBufferCapacity(byte_buffer); + RTC_LOG(LS_INFO) << "direct buffer capacity: " << capacity; + direct_buffer_capacity_in_bytes_ = static_cast(capacity); + const size_t bytes_per_frame = audio_parameters_.channels() * sizeof(int16_t); + frames_per_buffer_ = direct_buffer_capacity_in_bytes_ / bytes_per_frame; + RTC_LOG(LS_INFO) << "frames_per_buffer: " << frames_per_buffer_; +} + +JNI_FUNCTION_ALIGN +void JNICALL AudioTrackJni::GetPlayoutData(JNIEnv* env, + jobject obj, + jint length, + jlong nativeAudioTrack) { + webrtc::AudioTrackJni* this_object = + reinterpret_cast(nativeAudioTrack); + this_object->OnGetPlayoutData(static_cast(length)); +} + +// This method is called on a high-priority thread from Java. The name of +// the thread is 'AudioRecordTrack'. +void AudioTrackJni::OnGetPlayoutData(size_t length) { + RTC_DCHECK(thread_checker_java_.IsCurrent()); + const size_t bytes_per_frame = audio_parameters_.channels() * sizeof(int16_t); + RTC_DCHECK_EQ(frames_per_buffer_, length / bytes_per_frame); + if (!audio_device_buffer_) { + RTC_LOG(LS_ERROR) << "AttachAudioBuffer has not been called"; + return; + } + // Pull decoded data (in 16-bit PCM format) from jitter buffer. + int samples = audio_device_buffer_->RequestPlayoutData(frames_per_buffer_); + if (samples <= 0) { + RTC_LOG(LS_ERROR) << "AudioDeviceBuffer::RequestPlayoutData failed"; + return; + } + RTC_DCHECK_EQ(samples, frames_per_buffer_); + // Copy decoded data into common byte buffer to ensure that it can be + // written to the Java based audio track. + samples = audio_device_buffer_->GetPlayoutData(direct_buffer_address_); + RTC_DCHECK_EQ(length, bytes_per_frame * samples); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/audio_track_jni.h b/third_party/libwebrtc/modules/audio_device/android/audio_track_jni.h new file mode 100644 index 0000000000..7eb69082b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/audio_track_jni.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_AUDIO_TRACK_JNI_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_AUDIO_TRACK_JNI_H_ + +#include + +#include + +#include "api/sequence_checker.h" +#include "modules/audio_device/android/audio_common.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "modules/utility/include/helpers_android.h" +#include "modules/utility/include/jvm_android.h" + +namespace webrtc { + +// Implements 16-bit mono PCM audio output support for Android using the Java +// AudioTrack interface. Most of the work is done by its Java counterpart in +// WebRtcAudioTrack.java. This class is created and lives on a thread in +// C++-land, but decoded audio buffers are requested on a high-priority +// thread managed by the Java class. +// +// An instance must be created and destroyed on one and the same thread. +// All public methods must also be called on the same thread. A thread checker +// will RTC_DCHECK if any method is called on an invalid thread. +// +// This class uses JvmThreadConnector to attach to a Java VM if needed +// and detach when the object goes out of scope. Additional thread checking +// guarantees that no other (possibly non attached) thread is used. +class AudioTrackJni { + public: + // Wraps the Java specific parts of the AudioTrackJni into one helper class. + class JavaAudioTrack { + public: + JavaAudioTrack(NativeRegistration* native_registration, + std::unique_ptr audio_track); + ~JavaAudioTrack(); + + bool InitPlayout(int sample_rate, int channels); + bool StartPlayout(); + bool StopPlayout(); + bool SetStreamVolume(int volume); + int GetStreamMaxVolume(); + int GetStreamVolume(); + + private: + std::unique_ptr audio_track_; + jmethodID init_playout_; + jmethodID start_playout_; + jmethodID stop_playout_; + jmethodID set_stream_volume_; + jmethodID get_stream_max_volume_; + jmethodID get_stream_volume_; + jmethodID get_buffer_size_in_frames_; + }; + + explicit AudioTrackJni(AudioManager* audio_manager); + ~AudioTrackJni(); + + int32_t Init(); + int32_t Terminate(); + + int32_t InitPlayout(); + bool PlayoutIsInitialized() const { return initialized_; } + + int32_t StartPlayout(); + int32_t StopPlayout(); + bool Playing() const { return playing_; } + + int SpeakerVolumeIsAvailable(bool& available); + int SetSpeakerVolume(uint32_t volume); + int SpeakerVolume(uint32_t& volume) const; + int MaxSpeakerVolume(uint32_t& max_volume) const; + int MinSpeakerVolume(uint32_t& min_volume) const; + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer); + + private: + // Called from Java side so we can cache the address of the Java-manged + // `byte_buffer` in `direct_buffer_address_`. The size of the buffer + // is also stored in `direct_buffer_capacity_in_bytes_`. + // Called on the same thread as the creating thread. + static void JNICALL CacheDirectBufferAddress(JNIEnv* env, + jobject obj, + jobject byte_buffer, + jlong nativeAudioTrack); + void OnCacheDirectBufferAddress(JNIEnv* env, jobject byte_buffer); + + // Called periodically by the Java based WebRtcAudioTrack object when + // playout has started. Each call indicates that `length` new bytes should + // be written to the memory area `direct_buffer_address_` for playout. + // This method is called on a high-priority thread from Java. The name of + // the thread is 'AudioTrackThread'. + static void JNICALL GetPlayoutData(JNIEnv* env, + jobject obj, + jint length, + jlong nativeAudioTrack); + void OnGetPlayoutData(size_t length); + + // Stores thread ID in constructor. + SequenceChecker thread_checker_; + + // Stores thread ID in first call to OnGetPlayoutData() from high-priority + // thread in Java. Detached during construction of this object. + SequenceChecker thread_checker_java_; + + // Calls JavaVM::AttachCurrentThread() if this thread is not attached at + // construction. + // Also ensures that DetachCurrentThread() is called at destruction. + JvmThreadConnector attach_thread_if_needed_; + + // Wraps the JNI interface pointer and methods associated with it. + std::unique_ptr j_environment_; + + // Contains factory method for creating the Java object. + std::unique_ptr j_native_registration_; + + // Wraps the Java specific parts of the AudioTrackJni class. + std::unique_ptr j_audio_track_; + + // Contains audio parameters provided to this class at construction by the + // AudioManager. + const AudioParameters audio_parameters_; + + // Cached copy of address to direct audio buffer owned by `j_audio_track_`. + void* direct_buffer_address_; + + // Number of bytes in the direct audio buffer owned by `j_audio_track_`. + size_t direct_buffer_capacity_in_bytes_; + + // Number of audio frames per audio buffer. Each audio frame corresponds to + // one sample of PCM mono data at 16 bits per sample. Hence, each audio + // frame contains 2 bytes (given that the Java layer only supports mono). + // Example: 480 for 48000 Hz or 441 for 44100 Hz. + size_t frames_per_buffer_; + + bool initialized_; + + bool playing_; + + // Raw pointer handle provided to us in AttachAudioBuffer(). Owned by the + // AudioDeviceModuleImpl class and called by AudioDeviceModule::Create(). + // The AudioDeviceBuffer is a member of the AudioDeviceModuleImpl instance + // and therefore outlives this object. + AudioDeviceBuffer* audio_device_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_AUDIO_TRACK_JNI_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/build_info.cc b/third_party/libwebrtc/modules/audio_device/android/build_info.cc new file mode 100644 index 0000000000..916be8244e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/build_info.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/build_info.h" + +#include "modules/utility/include/helpers_android.h" + +namespace webrtc { + +BuildInfo::BuildInfo() + : j_environment_(JVM::GetInstance()->environment()), + j_build_info_( + JVM::GetInstance()->GetClass("org/webrtc/voiceengine/BuildInfo")) {} + +std::string BuildInfo::GetStringFromJava(const char* name) { + jmethodID id = j_build_info_.GetStaticMethodId(name, "()Ljava/lang/String;"); + jstring j_string = + static_cast(j_build_info_.CallStaticObjectMethod(id)); + return j_environment_->JavaToStdString(j_string); +} + +std::string BuildInfo::GetDeviceModel() { + return GetStringFromJava("getDeviceModel"); +} + +std::string BuildInfo::GetBrand() { + return GetStringFromJava("getBrand"); +} + +std::string BuildInfo::GetDeviceManufacturer() { + return GetStringFromJava("getDeviceManufacturer"); +} + +std::string BuildInfo::GetAndroidBuildId() { + return GetStringFromJava("getAndroidBuildId"); +} + +std::string BuildInfo::GetBuildType() { + return GetStringFromJava("getBuildType"); +} + +std::string BuildInfo::GetBuildRelease() { + return GetStringFromJava("getBuildRelease"); +} + +SdkCode BuildInfo::GetSdkVersion() { + jmethodID id = j_build_info_.GetStaticMethodId("getSdkVersion", "()I"); + jint j_version = j_build_info_.CallStaticIntMethod(id); + return static_cast(j_version); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/build_info.h b/third_party/libwebrtc/modules/audio_device/android/build_info.h new file mode 100644 index 0000000000..3647e56649 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/build_info.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_BUILD_INFO_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_BUILD_INFO_H_ + +#include + +#include +#include + +#include "modules/utility/include/jvm_android.h" + +namespace webrtc { + +// This enumeration maps to the values returned by BuildInfo::GetSdkVersion(), +// indicating the Android release associated with a given SDK version. +// See https://developer.android.com/guide/topics/manifest/uses-sdk-element.html +// for details. +enum SdkCode { + SDK_CODE_JELLY_BEAN = 16, // Android 4.1 + SDK_CODE_JELLY_BEAN_MR1 = 17, // Android 4.2 + SDK_CODE_JELLY_BEAN_MR2 = 18, // Android 4.3 + SDK_CODE_KITKAT = 19, // Android 4.4 + SDK_CODE_WATCH = 20, // Android 4.4W + SDK_CODE_LOLLIPOP = 21, // Android 5.0 + SDK_CODE_LOLLIPOP_MR1 = 22, // Android 5.1 + SDK_CODE_MARSHMALLOW = 23, // Android 6.0 + SDK_CODE_N = 24, +}; + +// Utility class used to query the Java class (org/webrtc/voiceengine/BuildInfo) +// for device and Android build information. +// The calling thread is attached to the JVM at construction if needed and a +// valid Java environment object is also created. +// All Get methods must be called on the creating thread. If not, the code will +// hit RTC_DCHECKs when calling JNIEnvironment::JavaToStdString(). +class BuildInfo { + public: + BuildInfo(); + ~BuildInfo() {} + + // End-user-visible name for the end product (e.g. "Nexus 6"). + std::string GetDeviceModel(); + // Consumer-visible brand (e.g. "google"). + std::string GetBrand(); + // Manufacturer of the product/hardware (e.g. "motorola"). + std::string GetDeviceManufacturer(); + // Android build ID (e.g. LMY47D). + std::string GetAndroidBuildId(); + // The type of build (e.g. "user" or "eng"). + std::string GetBuildType(); + // The user-visible version string (e.g. "5.1"). + std::string GetBuildRelease(); + // The user-visible SDK version of the framework (e.g. 21). See SdkCode enum + // for translation. + SdkCode GetSdkVersion(); + + private: + // Helper method which calls a static getter method with `name` and returns + // a string from Java. + std::string GetStringFromJava(const char* name); + + // Ensures that this class can access a valid JNI interface pointer even + // if the creating thread was not attached to the JVM. + JvmThreadConnector attach_thread_if_needed_; + + // Provides access to the JNIEnv interface pointer and the JavaToStdString() + // method which is used to translate Java strings to std strings. + std::unique_ptr j_environment_; + + // Holds the jclass object and provides access to CallStaticObjectMethod(). + // Used by GetStringFromJava() during construction only. + JavaClass j_build_info_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_BUILD_INFO_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/ensure_initialized.cc b/third_party/libwebrtc/modules/audio_device/android/ensure_initialized.cc new file mode 100644 index 0000000000..59e9c8f7a6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/ensure_initialized.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/ensure_initialized.h" + +#include +#include +#include + +#include "modules/utility/include/jvm_android.h" +#include "rtc_base/checks.h" +#include "sdk/android/src/jni/jvm.h" + +namespace webrtc { +namespace audiodevicemodule { + +static pthread_once_t g_initialize_once = PTHREAD_ONCE_INIT; + +void EnsureInitializedOnce() { + RTC_CHECK(::webrtc::jni::GetJVM() != nullptr); + + JNIEnv* jni = ::webrtc::jni::AttachCurrentThreadIfNeeded(); + JavaVM* jvm = NULL; + RTC_CHECK_EQ(0, jni->GetJavaVM(&jvm)); + + // Initialize the Java environment (currently only used by the audio manager). + webrtc::JVM::Initialize(jvm); +} + +void EnsureInitialized() { + RTC_CHECK_EQ(0, pthread_once(&g_initialize_once, &EnsureInitializedOnce)); +} + +} // namespace audiodevicemodule +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/ensure_initialized.h b/third_party/libwebrtc/modules/audio_device/android/ensure_initialized.h new file mode 100644 index 0000000000..c1997b4acd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/ensure_initialized.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +namespace webrtc { +namespace audiodevicemodule { + +void EnsureInitialized(); + +} // namespace audiodevicemodule +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/BuildInfo.java b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/BuildInfo.java new file mode 100644 index 0000000000..aed8a06454 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/BuildInfo.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +package org.webrtc.voiceengine; + +import android.os.Build; + +public final class BuildInfo { + public static String getDevice() { + return Build.DEVICE; + } + + public static String getDeviceModel() { + return Build.MODEL; + } + + public static String getProduct() { + return Build.PRODUCT; + } + + public static String getBrand() { + return Build.BRAND; + } + + public static String getDeviceManufacturer() { + return Build.MANUFACTURER; + } + + public static String getAndroidBuildId() { + return Build.ID; + } + + public static String getBuildType() { + return Build.TYPE; + } + + public static String getBuildRelease() { + return Build.VERSION.RELEASE; + } + + public static int getSdkVersion() { + return Build.VERSION.SDK_INT; + } +} diff --git a/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioEffects.java b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioEffects.java new file mode 100644 index 0000000000..92f1c93524 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioEffects.java @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +package org.webrtc.voiceengine; + +import android.media.audiofx.AcousticEchoCanceler; +import android.media.audiofx.AudioEffect; +import android.media.audiofx.AudioEffect.Descriptor; +import android.media.audiofx.NoiseSuppressor; +import android.os.Build; +import androidx.annotation.Nullable; +import java.util.List; +import java.util.UUID; +import org.webrtc.Logging; + +// This class wraps control of three different platform effects. Supported +// effects are: AcousticEchoCanceler (AEC) and NoiseSuppressor (NS). +// Calling enable() will active all effects that are +// supported by the device if the corresponding `shouldEnableXXX` member is set. +public class WebRtcAudioEffects { + private static final boolean DEBUG = false; + + private static final String TAG = "WebRtcAudioEffects"; + + // UUIDs for Software Audio Effects that we want to avoid using. + // The implementor field will be set to "The Android Open Source Project". + private static final UUID AOSP_ACOUSTIC_ECHO_CANCELER = + UUID.fromString("bb392ec0-8d4d-11e0-a896-0002a5d5c51b"); + private static final UUID AOSP_NOISE_SUPPRESSOR = + UUID.fromString("c06c8400-8e06-11e0-9cb6-0002a5d5c51b"); + + // Contains the available effect descriptors returned from the + // AudioEffect.getEffects() call. This result is cached to avoid doing the + // slow OS call multiple times. + private static @Nullable Descriptor[] cachedEffects; + + // Contains the audio effect objects. Created in enable() and destroyed + // in release(). + private @Nullable AcousticEchoCanceler aec; + private @Nullable NoiseSuppressor ns; + + // Affects the final state given to the setEnabled() method on each effect. + // The default state is set to "disabled" but each effect can also be enabled + // by calling setAEC() and setNS(). + // To enable an effect, both the shouldEnableXXX member and the static + // canUseXXX() must be true. + private boolean shouldEnableAec; + private boolean shouldEnableNs; + + // Checks if the device implements Acoustic Echo Cancellation (AEC). + // Returns true if the device implements AEC, false otherwise. + public static boolean isAcousticEchoCancelerSupported() { + // Note: we're using isAcousticEchoCancelerEffectAvailable() instead of + // AcousticEchoCanceler.isAvailable() to avoid the expensive getEffects() + // OS API call. + return isAcousticEchoCancelerEffectAvailable(); + } + + // Checks if the device implements Noise Suppression (NS). + // Returns true if the device implements NS, false otherwise. + public static boolean isNoiseSuppressorSupported() { + // Note: we're using isNoiseSuppressorEffectAvailable() instead of + // NoiseSuppressor.isAvailable() to avoid the expensive getEffects() + // OS API call. + return isNoiseSuppressorEffectAvailable(); + } + + // Returns true if the device is blacklisted for HW AEC usage. + public static boolean isAcousticEchoCancelerBlacklisted() { + List blackListedModels = WebRtcAudioUtils.getBlackListedModelsForAecUsage(); + boolean isBlacklisted = blackListedModels.contains(Build.MODEL); + if (isBlacklisted) { + Logging.w(TAG, Build.MODEL + " is blacklisted for HW AEC usage!"); + } + return isBlacklisted; + } + + // Returns true if the device is blacklisted for HW NS usage. + public static boolean isNoiseSuppressorBlacklisted() { + List blackListedModels = WebRtcAudioUtils.getBlackListedModelsForNsUsage(); + boolean isBlacklisted = blackListedModels.contains(Build.MODEL); + if (isBlacklisted) { + Logging.w(TAG, Build.MODEL + " is blacklisted for HW NS usage!"); + } + return isBlacklisted; + } + + // Returns true if the platform AEC should be excluded based on its UUID. + // AudioEffect.queryEffects() can throw IllegalStateException. + private static boolean isAcousticEchoCancelerExcludedByUUID() { + for (Descriptor d : getAvailableEffects()) { + if (d.type.equals(AudioEffect.EFFECT_TYPE_AEC) + && d.uuid.equals(AOSP_ACOUSTIC_ECHO_CANCELER)) { + return true; + } + } + return false; + } + + // Returns true if the platform NS should be excluded based on its UUID. + // AudioEffect.queryEffects() can throw IllegalStateException. + private static boolean isNoiseSuppressorExcludedByUUID() { + for (Descriptor d : getAvailableEffects()) { + if (d.type.equals(AudioEffect.EFFECT_TYPE_NS) && d.uuid.equals(AOSP_NOISE_SUPPRESSOR)) { + return true; + } + } + return false; + } + + // Returns true if the device supports Acoustic Echo Cancellation (AEC). + private static boolean isAcousticEchoCancelerEffectAvailable() { + return isEffectTypeAvailable(AudioEffect.EFFECT_TYPE_AEC); + } + + // Returns true if the device supports Noise Suppression (NS). + private static boolean isNoiseSuppressorEffectAvailable() { + return isEffectTypeAvailable(AudioEffect.EFFECT_TYPE_NS); + } + + // Returns true if all conditions for supporting the HW AEC are fulfilled. + // It will not be possible to enable the HW AEC if this method returns false. + public static boolean canUseAcousticEchoCanceler() { + boolean canUseAcousticEchoCanceler = isAcousticEchoCancelerSupported() + && !WebRtcAudioUtils.useWebRtcBasedAcousticEchoCanceler() + && !isAcousticEchoCancelerBlacklisted() && !isAcousticEchoCancelerExcludedByUUID(); + Logging.d(TAG, "canUseAcousticEchoCanceler: " + canUseAcousticEchoCanceler); + return canUseAcousticEchoCanceler; + } + + // Returns true if all conditions for supporting the HW NS are fulfilled. + // It will not be possible to enable the HW NS if this method returns false. + public static boolean canUseNoiseSuppressor() { + boolean canUseNoiseSuppressor = isNoiseSuppressorSupported() + && !WebRtcAudioUtils.useWebRtcBasedNoiseSuppressor() && !isNoiseSuppressorBlacklisted() + && !isNoiseSuppressorExcludedByUUID(); + Logging.d(TAG, "canUseNoiseSuppressor: " + canUseNoiseSuppressor); + return canUseNoiseSuppressor; + } + + public static WebRtcAudioEffects create() { + return new WebRtcAudioEffects(); + } + + private WebRtcAudioEffects() { + Logging.d(TAG, "ctor" + WebRtcAudioUtils.getThreadInfo()); + } + + // Call this method to enable or disable the platform AEC. It modifies + // `shouldEnableAec` which is used in enable() where the actual state + // of the AEC effect is modified. Returns true if HW AEC is supported and + // false otherwise. + public boolean setAEC(boolean enable) { + Logging.d(TAG, "setAEC(" + enable + ")"); + if (!canUseAcousticEchoCanceler()) { + Logging.w(TAG, "Platform AEC is not supported"); + shouldEnableAec = false; + return false; + } + if (aec != null && (enable != shouldEnableAec)) { + Logging.e(TAG, "Platform AEC state can't be modified while recording"); + return false; + } + shouldEnableAec = enable; + return true; + } + + // Call this method to enable or disable the platform NS. It modifies + // `shouldEnableNs` which is used in enable() where the actual state + // of the NS effect is modified. Returns true if HW NS is supported and + // false otherwise. + public boolean setNS(boolean enable) { + Logging.d(TAG, "setNS(" + enable + ")"); + if (!canUseNoiseSuppressor()) { + Logging.w(TAG, "Platform NS is not supported"); + shouldEnableNs = false; + return false; + } + if (ns != null && (enable != shouldEnableNs)) { + Logging.e(TAG, "Platform NS state can't be modified while recording"); + return false; + } + shouldEnableNs = enable; + return true; + } + + public void enable(int audioSession) { + Logging.d(TAG, "enable(audioSession=" + audioSession + ")"); + assertTrue(aec == null); + assertTrue(ns == null); + + if (DEBUG) { + // Add logging of supported effects but filter out "VoIP effects", i.e., + // AEC, AEC and NS. Avoid calling AudioEffect.queryEffects() unless the + // DEBUG flag is set since we have seen crashes in this API. + for (Descriptor d : AudioEffect.queryEffects()) { + if (effectTypeIsVoIP(d.type)) { + Logging.d(TAG, "name: " + d.name + ", " + + "mode: " + d.connectMode + ", " + + "implementor: " + d.implementor + ", " + + "UUID: " + d.uuid); + } + } + } + + if (isAcousticEchoCancelerSupported()) { + // Create an AcousticEchoCanceler and attach it to the AudioRecord on + // the specified audio session. + aec = AcousticEchoCanceler.create(audioSession); + if (aec != null) { + boolean enabled = aec.getEnabled(); + boolean enable = shouldEnableAec && canUseAcousticEchoCanceler(); + if (aec.setEnabled(enable) != AudioEffect.SUCCESS) { + Logging.e(TAG, "Failed to set the AcousticEchoCanceler state"); + } + Logging.d(TAG, "AcousticEchoCanceler: was " + (enabled ? "enabled" : "disabled") + + ", enable: " + enable + ", is now: " + + (aec.getEnabled() ? "enabled" : "disabled")); + } else { + Logging.e(TAG, "Failed to create the AcousticEchoCanceler instance"); + } + } + + if (isNoiseSuppressorSupported()) { + // Create an NoiseSuppressor and attach it to the AudioRecord on the + // specified audio session. + ns = NoiseSuppressor.create(audioSession); + if (ns != null) { + boolean enabled = ns.getEnabled(); + boolean enable = shouldEnableNs && canUseNoiseSuppressor(); + if (ns.setEnabled(enable) != AudioEffect.SUCCESS) { + Logging.e(TAG, "Failed to set the NoiseSuppressor state"); + } + Logging.d(TAG, "NoiseSuppressor: was " + (enabled ? "enabled" : "disabled") + ", enable: " + + enable + ", is now: " + (ns.getEnabled() ? "enabled" : "disabled")); + } else { + Logging.e(TAG, "Failed to create the NoiseSuppressor instance"); + } + } + } + + // Releases all native audio effect resources. It is a good practice to + // release the effect engine when not in use as control can be returned + // to other applications or the native resources released. + public void release() { + Logging.d(TAG, "release"); + if (aec != null) { + aec.release(); + aec = null; + } + if (ns != null) { + ns.release(); + ns = null; + } + } + + // Returns true for effect types in `type` that are of "VoIP" types: + // Acoustic Echo Canceler (AEC) or Automatic Gain Control (AGC) or + // Noise Suppressor (NS). Note that, an extra check for support is needed + // in each comparison since some devices includes effects in the + // AudioEffect.Descriptor array that are actually not available on the device. + // As an example: Samsung Galaxy S6 includes an AGC in the descriptor but + // AutomaticGainControl.isAvailable() returns false. + private boolean effectTypeIsVoIP(UUID type) { + return (AudioEffect.EFFECT_TYPE_AEC.equals(type) && isAcousticEchoCancelerSupported()) + || (AudioEffect.EFFECT_TYPE_NS.equals(type) && isNoiseSuppressorSupported()); + } + + // Helper method which throws an exception when an assertion has failed. + private static void assertTrue(boolean condition) { + if (!condition) { + throw new AssertionError("Expected condition to be true"); + } + } + + // Returns the cached copy of the audio effects array, if available, or + // queries the operating system for the list of effects. + private static @Nullable Descriptor[] getAvailableEffects() { + if (cachedEffects != null) { + return cachedEffects; + } + // The caching is best effort only - if this method is called from several + // threads in parallel, they may end up doing the underlying OS call + // multiple times. It's normally only called on one thread so there's no + // real need to optimize for the multiple threads case. + cachedEffects = AudioEffect.queryEffects(); + return cachedEffects; + } + + // Returns true if an effect of the specified type is available. Functionally + // equivalent to (NoiseSuppressor`AutomaticGainControl`...).isAvailable(), but + // faster as it avoids the expensive OS call to enumerate effects. + private static boolean isEffectTypeAvailable(UUID effectType) { + Descriptor[] effects = getAvailableEffects(); + if (effects == null) { + return false; + } + for (Descriptor d : effects) { + if (d.type.equals(effectType)) { + return true; + } + } + return false; + } +} diff --git a/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioManager.java b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioManager.java new file mode 100644 index 0000000000..43c416f5b1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioManager.java @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +package org.webrtc.voiceengine; + +import android.content.Context; +import android.content.pm.PackageManager; +import android.media.AudioFormat; +import android.media.AudioManager; +import android.media.AudioRecord; +import android.media.AudioTrack; +import android.os.Build; +import androidx.annotation.Nullable; +import java.util.Timer; +import java.util.TimerTask; +import org.webrtc.ContextUtils; +import org.webrtc.Logging; + +// WebRtcAudioManager handles tasks that uses android.media.AudioManager. +// At construction, storeAudioParameters() is called and it retrieves +// fundamental audio parameters like native sample rate and number of channels. +// The result is then provided to the caller by nativeCacheAudioParameters(). +// It is also possible to call init() to set up the audio environment for best +// possible "VoIP performance". All settings done in init() are reverted by +// dispose(). This class can also be used without calling init() if the user +// prefers to set up the audio environment separately. However, it is +// recommended to always use AudioManager.MODE_IN_COMMUNICATION. +public class WebRtcAudioManager { + private static final boolean DEBUG = false; + + private static final String TAG = "WebRtcAudioManager"; + + // TODO(bugs.webrtc.org/8914): disabled by default until AAudio support has + // been completed. Goal is to always return false on Android O MR1 and higher. + private static final boolean blacklistDeviceForAAudioUsage = true; + + // Use mono as default for both audio directions. + private static boolean useStereoOutput; + private static boolean useStereoInput; + + private static boolean blacklistDeviceForOpenSLESUsage; + private static boolean blacklistDeviceForOpenSLESUsageIsOverridden; + + // Call this method to override the default list of blacklisted devices + // specified in WebRtcAudioUtils.BLACKLISTED_OPEN_SL_ES_MODELS. + // Allows an app to take control over which devices to exclude from using + // the OpenSL ES audio output path + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized void setBlacklistDeviceForOpenSLESUsage(boolean enable) { + blacklistDeviceForOpenSLESUsageIsOverridden = true; + blacklistDeviceForOpenSLESUsage = enable; + } + + // Call these methods to override the default mono audio modes for the specified direction(s) + // (input and/or output). + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized void setStereoOutput(boolean enable) { + Logging.w(TAG, "Overriding default output behavior: setStereoOutput(" + enable + ')'); + useStereoOutput = enable; + } + + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized void setStereoInput(boolean enable) { + Logging.w(TAG, "Overriding default input behavior: setStereoInput(" + enable + ')'); + useStereoInput = enable; + } + + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized boolean getStereoOutput() { + return useStereoOutput; + } + + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized boolean getStereoInput() { + return useStereoInput; + } + + // Default audio data format is PCM 16 bit per sample. + // Guaranteed to be supported by all devices. + private static final int BITS_PER_SAMPLE = 16; + + private static final int DEFAULT_FRAME_PER_BUFFER = 256; + + // Private utility class that periodically checks and logs the volume level + // of the audio stream that is currently controlled by the volume control. + // A timer triggers logs once every 30 seconds and the timer's associated + // thread is named "WebRtcVolumeLevelLoggerThread". + private static class VolumeLogger { + private static final String THREAD_NAME = "WebRtcVolumeLevelLoggerThread"; + private static final int TIMER_PERIOD_IN_SECONDS = 30; + + private final AudioManager audioManager; + private @Nullable Timer timer; + + public VolumeLogger(AudioManager audioManager) { + this.audioManager = audioManager; + } + + public void start() { + timer = new Timer(THREAD_NAME); + timer.schedule(new LogVolumeTask(audioManager.getStreamMaxVolume(AudioManager.STREAM_RING), + audioManager.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL)), + 0, TIMER_PERIOD_IN_SECONDS * 1000); + } + + private class LogVolumeTask extends TimerTask { + private final int maxRingVolume; + private final int maxVoiceCallVolume; + + LogVolumeTask(int maxRingVolume, int maxVoiceCallVolume) { + this.maxRingVolume = maxRingVolume; + this.maxVoiceCallVolume = maxVoiceCallVolume; + } + + @Override + public void run() { + final int mode = audioManager.getMode(); + if (mode == AudioManager.MODE_RINGTONE) { + Logging.d(TAG, "STREAM_RING stream volume: " + + audioManager.getStreamVolume(AudioManager.STREAM_RING) + " (max=" + + maxRingVolume + ")"); + } else if (mode == AudioManager.MODE_IN_COMMUNICATION) { + Logging.d(TAG, "VOICE_CALL stream volume: " + + audioManager.getStreamVolume(AudioManager.STREAM_VOICE_CALL) + " (max=" + + maxVoiceCallVolume + ")"); + } + } + } + + private void stop() { + if (timer != null) { + timer.cancel(); + timer = null; + } + } + } + + private final long nativeAudioManager; + private final AudioManager audioManager; + + private boolean initialized; + private int nativeSampleRate; + private int nativeChannels; + + private boolean hardwareAEC; + private boolean hardwareAGC; + private boolean hardwareNS; + private boolean lowLatencyOutput; + private boolean lowLatencyInput; + private boolean proAudio; + private boolean aAudio; + private int sampleRate; + private int outputChannels; + private int inputChannels; + private int outputBufferSize; + private int inputBufferSize; + + private final VolumeLogger volumeLogger; + + WebRtcAudioManager(long nativeAudioManager) { + Logging.d(TAG, "ctor" + WebRtcAudioUtils.getThreadInfo()); + this.nativeAudioManager = nativeAudioManager; + audioManager = + (AudioManager) ContextUtils.getApplicationContext().getSystemService(Context.AUDIO_SERVICE); + if (DEBUG) { + WebRtcAudioUtils.logDeviceInfo(TAG); + } + volumeLogger = new VolumeLogger(audioManager); + storeAudioParameters(); + nativeCacheAudioParameters(sampleRate, outputChannels, inputChannels, hardwareAEC, hardwareAGC, + hardwareNS, lowLatencyOutput, lowLatencyInput, proAudio, aAudio, outputBufferSize, + inputBufferSize, nativeAudioManager); + WebRtcAudioUtils.logAudioState(TAG); + } + + private boolean init() { + Logging.d(TAG, "init" + WebRtcAudioUtils.getThreadInfo()); + if (initialized) { + return true; + } + Logging.d(TAG, "audio mode is: " + + WebRtcAudioUtils.modeToString(audioManager.getMode())); + initialized = true; + volumeLogger.start(); + return true; + } + + private void dispose() { + Logging.d(TAG, "dispose" + WebRtcAudioUtils.getThreadInfo()); + if (!initialized) { + return; + } + volumeLogger.stop(); + } + + private boolean isCommunicationModeEnabled() { + return (audioManager.getMode() == AudioManager.MODE_IN_COMMUNICATION); + } + + private boolean isDeviceBlacklistedForOpenSLESUsage() { + boolean blacklisted = blacklistDeviceForOpenSLESUsageIsOverridden + ? blacklistDeviceForOpenSLESUsage + : WebRtcAudioUtils.deviceIsBlacklistedForOpenSLESUsage(); + if (blacklisted) { + Logging.d(TAG, Build.MODEL + " is blacklisted for OpenSL ES usage!"); + } + return blacklisted; + } + + private void storeAudioParameters() { + outputChannels = getStereoOutput() ? 2 : 1; + inputChannels = getStereoInput() ? 2 : 1; + sampleRate = getNativeOutputSampleRate(); + hardwareAEC = isAcousticEchoCancelerSupported(); + // TODO(henrika): use of hardware AGC is no longer supported. Currently + // hardcoded to false. To be removed. + hardwareAGC = false; + hardwareNS = isNoiseSuppressorSupported(); + lowLatencyOutput = isLowLatencyOutputSupported(); + lowLatencyInput = isLowLatencyInputSupported(); + proAudio = isProAudioSupported(); + aAudio = isAAudioSupported(); + outputBufferSize = lowLatencyOutput ? getLowLatencyOutputFramesPerBuffer() + : getMinOutputFrameSize(sampleRate, outputChannels); + inputBufferSize = lowLatencyInput ? getLowLatencyInputFramesPerBuffer() + : getMinInputFrameSize(sampleRate, inputChannels); + } + + // Gets the current earpiece state. + private boolean hasEarpiece() { + return ContextUtils.getApplicationContext().getPackageManager().hasSystemFeature( + PackageManager.FEATURE_TELEPHONY); + } + + // Returns true if low-latency audio output is supported. + private boolean isLowLatencyOutputSupported() { + return ContextUtils.getApplicationContext().getPackageManager().hasSystemFeature( + PackageManager.FEATURE_AUDIO_LOW_LATENCY); + } + + // Returns true if low-latency audio input is supported. + // TODO(henrika): remove the hardcoded false return value when OpenSL ES + // input performance has been evaluated and tested more. + public boolean isLowLatencyInputSupported() { + // TODO(henrika): investigate if some sort of device list is needed here + // as well. The NDK doc states that: "As of API level 21, lower latency + // audio input is supported on select devices. To take advantage of this + // feature, first confirm that lower latency output is available". + return isLowLatencyOutputSupported(); + } + + // Returns true if the device has professional audio level of functionality + // and therefore supports the lowest possible round-trip latency. + private boolean isProAudioSupported() { + return Build.VERSION.SDK_INT >= 23 + && ContextUtils.getApplicationContext().getPackageManager().hasSystemFeature( + PackageManager.FEATURE_AUDIO_PRO); + } + + // AAudio is supported on Androio Oreo MR1 (API 27) and higher. + // TODO(bugs.webrtc.org/8914): currently disabled by default. + private boolean isAAudioSupported() { + if (blacklistDeviceForAAudioUsage) { + Logging.w(TAG, "AAudio support is currently disabled on all devices!"); + } + return !blacklistDeviceForAAudioUsage && Build.VERSION.SDK_INT >= 27; + } + + // Returns the native output sample rate for this device's output stream. + private int getNativeOutputSampleRate() { + // Override this if we're running on an old emulator image which only + // supports 8 kHz and doesn't support PROPERTY_OUTPUT_SAMPLE_RATE. + if (WebRtcAudioUtils.runningOnEmulator()) { + Logging.d(TAG, "Running emulator, overriding sample rate to 8 kHz."); + return 8000; + } + // Default can be overriden by WebRtcAudioUtils.setDefaultSampleRateHz(). + // If so, use that value and return here. + if (WebRtcAudioUtils.isDefaultSampleRateOverridden()) { + Logging.d(TAG, "Default sample rate is overriden to " + + WebRtcAudioUtils.getDefaultSampleRateHz() + " Hz"); + return WebRtcAudioUtils.getDefaultSampleRateHz(); + } + // No overrides available. Deliver best possible estimate based on default + // Android AudioManager APIs. + final int sampleRateHz = getSampleRateForApiLevel(); + Logging.d(TAG, "Sample rate is set to " + sampleRateHz + " Hz"); + return sampleRateHz; + } + + private int getSampleRateForApiLevel() { + String sampleRateString = audioManager.getProperty(AudioManager.PROPERTY_OUTPUT_SAMPLE_RATE); + return (sampleRateString == null) ? WebRtcAudioUtils.getDefaultSampleRateHz() + : Integer.parseInt(sampleRateString); + } + + // Returns the native output buffer size for low-latency output streams. + private int getLowLatencyOutputFramesPerBuffer() { + assertTrue(isLowLatencyOutputSupported()); + String framesPerBuffer = + audioManager.getProperty(AudioManager.PROPERTY_OUTPUT_FRAMES_PER_BUFFER); + return framesPerBuffer == null ? DEFAULT_FRAME_PER_BUFFER : Integer.parseInt(framesPerBuffer); + } + + // Returns true if the device supports an audio effect (AEC or NS). + // Four conditions must be fulfilled if functions are to return true: + // 1) the platform must support the built-in (HW) effect, + // 2) explicit use (override) of a WebRTC based version must not be set, + // 3) the device must not be blacklisted for use of the effect, and + // 4) the UUID of the effect must be approved (some UUIDs can be excluded). + private static boolean isAcousticEchoCancelerSupported() { + return WebRtcAudioEffects.canUseAcousticEchoCanceler(); + } + private static boolean isNoiseSuppressorSupported() { + return WebRtcAudioEffects.canUseNoiseSuppressor(); + } + + // Returns the minimum output buffer size for Java based audio (AudioTrack). + // This size can also be used for OpenSL ES implementations on devices that + // lacks support of low-latency output. + private static int getMinOutputFrameSize(int sampleRateInHz, int numChannels) { + final int bytesPerFrame = numChannels * (BITS_PER_SAMPLE / 8); + final int channelConfig = + (numChannels == 1 ? AudioFormat.CHANNEL_OUT_MONO : AudioFormat.CHANNEL_OUT_STEREO); + return AudioTrack.getMinBufferSize( + sampleRateInHz, channelConfig, AudioFormat.ENCODING_PCM_16BIT) + / bytesPerFrame; + } + + // Returns the native input buffer size for input streams. + private int getLowLatencyInputFramesPerBuffer() { + assertTrue(isLowLatencyInputSupported()); + return getLowLatencyOutputFramesPerBuffer(); + } + + // Returns the minimum input buffer size for Java based audio (AudioRecord). + // This size can calso be used for OpenSL ES implementations on devices that + // lacks support of low-latency input. + private static int getMinInputFrameSize(int sampleRateInHz, int numChannels) { + final int bytesPerFrame = numChannels * (BITS_PER_SAMPLE / 8); + final int channelConfig = + (numChannels == 1 ? AudioFormat.CHANNEL_IN_MONO : AudioFormat.CHANNEL_IN_STEREO); + return AudioRecord.getMinBufferSize( + sampleRateInHz, channelConfig, AudioFormat.ENCODING_PCM_16BIT) + / bytesPerFrame; + } + + // Helper method which throws an exception when an assertion has failed. + private static void assertTrue(boolean condition) { + if (!condition) { + throw new AssertionError("Expected condition to be true"); + } + } + + private native void nativeCacheAudioParameters(int sampleRate, int outputChannels, + int inputChannels, boolean hardwareAEC, boolean hardwareAGC, boolean hardwareNS, + boolean lowLatencyOutput, boolean lowLatencyInput, boolean proAudio, boolean aAudio, + int outputBufferSize, int inputBufferSize, long nativeAudioManager); +} diff --git a/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioRecord.java b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioRecord.java new file mode 100644 index 0000000000..8eab01cd69 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioRecord.java @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +package org.webrtc.voiceengine; + +import android.media.AudioFormat; +import android.media.AudioRecord; +import android.media.MediaRecorder.AudioSource; +import android.os.Build; +import android.os.Process; +import androidx.annotation.Nullable; +import java.lang.System; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.concurrent.TimeUnit; +import org.webrtc.Logging; +import org.webrtc.ThreadUtils; + +public class WebRtcAudioRecord { + private static final boolean DEBUG = false; + + private static final String TAG = "WebRtcAudioRecord"; + + // Default audio data format is PCM 16 bit per sample. + // Guaranteed to be supported by all devices. + private static final int BITS_PER_SAMPLE = 16; + + // Requested size of each recorded buffer provided to the client. + private static final int CALLBACK_BUFFER_SIZE_MS = 10; + + // Average number of callbacks per second. + private static final int BUFFERS_PER_SECOND = 1000 / CALLBACK_BUFFER_SIZE_MS; + + // We ask for a native buffer size of BUFFER_SIZE_FACTOR * (minimum required + // buffer size). The extra space is allocated to guard against glitches under + // high load. + private static final int BUFFER_SIZE_FACTOR = 2; + + // The AudioRecordJavaThread is allowed to wait for successful call to join() + // but the wait times out afther this amount of time. + private static final long AUDIO_RECORD_THREAD_JOIN_TIMEOUT_MS = 2000; + + private static final int DEFAULT_AUDIO_SOURCE = getDefaultAudioSource(); + private static int audioSource = DEFAULT_AUDIO_SOURCE; + + private final long nativeAudioRecord; + + private @Nullable WebRtcAudioEffects effects; + + private ByteBuffer byteBuffer; + + private @Nullable AudioRecord audioRecord; + private @Nullable AudioRecordThread audioThread; + + private static volatile boolean microphoneMute; + private byte[] emptyBytes; + + // Audio recording error handler functions. + public enum AudioRecordStartErrorCode { + AUDIO_RECORD_START_EXCEPTION, + AUDIO_RECORD_START_STATE_MISMATCH, + } + + public static interface WebRtcAudioRecordErrorCallback { + void onWebRtcAudioRecordInitError(String errorMessage); + void onWebRtcAudioRecordStartError(AudioRecordStartErrorCode errorCode, String errorMessage); + void onWebRtcAudioRecordError(String errorMessage); + } + + private static @Nullable WebRtcAudioRecordErrorCallback errorCallback; + + public static void setErrorCallback(WebRtcAudioRecordErrorCallback errorCallback) { + Logging.d(TAG, "Set error callback"); + WebRtcAudioRecord.errorCallback = errorCallback; + } + + /** + * Contains audio sample information. Object is passed using {@link + * WebRtcAudioRecord.WebRtcAudioRecordSamplesReadyCallback} + */ + public static class AudioSamples { + /** See {@link AudioRecord#getAudioFormat()} */ + private final int audioFormat; + /** See {@link AudioRecord#getChannelCount()} */ + private final int channelCount; + /** See {@link AudioRecord#getSampleRate()} */ + private final int sampleRate; + + private final byte[] data; + + private AudioSamples(AudioRecord audioRecord, byte[] data) { + this.audioFormat = audioRecord.getAudioFormat(); + this.channelCount = audioRecord.getChannelCount(); + this.sampleRate = audioRecord.getSampleRate(); + this.data = data; + } + + public int getAudioFormat() { + return audioFormat; + } + + public int getChannelCount() { + return channelCount; + } + + public int getSampleRate() { + return sampleRate; + } + + public byte[] getData() { + return data; + } + } + + /** Called when new audio samples are ready. This should only be set for debug purposes */ + public static interface WebRtcAudioRecordSamplesReadyCallback { + void onWebRtcAudioRecordSamplesReady(AudioSamples samples); + } + + private static @Nullable WebRtcAudioRecordSamplesReadyCallback audioSamplesReadyCallback; + + public static void setOnAudioSamplesReady(WebRtcAudioRecordSamplesReadyCallback callback) { + audioSamplesReadyCallback = callback; + } + + /** + * Audio thread which keeps calling ByteBuffer.read() waiting for audio + * to be recorded. Feeds recorded data to the native counterpart as a + * periodic sequence of callbacks using DataIsRecorded(). + * This thread uses a Process.THREAD_PRIORITY_URGENT_AUDIO priority. + */ + private class AudioRecordThread extends Thread { + private volatile boolean keepAlive = true; + + public AudioRecordThread(String name) { + super(name); + } + + // TODO(titovartem) make correct fix during webrtc:9175 + @SuppressWarnings("ByteBufferBackingArray") + @Override + public void run() { + Process.setThreadPriority(Process.THREAD_PRIORITY_URGENT_AUDIO); + Logging.d(TAG, "AudioRecordThread" + WebRtcAudioUtils.getThreadInfo()); + assertTrue(audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING); + + long lastTime = System.nanoTime(); + while (keepAlive) { + int bytesRead = audioRecord.read(byteBuffer, byteBuffer.capacity()); + if (bytesRead == byteBuffer.capacity()) { + if (microphoneMute) { + byteBuffer.clear(); + byteBuffer.put(emptyBytes); + } + // It's possible we've been shut down during the read, and stopRecording() tried and + // failed to join this thread. To be a bit safer, try to avoid calling any native methods + // in case they've been unregistered after stopRecording() returned. + if (keepAlive) { + nativeDataIsRecorded(bytesRead, nativeAudioRecord); + } + if (audioSamplesReadyCallback != null) { + // Copy the entire byte buffer array. Assume that the start of the byteBuffer is + // at index 0. + byte[] data = Arrays.copyOf(byteBuffer.array(), byteBuffer.capacity()); + audioSamplesReadyCallback.onWebRtcAudioRecordSamplesReady( + new AudioSamples(audioRecord, data)); + } + } else { + String errorMessage = "AudioRecord.read failed: " + bytesRead; + Logging.e(TAG, errorMessage); + if (bytesRead == AudioRecord.ERROR_INVALID_OPERATION) { + keepAlive = false; + reportWebRtcAudioRecordError(errorMessage); + } + } + if (DEBUG) { + long nowTime = System.nanoTime(); + long durationInMs = TimeUnit.NANOSECONDS.toMillis((nowTime - lastTime)); + lastTime = nowTime; + Logging.d(TAG, "bytesRead[" + durationInMs + "] " + bytesRead); + } + } + + try { + if (audioRecord != null) { + audioRecord.stop(); + } + } catch (IllegalStateException e) { + Logging.e(TAG, "AudioRecord.stop failed: " + e.getMessage()); + } + } + + // Stops the inner thread loop and also calls AudioRecord.stop(). + // Does not block the calling thread. + public void stopThread() { + Logging.d(TAG, "stopThread"); + keepAlive = false; + } + } + + WebRtcAudioRecord(long nativeAudioRecord) { + Logging.d(TAG, "ctor" + WebRtcAudioUtils.getThreadInfo()); + this.nativeAudioRecord = nativeAudioRecord; + if (DEBUG) { + WebRtcAudioUtils.logDeviceInfo(TAG); + } + effects = WebRtcAudioEffects.create(); + } + + private boolean enableBuiltInAEC(boolean enable) { + Logging.d(TAG, "enableBuiltInAEC(" + enable + ')'); + if (effects == null) { + Logging.e(TAG, "Built-in AEC is not supported on this platform"); + return false; + } + return effects.setAEC(enable); + } + + private boolean enableBuiltInNS(boolean enable) { + Logging.d(TAG, "enableBuiltInNS(" + enable + ')'); + if (effects == null) { + Logging.e(TAG, "Built-in NS is not supported on this platform"); + return false; + } + return effects.setNS(enable); + } + + private int initRecording(int sampleRate, int channels) { + Logging.d(TAG, "initRecording(sampleRate=" + sampleRate + ", channels=" + channels + ")"); + if (audioRecord != null) { + reportWebRtcAudioRecordInitError("InitRecording called twice without StopRecording."); + return -1; + } + final int bytesPerFrame = channels * (BITS_PER_SAMPLE / 8); + final int framesPerBuffer = sampleRate / BUFFERS_PER_SECOND; + byteBuffer = ByteBuffer.allocateDirect(bytesPerFrame * framesPerBuffer); + Logging.d(TAG, "byteBuffer.capacity: " + byteBuffer.capacity()); + emptyBytes = new byte[byteBuffer.capacity()]; + // Rather than passing the ByteBuffer with every callback (requiring + // the potentially expensive GetDirectBufferAddress) we simply have the + // the native class cache the address to the memory once. + nativeCacheDirectBufferAddress(byteBuffer, nativeAudioRecord); + + // Get the minimum buffer size required for the successful creation of + // an AudioRecord object, in byte units. + // Note that this size doesn't guarantee a smooth recording under load. + final int channelConfig = channelCountToConfiguration(channels); + int minBufferSize = + AudioRecord.getMinBufferSize(sampleRate, channelConfig, AudioFormat.ENCODING_PCM_16BIT); + if (minBufferSize == AudioRecord.ERROR || minBufferSize == AudioRecord.ERROR_BAD_VALUE) { + reportWebRtcAudioRecordInitError("AudioRecord.getMinBufferSize failed: " + minBufferSize); + return -1; + } + Logging.d(TAG, "AudioRecord.getMinBufferSize: " + minBufferSize); + + // Use a larger buffer size than the minimum required when creating the + // AudioRecord instance to ensure smooth recording under load. It has been + // verified that it does not increase the actual recording latency. + int bufferSizeInBytes = Math.max(BUFFER_SIZE_FACTOR * minBufferSize, byteBuffer.capacity()); + Logging.d(TAG, "bufferSizeInBytes: " + bufferSizeInBytes); + try { + audioRecord = new AudioRecord(audioSource, sampleRate, channelConfig, + AudioFormat.ENCODING_PCM_16BIT, bufferSizeInBytes); + } catch (IllegalArgumentException e) { + reportWebRtcAudioRecordInitError("AudioRecord ctor error: " + e.getMessage()); + releaseAudioResources(); + return -1; + } + if (audioRecord == null || audioRecord.getState() != AudioRecord.STATE_INITIALIZED) { + reportWebRtcAudioRecordInitError("Failed to create a new AudioRecord instance"); + releaseAudioResources(); + return -1; + } + if (effects != null) { + effects.enable(audioRecord.getAudioSessionId()); + } + logMainParameters(); + logMainParametersExtended(); + return framesPerBuffer; + } + + private boolean startRecording() { + Logging.d(TAG, "startRecording"); + assertTrue(audioRecord != null); + assertTrue(audioThread == null); + try { + audioRecord.startRecording(); + } catch (IllegalStateException e) { + reportWebRtcAudioRecordStartError(AudioRecordStartErrorCode.AUDIO_RECORD_START_EXCEPTION, + "AudioRecord.startRecording failed: " + e.getMessage()); + return false; + } + if (audioRecord.getRecordingState() != AudioRecord.RECORDSTATE_RECORDING) { + reportWebRtcAudioRecordStartError( + AudioRecordStartErrorCode.AUDIO_RECORD_START_STATE_MISMATCH, + "AudioRecord.startRecording failed - incorrect state :" + + audioRecord.getRecordingState()); + return false; + } + audioThread = new AudioRecordThread("AudioRecordJavaThread"); + audioThread.start(); + return true; + } + + private boolean stopRecording() { + Logging.d(TAG, "stopRecording"); + assertTrue(audioThread != null); + audioThread.stopThread(); + if (!ThreadUtils.joinUninterruptibly(audioThread, AUDIO_RECORD_THREAD_JOIN_TIMEOUT_MS)) { + Logging.e(TAG, "Join of AudioRecordJavaThread timed out"); + WebRtcAudioUtils.logAudioState(TAG); + } + audioThread = null; + if (effects != null) { + effects.release(); + } + releaseAudioResources(); + return true; + } + + private void logMainParameters() { + Logging.d(TAG, "AudioRecord: " + + "session ID: " + audioRecord.getAudioSessionId() + ", " + + "channels: " + audioRecord.getChannelCount() + ", " + + "sample rate: " + audioRecord.getSampleRate()); + } + + private void logMainParametersExtended() { + if (Build.VERSION.SDK_INT >= 23) { + Logging.d(TAG, "AudioRecord: " + // The frame count of the native AudioRecord buffer. + + "buffer size in frames: " + audioRecord.getBufferSizeInFrames()); + } + } + + // Helper method which throws an exception when an assertion has failed. + private static void assertTrue(boolean condition) { + if (!condition) { + throw new AssertionError("Expected condition to be true"); + } + } + + private int channelCountToConfiguration(int channels) { + return (channels == 1 ? AudioFormat.CHANNEL_IN_MONO : AudioFormat.CHANNEL_IN_STEREO); + } + + private native void nativeCacheDirectBufferAddress(ByteBuffer byteBuffer, long nativeAudioRecord); + + private native void nativeDataIsRecorded(int bytes, long nativeAudioRecord); + + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized void setAudioSource(int source) { + Logging.w(TAG, "Audio source is changed from: " + audioSource + + " to " + source); + audioSource = source; + } + + private static int getDefaultAudioSource() { + return AudioSource.VOICE_COMMUNICATION; + } + + // Sets all recorded samples to zero if `mute` is true, i.e., ensures that + // the microphone is muted. + public static void setMicrophoneMute(boolean mute) { + Logging.w(TAG, "setMicrophoneMute(" + mute + ")"); + microphoneMute = mute; + } + + // Releases the native AudioRecord resources. + private void releaseAudioResources() { + Logging.d(TAG, "releaseAudioResources"); + if (audioRecord != null) { + audioRecord.release(); + audioRecord = null; + } + } + + private void reportWebRtcAudioRecordInitError(String errorMessage) { + Logging.e(TAG, "Init recording error: " + errorMessage); + WebRtcAudioUtils.logAudioState(TAG); + if (errorCallback != null) { + errorCallback.onWebRtcAudioRecordInitError(errorMessage); + } + } + + private void reportWebRtcAudioRecordStartError( + AudioRecordStartErrorCode errorCode, String errorMessage) { + Logging.e(TAG, "Start recording error: " + errorCode + ". " + errorMessage); + WebRtcAudioUtils.logAudioState(TAG); + if (errorCallback != null) { + errorCallback.onWebRtcAudioRecordStartError(errorCode, errorMessage); + } + } + + private void reportWebRtcAudioRecordError(String errorMessage) { + Logging.e(TAG, "Run-time recording error: " + errorMessage); + WebRtcAudioUtils.logAudioState(TAG); + if (errorCallback != null) { + errorCallback.onWebRtcAudioRecordError(errorMessage); + } + } +} diff --git a/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioTrack.java b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioTrack.java new file mode 100644 index 0000000000..3e1875c3d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioTrack.java @@ -0,0 +1,494 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +package org.webrtc.voiceengine; + +import android.content.Context; +import android.media.AudioAttributes; +import android.media.AudioFormat; +import android.media.AudioManager; +import android.media.AudioTrack; +import android.os.Build; +import android.os.Process; +import androidx.annotation.Nullable; +import java.lang.Thread; +import java.nio.ByteBuffer; +import org.webrtc.ContextUtils; +import org.webrtc.Logging; +import org.webrtc.ThreadUtils; + +public class WebRtcAudioTrack { + private static final boolean DEBUG = false; + + private static final String TAG = "WebRtcAudioTrack"; + + // Default audio data format is PCM 16 bit per sample. + // Guaranteed to be supported by all devices. + private static final int BITS_PER_SAMPLE = 16; + + // Requested size of each recorded buffer provided to the client. + private static final int CALLBACK_BUFFER_SIZE_MS = 10; + + // Average number of callbacks per second. + private static final int BUFFERS_PER_SECOND = 1000 / CALLBACK_BUFFER_SIZE_MS; + + // The AudioTrackThread is allowed to wait for successful call to join() + // but the wait times out afther this amount of time. + private static final long AUDIO_TRACK_THREAD_JOIN_TIMEOUT_MS = 2000; + + // By default, WebRTC creates audio tracks with a usage attribute + // corresponding to voice communications, such as telephony or VoIP. + private static final int DEFAULT_USAGE = AudioAttributes.USAGE_VOICE_COMMUNICATION; + private static int usageAttribute = DEFAULT_USAGE; + + // This method overrides the default usage attribute and allows the user + // to set it to something else than AudioAttributes.USAGE_VOICE_COMMUNICATION. + // NOTE: calling this method will most likely break existing VoIP tuning. + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized void setAudioTrackUsageAttribute(int usage) { + Logging.w(TAG, "Default usage attribute is changed from: " + + DEFAULT_USAGE + " to " + usage); + usageAttribute = usage; + } + + private final long nativeAudioTrack; + private final AudioManager audioManager; + private final ThreadUtils.ThreadChecker threadChecker = new ThreadUtils.ThreadChecker(); + + private ByteBuffer byteBuffer; + + private @Nullable AudioTrack audioTrack; + private @Nullable AudioTrackThread audioThread; + + // Samples to be played are replaced by zeros if `speakerMute` is set to true. + // Can be used to ensure that the speaker is fully muted. + private static volatile boolean speakerMute; + private byte[] emptyBytes; + + // Audio playout/track error handler functions. + public enum AudioTrackStartErrorCode { + AUDIO_TRACK_START_EXCEPTION, + AUDIO_TRACK_START_STATE_MISMATCH, + } + + @Deprecated + public static interface WebRtcAudioTrackErrorCallback { + void onWebRtcAudioTrackInitError(String errorMessage); + void onWebRtcAudioTrackStartError(String errorMessage); + void onWebRtcAudioTrackError(String errorMessage); + } + + // TODO(henrika): upgrade all clients to use this new interface instead. + public static interface ErrorCallback { + void onWebRtcAudioTrackInitError(String errorMessage); + void onWebRtcAudioTrackStartError(AudioTrackStartErrorCode errorCode, String errorMessage); + void onWebRtcAudioTrackError(String errorMessage); + } + + private static @Nullable WebRtcAudioTrackErrorCallback errorCallbackOld; + private static @Nullable ErrorCallback errorCallback; + + @Deprecated + public static void setErrorCallback(WebRtcAudioTrackErrorCallback errorCallback) { + Logging.d(TAG, "Set error callback (deprecated"); + WebRtcAudioTrack.errorCallbackOld = errorCallback; + } + + public static void setErrorCallback(ErrorCallback errorCallback) { + Logging.d(TAG, "Set extended error callback"); + WebRtcAudioTrack.errorCallback = errorCallback; + } + + /** + * Audio thread which keeps calling AudioTrack.write() to stream audio. + * Data is periodically acquired from the native WebRTC layer using the + * nativeGetPlayoutData callback function. + * This thread uses a Process.THREAD_PRIORITY_URGENT_AUDIO priority. + */ + private class AudioTrackThread extends Thread { + private volatile boolean keepAlive = true; + + public AudioTrackThread(String name) { + super(name); + } + + @Override + public void run() { + Process.setThreadPriority(Process.THREAD_PRIORITY_URGENT_AUDIO); + Logging.d(TAG, "AudioTrackThread" + WebRtcAudioUtils.getThreadInfo()); + assertTrue(audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING); + + // Fixed size in bytes of each 10ms block of audio data that we ask for + // using callbacks to the native WebRTC client. + final int sizeInBytes = byteBuffer.capacity(); + + while (keepAlive) { + // Get 10ms of PCM data from the native WebRTC client. Audio data is + // written into the common ByteBuffer using the address that was + // cached at construction. + nativeGetPlayoutData(sizeInBytes, nativeAudioTrack); + // Write data until all data has been written to the audio sink. + // Upon return, the buffer position will have been advanced to reflect + // the amount of data that was successfully written to the AudioTrack. + assertTrue(sizeInBytes <= byteBuffer.remaining()); + if (speakerMute) { + byteBuffer.clear(); + byteBuffer.put(emptyBytes); + byteBuffer.position(0); + } + int bytesWritten = audioTrack.write(byteBuffer, sizeInBytes, AudioTrack.WRITE_BLOCKING); + if (bytesWritten != sizeInBytes) { + Logging.e(TAG, "AudioTrack.write played invalid number of bytes: " + bytesWritten); + // If a write() returns a negative value, an error has occurred. + // Stop playing and report an error in this case. + if (bytesWritten < 0) { + keepAlive = false; + reportWebRtcAudioTrackError("AudioTrack.write failed: " + bytesWritten); + } + } + // The byte buffer must be rewinded since byteBuffer.position() is + // increased at each call to AudioTrack.write(). If we don't do this, + // next call to AudioTrack.write() will fail. + byteBuffer.rewind(); + + // TODO(henrika): it is possible to create a delay estimate here by + // counting number of written frames and subtracting the result from + // audioTrack.getPlaybackHeadPosition(). + } + + // Stops playing the audio data. Since the instance was created in + // MODE_STREAM mode, audio will stop playing after the last buffer that + // was written has been played. + if (audioTrack != null) { + Logging.d(TAG, "Calling AudioTrack.stop..."); + try { + audioTrack.stop(); + Logging.d(TAG, "AudioTrack.stop is done."); + } catch (IllegalStateException e) { + Logging.e(TAG, "AudioTrack.stop failed: " + e.getMessage()); + } + } + } + + // Stops the inner thread loop which results in calling AudioTrack.stop(). + // Does not block the calling thread. + public void stopThread() { + Logging.d(TAG, "stopThread"); + keepAlive = false; + } + } + + WebRtcAudioTrack(long nativeAudioTrack) { + threadChecker.checkIsOnValidThread(); + Logging.d(TAG, "ctor" + WebRtcAudioUtils.getThreadInfo()); + this.nativeAudioTrack = nativeAudioTrack; + audioManager = + (AudioManager) ContextUtils.getApplicationContext().getSystemService(Context.AUDIO_SERVICE); + if (DEBUG) { + WebRtcAudioUtils.logDeviceInfo(TAG); + } + } + + private int initPlayout(int sampleRate, int channels, double bufferSizeFactor) { + threadChecker.checkIsOnValidThread(); + Logging.d(TAG, + "initPlayout(sampleRate=" + sampleRate + ", channels=" + channels + + ", bufferSizeFactor=" + bufferSizeFactor + ")"); + final int bytesPerFrame = channels * (BITS_PER_SAMPLE / 8); + byteBuffer = ByteBuffer.allocateDirect(bytesPerFrame * (sampleRate / BUFFERS_PER_SECOND)); + Logging.d(TAG, "byteBuffer.capacity: " + byteBuffer.capacity()); + emptyBytes = new byte[byteBuffer.capacity()]; + // Rather than passing the ByteBuffer with every callback (requiring + // the potentially expensive GetDirectBufferAddress) we simply have the + // the native class cache the address to the memory once. + nativeCacheDirectBufferAddress(byteBuffer, nativeAudioTrack); + + // Get the minimum buffer size required for the successful creation of an + // AudioTrack object to be created in the MODE_STREAM mode. + // Note that this size doesn't guarantee a smooth playback under load. + final int channelConfig = channelCountToConfiguration(channels); + final int minBufferSizeInBytes = (int) (AudioTrack.getMinBufferSize(sampleRate, channelConfig, + AudioFormat.ENCODING_PCM_16BIT) + * bufferSizeFactor); + Logging.d(TAG, "minBufferSizeInBytes: " + minBufferSizeInBytes); + // For the streaming mode, data must be written to the audio sink in + // chunks of size (given by byteBuffer.capacity()) less than or equal + // to the total buffer size `minBufferSizeInBytes`. But, we have seen + // reports of "getMinBufferSize(): error querying hardware". Hence, it + // can happen that `minBufferSizeInBytes` contains an invalid value. + if (minBufferSizeInBytes < byteBuffer.capacity()) { + reportWebRtcAudioTrackInitError("AudioTrack.getMinBufferSize returns an invalid value."); + return -1; + } + + // Ensure that prevision audio session was stopped correctly before trying + // to create a new AudioTrack. + if (audioTrack != null) { + reportWebRtcAudioTrackInitError("Conflict with existing AudioTrack."); + return -1; + } + try { + // Create an AudioTrack object and initialize its associated audio buffer. + // The size of this buffer determines how long an AudioTrack can play + // before running out of data. + // As we are on API level 21 or higher, it is possible to use a special AudioTrack + // constructor that uses AudioAttributes and AudioFormat as input. It allows us to + // supersede the notion of stream types for defining the behavior of audio playback, + // and to allow certain platforms or routing policies to use this information for more + // refined volume or routing decisions. + audioTrack = createAudioTrack(sampleRate, channelConfig, minBufferSizeInBytes); + } catch (IllegalArgumentException e) { + reportWebRtcAudioTrackInitError(e.getMessage()); + releaseAudioResources(); + return -1; + } + + // It can happen that an AudioTrack is created but it was not successfully + // initialized upon creation. Seems to be the case e.g. when the maximum + // number of globally available audio tracks is exceeded. + if (audioTrack == null || audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { + reportWebRtcAudioTrackInitError("Initialization of audio track failed."); + releaseAudioResources(); + return -1; + } + logMainParameters(); + logMainParametersExtended(); + return minBufferSizeInBytes; + } + + private boolean startPlayout() { + threadChecker.checkIsOnValidThread(); + Logging.d(TAG, "startPlayout"); + assertTrue(audioTrack != null); + assertTrue(audioThread == null); + + // Starts playing an audio track. + try { + audioTrack.play(); + } catch (IllegalStateException e) { + reportWebRtcAudioTrackStartError(AudioTrackStartErrorCode.AUDIO_TRACK_START_EXCEPTION, + "AudioTrack.play failed: " + e.getMessage()); + releaseAudioResources(); + return false; + } + if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { + reportWebRtcAudioTrackStartError( + AudioTrackStartErrorCode.AUDIO_TRACK_START_STATE_MISMATCH, + "AudioTrack.play failed - incorrect state :" + + audioTrack.getPlayState()); + releaseAudioResources(); + return false; + } + + // Create and start new high-priority thread which calls AudioTrack.write() + // and where we also call the native nativeGetPlayoutData() callback to + // request decoded audio from WebRTC. + audioThread = new AudioTrackThread("AudioTrackJavaThread"); + audioThread.start(); + return true; + } + + private boolean stopPlayout() { + threadChecker.checkIsOnValidThread(); + Logging.d(TAG, "stopPlayout"); + assertTrue(audioThread != null); + logUnderrunCount(); + audioThread.stopThread(); + + Logging.d(TAG, "Stopping the AudioTrackThread..."); + audioThread.interrupt(); + if (!ThreadUtils.joinUninterruptibly(audioThread, AUDIO_TRACK_THREAD_JOIN_TIMEOUT_MS)) { + Logging.e(TAG, "Join of AudioTrackThread timed out."); + WebRtcAudioUtils.logAudioState(TAG); + } + Logging.d(TAG, "AudioTrackThread has now been stopped."); + audioThread = null; + releaseAudioResources(); + return true; + } + + // Get max possible volume index for a phone call audio stream. + private int getStreamMaxVolume() { + threadChecker.checkIsOnValidThread(); + Logging.d(TAG, "getStreamMaxVolume"); + assertTrue(audioManager != null); + return audioManager.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL); + } + + // Set current volume level for a phone call audio stream. + private boolean setStreamVolume(int volume) { + threadChecker.checkIsOnValidThread(); + Logging.d(TAG, "setStreamVolume(" + volume + ")"); + assertTrue(audioManager != null); + if (audioManager.isVolumeFixed()) { + Logging.e(TAG, "The device implements a fixed volume policy."); + return false; + } + audioManager.setStreamVolume(AudioManager.STREAM_VOICE_CALL, volume, 0); + return true; + } + + /** Get current volume level for a phone call audio stream. */ + private int getStreamVolume() { + threadChecker.checkIsOnValidThread(); + Logging.d(TAG, "getStreamVolume"); + assertTrue(audioManager != null); + return audioManager.getStreamVolume(AudioManager.STREAM_VOICE_CALL); + } + + private void logMainParameters() { + Logging.d(TAG, "AudioTrack: " + + "session ID: " + audioTrack.getAudioSessionId() + ", " + + "channels: " + audioTrack.getChannelCount() + ", " + + "sample rate: " + audioTrack.getSampleRate() + ", " + // Gain (>=1.0) expressed as linear multiplier on sample values. + + "max gain: " + AudioTrack.getMaxVolume()); + } + + // Creates and AudioTrack instance using AudioAttributes and AudioFormat as input. + // It allows certain platforms or routing policies to use this information for more + // refined volume or routing decisions. + private static AudioTrack createAudioTrack( + int sampleRateInHz, int channelConfig, int bufferSizeInBytes) { + Logging.d(TAG, "createAudioTrack"); + // TODO(henrika): use setPerformanceMode(int) with PERFORMANCE_MODE_LOW_LATENCY to control + // performance when Android O is supported. Add some logging in the mean time. + final int nativeOutputSampleRate = + AudioTrack.getNativeOutputSampleRate(AudioManager.STREAM_VOICE_CALL); + Logging.d(TAG, "nativeOutputSampleRate: " + nativeOutputSampleRate); + if (sampleRateInHz != nativeOutputSampleRate) { + Logging.w(TAG, "Unable to use fast mode since requested sample rate is not native"); + } + if (usageAttribute != DEFAULT_USAGE) { + Logging.w(TAG, "A non default usage attribute is used: " + usageAttribute); + } + // Create an audio track where the audio usage is for VoIP and the content type is speech. + return new AudioTrack( + new AudioAttributes.Builder() + .setUsage(usageAttribute) + .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) + .build(), + new AudioFormat.Builder() + .setEncoding(AudioFormat.ENCODING_PCM_16BIT) + .setSampleRate(sampleRateInHz) + .setChannelMask(channelConfig) + .build(), + bufferSizeInBytes, + AudioTrack.MODE_STREAM, + AudioManager.AUDIO_SESSION_ID_GENERATE); + } + + private void logBufferSizeInFrames() { + if (Build.VERSION.SDK_INT >= 23) { + Logging.d(TAG, "AudioTrack: " + // The effective size of the AudioTrack buffer that the app writes to. + + "buffer size in frames: " + audioTrack.getBufferSizeInFrames()); + } + } + + private int getBufferSizeInFrames() { + if (Build.VERSION.SDK_INT >= 23) { + return audioTrack.getBufferSizeInFrames(); + } + return -1; + } + + private void logBufferCapacityInFrames() { + if (Build.VERSION.SDK_INT >= 24) { + Logging.d(TAG, + "AudioTrack: " + // Maximum size of the AudioTrack buffer in frames. + + "buffer capacity in frames: " + audioTrack.getBufferCapacityInFrames()); + } + } + + private void logMainParametersExtended() { + logBufferSizeInFrames(); + logBufferCapacityInFrames(); + } + + // Prints the number of underrun occurrences in the application-level write + // buffer since the AudioTrack was created. An underrun occurs if the app does + // not write audio data quickly enough, causing the buffer to underflow and a + // potential audio glitch. + // TODO(henrika): keep track of this value in the field and possibly add new + // UMA stat if needed. + private void logUnderrunCount() { + if (Build.VERSION.SDK_INT >= 24) { + Logging.d(TAG, "underrun count: " + audioTrack.getUnderrunCount()); + } + } + + // Helper method which throws an exception when an assertion has failed. + private static void assertTrue(boolean condition) { + if (!condition) { + throw new AssertionError("Expected condition to be true"); + } + } + + private int channelCountToConfiguration(int channels) { + return (channels == 1 ? AudioFormat.CHANNEL_OUT_MONO : AudioFormat.CHANNEL_OUT_STEREO); + } + + private native void nativeCacheDirectBufferAddress(ByteBuffer byteBuffer, long nativeAudioRecord); + + private native void nativeGetPlayoutData(int bytes, long nativeAudioRecord); + + // Sets all samples to be played out to zero if `mute` is true, i.e., + // ensures that the speaker is muted. + public static void setSpeakerMute(boolean mute) { + Logging.w(TAG, "setSpeakerMute(" + mute + ")"); + speakerMute = mute; + } + + // Releases the native AudioTrack resources. + private void releaseAudioResources() { + Logging.d(TAG, "releaseAudioResources"); + if (audioTrack != null) { + audioTrack.release(); + audioTrack = null; + } + } + + private void reportWebRtcAudioTrackInitError(String errorMessage) { + Logging.e(TAG, "Init playout error: " + errorMessage); + WebRtcAudioUtils.logAudioState(TAG); + if (errorCallbackOld != null) { + errorCallbackOld.onWebRtcAudioTrackInitError(errorMessage); + } + if (errorCallback != null) { + errorCallback.onWebRtcAudioTrackInitError(errorMessage); + } + } + + private void reportWebRtcAudioTrackStartError( + AudioTrackStartErrorCode errorCode, String errorMessage) { + Logging.e(TAG, "Start playout error: " + errorCode + ". " + errorMessage); + WebRtcAudioUtils.logAudioState(TAG); + if (errorCallbackOld != null) { + errorCallbackOld.onWebRtcAudioTrackStartError(errorMessage); + } + if (errorCallback != null) { + errorCallback.onWebRtcAudioTrackStartError(errorCode, errorMessage); + } + } + + private void reportWebRtcAudioTrackError(String errorMessage) { + Logging.e(TAG, "Run-time playback error: " + errorMessage); + WebRtcAudioUtils.logAudioState(TAG); + if (errorCallbackOld != null) { + errorCallbackOld.onWebRtcAudioTrackError(errorMessage); + } + if (errorCallback != null) { + errorCallback.onWebRtcAudioTrackError(errorMessage); + } + } +} diff --git a/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioUtils.java b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioUtils.java new file mode 100644 index 0000000000..afd3d429af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/java/src/org/webrtc/voiceengine/WebRtcAudioUtils.java @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +package org.webrtc.voiceengine; + +import static android.media.AudioManager.MODE_IN_CALL; +import static android.media.AudioManager.MODE_IN_COMMUNICATION; +import static android.media.AudioManager.MODE_NORMAL; +import static android.media.AudioManager.MODE_RINGTONE; + +import android.annotation.SuppressLint; +import android.content.Context; +import android.content.pm.PackageManager; +import android.media.AudioDeviceInfo; +import android.media.AudioManager; +import android.os.Build; +import java.lang.Thread; +import java.util.Arrays; +import java.util.List; +import org.webrtc.ContextUtils; +import org.webrtc.Logging; + +public final class WebRtcAudioUtils { + private static final String TAG = "WebRtcAudioUtils"; + + // List of devices where we have seen issues (e.g. bad audio quality) using + // the low latency output mode in combination with OpenSL ES. + // The device name is given by Build.MODEL. + private static final String[] BLACKLISTED_OPEN_SL_ES_MODELS = new String[] { + // It is recommended to maintain a list of blacklisted models outside + // this package and instead call + // WebRtcAudioManager.setBlacklistDeviceForOpenSLESUsage(true) + // from the client for devices where OpenSL ES shall be disabled. + }; + + // List of devices where it has been verified that the built-in effect + // bad and where it makes sense to avoid using it and instead rely on the + // native WebRTC version instead. The device name is given by Build.MODEL. + private static final String[] BLACKLISTED_AEC_MODELS = new String[] { + // It is recommended to maintain a list of blacklisted models outside + // this package and instead call setWebRtcBasedAcousticEchoCanceler(true) + // from the client for devices where the built-in AEC shall be disabled. + }; + private static final String[] BLACKLISTED_NS_MODELS = new String[] { + // It is recommended to maintain a list of blacklisted models outside + // this package and instead call setWebRtcBasedNoiseSuppressor(true) + // from the client for devices where the built-in NS shall be disabled. + }; + + // Use 16kHz as the default sample rate. A higher sample rate might prevent + // us from supporting communication mode on some older (e.g. ICS) devices. + private static final int DEFAULT_SAMPLE_RATE_HZ = 16000; + private static int defaultSampleRateHz = DEFAULT_SAMPLE_RATE_HZ; + // Set to true if setDefaultSampleRateHz() has been called. + private static boolean isDefaultSampleRateOverridden; + + // By default, utilize hardware based audio effects for AEC and NS when + // available. + private static boolean useWebRtcBasedAcousticEchoCanceler; + private static boolean useWebRtcBasedNoiseSuppressor; + + // Call these methods if any hardware based effect shall be replaced by a + // software based version provided by the WebRTC stack instead. + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized void setWebRtcBasedAcousticEchoCanceler(boolean enable) { + useWebRtcBasedAcousticEchoCanceler = enable; + } + + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized void setWebRtcBasedNoiseSuppressor(boolean enable) { + useWebRtcBasedNoiseSuppressor = enable; + } + + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized void setWebRtcBasedAutomaticGainControl(boolean enable) { + // TODO(henrika): deprecated; remove when no longer used by any client. + Logging.w(TAG, "setWebRtcBasedAutomaticGainControl() is deprecated"); + } + + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized boolean useWebRtcBasedAcousticEchoCanceler() { + if (useWebRtcBasedAcousticEchoCanceler) { + Logging.w(TAG, "Overriding default behavior; now using WebRTC AEC!"); + } + return useWebRtcBasedAcousticEchoCanceler; + } + + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized boolean useWebRtcBasedNoiseSuppressor() { + if (useWebRtcBasedNoiseSuppressor) { + Logging.w(TAG, "Overriding default behavior; now using WebRTC NS!"); + } + return useWebRtcBasedNoiseSuppressor; + } + + // TODO(henrika): deprecated; remove when no longer used by any client. + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized boolean useWebRtcBasedAutomaticGainControl() { + // Always return true here to avoid trying to use any built-in AGC. + return true; + } + + // Returns true if the device supports an audio effect (AEC or NS). + // Four conditions must be fulfilled if functions are to return true: + // 1) the platform must support the built-in (HW) effect, + // 2) explicit use (override) of a WebRTC based version must not be set, + // 3) the device must not be blacklisted for use of the effect, and + // 4) the UUID of the effect must be approved (some UUIDs can be excluded). + public static boolean isAcousticEchoCancelerSupported() { + return WebRtcAudioEffects.canUseAcousticEchoCanceler(); + } + public static boolean isNoiseSuppressorSupported() { + return WebRtcAudioEffects.canUseNoiseSuppressor(); + } + // TODO(henrika): deprecated; remove when no longer used by any client. + public static boolean isAutomaticGainControlSupported() { + // Always return false here to avoid trying to use any built-in AGC. + return false; + } + + // Call this method if the default handling of querying the native sample + // rate shall be overridden. Can be useful on some devices where the + // available Android APIs are known to return invalid results. + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized void setDefaultSampleRateHz(int sampleRateHz) { + isDefaultSampleRateOverridden = true; + defaultSampleRateHz = sampleRateHz; + } + + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized boolean isDefaultSampleRateOverridden() { + return isDefaultSampleRateOverridden; + } + + // TODO(bugs.webrtc.org/8491): Remove NoSynchronizedMethodCheck suppression. + @SuppressWarnings("NoSynchronizedMethodCheck") + public static synchronized int getDefaultSampleRateHz() { + return defaultSampleRateHz; + } + + public static List getBlackListedModelsForAecUsage() { + return Arrays.asList(WebRtcAudioUtils.BLACKLISTED_AEC_MODELS); + } + + public static List getBlackListedModelsForNsUsage() { + return Arrays.asList(WebRtcAudioUtils.BLACKLISTED_NS_MODELS); + } + + // Helper method for building a string of thread information. + public static String getThreadInfo() { + return "@[name=" + Thread.currentThread().getName() + ", id=" + Thread.currentThread().getId() + + "]"; + } + + // Returns true if we're running on emulator. + public static boolean runningOnEmulator() { + return Build.HARDWARE.equals("goldfish") && Build.BRAND.startsWith("generic_"); + } + + // Returns true if the device is blacklisted for OpenSL ES usage. + public static boolean deviceIsBlacklistedForOpenSLESUsage() { + List blackListedModels = Arrays.asList(BLACKLISTED_OPEN_SL_ES_MODELS); + return blackListedModels.contains(Build.MODEL); + } + + // Information about the current build, taken from system properties. + static void logDeviceInfo(String tag) { + Logging.d(tag, "Android SDK: " + Build.VERSION.SDK_INT + ", " + + "Release: " + Build.VERSION.RELEASE + ", " + + "Brand: " + Build.BRAND + ", " + + "Device: " + Build.DEVICE + ", " + + "Id: " + Build.ID + ", " + + "Hardware: " + Build.HARDWARE + ", " + + "Manufacturer: " + Build.MANUFACTURER + ", " + + "Model: " + Build.MODEL + ", " + + "Product: " + Build.PRODUCT); + } + + // Logs information about the current audio state. The idea is to call this + // method when errors are detected to log under what conditions the error + // occurred. Hopefully it will provide clues to what might be the root cause. + static void logAudioState(String tag) { + logDeviceInfo(tag); + final Context context = ContextUtils.getApplicationContext(); + final AudioManager audioManager = + (AudioManager) context.getSystemService(Context.AUDIO_SERVICE); + logAudioStateBasic(tag, audioManager); + logAudioStateVolume(tag, audioManager); + logAudioDeviceInfo(tag, audioManager); + } + + // Reports basic audio statistics. + private static void logAudioStateBasic(String tag, AudioManager audioManager) { + Logging.d(tag, "Audio State: " + + "audio mode: " + modeToString(audioManager.getMode()) + ", " + + "has mic: " + hasMicrophone() + ", " + + "mic muted: " + audioManager.isMicrophoneMute() + ", " + + "music active: " + audioManager.isMusicActive() + ", " + + "speakerphone: " + audioManager.isSpeakerphoneOn() + ", " + + "BT SCO: " + audioManager.isBluetoothScoOn()); + } + + // Adds volume information for all possible stream types. + private static void logAudioStateVolume(String tag, AudioManager audioManager) { + final int[] streams = { + AudioManager.STREAM_VOICE_CALL, + AudioManager.STREAM_MUSIC, + AudioManager.STREAM_RING, + AudioManager.STREAM_ALARM, + AudioManager.STREAM_NOTIFICATION, + AudioManager.STREAM_SYSTEM + }; + Logging.d(tag, "Audio State: "); + // Some devices may not have volume controls and might use a fixed volume. + boolean fixedVolume = audioManager.isVolumeFixed(); + Logging.d(tag, " fixed volume=" + fixedVolume); + if (!fixedVolume) { + for (int stream : streams) { + StringBuilder info = new StringBuilder(); + info.append(" " + streamTypeToString(stream) + ": "); + info.append("volume=").append(audioManager.getStreamVolume(stream)); + info.append(", max=").append(audioManager.getStreamMaxVolume(stream)); + logIsStreamMute(tag, audioManager, stream, info); + Logging.d(tag, info.toString()); + } + } + } + + private static void logIsStreamMute( + String tag, AudioManager audioManager, int stream, StringBuilder info) { + if (Build.VERSION.SDK_INT >= 23) { + info.append(", muted=").append(audioManager.isStreamMute(stream)); + } + } + + // Moz linting complains even though AudioManager.GET_DEVICES_ALL is + // listed in the docs here: + // https://developer.android.com/reference/android/media/AudioManager#GET_DEVICES_ALL + @SuppressLint("WrongConstant") + private static void logAudioDeviceInfo(String tag, AudioManager audioManager) { + if (Build.VERSION.SDK_INT < 23) { + return; + } + final AudioDeviceInfo[] devices = + audioManager.getDevices(AudioManager.GET_DEVICES_ALL); + if (devices.length == 0) { + return; + } + Logging.d(tag, "Audio Devices: "); + for (AudioDeviceInfo device : devices) { + StringBuilder info = new StringBuilder(); + info.append(" ").append(deviceTypeToString(device.getType())); + info.append(device.isSource() ? "(in): " : "(out): "); + // An empty array indicates that the device supports arbitrary channel counts. + if (device.getChannelCounts().length > 0) { + info.append("channels=").append(Arrays.toString(device.getChannelCounts())); + info.append(", "); + } + if (device.getEncodings().length > 0) { + // Examples: ENCODING_PCM_16BIT = 2, ENCODING_PCM_FLOAT = 4. + info.append("encodings=").append(Arrays.toString(device.getEncodings())); + info.append(", "); + } + if (device.getSampleRates().length > 0) { + info.append("sample rates=").append(Arrays.toString(device.getSampleRates())); + info.append(", "); + } + info.append("id=").append(device.getId()); + Logging.d(tag, info.toString()); + } + } + + // Converts media.AudioManager modes into local string representation. + static String modeToString(int mode) { + switch (mode) { + case MODE_IN_CALL: + return "MODE_IN_CALL"; + case MODE_IN_COMMUNICATION: + return "MODE_IN_COMMUNICATION"; + case MODE_NORMAL: + return "MODE_NORMAL"; + case MODE_RINGTONE: + return "MODE_RINGTONE"; + default: + return "MODE_INVALID"; + } + } + + private static String streamTypeToString(int stream) { + switch(stream) { + case AudioManager.STREAM_VOICE_CALL: + return "STREAM_VOICE_CALL"; + case AudioManager.STREAM_MUSIC: + return "STREAM_MUSIC"; + case AudioManager.STREAM_RING: + return "STREAM_RING"; + case AudioManager.STREAM_ALARM: + return "STREAM_ALARM"; + case AudioManager.STREAM_NOTIFICATION: + return "STREAM_NOTIFICATION"; + case AudioManager.STREAM_SYSTEM: + return "STREAM_SYSTEM"; + default: + return "STREAM_INVALID"; + } + } + + // Converts AudioDeviceInfo types to local string representation. + private static String deviceTypeToString(int type) { + switch (type) { + case AudioDeviceInfo.TYPE_UNKNOWN: + return "TYPE_UNKNOWN"; + case AudioDeviceInfo.TYPE_BUILTIN_EARPIECE: + return "TYPE_BUILTIN_EARPIECE"; + case AudioDeviceInfo.TYPE_BUILTIN_SPEAKER: + return "TYPE_BUILTIN_SPEAKER"; + case AudioDeviceInfo.TYPE_WIRED_HEADSET: + return "TYPE_WIRED_HEADSET"; + case AudioDeviceInfo.TYPE_WIRED_HEADPHONES: + return "TYPE_WIRED_HEADPHONES"; + case AudioDeviceInfo.TYPE_LINE_ANALOG: + return "TYPE_LINE_ANALOG"; + case AudioDeviceInfo.TYPE_LINE_DIGITAL: + return "TYPE_LINE_DIGITAL"; + case AudioDeviceInfo.TYPE_BLUETOOTH_SCO: + return "TYPE_BLUETOOTH_SCO"; + case AudioDeviceInfo.TYPE_BLUETOOTH_A2DP: + return "TYPE_BLUETOOTH_A2DP"; + case AudioDeviceInfo.TYPE_HDMI: + return "TYPE_HDMI"; + case AudioDeviceInfo.TYPE_HDMI_ARC: + return "TYPE_HDMI_ARC"; + case AudioDeviceInfo.TYPE_USB_DEVICE: + return "TYPE_USB_DEVICE"; + case AudioDeviceInfo.TYPE_USB_ACCESSORY: + return "TYPE_USB_ACCESSORY"; + case AudioDeviceInfo.TYPE_DOCK: + return "TYPE_DOCK"; + case AudioDeviceInfo.TYPE_FM: + return "TYPE_FM"; + case AudioDeviceInfo.TYPE_BUILTIN_MIC: + return "TYPE_BUILTIN_MIC"; + case AudioDeviceInfo.TYPE_FM_TUNER: + return "TYPE_FM_TUNER"; + case AudioDeviceInfo.TYPE_TV_TUNER: + return "TYPE_TV_TUNER"; + case AudioDeviceInfo.TYPE_TELEPHONY: + return "TYPE_TELEPHONY"; + case AudioDeviceInfo.TYPE_AUX_LINE: + return "TYPE_AUX_LINE"; + case AudioDeviceInfo.TYPE_IP: + return "TYPE_IP"; + case AudioDeviceInfo.TYPE_BUS: + return "TYPE_BUS"; + case AudioDeviceInfo.TYPE_USB_HEADSET: + return "TYPE_USB_HEADSET"; + default: + return "TYPE_UNKNOWN"; + } + } + + // Returns true if the device can record audio via a microphone. + private static boolean hasMicrophone() { + return ContextUtils.getApplicationContext().getPackageManager().hasSystemFeature( + PackageManager.FEATURE_MICROPHONE); + } +} diff --git a/third_party/libwebrtc/modules/audio_device/android/opensles_common.cc b/third_party/libwebrtc/modules/audio_device/android/opensles_common.cc new file mode 100644 index 0000000000..019714dae4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/opensles_common.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/opensles_common.h" + +#include + +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Returns a string representation given an integer SL_RESULT_XXX code. +// The mapping can be found in . +const char* GetSLErrorString(size_t code) { + static const char* sl_error_strings[] = { + "SL_RESULT_SUCCESS", // 0 + "SL_RESULT_PRECONDITIONS_VIOLATED", // 1 + "SL_RESULT_PARAMETER_INVALID", // 2 + "SL_RESULT_MEMORY_FAILURE", // 3 + "SL_RESULT_RESOURCE_ERROR", // 4 + "SL_RESULT_RESOURCE_LOST", // 5 + "SL_RESULT_IO_ERROR", // 6 + "SL_RESULT_BUFFER_INSUFFICIENT", // 7 + "SL_RESULT_CONTENT_CORRUPTED", // 8 + "SL_RESULT_CONTENT_UNSUPPORTED", // 9 + "SL_RESULT_CONTENT_NOT_FOUND", // 10 + "SL_RESULT_PERMISSION_DENIED", // 11 + "SL_RESULT_FEATURE_UNSUPPORTED", // 12 + "SL_RESULT_INTERNAL_ERROR", // 13 + "SL_RESULT_UNKNOWN_ERROR", // 14 + "SL_RESULT_OPERATION_ABORTED", // 15 + "SL_RESULT_CONTROL_LOST", // 16 + }; + + if (code >= arraysize(sl_error_strings)) { + return "SL_RESULT_UNKNOWN_ERROR"; + } + return sl_error_strings[code]; +} + +SLDataFormat_PCM CreatePCMConfiguration(size_t channels, + int sample_rate, + size_t bits_per_sample) { + RTC_CHECK_EQ(bits_per_sample, SL_PCMSAMPLEFORMAT_FIXED_16); + SLDataFormat_PCM format; + format.formatType = SL_DATAFORMAT_PCM; + format.numChannels = static_cast(channels); + // Note that, the unit of sample rate is actually in milliHertz and not Hertz. + switch (sample_rate) { + case 8000: + format.samplesPerSec = SL_SAMPLINGRATE_8; + break; + case 16000: + format.samplesPerSec = SL_SAMPLINGRATE_16; + break; + case 22050: + format.samplesPerSec = SL_SAMPLINGRATE_22_05; + break; + case 32000: + format.samplesPerSec = SL_SAMPLINGRATE_32; + break; + case 44100: + format.samplesPerSec = SL_SAMPLINGRATE_44_1; + break; + case 48000: + format.samplesPerSec = SL_SAMPLINGRATE_48; + break; + case 64000: + format.samplesPerSec = SL_SAMPLINGRATE_64; + break; + case 88200: + format.samplesPerSec = SL_SAMPLINGRATE_88_2; + break; + case 96000: + format.samplesPerSec = SL_SAMPLINGRATE_96; + break; + default: + RTC_CHECK(false) << "Unsupported sample rate: " << sample_rate; + break; + } + format.bitsPerSample = SL_PCMSAMPLEFORMAT_FIXED_16; + format.containerSize = SL_PCMSAMPLEFORMAT_FIXED_16; + format.endianness = SL_BYTEORDER_LITTLEENDIAN; + if (format.numChannels == 1) { + format.channelMask = SL_SPEAKER_FRONT_CENTER; + } else if (format.numChannels == 2) { + format.channelMask = SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT; + } else { + RTC_CHECK(false) << "Unsupported number of channels: " + << format.numChannels; + } + return format; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/opensles_common.h b/third_party/libwebrtc/modules/audio_device/android/opensles_common.h new file mode 100644 index 0000000000..438c522072 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/opensles_common.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_OPENSLES_COMMON_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_OPENSLES_COMMON_H_ + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +// Returns a string representation given an integer SL_RESULT_XXX code. +// The mapping can be found in . +const char* GetSLErrorString(size_t code); + +// Configures an SL_DATAFORMAT_PCM structure based on native audio parameters. +SLDataFormat_PCM CreatePCMConfiguration(size_t channels, + int sample_rate, + size_t bits_per_sample); + +// Helper class for using SLObjectItf interfaces. +template +class ScopedSLObject { + public: + ScopedSLObject() : obj_(nullptr) {} + + ~ScopedSLObject() { Reset(); } + + SLType* Receive() { + RTC_DCHECK(!obj_); + return &obj_; + } + + SLDerefType operator->() { return *obj_; } + + SLType Get() const { return obj_; } + + void Reset() { + if (obj_) { + (*obj_)->Destroy(obj_); + obj_ = nullptr; + } + } + + private: + SLType obj_; +}; + +typedef ScopedSLObject ScopedSLObjectItf; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_OPENSLES_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/opensles_player.cc b/third_party/libwebrtc/modules/audio_device/android/opensles_player.cc new file mode 100644 index 0000000000..f2b3a37194 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/opensles_player.cc @@ -0,0 +1,434 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/opensles_player.h" + +#include + +#include + +#include "api/array_view.h" +#include "modules/audio_device/android/audio_common.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/fine_audio_buffer.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/time_utils.h" + +#define TAG "OpenSLESPlayer" +#define ALOGV(...) __android_log_print(ANDROID_LOG_VERBOSE, TAG, __VA_ARGS__) +#define ALOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__) +#define ALOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__) +#define ALOGW(...) __android_log_print(ANDROID_LOG_WARN, TAG, __VA_ARGS__) +#define ALOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__) + +#define RETURN_ON_ERROR(op, ...) \ + do { \ + SLresult err = (op); \ + if (err != SL_RESULT_SUCCESS) { \ + ALOGE("%s failed: %s", #op, GetSLErrorString(err)); \ + return __VA_ARGS__; \ + } \ + } while (0) + +namespace webrtc { + +OpenSLESPlayer::OpenSLESPlayer(AudioManager* audio_manager) + : audio_manager_(audio_manager), + audio_parameters_(audio_manager->GetPlayoutAudioParameters()), + audio_device_buffer_(nullptr), + initialized_(false), + playing_(false), + buffer_index_(0), + engine_(nullptr), + player_(nullptr), + simple_buffer_queue_(nullptr), + volume_(nullptr), + last_play_time_(0) { + ALOGD("ctor[tid=%d]", rtc::CurrentThreadId()); + // Use native audio output parameters provided by the audio manager and + // define the PCM format structure. + pcm_format_ = CreatePCMConfiguration(audio_parameters_.channels(), + audio_parameters_.sample_rate(), + audio_parameters_.bits_per_sample()); + // Detach from this thread since we want to use the checker to verify calls + // from the internal audio thread. + thread_checker_opensles_.Detach(); +} + +OpenSLESPlayer::~OpenSLESPlayer() { + ALOGD("dtor[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + Terminate(); + DestroyAudioPlayer(); + DestroyMix(); + engine_ = nullptr; + RTC_DCHECK(!engine_); + RTC_DCHECK(!output_mix_.Get()); + RTC_DCHECK(!player_); + RTC_DCHECK(!simple_buffer_queue_); + RTC_DCHECK(!volume_); +} + +int OpenSLESPlayer::Init() { + ALOGD("Init[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (audio_parameters_.channels() == 2) { + ALOGW("Stereo mode is enabled"); + } + return 0; +} + +int OpenSLESPlayer::Terminate() { + ALOGD("Terminate[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + StopPlayout(); + return 0; +} + +int OpenSLESPlayer::InitPlayout() { + ALOGD("InitPlayout[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!initialized_); + RTC_DCHECK(!playing_); + if (!ObtainEngineInterface()) { + ALOGE("Failed to obtain SL Engine interface"); + return -1; + } + CreateMix(); + initialized_ = true; + buffer_index_ = 0; + return 0; +} + +int OpenSLESPlayer::StartPlayout() { + ALOGD("StartPlayout[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(initialized_); + RTC_DCHECK(!playing_); + if (fine_audio_buffer_) { + fine_audio_buffer_->ResetPlayout(); + } + // The number of lower latency audio players is limited, hence we create the + // audio player in Start() and destroy it in Stop(). + CreateAudioPlayer(); + // Fill up audio buffers to avoid initial glitch and to ensure that playback + // starts when mode is later changed to SL_PLAYSTATE_PLAYING. + // TODO(henrika): we can save some delay by only making one call to + // EnqueuePlayoutData. Most likely not worth the risk of adding a glitch. + last_play_time_ = rtc::Time(); + for (int i = 0; i < kNumOfOpenSLESBuffers; ++i) { + EnqueuePlayoutData(true); + } + // Start streaming data by setting the play state to SL_PLAYSTATE_PLAYING. + // For a player object, when the object is in the SL_PLAYSTATE_PLAYING + // state, adding buffers will implicitly start playback. + RETURN_ON_ERROR((*player_)->SetPlayState(player_, SL_PLAYSTATE_PLAYING), -1); + playing_ = (GetPlayState() == SL_PLAYSTATE_PLAYING); + RTC_DCHECK(playing_); + return 0; +} + +int OpenSLESPlayer::StopPlayout() { + ALOGD("StopPlayout[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!initialized_ || !playing_) { + return 0; + } + // Stop playing by setting the play state to SL_PLAYSTATE_STOPPED. + RETURN_ON_ERROR((*player_)->SetPlayState(player_, SL_PLAYSTATE_STOPPED), -1); + // Clear the buffer queue to flush out any remaining data. + RETURN_ON_ERROR((*simple_buffer_queue_)->Clear(simple_buffer_queue_), -1); +#if RTC_DCHECK_IS_ON + // Verify that the buffer queue is in fact cleared as it should. + SLAndroidSimpleBufferQueueState buffer_queue_state; + (*simple_buffer_queue_)->GetState(simple_buffer_queue_, &buffer_queue_state); + RTC_DCHECK_EQ(0, buffer_queue_state.count); + RTC_DCHECK_EQ(0, buffer_queue_state.index); +#endif + // The number of lower latency audio players is limited, hence we create the + // audio player in Start() and destroy it in Stop(). + DestroyAudioPlayer(); + thread_checker_opensles_.Detach(); + initialized_ = false; + playing_ = false; + return 0; +} + +int OpenSLESPlayer::SpeakerVolumeIsAvailable(bool& available) { + available = false; + return 0; +} + +int OpenSLESPlayer::MaxSpeakerVolume(uint32_t& maxVolume) const { + return -1; +} + +int OpenSLESPlayer::MinSpeakerVolume(uint32_t& minVolume) const { + return -1; +} + +int OpenSLESPlayer::SetSpeakerVolume(uint32_t volume) { + return -1; +} + +int OpenSLESPlayer::SpeakerVolume(uint32_t& volume) const { + return -1; +} + +void OpenSLESPlayer::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + ALOGD("AttachAudioBuffer"); + RTC_DCHECK(thread_checker_.IsCurrent()); + audio_device_buffer_ = audioBuffer; + const int sample_rate_hz = audio_parameters_.sample_rate(); + ALOGD("SetPlayoutSampleRate(%d)", sample_rate_hz); + audio_device_buffer_->SetPlayoutSampleRate(sample_rate_hz); + const size_t channels = audio_parameters_.channels(); + ALOGD("SetPlayoutChannels(%zu)", channels); + audio_device_buffer_->SetPlayoutChannels(channels); + RTC_CHECK(audio_device_buffer_); + AllocateDataBuffers(); +} + +void OpenSLESPlayer::AllocateDataBuffers() { + ALOGD("AllocateDataBuffers"); + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!simple_buffer_queue_); + RTC_CHECK(audio_device_buffer_); + // Create a modified audio buffer class which allows us to ask for any number + // of samples (and not only multiple of 10ms) to match the native OpenSL ES + // buffer size. The native buffer size corresponds to the + // PROPERTY_OUTPUT_FRAMES_PER_BUFFER property which is the number of audio + // frames that the HAL (Hardware Abstraction Layer) buffer can hold. It is + // recommended to construct audio buffers so that they contain an exact + // multiple of this number. If so, callbacks will occur at regular intervals, + // which reduces jitter. + const size_t buffer_size_in_samples = + audio_parameters_.frames_per_buffer() * audio_parameters_.channels(); + ALOGD("native buffer size: %zu", buffer_size_in_samples); + ALOGD("native buffer size in ms: %.2f", + audio_parameters_.GetBufferSizeInMilliseconds()); + fine_audio_buffer_ = std::make_unique(audio_device_buffer_); + // Allocated memory for audio buffers. + for (int i = 0; i < kNumOfOpenSLESBuffers; ++i) { + audio_buffers_[i].reset(new SLint16[buffer_size_in_samples]); + } +} + +bool OpenSLESPlayer::ObtainEngineInterface() { + ALOGD("ObtainEngineInterface"); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (engine_) + return true; + // Get access to (or create if not already existing) the global OpenSL Engine + // object. + SLObjectItf engine_object = audio_manager_->GetOpenSLEngine(); + if (engine_object == nullptr) { + ALOGE("Failed to access the global OpenSL engine"); + return false; + } + // Get the SL Engine Interface which is implicit. + RETURN_ON_ERROR( + (*engine_object)->GetInterface(engine_object, SL_IID_ENGINE, &engine_), + false); + return true; +} + +bool OpenSLESPlayer::CreateMix() { + ALOGD("CreateMix"); + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(engine_); + if (output_mix_.Get()) + return true; + + // Create the ouput mix on the engine object. No interfaces will be used. + RETURN_ON_ERROR((*engine_)->CreateOutputMix(engine_, output_mix_.Receive(), 0, + nullptr, nullptr), + false); + RETURN_ON_ERROR(output_mix_->Realize(output_mix_.Get(), SL_BOOLEAN_FALSE), + false); + return true; +} + +void OpenSLESPlayer::DestroyMix() { + ALOGD("DestroyMix"); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!output_mix_.Get()) + return; + output_mix_.Reset(); +} + +bool OpenSLESPlayer::CreateAudioPlayer() { + ALOGD("CreateAudioPlayer"); + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(output_mix_.Get()); + if (player_object_.Get()) + return true; + RTC_DCHECK(!player_); + RTC_DCHECK(!simple_buffer_queue_); + RTC_DCHECK(!volume_); + + // source: Android Simple Buffer Queue Data Locator is source. + SLDataLocator_AndroidSimpleBufferQueue simple_buffer_queue = { + SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, + static_cast(kNumOfOpenSLESBuffers)}; + SLDataSource audio_source = {&simple_buffer_queue, &pcm_format_}; + + // sink: OutputMix-based data is sink. + SLDataLocator_OutputMix locator_output_mix = {SL_DATALOCATOR_OUTPUTMIX, + output_mix_.Get()}; + SLDataSink audio_sink = {&locator_output_mix, nullptr}; + + // Define interfaces that we indend to use and realize. + const SLInterfaceID interface_ids[] = {SL_IID_ANDROIDCONFIGURATION, + SL_IID_BUFFERQUEUE, SL_IID_VOLUME}; + const SLboolean interface_required[] = {SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE, + SL_BOOLEAN_TRUE}; + + // Create the audio player on the engine interface. + RETURN_ON_ERROR( + (*engine_)->CreateAudioPlayer( + engine_, player_object_.Receive(), &audio_source, &audio_sink, + arraysize(interface_ids), interface_ids, interface_required), + false); + + // Use the Android configuration interface to set platform-specific + // parameters. Should be done before player is realized. + SLAndroidConfigurationItf player_config; + RETURN_ON_ERROR( + player_object_->GetInterface(player_object_.Get(), + SL_IID_ANDROIDCONFIGURATION, &player_config), + false); + // Set audio player configuration to SL_ANDROID_STREAM_VOICE which + // corresponds to android.media.AudioManager.STREAM_VOICE_CALL. + SLint32 stream_type = SL_ANDROID_STREAM_VOICE; + RETURN_ON_ERROR( + (*player_config) + ->SetConfiguration(player_config, SL_ANDROID_KEY_STREAM_TYPE, + &stream_type, sizeof(SLint32)), + false); + + // Realize the audio player object after configuration has been set. + RETURN_ON_ERROR( + player_object_->Realize(player_object_.Get(), SL_BOOLEAN_FALSE), false); + + // Get the SLPlayItf interface on the audio player. + RETURN_ON_ERROR( + player_object_->GetInterface(player_object_.Get(), SL_IID_PLAY, &player_), + false); + + // Get the SLAndroidSimpleBufferQueueItf interface on the audio player. + RETURN_ON_ERROR( + player_object_->GetInterface(player_object_.Get(), SL_IID_BUFFERQUEUE, + &simple_buffer_queue_), + false); + + // Register callback method for the Android Simple Buffer Queue interface. + // This method will be called when the native audio layer needs audio data. + RETURN_ON_ERROR((*simple_buffer_queue_) + ->RegisterCallback(simple_buffer_queue_, + SimpleBufferQueueCallback, this), + false); + + // Get the SLVolumeItf interface on the audio player. + RETURN_ON_ERROR(player_object_->GetInterface(player_object_.Get(), + SL_IID_VOLUME, &volume_), + false); + + // TODO(henrika): might not be required to set volume to max here since it + // seems to be default on most devices. Might be required for unit tests. + // RETURN_ON_ERROR((*volume_)->SetVolumeLevel(volume_, 0), false); + + return true; +} + +void OpenSLESPlayer::DestroyAudioPlayer() { + ALOGD("DestroyAudioPlayer"); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!player_object_.Get()) + return; + (*simple_buffer_queue_) + ->RegisterCallback(simple_buffer_queue_, nullptr, nullptr); + player_object_.Reset(); + player_ = nullptr; + simple_buffer_queue_ = nullptr; + volume_ = nullptr; +} + +// static +void OpenSLESPlayer::SimpleBufferQueueCallback( + SLAndroidSimpleBufferQueueItf caller, + void* context) { + OpenSLESPlayer* stream = reinterpret_cast(context); + stream->FillBufferQueue(); +} + +void OpenSLESPlayer::FillBufferQueue() { + RTC_DCHECK(thread_checker_opensles_.IsCurrent()); + SLuint32 state = GetPlayState(); + if (state != SL_PLAYSTATE_PLAYING) { + ALOGW("Buffer callback in non-playing state!"); + return; + } + EnqueuePlayoutData(false); +} + +void OpenSLESPlayer::EnqueuePlayoutData(bool silence) { + // Check delta time between two successive callbacks and provide a warning + // if it becomes very large. + // TODO(henrika): using 150ms as upper limit but this value is rather random. + const uint32_t current_time = rtc::Time(); + const uint32_t diff = current_time - last_play_time_; + if (diff > 150) { + ALOGW("Bad OpenSL ES playout timing, dT=%u [ms]", diff); + } + last_play_time_ = current_time; + SLint8* audio_ptr8 = + reinterpret_cast(audio_buffers_[buffer_index_].get()); + if (silence) { + RTC_DCHECK(thread_checker_.IsCurrent()); + // Avoid acquiring real audio data from WebRTC and fill the buffer with + // zeros instead. Used to prime the buffer with silence and to avoid asking + // for audio data from two different threads. + memset(audio_ptr8, 0, audio_parameters_.GetBytesPerBuffer()); + } else { + RTC_DCHECK(thread_checker_opensles_.IsCurrent()); + // Read audio data from the WebRTC source using the FineAudioBuffer object + // to adjust for differences in buffer size between WebRTC (10ms) and native + // OpenSL ES. Use hardcoded delay estimate since OpenSL ES does not support + // delay estimation. + fine_audio_buffer_->GetPlayoutData( + rtc::ArrayView(audio_buffers_[buffer_index_].get(), + audio_parameters_.frames_per_buffer() * + audio_parameters_.channels()), + 25); + } + // Enqueue the decoded audio buffer for playback. + SLresult err = (*simple_buffer_queue_) + ->Enqueue(simple_buffer_queue_, audio_ptr8, + audio_parameters_.GetBytesPerBuffer()); + if (SL_RESULT_SUCCESS != err) { + ALOGE("Enqueue failed: %d", err); + } + buffer_index_ = (buffer_index_ + 1) % kNumOfOpenSLESBuffers; +} + +SLuint32 OpenSLESPlayer::GetPlayState() const { + RTC_DCHECK(player_); + SLuint32 state; + SLresult err = (*player_)->GetPlayState(player_, &state); + if (SL_RESULT_SUCCESS != err) { + ALOGE("GetPlayState failed: %d", err); + } + return state; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/opensles_player.h b/third_party/libwebrtc/modules/audio_device/android/opensles_player.h new file mode 100644 index 0000000000..41593a448f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/opensles_player.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_OPENSLES_PLAYER_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_OPENSLES_PLAYER_H_ + +#include +#include +#include + +#include "api/sequence_checker.h" +#include "modules/audio_device/android/audio_common.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/android/opensles_common.h" +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "modules/utility/include/helpers_android.h" + +namespace webrtc { + +class FineAudioBuffer; + +// Implements 16-bit mono PCM audio output support for Android using the +// C based OpenSL ES API. No calls from C/C++ to Java using JNI is done. +// +// An instance must be created and destroyed on one and the same thread. +// All public methods must also be called on the same thread. A thread checker +// will RTC_DCHECK if any method is called on an invalid thread. Decoded audio +// buffers are requested on a dedicated internal thread managed by the OpenSL +// ES layer. +// +// The existing design forces the user to call InitPlayout() after Stoplayout() +// to be able to call StartPlayout() again. This is inline with how the Java- +// based implementation works. +// +// OpenSL ES is a native C API which have no Dalvik-related overhead such as +// garbage collection pauses and it supports reduced audio output latency. +// If the device doesn't claim this feature but supports API level 9 (Android +// platform version 2.3) or later, then we can still use the OpenSL ES APIs but +// the output latency may be higher. +class OpenSLESPlayer { + public: + // Beginning with API level 17 (Android 4.2), a buffer count of 2 or more is + // required for lower latency. Beginning with API level 18 (Android 4.3), a + // buffer count of 1 is sufficient for lower latency. In addition, the buffer + // size and sample rate must be compatible with the device's native output + // configuration provided via the audio manager at construction. + // TODO(henrika): perhaps set this value dynamically based on OS version. + static const int kNumOfOpenSLESBuffers = 2; + + explicit OpenSLESPlayer(AudioManager* audio_manager); + ~OpenSLESPlayer(); + + int Init(); + int Terminate(); + + int InitPlayout(); + bool PlayoutIsInitialized() const { return initialized_; } + + int StartPlayout(); + int StopPlayout(); + bool Playing() const { return playing_; } + + int SpeakerVolumeIsAvailable(bool& available); + int SetSpeakerVolume(uint32_t volume); + int SpeakerVolume(uint32_t& volume) const; + int MaxSpeakerVolume(uint32_t& maxVolume) const; + int MinSpeakerVolume(uint32_t& minVolume) const; + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer); + + private: + // These callback methods are called when data is required for playout. + // They are both called from an internal "OpenSL ES thread" which is not + // attached to the Dalvik VM. + static void SimpleBufferQueueCallback(SLAndroidSimpleBufferQueueItf caller, + void* context); + void FillBufferQueue(); + // Reads audio data in PCM format using the AudioDeviceBuffer. + // Can be called both on the main thread (during Start()) and from the + // internal audio thread while output streaming is active. + // If the `silence` flag is set, the audio is filled with zeros instead of + // asking the WebRTC layer for real audio data. This procedure is also known + // as audio priming. + void EnqueuePlayoutData(bool silence); + + // Allocate memory for audio buffers which will be used to render audio + // via the SLAndroidSimpleBufferQueueItf interface. + void AllocateDataBuffers(); + + // Obtaines the SL Engine Interface from the existing global Engine object. + // The interface exposes creation methods of all the OpenSL ES object types. + // This method defines the `engine_` member variable. + bool ObtainEngineInterface(); + + // Creates/destroys the output mix object. + bool CreateMix(); + void DestroyMix(); + + // Creates/destroys the audio player and the simple-buffer object. + // Also creates the volume object. + bool CreateAudioPlayer(); + void DestroyAudioPlayer(); + + SLuint32 GetPlayState() const; + + // Ensures that methods are called from the same thread as this object is + // created on. + SequenceChecker thread_checker_; + + // Stores thread ID in first call to SimpleBufferQueueCallback() from internal + // non-application thread which is not attached to the Dalvik JVM. + // Detached during construction of this object. + SequenceChecker thread_checker_opensles_; + + // Raw pointer to the audio manager injected at construction. Used to cache + // audio parameters and to access the global SL engine object needed by the + // ObtainEngineInterface() method. The audio manager outlives any instance of + // this class. + AudioManager* audio_manager_; + + // Contains audio parameters provided to this class at construction by the + // AudioManager. + const AudioParameters audio_parameters_; + + // Raw pointer handle provided to us in AttachAudioBuffer(). Owned by the + // AudioDeviceModuleImpl class and called by AudioDeviceModule::Create(). + AudioDeviceBuffer* audio_device_buffer_; + + bool initialized_; + bool playing_; + + // PCM-type format definition. + // TODO(henrika): add support for SLAndroidDataFormat_PCM_EX (android-21) if + // 32-bit float representation is needed. + SLDataFormat_PCM pcm_format_; + + // Queue of audio buffers to be used by the player object for rendering + // audio. + std::unique_ptr audio_buffers_[kNumOfOpenSLESBuffers]; + + // FineAudioBuffer takes an AudioDeviceBuffer which delivers audio data + // in chunks of 10ms. It then allows for this data to be pulled in + // a finer or coarser granularity. I.e. interacting with this class instead + // of directly with the AudioDeviceBuffer one can ask for any number of + // audio data samples. + // Example: native buffer size can be 192 audio frames at 48kHz sample rate. + // WebRTC will provide 480 audio frames per 10ms but OpenSL ES asks for 192 + // in each callback (one every 4th ms). This class can then ask for 192 and + // the FineAudioBuffer will ask WebRTC for new data approximately only every + // second callback and also cache non-utilized audio. + std::unique_ptr fine_audio_buffer_; + + // Keeps track of active audio buffer 'n' in the audio_buffers_[n] queue. + // Example (kNumOfOpenSLESBuffers = 2): counts 0, 1, 0, 1, ... + int buffer_index_; + + // This interface exposes creation methods for all the OpenSL ES object types. + // It is the OpenSL ES API entry point. + SLEngineItf engine_; + + // Output mix object to be used by the player object. + webrtc::ScopedSLObjectItf output_mix_; + + // The audio player media object plays out audio to the speakers. It also + // supports volume control. + webrtc::ScopedSLObjectItf player_object_; + + // This interface is supported on the audio player and it controls the state + // of the audio player. + SLPlayItf player_; + + // The Android Simple Buffer Queue interface is supported on the audio player + // and it provides methods to send audio data from the source to the audio + // player for rendering. + SLAndroidSimpleBufferQueueItf simple_buffer_queue_; + + // This interface exposes controls for manipulating the object’s audio volume + // properties. This interface is supported on the Audio Player object. + SLVolumeItf volume_; + + // Last time the OpenSL ES layer asked for audio data to play out. + uint32_t last_play_time_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_OPENSLES_PLAYER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/android/opensles_recorder.cc b/third_party/libwebrtc/modules/audio_device/android/opensles_recorder.cc new file mode 100644 index 0000000000..4e0c26dbf0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/opensles_recorder.cc @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/android/opensles_recorder.h" + +#include + +#include + +#include "api/array_view.h" +#include "modules/audio_device/android/audio_common.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/fine_audio_buffer.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/time_utils.h" + +#define TAG "OpenSLESRecorder" +#define ALOGV(...) __android_log_print(ANDROID_LOG_VERBOSE, TAG, __VA_ARGS__) +#define ALOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__) +#define ALOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__) +#define ALOGW(...) __android_log_print(ANDROID_LOG_WARN, TAG, __VA_ARGS__) +#define ALOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__) + +#define LOG_ON_ERROR(op) \ + [](SLresult err) { \ + if (err != SL_RESULT_SUCCESS) { \ + ALOGE("%s:%d %s failed: %s", __FILE__, __LINE__, #op, \ + GetSLErrorString(err)); \ + return true; \ + } \ + return false; \ + }(op) + +namespace webrtc { + +OpenSLESRecorder::OpenSLESRecorder(AudioManager* audio_manager) + : audio_manager_(audio_manager), + audio_parameters_(audio_manager->GetRecordAudioParameters()), + audio_device_buffer_(nullptr), + initialized_(false), + recording_(false), + engine_(nullptr), + recorder_(nullptr), + simple_buffer_queue_(nullptr), + buffer_index_(0), + last_rec_time_(0) { + ALOGD("ctor[tid=%d]", rtc::CurrentThreadId()); + // Detach from this thread since we want to use the checker to verify calls + // from the internal audio thread. + thread_checker_opensles_.Detach(); + // Use native audio output parameters provided by the audio manager and + // define the PCM format structure. + pcm_format_ = CreatePCMConfiguration(audio_parameters_.channels(), + audio_parameters_.sample_rate(), + audio_parameters_.bits_per_sample()); +} + +OpenSLESRecorder::~OpenSLESRecorder() { + ALOGD("dtor[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + Terminate(); + DestroyAudioRecorder(); + engine_ = nullptr; + RTC_DCHECK(!engine_); + RTC_DCHECK(!recorder_); + RTC_DCHECK(!simple_buffer_queue_); +} + +int OpenSLESRecorder::Init() { + ALOGD("Init[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (audio_parameters_.channels() == 2) { + ALOGD("Stereo mode is enabled"); + } + return 0; +} + +int OpenSLESRecorder::Terminate() { + ALOGD("Terminate[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + StopRecording(); + return 0; +} + +int OpenSLESRecorder::InitRecording() { + ALOGD("InitRecording[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!initialized_); + RTC_DCHECK(!recording_); + if (!ObtainEngineInterface()) { + ALOGE("Failed to obtain SL Engine interface"); + return -1; + } + CreateAudioRecorder(); + initialized_ = true; + buffer_index_ = 0; + return 0; +} + +int OpenSLESRecorder::StartRecording() { + ALOGD("StartRecording[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(initialized_); + RTC_DCHECK(!recording_); + if (fine_audio_buffer_) { + fine_audio_buffer_->ResetRecord(); + } + // Add buffers to the queue before changing state to SL_RECORDSTATE_RECORDING + // to ensure that recording starts as soon as the state is modified. On some + // devices, SLAndroidSimpleBufferQueue::Clear() used in Stop() does not flush + // the buffers as intended and we therefore check the number of buffers + // already queued first. Enqueue() can return SL_RESULT_BUFFER_INSUFFICIENT + // otherwise. + int num_buffers_in_queue = GetBufferCount(); + for (int i = 0; i < kNumOfOpenSLESBuffers - num_buffers_in_queue; ++i) { + if (!EnqueueAudioBuffer()) { + recording_ = false; + return -1; + } + } + num_buffers_in_queue = GetBufferCount(); + RTC_DCHECK_EQ(num_buffers_in_queue, kNumOfOpenSLESBuffers); + LogBufferState(); + // Start audio recording by changing the state to SL_RECORDSTATE_RECORDING. + // Given that buffers are already enqueued, recording should start at once. + // The macro returns -1 if recording fails to start. + last_rec_time_ = rtc::Time(); + if (LOG_ON_ERROR( + (*recorder_)->SetRecordState(recorder_, SL_RECORDSTATE_RECORDING))) { + return -1; + } + recording_ = (GetRecordState() == SL_RECORDSTATE_RECORDING); + RTC_DCHECK(recording_); + return 0; +} + +int OpenSLESRecorder::StopRecording() { + ALOGD("StopRecording[tid=%d]", rtc::CurrentThreadId()); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!initialized_ || !recording_) { + return 0; + } + // Stop recording by setting the record state to SL_RECORDSTATE_STOPPED. + if (LOG_ON_ERROR( + (*recorder_)->SetRecordState(recorder_, SL_RECORDSTATE_STOPPED))) { + return -1; + } + // Clear the buffer queue to get rid of old data when resuming recording. + if (LOG_ON_ERROR((*simple_buffer_queue_)->Clear(simple_buffer_queue_))) { + return -1; + } + thread_checker_opensles_.Detach(); + initialized_ = false; + recording_ = false; + return 0; +} + +void OpenSLESRecorder::AttachAudioBuffer(AudioDeviceBuffer* audio_buffer) { + ALOGD("AttachAudioBuffer"); + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_CHECK(audio_buffer); + audio_device_buffer_ = audio_buffer; + // Ensure that the audio device buffer is informed about the native sample + // rate used on the recording side. + const int sample_rate_hz = audio_parameters_.sample_rate(); + ALOGD("SetRecordingSampleRate(%d)", sample_rate_hz); + audio_device_buffer_->SetRecordingSampleRate(sample_rate_hz); + // Ensure that the audio device buffer is informed about the number of + // channels preferred by the OS on the recording side. + const size_t channels = audio_parameters_.channels(); + ALOGD("SetRecordingChannels(%zu)", channels); + audio_device_buffer_->SetRecordingChannels(channels); + // Allocated memory for internal data buffers given existing audio parameters. + AllocateDataBuffers(); +} + +int OpenSLESRecorder::EnableBuiltInAEC(bool enable) { + ALOGD("EnableBuiltInAEC(%d)", enable); + RTC_DCHECK(thread_checker_.IsCurrent()); + ALOGE("Not implemented"); + return 0; +} + +int OpenSLESRecorder::EnableBuiltInAGC(bool enable) { + ALOGD("EnableBuiltInAGC(%d)", enable); + RTC_DCHECK(thread_checker_.IsCurrent()); + ALOGE("Not implemented"); + return 0; +} + +int OpenSLESRecorder::EnableBuiltInNS(bool enable) { + ALOGD("EnableBuiltInNS(%d)", enable); + RTC_DCHECK(thread_checker_.IsCurrent()); + ALOGE("Not implemented"); + return 0; +} + +bool OpenSLESRecorder::ObtainEngineInterface() { + ALOGD("ObtainEngineInterface"); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (engine_) + return true; + // Get access to (or create if not already existing) the global OpenSL Engine + // object. + SLObjectItf engine_object = audio_manager_->GetOpenSLEngine(); + if (engine_object == nullptr) { + ALOGE("Failed to access the global OpenSL engine"); + return false; + } + // Get the SL Engine Interface which is implicit. + if (LOG_ON_ERROR( + (*engine_object) + ->GetInterface(engine_object, SL_IID_ENGINE, &engine_))) { + return false; + } + return true; +} + +bool OpenSLESRecorder::CreateAudioRecorder() { + ALOGD("CreateAudioRecorder"); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (recorder_object_.Get()) + return true; + RTC_DCHECK(!recorder_); + RTC_DCHECK(!simple_buffer_queue_); + + // Audio source configuration. + SLDataLocator_IODevice mic_locator = {SL_DATALOCATOR_IODEVICE, + SL_IODEVICE_AUDIOINPUT, + SL_DEFAULTDEVICEID_AUDIOINPUT, NULL}; + SLDataSource audio_source = {&mic_locator, NULL}; + + // Audio sink configuration. + SLDataLocator_AndroidSimpleBufferQueue buffer_queue = { + SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, + static_cast(kNumOfOpenSLESBuffers)}; + SLDataSink audio_sink = {&buffer_queue, &pcm_format_}; + + // Create the audio recorder object (requires the RECORD_AUDIO permission). + // Do not realize the recorder yet. Set the configuration first. + const SLInterfaceID interface_id[] = {SL_IID_ANDROIDSIMPLEBUFFERQUEUE, + SL_IID_ANDROIDCONFIGURATION}; + const SLboolean interface_required[] = {SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE}; + if (LOG_ON_ERROR((*engine_)->CreateAudioRecorder( + engine_, recorder_object_.Receive(), &audio_source, &audio_sink, + arraysize(interface_id), interface_id, interface_required))) { + return false; + } + + // Configure the audio recorder (before it is realized). + SLAndroidConfigurationItf recorder_config; + if (LOG_ON_ERROR((recorder_object_->GetInterface(recorder_object_.Get(), + SL_IID_ANDROIDCONFIGURATION, + &recorder_config)))) { + return false; + } + + // Uses the default microphone tuned for audio communication. + // Note that, SL_ANDROID_RECORDING_PRESET_VOICE_RECOGNITION leads to a fast + // track but also excludes usage of required effects like AEC, AGC and NS. + // SL_ANDROID_RECORDING_PRESET_VOICE_COMMUNICATION + SLint32 stream_type = SL_ANDROID_RECORDING_PRESET_VOICE_COMMUNICATION; + if (LOG_ON_ERROR(((*recorder_config) + ->SetConfiguration(recorder_config, + SL_ANDROID_KEY_RECORDING_PRESET, + &stream_type, sizeof(SLint32))))) { + return false; + } + + // The audio recorder can now be realized (in synchronous mode). + if (LOG_ON_ERROR((recorder_object_->Realize(recorder_object_.Get(), + SL_BOOLEAN_FALSE)))) { + return false; + } + + // Get the implicit recorder interface (SL_IID_RECORD). + if (LOG_ON_ERROR((recorder_object_->GetInterface( + recorder_object_.Get(), SL_IID_RECORD, &recorder_)))) { + return false; + } + + // Get the simple buffer queue interface (SL_IID_ANDROIDSIMPLEBUFFERQUEUE). + // It was explicitly requested. + if (LOG_ON_ERROR((recorder_object_->GetInterface( + recorder_object_.Get(), SL_IID_ANDROIDSIMPLEBUFFERQUEUE, + &simple_buffer_queue_)))) { + return false; + } + + // Register the input callback for the simple buffer queue. + // This callback will be called when receiving new data from the device. + if (LOG_ON_ERROR(((*simple_buffer_queue_) + ->RegisterCallback(simple_buffer_queue_, + SimpleBufferQueueCallback, this)))) { + return false; + } + return true; +} + +void OpenSLESRecorder::DestroyAudioRecorder() { + ALOGD("DestroyAudioRecorder"); + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!recorder_object_.Get()) + return; + (*simple_buffer_queue_) + ->RegisterCallback(simple_buffer_queue_, nullptr, nullptr); + recorder_object_.Reset(); + recorder_ = nullptr; + simple_buffer_queue_ = nullptr; +} + +void OpenSLESRecorder::SimpleBufferQueueCallback( + SLAndroidSimpleBufferQueueItf buffer_queue, + void* context) { + OpenSLESRecorder* stream = static_cast(context); + stream->ReadBufferQueue(); +} + +void OpenSLESRecorder::AllocateDataBuffers() { + ALOGD("AllocateDataBuffers"); + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DCHECK(!simple_buffer_queue_); + RTC_CHECK(audio_device_buffer_); + // Create a modified audio buffer class which allows us to deliver any number + // of samples (and not only multiple of 10ms) to match the native audio unit + // buffer size. + ALOGD("frames per native buffer: %zu", audio_parameters_.frames_per_buffer()); + ALOGD("frames per 10ms buffer: %zu", + audio_parameters_.frames_per_10ms_buffer()); + ALOGD("bytes per native buffer: %zu", audio_parameters_.GetBytesPerBuffer()); + ALOGD("native sample rate: %d", audio_parameters_.sample_rate()); + RTC_DCHECK(audio_device_buffer_); + fine_audio_buffer_ = std::make_unique(audio_device_buffer_); + // Allocate queue of audio buffers that stores recorded audio samples. + const int buffer_size_samples = + audio_parameters_.frames_per_buffer() * audio_parameters_.channels(); + audio_buffers_.reset(new std::unique_ptr[kNumOfOpenSLESBuffers]); + for (int i = 0; i < kNumOfOpenSLESBuffers; ++i) { + audio_buffers_[i].reset(new SLint16[buffer_size_samples]); + } +} + +void OpenSLESRecorder::ReadBufferQueue() { + RTC_DCHECK(thread_checker_opensles_.IsCurrent()); + SLuint32 state = GetRecordState(); + if (state != SL_RECORDSTATE_RECORDING) { + ALOGW("Buffer callback in non-recording state!"); + return; + } + // Check delta time between two successive callbacks and provide a warning + // if it becomes very large. + // TODO(henrika): using 150ms as upper limit but this value is rather random. + const uint32_t current_time = rtc::Time(); + const uint32_t diff = current_time - last_rec_time_; + if (diff > 150) { + ALOGW("Bad OpenSL ES record timing, dT=%u [ms]", diff); + } + last_rec_time_ = current_time; + // Send recorded audio data to the WebRTC sink. + // TODO(henrika): fix delay estimates. It is OK to use fixed values for now + // since there is no support to turn off built-in EC in combination with + // OpenSL ES anyhow. Hence, as is, the WebRTC based AEC (which would use + // these estimates) will never be active. + fine_audio_buffer_->DeliverRecordedData( + rtc::ArrayView( + audio_buffers_[buffer_index_].get(), + audio_parameters_.frames_per_buffer() * audio_parameters_.channels()), + 25); + // Enqueue the utilized audio buffer and use if for recording again. + EnqueueAudioBuffer(); +} + +bool OpenSLESRecorder::EnqueueAudioBuffer() { + SLresult err = + (*simple_buffer_queue_) + ->Enqueue( + simple_buffer_queue_, + reinterpret_cast(audio_buffers_[buffer_index_].get()), + audio_parameters_.GetBytesPerBuffer()); + if (SL_RESULT_SUCCESS != err) { + ALOGE("Enqueue failed: %s", GetSLErrorString(err)); + return false; + } + buffer_index_ = (buffer_index_ + 1) % kNumOfOpenSLESBuffers; + return true; +} + +SLuint32 OpenSLESRecorder::GetRecordState() const { + RTC_DCHECK(recorder_); + SLuint32 state; + SLresult err = (*recorder_)->GetRecordState(recorder_, &state); + if (SL_RESULT_SUCCESS != err) { + ALOGE("GetRecordState failed: %s", GetSLErrorString(err)); + } + return state; +} + +SLAndroidSimpleBufferQueueState OpenSLESRecorder::GetBufferQueueState() const { + RTC_DCHECK(simple_buffer_queue_); + // state.count: Number of buffers currently in the queue. + // state.index: Index of the currently filling buffer. This is a linear index + // that keeps a cumulative count of the number of buffers recorded. + SLAndroidSimpleBufferQueueState state; + SLresult err = + (*simple_buffer_queue_)->GetState(simple_buffer_queue_, &state); + if (SL_RESULT_SUCCESS != err) { + ALOGE("GetState failed: %s", GetSLErrorString(err)); + } + return state; +} + +void OpenSLESRecorder::LogBufferState() const { + SLAndroidSimpleBufferQueueState state = GetBufferQueueState(); + ALOGD("state.count:%d state.index:%d", state.count, state.index); +} + +SLuint32 OpenSLESRecorder::GetBufferCount() { + SLAndroidSimpleBufferQueueState state = GetBufferQueueState(); + return state.count; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/android/opensles_recorder.h b/third_party/libwebrtc/modules/audio_device/android/opensles_recorder.h new file mode 100644 index 0000000000..e659c3c157 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/android/opensles_recorder.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_ANDROID_OPENSLES_RECORDER_H_ +#define MODULES_AUDIO_DEVICE_ANDROID_OPENSLES_RECORDER_H_ + +#include +#include +#include + +#include + +#include "api/sequence_checker.h" +#include "modules/audio_device/android/audio_common.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/android/opensles_common.h" +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "modules/utility/include/helpers_android.h" + +namespace webrtc { + +class FineAudioBuffer; + +// Implements 16-bit mono PCM audio input support for Android using the +// C based OpenSL ES API. No calls from C/C++ to Java using JNI is done. +// +// An instance must be created and destroyed on one and the same thread. +// All public methods must also be called on the same thread. A thread checker +// will RTC_DCHECK if any method is called on an invalid thread. Recorded audio +// buffers are provided on a dedicated internal thread managed by the OpenSL +// ES layer. +// +// The existing design forces the user to call InitRecording() after +// StopRecording() to be able to call StartRecording() again. This is inline +// with how the Java-based implementation works. +// +// As of API level 21, lower latency audio input is supported on select devices. +// To take advantage of this feature, first confirm that lower latency output is +// available. The capability for lower latency output is a prerequisite for the +// lower latency input feature. Then, create an AudioRecorder with the same +// sample rate and buffer size as would be used for output. OpenSL ES interfaces +// for input effects preclude the lower latency path. +// See https://developer.android.com/ndk/guides/audio/opensl-prog-notes.html +// for more details. +class OpenSLESRecorder { + public: + // Beginning with API level 17 (Android 4.2), a buffer count of 2 or more is + // required for lower latency. Beginning with API level 18 (Android 4.3), a + // buffer count of 1 is sufficient for lower latency. In addition, the buffer + // size and sample rate must be compatible with the device's native input + // configuration provided via the audio manager at construction. + // TODO(henrika): perhaps set this value dynamically based on OS version. + static const int kNumOfOpenSLESBuffers = 2; + + explicit OpenSLESRecorder(AudioManager* audio_manager); + ~OpenSLESRecorder(); + + int Init(); + int Terminate(); + + int InitRecording(); + bool RecordingIsInitialized() const { return initialized_; } + + int StartRecording(); + int StopRecording(); + bool Recording() const { return recording_; } + + void AttachAudioBuffer(AudioDeviceBuffer* audio_buffer); + + // TODO(henrika): add support using OpenSL ES APIs when available. + int EnableBuiltInAEC(bool enable); + int EnableBuiltInAGC(bool enable); + int EnableBuiltInNS(bool enable); + + private: + // Obtaines the SL Engine Interface from the existing global Engine object. + // The interface exposes creation methods of all the OpenSL ES object types. + // This method defines the `engine_` member variable. + bool ObtainEngineInterface(); + + // Creates/destroys the audio recorder and the simple-buffer queue object. + bool CreateAudioRecorder(); + void DestroyAudioRecorder(); + + // Allocate memory for audio buffers which will be used to capture audio + // via the SLAndroidSimpleBufferQueueItf interface. + void AllocateDataBuffers(); + + // These callback methods are called when data has been written to the input + // buffer queue. They are both called from an internal "OpenSL ES thread" + // which is not attached to the Dalvik VM. + static void SimpleBufferQueueCallback(SLAndroidSimpleBufferQueueItf caller, + void* context); + void ReadBufferQueue(); + + // Wraps calls to SLAndroidSimpleBufferQueueState::Enqueue() and it can be + // called both on the main thread (but before recording has started) and from + // the internal audio thread while input streaming is active. It uses + // `simple_buffer_queue_` but no lock is needed since the initial calls from + // the main thread and the native callback thread are mutually exclusive. + bool EnqueueAudioBuffer(); + + // Returns the current recorder state. + SLuint32 GetRecordState() const; + + // Returns the current buffer queue state. + SLAndroidSimpleBufferQueueState GetBufferQueueState() const; + + // Number of buffers currently in the queue. + SLuint32 GetBufferCount(); + + // Prints a log message of the current queue state. Can be used for debugging + // purposes. + void LogBufferState() const; + + // Ensures that methods are called from the same thread as this object is + // created on. + SequenceChecker thread_checker_; + + // Stores thread ID in first call to SimpleBufferQueueCallback() from internal + // non-application thread which is not attached to the Dalvik JVM. + // Detached during construction of this object. + SequenceChecker thread_checker_opensles_; + + // Raw pointer to the audio manager injected at construction. Used to cache + // audio parameters and to access the global SL engine object needed by the + // ObtainEngineInterface() method. The audio manager outlives any instance of + // this class. + AudioManager* const audio_manager_; + + // Contains audio parameters provided to this class at construction by the + // AudioManager. + const AudioParameters audio_parameters_; + + // Raw pointer handle provided to us in AttachAudioBuffer(). Owned by the + // AudioDeviceModuleImpl class and called by AudioDeviceModule::Create(). + AudioDeviceBuffer* audio_device_buffer_; + + // PCM-type format definition. + // TODO(henrika): add support for SLAndroidDataFormat_PCM_EX (android-21) if + // 32-bit float representation is needed. + SLDataFormat_PCM pcm_format_; + + bool initialized_; + bool recording_; + + // This interface exposes creation methods for all the OpenSL ES object types. + // It is the OpenSL ES API entry point. + SLEngineItf engine_; + + // The audio recorder media object records audio to the destination specified + // by the data sink capturing it from the input specified by the data source. + webrtc::ScopedSLObjectItf recorder_object_; + + // This interface is supported on the audio recorder object and it controls + // the state of the audio recorder. + SLRecordItf recorder_; + + // The Android Simple Buffer Queue interface is supported on the audio + // recorder. For recording, an app should enqueue empty buffers. When a + // registered callback sends notification that the system has finished writing + // data to the buffer, the app can read the buffer. + SLAndroidSimpleBufferQueueItf simple_buffer_queue_; + + // Consumes audio of native buffer size and feeds the WebRTC layer with 10ms + // chunks of audio. + std::unique_ptr fine_audio_buffer_; + + // Queue of audio buffers to be used by the recorder object for capturing + // audio. They will be used in a Round-robin way and the size of each buffer + // is given by AudioParameters::frames_per_buffer(), i.e., it corresponds to + // the native OpenSL ES buffer size. + std::unique_ptr[]> audio_buffers_; + + // Keeps track of active audio buffer 'n' in the audio_buffers_[n] queue. + // Example (kNumOfOpenSLESBuffers = 2): counts 0, 1, 0, 1, ... + int buffer_index_; + + // Last time the OpenSL ES layer delivered recorded audio data. + uint32_t last_rec_time_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_ANDROID_OPENSLES_RECORDER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_buffer.cc b/third_party/libwebrtc/modules/audio_device/audio_device_buffer.cc new file mode 100644 index 0000000000..b1be445e0d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_buffer.cc @@ -0,0 +1,518 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/audio_device_buffer.h" + +#include + +#include +#include +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" +#include "rtc_base/trace_event.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +static const char kTimerQueueName[] = "AudioDeviceBufferTimer"; + +// Time between two sucessive calls to LogStats(). +static const size_t kTimerIntervalInSeconds = 10; +static const size_t kTimerIntervalInMilliseconds = + kTimerIntervalInSeconds * rtc::kNumMillisecsPerSec; +// Min time required to qualify an audio session as a "call". If playout or +// recording has been active for less than this time we will not store any +// logs or UMA stats but instead consider the call as too short. +static const size_t kMinValidCallTimeTimeInSeconds = 10; +static const size_t kMinValidCallTimeTimeInMilliseconds = + kMinValidCallTimeTimeInSeconds * rtc::kNumMillisecsPerSec; +#ifdef AUDIO_DEVICE_PLAYS_SINUS_TONE +static const double k2Pi = 6.28318530717959; +#endif + +AudioDeviceBuffer::AudioDeviceBuffer(TaskQueueFactory* task_queue_factory) + : task_queue_(task_queue_factory->CreateTaskQueue( + kTimerQueueName, + TaskQueueFactory::Priority::NORMAL)), + audio_transport_cb_(nullptr), + rec_sample_rate_(0), + play_sample_rate_(0), + rec_channels_(0), + play_channels_(0), + playing_(false), + recording_(false), + typing_status_(false), + play_delay_ms_(0), + rec_delay_ms_(0), + num_stat_reports_(0), + last_timer_task_time_(0), + rec_stat_count_(0), + play_stat_count_(0), + play_start_time_(0), + only_silence_recorded_(true), + log_stats_(false) { + RTC_LOG(LS_INFO) << "AudioDeviceBuffer::ctor"; +#ifdef AUDIO_DEVICE_PLAYS_SINUS_TONE + phase_ = 0.0; + RTC_LOG(LS_WARNING) << "AUDIO_DEVICE_PLAYS_SINUS_TONE is defined!"; +#endif +} + +AudioDeviceBuffer::~AudioDeviceBuffer() { + RTC_DCHECK_RUN_ON(&main_thread_checker_); + RTC_DCHECK(!playing_); + RTC_DCHECK(!recording_); + RTC_LOG(LS_INFO) << "AudioDeviceBuffer::~dtor"; +} + +int32_t AudioDeviceBuffer::RegisterAudioCallback( + AudioTransport* audio_callback) { + RTC_DCHECK_RUN_ON(&main_thread_checker_); + RTC_DLOG(LS_INFO) << __FUNCTION__; + if (playing_ || recording_) { + RTC_LOG(LS_ERROR) << "Failed to set audio transport since media was active"; + return -1; + } + audio_transport_cb_ = audio_callback; + return 0; +} + +void AudioDeviceBuffer::StartPlayout() { + RTC_DCHECK_RUN_ON(&main_thread_checker_); + // TODO(henrika): allow for usage of DCHECK(!playing_) here instead. Today the + // ADM allows calling Start(), Start() by ignoring the second call but it + // makes more sense to only allow one call. + if (playing_) { + return; + } + RTC_DLOG(LS_INFO) << __FUNCTION__; + // Clear members tracking playout stats and do it on the task queue. + task_queue_.PostTask([this] { ResetPlayStats(); }); + // Start a periodic timer based on task queue if not already done by the + // recording side. + if (!recording_) { + StartPeriodicLogging(); + } + const int64_t now_time = rtc::TimeMillis(); + // Clear members that are only touched on the main (creating) thread. + play_start_time_ = now_time; + playing_ = true; +} + +void AudioDeviceBuffer::StartRecording() { + RTC_DCHECK_RUN_ON(&main_thread_checker_); + if (recording_) { + return; + } + RTC_DLOG(LS_INFO) << __FUNCTION__; + // Clear members tracking recording stats and do it on the task queue. + task_queue_.PostTask([this] { ResetRecStats(); }); + // Start a periodic timer based on task queue if not already done by the + // playout side. + if (!playing_) { + StartPeriodicLogging(); + } + // Clear members that will be touched on the main (creating) thread. + rec_start_time_ = rtc::TimeMillis(); + recording_ = true; + // And finally a member which can be modified on the native audio thread. + // It is safe to do so since we know by design that the owning ADM has not + // yet started the native audio recording. + only_silence_recorded_ = true; +} + +void AudioDeviceBuffer::StopPlayout() { + RTC_DCHECK_RUN_ON(&main_thread_checker_); + if (!playing_) { + return; + } + RTC_DLOG(LS_INFO) << __FUNCTION__; + playing_ = false; + // Stop periodic logging if no more media is active. + if (!recording_) { + StopPeriodicLogging(); + } + RTC_LOG(LS_INFO) << "total playout time: " + << rtc::TimeSince(play_start_time_); +} + +void AudioDeviceBuffer::StopRecording() { + RTC_DCHECK_RUN_ON(&main_thread_checker_); + if (!recording_) { + return; + } + RTC_DLOG(LS_INFO) << __FUNCTION__; + recording_ = false; + // Stop periodic logging if no more media is active. + if (!playing_) { + StopPeriodicLogging(); + } + // Add UMA histogram to keep track of the case when only zeros have been + // recorded. Measurements (max of absolute level) are taken twice per second, + // which means that if e.g 10 seconds of audio has been recorded, a total of + // 20 level estimates must all be identical to zero to trigger the histogram. + // `only_silence_recorded_` can only be cleared on the native audio thread + // that drives audio capture but we know by design that the audio has stopped + // when this method is called, hence there should not be aby conflicts. Also, + // the fact that `only_silence_recorded_` can be affected during the complete + // call makes chances of conflicts with potentially one last callback very + // small. + const size_t time_since_start = rtc::TimeSince(rec_start_time_); + if (time_since_start > kMinValidCallTimeTimeInMilliseconds) { + const int only_zeros = static_cast(only_silence_recorded_); + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.RecordedOnlyZeros", only_zeros); + RTC_LOG(LS_INFO) << "HISTOGRAM(WebRTC.Audio.RecordedOnlyZeros): " + << only_zeros; + } + RTC_LOG(LS_INFO) << "total recording time: " << time_since_start; +} + +int32_t AudioDeviceBuffer::SetRecordingSampleRate(uint32_t fsHz) { + RTC_LOG(LS_INFO) << "SetRecordingSampleRate(" << fsHz << ")"; + rec_sample_rate_ = fsHz; + return 0; +} + +int32_t AudioDeviceBuffer::SetPlayoutSampleRate(uint32_t fsHz) { + RTC_LOG(LS_INFO) << "SetPlayoutSampleRate(" << fsHz << ")"; + play_sample_rate_ = fsHz; + return 0; +} + +uint32_t AudioDeviceBuffer::RecordingSampleRate() const { + return rec_sample_rate_; +} + +uint32_t AudioDeviceBuffer::PlayoutSampleRate() const { + return play_sample_rate_; +} + +int32_t AudioDeviceBuffer::SetRecordingChannels(size_t channels) { + RTC_LOG(LS_INFO) << "SetRecordingChannels(" << channels << ")"; + rec_channels_ = channels; + return 0; +} + +int32_t AudioDeviceBuffer::SetPlayoutChannels(size_t channels) { + RTC_LOG(LS_INFO) << "SetPlayoutChannels(" << channels << ")"; + play_channels_ = channels; + return 0; +} + +size_t AudioDeviceBuffer::RecordingChannels() const { + return rec_channels_; +} + +size_t AudioDeviceBuffer::PlayoutChannels() const { + return play_channels_; +} + +int32_t AudioDeviceBuffer::SetTypingStatus(bool typing_status) { + typing_status_ = typing_status; + return 0; +} + +void AudioDeviceBuffer::SetVQEData(int play_delay_ms, int rec_delay_ms) { + play_delay_ms_ = play_delay_ms; + rec_delay_ms_ = rec_delay_ms; +} + +int32_t AudioDeviceBuffer::SetRecordedBuffer(const void* audio_buffer, + size_t samples_per_channel) { + return SetRecordedBuffer(audio_buffer, samples_per_channel, absl::nullopt); +} + +int32_t AudioDeviceBuffer::SetRecordedBuffer( + const void* audio_buffer, + size_t samples_per_channel, + absl::optional capture_timestamp_ns) { + // Copy the complete input buffer to the local buffer. + const size_t old_size = rec_buffer_.size(); + rec_buffer_.SetData(static_cast(audio_buffer), + rec_channels_ * samples_per_channel); + // Keep track of the size of the recording buffer. Only updated when the + // size changes, which is a rare event. + if (old_size != rec_buffer_.size()) { + RTC_LOG(LS_INFO) << "Size of recording buffer: " << rec_buffer_.size(); + } + + if (capture_timestamp_ns) { + capture_timestamp_ns_ = + rtc::kNumNanosecsPerMicrosec * + timestamp_aligner_.TranslateTimestamp( + *capture_timestamp_ns / rtc::kNumNanosecsPerMicrosec, + rtc::TimeMicros()); + } + // Derive a new level value twice per second and check if it is non-zero. + int16_t max_abs = 0; + RTC_DCHECK_LT(rec_stat_count_, 50); + if (++rec_stat_count_ >= 50) { + // Returns the largest absolute value in a signed 16-bit vector. + max_abs = WebRtcSpl_MaxAbsValueW16(rec_buffer_.data(), rec_buffer_.size()); + rec_stat_count_ = 0; + // Set `only_silence_recorded_` to false as soon as at least one detection + // of a non-zero audio packet is found. It can only be restored to true + // again by restarting the call. + if (max_abs > 0) { + only_silence_recorded_ = false; + } + } + // Update recording stats which is used as base for periodic logging of the + // audio input state. + UpdateRecStats(max_abs, samples_per_channel); + return 0; +} + +int32_t AudioDeviceBuffer::DeliverRecordedData() { + if (!audio_transport_cb_) { + RTC_LOG(LS_WARNING) << "Invalid audio transport"; + return 0; + } + const size_t frames = rec_buffer_.size() / rec_channels_; + const size_t bytes_per_frame = rec_channels_ * sizeof(int16_t); + uint32_t new_mic_level_dummy = 0; + uint32_t total_delay_ms = play_delay_ms_ + rec_delay_ms_; + int32_t res = audio_transport_cb_->RecordedDataIsAvailable( + rec_buffer_.data(), frames, bytes_per_frame, rec_channels_, + rec_sample_rate_, total_delay_ms, 0, 0, typing_status_, + new_mic_level_dummy, capture_timestamp_ns_); + if (res == -1) { + RTC_LOG(LS_ERROR) << "RecordedDataIsAvailable() failed"; + } + return 0; +} + +int32_t AudioDeviceBuffer::RequestPlayoutData(size_t samples_per_channel) { + TRACE_EVENT1("webrtc", "AudioDeviceBuffer::RequestPlayoutData", + "samples_per_channel", samples_per_channel); + + // The consumer can change the requested size on the fly and we therefore + // resize the buffer accordingly. Also takes place at the first call to this + // method. + const size_t total_samples = play_channels_ * samples_per_channel; + if (play_buffer_.size() != total_samples) { + play_buffer_.SetSize(total_samples); + RTC_LOG(LS_INFO) << "Size of playout buffer: " << play_buffer_.size(); + } + + size_t num_samples_out(0); + // It is currently supported to start playout without a valid audio + // transport object. Leads to warning and silence. + if (!audio_transport_cb_) { + RTC_LOG(LS_WARNING) << "Invalid audio transport"; + return 0; + } + + // Retrieve new 16-bit PCM audio data using the audio transport instance. + int64_t elapsed_time_ms = -1; + int64_t ntp_time_ms = -1; + const size_t bytes_per_frame = play_channels_ * sizeof(int16_t); + uint32_t res = audio_transport_cb_->NeedMorePlayData( + samples_per_channel, bytes_per_frame, play_channels_, play_sample_rate_, + play_buffer_.data(), num_samples_out, &elapsed_time_ms, &ntp_time_ms); + if (res != 0) { + RTC_LOG(LS_ERROR) << "NeedMorePlayData() failed"; + } + + // Derive a new level value twice per second. + int16_t max_abs = 0; + RTC_DCHECK_LT(play_stat_count_, 50); + if (++play_stat_count_ >= 50) { + // Returns the largest absolute value in a signed 16-bit vector. + max_abs = + WebRtcSpl_MaxAbsValueW16(play_buffer_.data(), play_buffer_.size()); + play_stat_count_ = 0; + } + // Update playout stats which is used as base for periodic logging of the + // audio output state. + UpdatePlayStats(max_abs, num_samples_out / play_channels_); + return static_cast(num_samples_out / play_channels_); +} + +int32_t AudioDeviceBuffer::GetPlayoutData(void* audio_buffer) { + RTC_DCHECK_GT(play_buffer_.size(), 0); +#ifdef AUDIO_DEVICE_PLAYS_SINUS_TONE + const double phase_increment = + k2Pi * 440.0 / static_cast(play_sample_rate_); + int16_t* destination_r = reinterpret_cast(audio_buffer); + if (play_channels_ == 1) { + for (size_t i = 0; i < play_buffer_.size(); ++i) { + destination_r[i] = static_cast((sin(phase_) * (1 << 14))); + phase_ += phase_increment; + } + } else if (play_channels_ == 2) { + for (size_t i = 0; i < play_buffer_.size() / 2; ++i) { + destination_r[2 * i] = destination_r[2 * i + 1] = + static_cast((sin(phase_) * (1 << 14))); + phase_ += phase_increment; + } + } +#else + memcpy(audio_buffer, play_buffer_.data(), + play_buffer_.size() * sizeof(int16_t)); +#endif + // Return samples per channel or number of frames. + return static_cast(play_buffer_.size() / play_channels_); +} + +void AudioDeviceBuffer::StartPeriodicLogging() { + task_queue_.PostTask([this] { LogStats(AudioDeviceBuffer::LOG_START); }); +} + +void AudioDeviceBuffer::StopPeriodicLogging() { + task_queue_.PostTask([this] { LogStats(AudioDeviceBuffer::LOG_STOP); }); +} + +void AudioDeviceBuffer::LogStats(LogState state) { + RTC_DCHECK_RUN_ON(&task_queue_); + int64_t now_time = rtc::TimeMillis(); + + if (state == AudioDeviceBuffer::LOG_START) { + // Reset counters at start. We will not add any logging in this state but + // the timer will started by posting a new (delayed) task. + num_stat_reports_ = 0; + last_timer_task_time_ = now_time; + log_stats_ = true; + } else if (state == AudioDeviceBuffer::LOG_STOP) { + // Stop logging and posting new tasks. + log_stats_ = false; + } else if (state == AudioDeviceBuffer::LOG_ACTIVE) { + // Keep logging unless logging was disabled while task was posted. + } + + // Avoid adding more logs since we are in STOP mode. + if (!log_stats_) { + return; + } + + int64_t next_callback_time = now_time + kTimerIntervalInMilliseconds; + int64_t time_since_last = rtc::TimeDiff(now_time, last_timer_task_time_); + last_timer_task_time_ = now_time; + + Stats stats; + { + MutexLock lock(&lock_); + stats = stats_; + stats_.max_rec_level = 0; + stats_.max_play_level = 0; + } + + // Cache current sample rate from atomic members. + const uint32_t rec_sample_rate = rec_sample_rate_; + const uint32_t play_sample_rate = play_sample_rate_; + + // Log the latest statistics but skip the first two rounds just after state + // was set to LOG_START to ensure that we have at least one full stable + // 10-second interval for sample-rate estimation. Hence, first printed log + // will be after ~20 seconds. + if (++num_stat_reports_ > 2 && + static_cast(time_since_last) > kTimerIntervalInMilliseconds / 2) { + uint32_t diff_samples = stats.rec_samples - last_stats_.rec_samples; + float rate = diff_samples / (static_cast(time_since_last) / 1000.0); + uint32_t abs_diff_rate_in_percent = 0; + if (rec_sample_rate > 0 && rate > 0) { + abs_diff_rate_in_percent = static_cast( + 0.5f + + ((100.0f * std::abs(rate - rec_sample_rate)) / rec_sample_rate)); + RTC_HISTOGRAM_PERCENTAGE("WebRTC.Audio.RecordSampleRateOffsetInPercent", + abs_diff_rate_in_percent); + RTC_LOG(LS_INFO) << "[REC : " << time_since_last << "msec, " + << rec_sample_rate / 1000 << "kHz] callbacks: " + << stats.rec_callbacks - last_stats_.rec_callbacks + << ", " + "samples: " + << diff_samples + << ", " + "rate: " + << static_cast(rate + 0.5) + << ", " + "rate diff: " + << abs_diff_rate_in_percent + << "%, " + "level: " + << stats.max_rec_level; + } + + diff_samples = stats.play_samples - last_stats_.play_samples; + rate = diff_samples / (static_cast(time_since_last) / 1000.0); + abs_diff_rate_in_percent = 0; + if (play_sample_rate > 0 && rate > 0) { + abs_diff_rate_in_percent = static_cast( + 0.5f + + ((100.0f * std::abs(rate - play_sample_rate)) / play_sample_rate)); + RTC_HISTOGRAM_PERCENTAGE("WebRTC.Audio.PlayoutSampleRateOffsetInPercent", + abs_diff_rate_in_percent); + RTC_LOG(LS_INFO) << "[PLAY: " << time_since_last << "msec, " + << play_sample_rate / 1000 << "kHz] callbacks: " + << stats.play_callbacks - last_stats_.play_callbacks + << ", " + "samples: " + << diff_samples + << ", " + "rate: " + << static_cast(rate + 0.5) + << ", " + "rate diff: " + << abs_diff_rate_in_percent + << "%, " + "level: " + << stats.max_play_level; + } + } + last_stats_ = stats; + + int64_t time_to_wait_ms = next_callback_time - rtc::TimeMillis(); + RTC_DCHECK_GT(time_to_wait_ms, 0) << "Invalid timer interval"; + + // Keep posting new (delayed) tasks until state is changed to kLogStop. + task_queue_.PostDelayedTask( + [this] { AudioDeviceBuffer::LogStats(AudioDeviceBuffer::LOG_ACTIVE); }, + TimeDelta::Millis(time_to_wait_ms)); +} + +void AudioDeviceBuffer::ResetRecStats() { + RTC_DCHECK_RUN_ON(&task_queue_); + last_stats_.ResetRecStats(); + MutexLock lock(&lock_); + stats_.ResetRecStats(); +} + +void AudioDeviceBuffer::ResetPlayStats() { + RTC_DCHECK_RUN_ON(&task_queue_); + last_stats_.ResetPlayStats(); + MutexLock lock(&lock_); + stats_.ResetPlayStats(); +} + +void AudioDeviceBuffer::UpdateRecStats(int16_t max_abs, + size_t samples_per_channel) { + MutexLock lock(&lock_); + ++stats_.rec_callbacks; + stats_.rec_samples += samples_per_channel; + if (max_abs > stats_.max_rec_level) { + stats_.max_rec_level = max_abs; + } +} + +void AudioDeviceBuffer::UpdatePlayStats(int16_t max_abs, + size_t samples_per_channel) { + MutexLock lock(&lock_); + ++stats_.play_callbacks; + stats_.play_samples += samples_per_channel; + if (max_abs > stats_.max_play_level) { + stats_.max_play_level = max_abs; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_buffer.h b/third_party/libwebrtc/modules/audio_device/audio_device_buffer.h new file mode 100644 index 0000000000..eb681a7a68 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_buffer.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_AUDIO_DEVICE_BUFFER_H_ +#define MODULES_AUDIO_DEVICE_AUDIO_DEVICE_BUFFER_H_ + +#include +#include + +#include + +#include "api/sequence_checker.h" +#include "api/task_queue/task_queue_factory.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "rtc_base/buffer.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/thread_annotations.h" +#include "rtc_base/timestamp_aligner.h" + +namespace webrtc { + +// Delta times between two successive playout callbacks are limited to this +// value before added to an internal array. +const size_t kMaxDeltaTimeInMs = 500; +// TODO(henrika): remove when no longer used by external client. +const size_t kMaxBufferSizeBytes = 3840; // 10ms in stereo @ 96kHz + +class AudioDeviceBuffer { + public: + enum LogState { + LOG_START = 0, + LOG_STOP, + LOG_ACTIVE, + }; + + struct Stats { + void ResetRecStats() { + rec_callbacks = 0; + rec_samples = 0; + max_rec_level = 0; + } + + void ResetPlayStats() { + play_callbacks = 0; + play_samples = 0; + max_play_level = 0; + } + + // Total number of recording callbacks where the source provides 10ms audio + // data each time. + uint64_t rec_callbacks = 0; + + // Total number of playback callbacks where the sink asks for 10ms audio + // data each time. + uint64_t play_callbacks = 0; + + // Total number of recorded audio samples. + uint64_t rec_samples = 0; + + // Total number of played audio samples. + uint64_t play_samples = 0; + + // Contains max level (max(abs(x))) of recorded audio packets over the last + // 10 seconds where a new measurement is done twice per second. The level + // is reset to zero at each call to LogStats(). + int16_t max_rec_level = 0; + + // Contains max level of recorded audio packets over the last 10 seconds + // where a new measurement is done twice per second. + int16_t max_play_level = 0; + }; + + explicit AudioDeviceBuffer(TaskQueueFactory* task_queue_factory); + virtual ~AudioDeviceBuffer(); + + int32_t RegisterAudioCallback(AudioTransport* audio_callback); + + void StartPlayout(); + void StartRecording(); + void StopPlayout(); + void StopRecording(); + + int32_t SetRecordingSampleRate(uint32_t fsHz); + int32_t SetPlayoutSampleRate(uint32_t fsHz); + uint32_t RecordingSampleRate() const; + uint32_t PlayoutSampleRate() const; + + int32_t SetRecordingChannels(size_t channels); + int32_t SetPlayoutChannels(size_t channels); + size_t RecordingChannels() const; + size_t PlayoutChannels() const; + + // TODO(bugs.webrtc.org/13621) Deprecate this function + virtual int32_t SetRecordedBuffer(const void* audio_buffer, + size_t samples_per_channel); + + virtual int32_t SetRecordedBuffer( + const void* audio_buffer, + size_t samples_per_channel, + absl::optional capture_timestamp_ns); + virtual void SetVQEData(int play_delay_ms, int rec_delay_ms); + virtual int32_t DeliverRecordedData(); + uint32_t NewMicLevel() const; + + virtual int32_t RequestPlayoutData(size_t samples_per_channel); + virtual int32_t GetPlayoutData(void* audio_buffer); + + int32_t SetTypingStatus(bool typing_status); + + private: + // Starts/stops periodic logging of audio stats. + void StartPeriodicLogging(); + void StopPeriodicLogging(); + + // Called periodically on the internal thread created by the TaskQueue. + // Updates some stats but dooes it on the task queue to ensure that access of + // members is serialized hence avoiding usage of locks. + // state = LOG_START => members are initialized and the timer starts. + // state = LOG_STOP => no logs are printed and the timer stops. + // state = LOG_ACTIVE => logs are printed and the timer is kept alive. + void LogStats(LogState state); + + // Updates counters in each play/record callback. These counters are later + // (periodically) read by LogStats() using a lock. + void UpdateRecStats(int16_t max_abs, size_t samples_per_channel); + void UpdatePlayStats(int16_t max_abs, size_t samples_per_channel); + + // Clears all members tracking stats for recording and playout. + // These methods both run on the task queue. + void ResetRecStats(); + void ResetPlayStats(); + + // This object lives on the main (creating) thread and most methods are + // called on that same thread. When audio has started some methods will be + // called on either a native audio thread for playout or a native thread for + // recording. Some members are not annotated since they are "protected by + // design" and adding e.g. a race checker can cause failures for very few + // edge cases and it is IMHO not worth the risk to use them in this class. + // TODO(henrika): see if it is possible to refactor and annotate all members. + + // Main thread on which this object is created. + SequenceChecker main_thread_checker_; + + Mutex lock_; + + // Task queue used to invoke LogStats() periodically. Tasks are executed on a + // worker thread but it does not necessarily have to be the same thread for + // each task. + rtc::TaskQueue task_queue_; + + // Raw pointer to AudioTransport instance. Supplied to RegisterAudioCallback() + // and it must outlive this object. It is not possible to change this member + // while any media is active. It is possible to start media without calling + // RegisterAudioCallback() but that will lead to ignored audio callbacks in + // both directions where native audio will be active but no audio samples will + // be transported. + AudioTransport* audio_transport_cb_; + + // Sample rate in Hertz. Accessed atomically. + std::atomic rec_sample_rate_; + std::atomic play_sample_rate_; + + // Number of audio channels. Accessed atomically. + std::atomic rec_channels_; + std::atomic play_channels_; + + // Keeps track of if playout/recording are active or not. A combination + // of these states are used to determine when to start and stop the timer. + // Only used on the creating thread and not used to control any media flow. + bool playing_ RTC_GUARDED_BY(main_thread_checker_); + bool recording_ RTC_GUARDED_BY(main_thread_checker_); + + // Buffer used for audio samples to be played out. Size can be changed + // dynamically. The 16-bit samples are interleaved, hence the size is + // proportional to the number of channels. + rtc::BufferT play_buffer_; + + // Byte buffer used for recorded audio samples. Size can be changed + // dynamically. + rtc::BufferT rec_buffer_; + + // Contains true of a key-press has been detected. + bool typing_status_; + + // Delay values used by the AEC. + int play_delay_ms_; + int rec_delay_ms_; + + // Capture timestamp. + absl::optional capture_timestamp_ns_; + + // Counts number of times LogStats() has been called. + size_t num_stat_reports_ RTC_GUARDED_BY(task_queue_); + + // Time stamp of last timer task (drives logging). + int64_t last_timer_task_time_ RTC_GUARDED_BY(task_queue_); + + // Counts number of audio callbacks modulo 50 to create a signal when + // a new storage of audio stats shall be done. + int16_t rec_stat_count_; + int16_t play_stat_count_; + + // Time stamps of when playout and recording starts. + int64_t play_start_time_ RTC_GUARDED_BY(main_thread_checker_); + int64_t rec_start_time_ RTC_GUARDED_BY(main_thread_checker_); + + // Contains counters for playout and recording statistics. + Stats stats_ RTC_GUARDED_BY(lock_); + + // Stores current stats at each timer task. Used to calculate differences + // between two successive timer events. + Stats last_stats_ RTC_GUARDED_BY(task_queue_); + + // Set to true at construction and modified to false as soon as one audio- + // level estimate larger than zero is detected. + bool only_silence_recorded_; + + // Set to true when logging of audio stats is enabled for the first time in + // StartPeriodicLogging() and set to false by StopPeriodicLogging(). + // Setting this member to false prevents (possiby invalid) log messages from + // being printed in the LogStats() task. + bool log_stats_ RTC_GUARDED_BY(task_queue_); + + // Used for converting capture timestaps (received from AudioRecordThread + // via AudioRecordJni::DataIsRecorded) to RTC clock. + rtc::TimestampAligner timestamp_aligner_; + +// Should *never* be defined in production builds. Only used for testing. +// When defined, the output signal will be replaced by a sinus tone at 440Hz. +#ifdef AUDIO_DEVICE_PLAYS_SINUS_TONE + double phase_; +#endif +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_AUDIO_DEVICE_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_config.h b/third_party/libwebrtc/modules/audio_device/audio_device_config.h new file mode 100644 index 0000000000..fa51747b67 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_config.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_AUDIO_DEVICE_CONFIG_H_ +#define AUDIO_DEVICE_AUDIO_DEVICE_CONFIG_H_ + +// Enumerators +// +enum { GET_MIC_VOLUME_INTERVAL_MS = 1000 }; + +// Platform specifics +// +#if defined(_WIN32) +#if (_MSC_VER >= 1400) +#if !defined(WEBRTC_DUMMY_FILE_DEVICES) +// Windows Core Audio is the default audio layer in Windows. +// Only supported for VS 2005 and higher. +#define WEBRTC_WINDOWS_CORE_AUDIO_BUILD +#endif +#endif +#endif + +#endif // AUDIO_DEVICE_AUDIO_DEVICE_CONFIG_H_ diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_data_observer.cc b/third_party/libwebrtc/modules/audio_device/audio_device_data_observer.cc new file mode 100644 index 0000000000..0524830327 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_data_observer.cc @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/include/audio_device_data_observer.h" + +#include "api/make_ref_counted.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// A wrapper over AudioDeviceModule that registers itself as AudioTransport +// callback and redirects the PCM data to AudioDeviceDataObserver callback. +class ADMWrapper : public AudioDeviceModule, public AudioTransport { + public: + ADMWrapper(rtc::scoped_refptr impl, + AudioDeviceDataObserver* legacy_observer, + std::unique_ptr observer) + : impl_(impl), + legacy_observer_(legacy_observer), + observer_(std::move(observer)) { + is_valid_ = impl_.get() != nullptr; + } + ADMWrapper(AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory, + AudioDeviceDataObserver* legacy_observer, + std::unique_ptr observer) + : ADMWrapper(AudioDeviceModule::Create(audio_layer, task_queue_factory), + legacy_observer, + std::move(observer)) {} + ~ADMWrapper() override { + audio_transport_ = nullptr; + observer_ = nullptr; + } + + // Make sure we have a valid ADM before returning it to user. + bool IsValid() { return is_valid_; } + + int32_t RecordedDataIsAvailable(const void* audioSamples, + size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samples_per_sec, + uint32_t total_delay_ms, + int32_t clockDrift, + uint32_t currentMicLevel, + bool keyPressed, + uint32_t& newMicLevel) override { + return RecordedDataIsAvailable( + audioSamples, nSamples, nBytesPerSample, nChannels, samples_per_sec, + total_delay_ms, clockDrift, currentMicLevel, keyPressed, newMicLevel, + /*capture_timestamp_ns=*/absl::nullopt); + } + + // AudioTransport methods overrides. + int32_t RecordedDataIsAvailable( + const void* audioSamples, + size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samples_per_sec, + uint32_t total_delay_ms, + int32_t clockDrift, + uint32_t currentMicLevel, + bool keyPressed, + uint32_t& newMicLevel, + absl::optional capture_timestamp_ns) override { + int32_t res = 0; + // Capture PCM data of locally captured audio. + if (observer_) { + observer_->OnCaptureData(audioSamples, nSamples, nBytesPerSample, + nChannels, samples_per_sec); + } + + // Send to the actual audio transport. + if (audio_transport_) { + res = audio_transport_->RecordedDataIsAvailable( + audioSamples, nSamples, nBytesPerSample, nChannels, samples_per_sec, + total_delay_ms, clockDrift, currentMicLevel, keyPressed, newMicLevel, + capture_timestamp_ns); + } + + return res; + } + + int32_t NeedMorePlayData(const size_t nSamples, + const size_t nBytesPerSample, + const size_t nChannels, + const uint32_t samples_per_sec, + void* audioSamples, + size_t& nSamplesOut, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) override { + int32_t res = 0; + // Set out parameters to safe values to be sure not to return corrupted + // data. + nSamplesOut = 0; + *elapsed_time_ms = -1; + *ntp_time_ms = -1; + // Request data from audio transport. + if (audio_transport_) { + res = audio_transport_->NeedMorePlayData( + nSamples, nBytesPerSample, nChannels, samples_per_sec, audioSamples, + nSamplesOut, elapsed_time_ms, ntp_time_ms); + } + + // Capture rendered data. + if (observer_) { + observer_->OnRenderData(audioSamples, nSamples, nBytesPerSample, + nChannels, samples_per_sec); + } + + return res; + } + + void PullRenderData(int bits_per_sample, + int sample_rate, + size_t number_of_channels, + size_t number_of_frames, + void* audio_data, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) override { + RTC_DCHECK_NOTREACHED(); + } + + // Override AudioDeviceModule's RegisterAudioCallback method to remember the + // actual audio transport (e.g.: voice engine). + int32_t RegisterAudioCallback(AudioTransport* audio_callback) override { + // Remember the audio callback to forward PCM data + audio_transport_ = audio_callback; + return 0; + } + + // AudioDeviceModule pass through method overrides. + int32_t ActiveAudioLayer(AudioLayer* audio_layer) const override { + return impl_->ActiveAudioLayer(audio_layer); + } + int32_t Init() override { + int res = impl_->Init(); + if (res != 0) { + return res; + } + // Register self as the audio transport callback for underlying ADM impl. + impl_->RegisterAudioCallback(this); + return res; + } + int32_t Terminate() override { return impl_->Terminate(); } + bool Initialized() const override { return impl_->Initialized(); } + int16_t PlayoutDevices() override { return impl_->PlayoutDevices(); } + int16_t RecordingDevices() override { return impl_->RecordingDevices(); } + int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override { + return impl_->PlayoutDeviceName(index, name, guid); + } + int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override { + return impl_->RecordingDeviceName(index, name, guid); + } + int32_t SetPlayoutDevice(uint16_t index) override { + return impl_->SetPlayoutDevice(index); + } + int32_t SetPlayoutDevice(WindowsDeviceType device) override { + return impl_->SetPlayoutDevice(device); + } + int32_t SetRecordingDevice(uint16_t index) override { + return impl_->SetRecordingDevice(index); + } + int32_t SetRecordingDevice(WindowsDeviceType device) override { + return impl_->SetRecordingDevice(device); + } + int32_t PlayoutIsAvailable(bool* available) override { + return impl_->PlayoutIsAvailable(available); + } + int32_t InitPlayout() override { return impl_->InitPlayout(); } + bool PlayoutIsInitialized() const override { + return impl_->PlayoutIsInitialized(); + } + int32_t RecordingIsAvailable(bool* available) override { + return impl_->RecordingIsAvailable(available); + } + int32_t InitRecording() override { return impl_->InitRecording(); } + bool RecordingIsInitialized() const override { + return impl_->RecordingIsInitialized(); + } + int32_t StartPlayout() override { return impl_->StartPlayout(); } + int32_t StopPlayout() override { return impl_->StopPlayout(); } + bool Playing() const override { return impl_->Playing(); } + int32_t StartRecording() override { return impl_->StartRecording(); } + int32_t StopRecording() override { return impl_->StopRecording(); } + bool Recording() const override { return impl_->Recording(); } + int32_t InitSpeaker() override { return impl_->InitSpeaker(); } + bool SpeakerIsInitialized() const override { + return impl_->SpeakerIsInitialized(); + } + int32_t InitMicrophone() override { return impl_->InitMicrophone(); } + bool MicrophoneIsInitialized() const override { + return impl_->MicrophoneIsInitialized(); + } + int32_t SpeakerVolumeIsAvailable(bool* available) override { + return impl_->SpeakerVolumeIsAvailable(available); + } + int32_t SetSpeakerVolume(uint32_t volume) override { + return impl_->SetSpeakerVolume(volume); + } + int32_t SpeakerVolume(uint32_t* volume) const override { + return impl_->SpeakerVolume(volume); + } + int32_t MaxSpeakerVolume(uint32_t* max_volume) const override { + return impl_->MaxSpeakerVolume(max_volume); + } + int32_t MinSpeakerVolume(uint32_t* min_volume) const override { + return impl_->MinSpeakerVolume(min_volume); + } + int32_t MicrophoneVolumeIsAvailable(bool* available) override { + return impl_->MicrophoneVolumeIsAvailable(available); + } + int32_t SetMicrophoneVolume(uint32_t volume) override { + return impl_->SetMicrophoneVolume(volume); + } + int32_t MicrophoneVolume(uint32_t* volume) const override { + return impl_->MicrophoneVolume(volume); + } + int32_t MaxMicrophoneVolume(uint32_t* max_volume) const override { + return impl_->MaxMicrophoneVolume(max_volume); + } + int32_t MinMicrophoneVolume(uint32_t* min_volume) const override { + return impl_->MinMicrophoneVolume(min_volume); + } + int32_t SpeakerMuteIsAvailable(bool* available) override { + return impl_->SpeakerMuteIsAvailable(available); + } + int32_t SetSpeakerMute(bool enable) override { + return impl_->SetSpeakerMute(enable); + } + int32_t SpeakerMute(bool* enabled) const override { + return impl_->SpeakerMute(enabled); + } + int32_t MicrophoneMuteIsAvailable(bool* available) override { + return impl_->MicrophoneMuteIsAvailable(available); + } + int32_t SetMicrophoneMute(bool enable) override { + return impl_->SetMicrophoneMute(enable); + } + int32_t MicrophoneMute(bool* enabled) const override { + return impl_->MicrophoneMute(enabled); + } + int32_t StereoPlayoutIsAvailable(bool* available) const override { + return impl_->StereoPlayoutIsAvailable(available); + } + int32_t SetStereoPlayout(bool enable) override { + return impl_->SetStereoPlayout(enable); + } + int32_t StereoPlayout(bool* enabled) const override { + return impl_->StereoPlayout(enabled); + } + int32_t StereoRecordingIsAvailable(bool* available) const override { + return impl_->StereoRecordingIsAvailable(available); + } + int32_t SetStereoRecording(bool enable) override { + return impl_->SetStereoRecording(enable); + } + int32_t StereoRecording(bool* enabled) const override { + return impl_->StereoRecording(enabled); + } + int32_t PlayoutDelay(uint16_t* delay_ms) const override { + return impl_->PlayoutDelay(delay_ms); + } + bool BuiltInAECIsAvailable() const override { + return impl_->BuiltInAECIsAvailable(); + } + bool BuiltInAGCIsAvailable() const override { + return impl_->BuiltInAGCIsAvailable(); + } + bool BuiltInNSIsAvailable() const override { + return impl_->BuiltInNSIsAvailable(); + } + int32_t EnableBuiltInAEC(bool enable) override { + return impl_->EnableBuiltInAEC(enable); + } + int32_t EnableBuiltInAGC(bool enable) override { + return impl_->EnableBuiltInAGC(enable); + } + int32_t EnableBuiltInNS(bool enable) override { + return impl_->EnableBuiltInNS(enable); + } + int32_t GetPlayoutUnderrunCount() const override { + return impl_->GetPlayoutUnderrunCount(); + } +// Only supported on iOS. +#if defined(WEBRTC_IOS) + int GetPlayoutAudioParameters(AudioParameters* params) const override { + return impl_->GetPlayoutAudioParameters(params); + } + int GetRecordAudioParameters(AudioParameters* params) const override { + return impl_->GetRecordAudioParameters(params); + } +#endif // WEBRTC_IOS + + protected: + rtc::scoped_refptr impl_; + AudioDeviceDataObserver* legacy_observer_ = nullptr; + std::unique_ptr observer_; + AudioTransport* audio_transport_ = nullptr; + bool is_valid_ = false; +}; + +} // namespace + +rtc::scoped_refptr CreateAudioDeviceWithDataObserver( + rtc::scoped_refptr impl, + std::unique_ptr observer) { + auto audio_device = rtc::make_ref_counted(impl, observer.get(), + std::move(observer)); + + if (!audio_device->IsValid()) { + return nullptr; + } + + return audio_device; +} + +rtc::scoped_refptr CreateAudioDeviceWithDataObserver( + rtc::scoped_refptr impl, + AudioDeviceDataObserver* legacy_observer) { + auto audio_device = + rtc::make_ref_counted(impl, legacy_observer, nullptr); + + if (!audio_device->IsValid()) { + return nullptr; + } + + return audio_device; +} + +rtc::scoped_refptr CreateAudioDeviceWithDataObserver( + AudioDeviceModule::AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory, + std::unique_ptr observer) { + auto audio_device = rtc::make_ref_counted( + audio_layer, task_queue_factory, observer.get(), std::move(observer)); + + if (!audio_device->IsValid()) { + return nullptr; + } + + return audio_device; +} + +rtc::scoped_refptr CreateAudioDeviceWithDataObserver( + AudioDeviceModule::AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory, + AudioDeviceDataObserver* legacy_observer) { + auto audio_device = rtc::make_ref_counted( + audio_layer, task_queue_factory, legacy_observer, nullptr); + + if (!audio_device->IsValid()) { + return nullptr; + } + + return audio_device; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_generic.cc b/third_party/libwebrtc/modules/audio_device/audio_device_generic.cc new file mode 100644 index 0000000000..7b8cfd1734 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_generic.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/audio_device_generic.h" + +#include "rtc_base/logging.h" + +namespace webrtc { + +bool AudioDeviceGeneric::BuiltInAECIsAvailable() const { + RTC_LOG_F(LS_ERROR) << "Not supported on this platform"; + return false; +} + +int32_t AudioDeviceGeneric::EnableBuiltInAEC(bool enable) { + RTC_LOG_F(LS_ERROR) << "Not supported on this platform"; + return -1; +} + +bool AudioDeviceGeneric::BuiltInAGCIsAvailable() const { + RTC_LOG_F(LS_ERROR) << "Not supported on this platform"; + return false; +} + +int32_t AudioDeviceGeneric::EnableBuiltInAGC(bool enable) { + RTC_LOG_F(LS_ERROR) << "Not supported on this platform"; + return -1; +} + +bool AudioDeviceGeneric::BuiltInNSIsAvailable() const { + RTC_LOG_F(LS_ERROR) << "Not supported on this platform"; + return false; +} + +int32_t AudioDeviceGeneric::EnableBuiltInNS(bool enable) { + RTC_LOG_F(LS_ERROR) << "Not supported on this platform"; + return -1; +} + +int32_t AudioDeviceGeneric::GetPlayoutUnderrunCount() const { + RTC_LOG_F(LS_ERROR) << "Not supported on this platform"; + return -1; +} + +#if defined(WEBRTC_IOS) +int AudioDeviceGeneric::GetPlayoutAudioParameters( + AudioParameters* params) const { + RTC_LOG_F(LS_ERROR) << "Not supported on this platform"; + return -1; +} + +int AudioDeviceGeneric::GetRecordAudioParameters( + AudioParameters* params) const { + RTC_LOG_F(LS_ERROR) << "Not supported on this platform"; + return -1; +} +#endif // WEBRTC_IOS + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_generic.h b/third_party/libwebrtc/modules/audio_device/audio_device_generic.h new file mode 100644 index 0000000000..41e24eb3b0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_generic.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_AUDIO_DEVICE_GENERIC_H_ +#define AUDIO_DEVICE_AUDIO_DEVICE_GENERIC_H_ + +#include + +#include "modules/audio_device/audio_device_buffer.h" +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_device/include/audio_device_defines.h" + +namespace webrtc { + +class AudioDeviceGeneric { + public: + // For use with UMA logging. Must be kept in sync with histograms.xml in + // Chrome, located at + // https://cs.chromium.org/chromium/src/tools/metrics/histograms/histograms.xml + enum class InitStatus { + OK = 0, + PLAYOUT_ERROR = 1, + RECORDING_ERROR = 2, + OTHER_ERROR = 3, + NUM_STATUSES = 4 + }; + // Retrieve the currently utilized audio layer + virtual int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const = 0; + + // Main initializaton and termination + virtual InitStatus Init() = 0; + virtual int32_t Terminate() = 0; + virtual bool Initialized() const = 0; + + // Device enumeration + virtual int16_t PlayoutDevices() = 0; + virtual int16_t RecordingDevices() = 0; + virtual int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) = 0; + virtual int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) = 0; + + // Device selection + virtual int32_t SetPlayoutDevice(uint16_t index) = 0; + virtual int32_t SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) = 0; + virtual int32_t SetRecordingDevice(uint16_t index) = 0; + virtual int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) = 0; + + // Audio transport initialization + virtual int32_t PlayoutIsAvailable(bool& available) = 0; + virtual int32_t InitPlayout() = 0; + virtual bool PlayoutIsInitialized() const = 0; + virtual int32_t RecordingIsAvailable(bool& available) = 0; + virtual int32_t InitRecording() = 0; + virtual bool RecordingIsInitialized() const = 0; + + // Audio transport control + virtual int32_t StartPlayout() = 0; + virtual int32_t StopPlayout() = 0; + virtual bool Playing() const = 0; + virtual int32_t StartRecording() = 0; + virtual int32_t StopRecording() = 0; + virtual bool Recording() const = 0; + + // Audio mixer initialization + virtual int32_t InitSpeaker() = 0; + virtual bool SpeakerIsInitialized() const = 0; + virtual int32_t InitMicrophone() = 0; + virtual bool MicrophoneIsInitialized() const = 0; + + // Speaker volume controls + virtual int32_t SpeakerVolumeIsAvailable(bool& available) = 0; + virtual int32_t SetSpeakerVolume(uint32_t volume) = 0; + virtual int32_t SpeakerVolume(uint32_t& volume) const = 0; + virtual int32_t MaxSpeakerVolume(uint32_t& maxVolume) const = 0; + virtual int32_t MinSpeakerVolume(uint32_t& minVolume) const = 0; + + // Microphone volume controls + virtual int32_t MicrophoneVolumeIsAvailable(bool& available) = 0; + virtual int32_t SetMicrophoneVolume(uint32_t volume) = 0; + virtual int32_t MicrophoneVolume(uint32_t& volume) const = 0; + virtual int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const = 0; + virtual int32_t MinMicrophoneVolume(uint32_t& minVolume) const = 0; + + // Speaker mute control + virtual int32_t SpeakerMuteIsAvailable(bool& available) = 0; + virtual int32_t SetSpeakerMute(bool enable) = 0; + virtual int32_t SpeakerMute(bool& enabled) const = 0; + + // Microphone mute control + virtual int32_t MicrophoneMuteIsAvailable(bool& available) = 0; + virtual int32_t SetMicrophoneMute(bool enable) = 0; + virtual int32_t MicrophoneMute(bool& enabled) const = 0; + + // Stereo support + virtual int32_t StereoPlayoutIsAvailable(bool& available) = 0; + virtual int32_t SetStereoPlayout(bool enable) = 0; + virtual int32_t StereoPlayout(bool& enabled) const = 0; + virtual int32_t StereoRecordingIsAvailable(bool& available) = 0; + virtual int32_t SetStereoRecording(bool enable) = 0; + virtual int32_t StereoRecording(bool& enabled) const = 0; + + // Delay information and control + virtual int32_t PlayoutDelay(uint16_t& delayMS) const = 0; + + // Android only + virtual bool BuiltInAECIsAvailable() const; + virtual bool BuiltInAGCIsAvailable() const; + virtual bool BuiltInNSIsAvailable() const; + + // Windows Core Audio and Android only. + virtual int32_t EnableBuiltInAEC(bool enable); + virtual int32_t EnableBuiltInAGC(bool enable); + virtual int32_t EnableBuiltInNS(bool enable); + + // Play underrun count. + virtual int32_t GetPlayoutUnderrunCount() const; + +// iOS only. +// TODO(henrika): add Android support. +#if defined(WEBRTC_IOS) + virtual int GetPlayoutAudioParameters(AudioParameters* params) const; + virtual int GetRecordAudioParameters(AudioParameters* params) const; +#endif // WEBRTC_IOS + + virtual void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) = 0; + + virtual ~AudioDeviceGeneric() {} +}; + +} // namespace webrtc + +#endif // AUDIO_DEVICE_AUDIO_DEVICE_GENERIC_H_ diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_gn/moz.build b/third_party/libwebrtc/modules/audio_device/audio_device_gn/moz.build new file mode 100644 index 0000000000..40ed29f258 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_device_gn") diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_impl.cc b/third_party/libwebrtc/modules/audio_device/audio_device_impl.cc new file mode 100644 index 0000000000..092b98f2bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_impl.cc @@ -0,0 +1,951 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/audio_device_impl.h" + +#include + +#include "api/make_ref_counted.h" +#include "api/scoped_refptr.h" +#include "modules/audio_device/audio_device_config.h" // IWYU pragma: keep +#include "modules/audio_device/audio_device_generic.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +#if defined(_WIN32) +#if defined(WEBRTC_WINDOWS_CORE_AUDIO_BUILD) +#include "modules/audio_device/win/audio_device_core_win.h" +#endif +#elif defined(WEBRTC_ANDROID) +#include +#if defined(WEBRTC_AUDIO_DEVICE_INCLUDE_ANDROID_AAUDIO) +#include "modules/audio_device/android/aaudio_player.h" +#include "modules/audio_device/android/aaudio_recorder.h" +#endif +#include "modules/audio_device/android/audio_device_template.h" +#include "modules/audio_device/android/audio_manager.h" +#include "modules/audio_device/android/audio_record_jni.h" +#include "modules/audio_device/android/audio_track_jni.h" +#include "modules/audio_device/android/opensles_player.h" +#include "modules/audio_device/android/opensles_recorder.h" +#elif defined(WEBRTC_LINUX) +#if defined(WEBRTC_ENABLE_LINUX_ALSA) +#include "modules/audio_device/linux/audio_device_alsa_linux.h" +#endif +#if defined(WEBRTC_ENABLE_LINUX_PULSE) +#include "modules/audio_device/linux/audio_device_pulse_linux.h" +#endif +#elif defined(WEBRTC_IOS) +#include "sdk/objc/native/src/audio/audio_device_ios.h" +#elif defined(WEBRTC_MAC) +#include "modules/audio_device/mac/audio_device_mac.h" +#endif +#if defined(WEBRTC_DUMMY_FILE_DEVICES) +#include "modules/audio_device/dummy/file_audio_device.h" +#include "modules/audio_device/dummy/file_audio_device_factory.h" +#endif +#include "modules/audio_device/dummy/audio_device_dummy.h" + +#define CHECKinitialized_() \ + { \ + if (!initialized_) { \ + return -1; \ + } \ + } + +#define CHECKinitialized__BOOL() \ + { \ + if (!initialized_) { \ + return false; \ + } \ + } + +namespace webrtc { + +rtc::scoped_refptr AudioDeviceModule::Create( + AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return AudioDeviceModule::CreateForTest(audio_layer, task_queue_factory); +} + +// static +rtc::scoped_refptr AudioDeviceModule::CreateForTest( + AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + + // The "AudioDeviceModule::kWindowsCoreAudio2" audio layer has its own + // dedicated factory method which should be used instead. + if (audio_layer == AudioDeviceModule::kWindowsCoreAudio2) { + RTC_LOG(LS_ERROR) << "Use the CreateWindowsCoreAudioAudioDeviceModule() " + "factory method instead for this option."; + return nullptr; + } + + // Create the generic reference counted (platform independent) implementation. + auto audio_device = rtc::make_ref_counted( + audio_layer, task_queue_factory); + + // Ensure that the current platform is supported. + if (audio_device->CheckPlatform() == -1) { + return nullptr; + } + + // Create the platform-dependent implementation. + if (audio_device->CreatePlatformSpecificObjects() == -1) { + return nullptr; + } + + // Ensure that the generic audio buffer can communicate with the platform + // specific parts. + if (audio_device->AttachAudioBuffer() == -1) { + return nullptr; + } + + return audio_device; +} + +AudioDeviceModuleImpl::AudioDeviceModuleImpl( + AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory) + : audio_layer_(audio_layer), audio_device_buffer_(task_queue_factory) { + RTC_DLOG(LS_INFO) << __FUNCTION__; +} + +int32_t AudioDeviceModuleImpl::CheckPlatform() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + // Ensure that the current platform is supported + PlatformType platform(kPlatformNotSupported); +#if defined(_WIN32) + platform = kPlatformWin32; + RTC_LOG(LS_INFO) << "current platform is Win32"; +#elif defined(WEBRTC_ANDROID) + platform = kPlatformAndroid; + RTC_LOG(LS_INFO) << "current platform is Android"; +#elif defined(WEBRTC_LINUX) + platform = kPlatformLinux; + RTC_LOG(LS_INFO) << "current platform is Linux"; +#elif defined(WEBRTC_IOS) + platform = kPlatformIOS; + RTC_LOG(LS_INFO) << "current platform is IOS"; +#elif defined(WEBRTC_MAC) + platform = kPlatformMac; + RTC_LOG(LS_INFO) << "current platform is Mac"; +#endif + if (platform == kPlatformNotSupported) { + RTC_LOG(LS_ERROR) + << "current platform is not supported => this module will self " + "destruct!"; + return -1; + } + platform_type_ = platform; + return 0; +} + +int32_t AudioDeviceModuleImpl::CreatePlatformSpecificObjects() { + RTC_LOG(LS_INFO) << __FUNCTION__; +// Dummy ADM implementations if build flags are set. +#if defined(WEBRTC_DUMMY_AUDIO_BUILD) + audio_device_.reset(new AudioDeviceDummy()); + RTC_LOG(LS_INFO) << "Dummy Audio APIs will be utilized"; +#elif defined(WEBRTC_DUMMY_FILE_DEVICES) + audio_device_.reset(FileAudioDeviceFactory::CreateFileAudioDevice()); + if (audio_device_) { + RTC_LOG(LS_INFO) << "Will use file-playing dummy device."; + } else { + // Create a dummy device instead. + audio_device_.reset(new AudioDeviceDummy()); + RTC_LOG(LS_INFO) << "Dummy Audio APIs will be utilized"; + } + +// Real (non-dummy) ADM implementations. +#else + AudioLayer audio_layer(PlatformAudioLayer()); +// Windows ADM implementation. +#if defined(WEBRTC_WINDOWS_CORE_AUDIO_BUILD) + if ((audio_layer == kWindowsCoreAudio) || + (audio_layer == kPlatformDefaultAudio)) { + RTC_LOG(LS_INFO) << "Attempting to use the Windows Core Audio APIs..."; + if (AudioDeviceWindowsCore::CoreAudioIsSupported()) { + audio_device_.reset(new AudioDeviceWindowsCore()); + RTC_LOG(LS_INFO) << "Windows Core Audio APIs will be utilized"; + } + } +#endif // defined(WEBRTC_WINDOWS_CORE_AUDIO_BUILD) + +#if defined(WEBRTC_ANDROID) + // Create an Android audio manager. + audio_manager_android_.reset(new AudioManager()); + // Select best possible combination of audio layers. + if (audio_layer == kPlatformDefaultAudio) { + if (audio_manager_android_->IsAAudioSupported()) { + // Use of AAudio for both playout and recording has highest priority. + audio_layer = kAndroidAAudioAudio; + } else if (audio_manager_android_->IsLowLatencyPlayoutSupported() && + audio_manager_android_->IsLowLatencyRecordSupported()) { + // Use OpenSL ES for both playout and recording. + audio_layer = kAndroidOpenSLESAudio; + } else if (audio_manager_android_->IsLowLatencyPlayoutSupported() && + !audio_manager_android_->IsLowLatencyRecordSupported()) { + // Use OpenSL ES for output on devices that only supports the + // low-latency output audio path. + audio_layer = kAndroidJavaInputAndOpenSLESOutputAudio; + } else { + // Use Java-based audio in both directions when low-latency output is + // not supported. + audio_layer = kAndroidJavaAudio; + } + } + AudioManager* audio_manager = audio_manager_android_.get(); + if (audio_layer == kAndroidJavaAudio) { + // Java audio for both input and output audio. + audio_device_.reset(new AudioDeviceTemplate( + audio_layer, audio_manager)); + } else if (audio_layer == kAndroidOpenSLESAudio) { + // OpenSL ES based audio for both input and output audio. + audio_device_.reset( + new AudioDeviceTemplate( + audio_layer, audio_manager)); + } else if (audio_layer == kAndroidJavaInputAndOpenSLESOutputAudio) { + // Java audio for input and OpenSL ES for output audio (i.e. mixed APIs). + // This combination provides low-latency output audio and at the same + // time support for HW AEC using the AudioRecord Java API. + audio_device_.reset(new AudioDeviceTemplate( + audio_layer, audio_manager)); + } else if (audio_layer == kAndroidAAudioAudio) { +#if defined(WEBRTC_AUDIO_DEVICE_INCLUDE_ANDROID_AAUDIO) + // AAudio based audio for both input and output. + audio_device_.reset(new AudioDeviceTemplate( + audio_layer, audio_manager)); +#endif + } else if (audio_layer == kAndroidJavaInputAndAAudioOutputAudio) { +#if defined(WEBRTC_AUDIO_DEVICE_INCLUDE_ANDROID_AAUDIO) + // Java audio for input and AAudio for output audio (i.e. mixed APIs). + audio_device_.reset(new AudioDeviceTemplate( + audio_layer, audio_manager)); +#endif + } else { + RTC_LOG(LS_ERROR) << "The requested audio layer is not supported"; + audio_device_.reset(nullptr); + } +// END #if defined(WEBRTC_ANDROID) + +// Linux ADM implementation. +// Note that, WEBRTC_ENABLE_LINUX_ALSA is always defined by default when +// WEBRTC_LINUX is defined. WEBRTC_ENABLE_LINUX_PULSE depends on the +// 'rtc_include_pulse_audio' build flag. +// TODO(bugs.webrtc.org/9127): improve support and make it more clear that +// PulseAudio is the default selection. +#elif defined(WEBRTC_LINUX) +#if !defined(WEBRTC_ENABLE_LINUX_PULSE) + // Build flag 'rtc_include_pulse_audio' is set to false. In this mode: + // - kPlatformDefaultAudio => ALSA, and + // - kLinuxAlsaAudio => ALSA, and + // - kLinuxPulseAudio => Invalid selection. + RTC_LOG(LS_WARNING) << "PulseAudio is disabled using build flag."; + if ((audio_layer == kLinuxAlsaAudio) || + (audio_layer == kPlatformDefaultAudio)) { + audio_device_.reset(new AudioDeviceLinuxALSA()); + RTC_LOG(LS_INFO) << "Linux ALSA APIs will be utilized."; + } +#else + // Build flag 'rtc_include_pulse_audio' is set to true (default). In this + // mode: + // - kPlatformDefaultAudio => PulseAudio, and + // - kLinuxPulseAudio => PulseAudio, and + // - kLinuxAlsaAudio => ALSA (supported but not default). + RTC_LOG(LS_INFO) << "PulseAudio support is enabled."; + if ((audio_layer == kLinuxPulseAudio) || + (audio_layer == kPlatformDefaultAudio)) { + // Linux PulseAudio implementation is default. + audio_device_.reset(new AudioDeviceLinuxPulse()); + RTC_LOG(LS_INFO) << "Linux PulseAudio APIs will be utilized"; + } else if (audio_layer == kLinuxAlsaAudio) { + audio_device_.reset(new AudioDeviceLinuxALSA()); + RTC_LOG(LS_WARNING) << "Linux ALSA APIs will be utilized."; + } +#endif // #if !defined(WEBRTC_ENABLE_LINUX_PULSE) +#endif // #if defined(WEBRTC_LINUX) + +// iOS ADM implementation. +#if defined(WEBRTC_IOS) + if (audio_layer == kPlatformDefaultAudio) { + audio_device_.reset( + new ios_adm::AudioDeviceIOS(/*bypass_voice_processing=*/false)); + RTC_LOG(LS_INFO) << "iPhone Audio APIs will be utilized."; + } +// END #if defined(WEBRTC_IOS) + +// Mac OS X ADM implementation. +#elif defined(WEBRTC_MAC) + if (audio_layer == kPlatformDefaultAudio) { + audio_device_.reset(new AudioDeviceMac()); + RTC_LOG(LS_INFO) << "Mac OS X Audio APIs will be utilized."; + } +#endif // WEBRTC_MAC + + // Dummy ADM implementation. + if (audio_layer == kDummyAudio) { + audio_device_.reset(new AudioDeviceDummy()); + RTC_LOG(LS_INFO) << "Dummy Audio APIs will be utilized."; + } +#endif // if defined(WEBRTC_DUMMY_AUDIO_BUILD) + + if (!audio_device_) { + RTC_LOG(LS_ERROR) + << "Failed to create the platform specific ADM implementation."; + return -1; + } + return 0; +} + +int32_t AudioDeviceModuleImpl::AttachAudioBuffer() { + RTC_LOG(LS_INFO) << __FUNCTION__; + audio_device_->AttachAudioBuffer(&audio_device_buffer_); + return 0; +} + +AudioDeviceModuleImpl::~AudioDeviceModuleImpl() { + RTC_LOG(LS_INFO) << __FUNCTION__; +} + +int32_t AudioDeviceModuleImpl::ActiveAudioLayer(AudioLayer* audioLayer) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + AudioLayer activeAudio; + if (audio_device_->ActiveAudioLayer(activeAudio) == -1) { + return -1; + } + *audioLayer = activeAudio; + return 0; +} + +int32_t AudioDeviceModuleImpl::Init() { + RTC_LOG(LS_INFO) << __FUNCTION__; + if (initialized_) + return 0; + RTC_CHECK(audio_device_); + AudioDeviceGeneric::InitStatus status = audio_device_->Init(); + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.InitializationResult", static_cast(status), + static_cast(AudioDeviceGeneric::InitStatus::NUM_STATUSES)); + if (status != AudioDeviceGeneric::InitStatus::OK) { + RTC_LOG(LS_ERROR) << "Audio device initialization failed."; + return -1; + } + initialized_ = true; + return 0; +} + +int32_t AudioDeviceModuleImpl::Terminate() { + RTC_LOG(LS_INFO) << __FUNCTION__; + if (!initialized_) + return 0; + if (audio_device_->Terminate() == -1) { + return -1; + } + initialized_ = false; + return 0; +} + +bool AudioDeviceModuleImpl::Initialized() const { + RTC_LOG(LS_INFO) << __FUNCTION__ << ": " << initialized_; + return initialized_; +} + +int32_t AudioDeviceModuleImpl::InitSpeaker() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + return audio_device_->InitSpeaker(); +} + +int32_t AudioDeviceModuleImpl::InitMicrophone() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + return audio_device_->InitMicrophone(); +} + +int32_t AudioDeviceModuleImpl::SpeakerVolumeIsAvailable(bool* available) { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool isAvailable = false; + if (audio_device_->SpeakerVolumeIsAvailable(isAvailable) == -1) { + return -1; + } + *available = isAvailable; + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return 0; +} + +int32_t AudioDeviceModuleImpl::SetSpeakerVolume(uint32_t volume) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << volume << ")"; + CHECKinitialized_(); + return audio_device_->SetSpeakerVolume(volume); +} + +int32_t AudioDeviceModuleImpl::SpeakerVolume(uint32_t* volume) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + uint32_t level = 0; + if (audio_device_->SpeakerVolume(level) == -1) { + return -1; + } + *volume = level; + RTC_LOG(LS_INFO) << "output: " << *volume; + return 0; +} + +bool AudioDeviceModuleImpl::SpeakerIsInitialized() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized__BOOL(); + bool isInitialized = audio_device_->SpeakerIsInitialized(); + RTC_LOG(LS_INFO) << "output: " << isInitialized; + return isInitialized; +} + +bool AudioDeviceModuleImpl::MicrophoneIsInitialized() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized__BOOL(); + bool isInitialized = audio_device_->MicrophoneIsInitialized(); + RTC_LOG(LS_INFO) << "output: " << isInitialized; + return isInitialized; +} + +int32_t AudioDeviceModuleImpl::MaxSpeakerVolume(uint32_t* maxVolume) const { + CHECKinitialized_(); + uint32_t maxVol = 0; + if (audio_device_->MaxSpeakerVolume(maxVol) == -1) { + return -1; + } + *maxVolume = maxVol; + return 0; +} + +int32_t AudioDeviceModuleImpl::MinSpeakerVolume(uint32_t* minVolume) const { + CHECKinitialized_(); + uint32_t minVol = 0; + if (audio_device_->MinSpeakerVolume(minVol) == -1) { + return -1; + } + *minVolume = minVol; + return 0; +} + +int32_t AudioDeviceModuleImpl::SpeakerMuteIsAvailable(bool* available) { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool isAvailable = false; + if (audio_device_->SpeakerMuteIsAvailable(isAvailable) == -1) { + return -1; + } + *available = isAvailable; + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return 0; +} + +int32_t AudioDeviceModuleImpl::SetSpeakerMute(bool enable) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + CHECKinitialized_(); + return audio_device_->SetSpeakerMute(enable); +} + +int32_t AudioDeviceModuleImpl::SpeakerMute(bool* enabled) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool muted = false; + if (audio_device_->SpeakerMute(muted) == -1) { + return -1; + } + *enabled = muted; + RTC_LOG(LS_INFO) << "output: " << muted; + return 0; +} + +int32_t AudioDeviceModuleImpl::MicrophoneMuteIsAvailable(bool* available) { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool isAvailable = false; + if (audio_device_->MicrophoneMuteIsAvailable(isAvailable) == -1) { + return -1; + } + *available = isAvailable; + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return 0; +} + +int32_t AudioDeviceModuleImpl::SetMicrophoneMute(bool enable) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + CHECKinitialized_(); + return (audio_device_->SetMicrophoneMute(enable)); +} + +int32_t AudioDeviceModuleImpl::MicrophoneMute(bool* enabled) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool muted = false; + if (audio_device_->MicrophoneMute(muted) == -1) { + return -1; + } + *enabled = muted; + RTC_LOG(LS_INFO) << "output: " << muted; + return 0; +} + +int32_t AudioDeviceModuleImpl::MicrophoneVolumeIsAvailable(bool* available) { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool isAvailable = false; + if (audio_device_->MicrophoneVolumeIsAvailable(isAvailable) == -1) { + return -1; + } + *available = isAvailable; + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return 0; +} + +int32_t AudioDeviceModuleImpl::SetMicrophoneVolume(uint32_t volume) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << volume << ")"; + CHECKinitialized_(); + return (audio_device_->SetMicrophoneVolume(volume)); +} + +int32_t AudioDeviceModuleImpl::MicrophoneVolume(uint32_t* volume) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + uint32_t level = 0; + if (audio_device_->MicrophoneVolume(level) == -1) { + return -1; + } + *volume = level; + RTC_LOG(LS_INFO) << "output: " << *volume; + return 0; +} + +int32_t AudioDeviceModuleImpl::StereoRecordingIsAvailable( + bool* available) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool isAvailable = false; + if (audio_device_->StereoRecordingIsAvailable(isAvailable) == -1) { + return -1; + } + *available = isAvailable; + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return 0; +} + +int32_t AudioDeviceModuleImpl::SetStereoRecording(bool enable) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + CHECKinitialized_(); + if (audio_device_->RecordingIsInitialized()) { + RTC_LOG(LS_ERROR) + << "unable to set stereo mode after recording is initialized"; + return -1; + } + if (audio_device_->SetStereoRecording(enable) == -1) { + if (enable) { + RTC_LOG(LS_WARNING) << "failed to enable stereo recording"; + } + return -1; + } + int8_t nChannels(1); + if (enable) { + nChannels = 2; + } + audio_device_buffer_.SetRecordingChannels(nChannels); + return 0; +} + +int32_t AudioDeviceModuleImpl::StereoRecording(bool* enabled) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool stereo = false; + if (audio_device_->StereoRecording(stereo) == -1) { + return -1; + } + *enabled = stereo; + RTC_LOG(LS_INFO) << "output: " << stereo; + return 0; +} + +int32_t AudioDeviceModuleImpl::StereoPlayoutIsAvailable(bool* available) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool isAvailable = false; + if (audio_device_->StereoPlayoutIsAvailable(isAvailable) == -1) { + return -1; + } + *available = isAvailable; + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return 0; +} + +int32_t AudioDeviceModuleImpl::SetStereoPlayout(bool enable) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + CHECKinitialized_(); + if (audio_device_->PlayoutIsInitialized()) { + RTC_LOG(LS_ERROR) + << "unable to set stereo mode while playing side is initialized"; + return -1; + } + if (audio_device_->SetStereoPlayout(enable)) { + RTC_LOG(LS_WARNING) << "stereo playout is not supported"; + return -1; + } + int8_t nChannels(1); + if (enable) { + nChannels = 2; + } + audio_device_buffer_.SetPlayoutChannels(nChannels); + return 0; +} + +int32_t AudioDeviceModuleImpl::StereoPlayout(bool* enabled) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool stereo = false; + if (audio_device_->StereoPlayout(stereo) == -1) { + return -1; + } + *enabled = stereo; + RTC_LOG(LS_INFO) << "output: " << stereo; + return 0; +} + +int32_t AudioDeviceModuleImpl::PlayoutIsAvailable(bool* available) { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool isAvailable = false; + if (audio_device_->PlayoutIsAvailable(isAvailable) == -1) { + return -1; + } + *available = isAvailable; + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return 0; +} + +int32_t AudioDeviceModuleImpl::RecordingIsAvailable(bool* available) { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + bool isAvailable = false; + if (audio_device_->RecordingIsAvailable(isAvailable) == -1) { + return -1; + } + *available = isAvailable; + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return 0; +} + +int32_t AudioDeviceModuleImpl::MaxMicrophoneVolume(uint32_t* maxVolume) const { + CHECKinitialized_(); + uint32_t maxVol(0); + if (audio_device_->MaxMicrophoneVolume(maxVol) == -1) { + return -1; + } + *maxVolume = maxVol; + return 0; +} + +int32_t AudioDeviceModuleImpl::MinMicrophoneVolume(uint32_t* minVolume) const { + CHECKinitialized_(); + uint32_t minVol(0); + if (audio_device_->MinMicrophoneVolume(minVol) == -1) { + return -1; + } + *minVolume = minVol; + return 0; +} + +int16_t AudioDeviceModuleImpl::PlayoutDevices() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + uint16_t nPlayoutDevices = audio_device_->PlayoutDevices(); + RTC_LOG(LS_INFO) << "output: " << nPlayoutDevices; + return (int16_t)(nPlayoutDevices); +} + +int32_t AudioDeviceModuleImpl::SetPlayoutDevice(uint16_t index) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << index << ")"; + CHECKinitialized_(); + return audio_device_->SetPlayoutDevice(index); +} + +int32_t AudioDeviceModuleImpl::SetPlayoutDevice(WindowsDeviceType device) { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + return audio_device_->SetPlayoutDevice(device); +} + +int32_t AudioDeviceModuleImpl::PlayoutDeviceName( + uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << index << ", ...)"; + CHECKinitialized_(); + if (name == NULL) { + return -1; + } + if (audio_device_->PlayoutDeviceName(index, name, guid) == -1) { + return -1; + } + if (name != NULL) { + RTC_LOG(LS_INFO) << "output: name = " << name; + } + if (guid != NULL) { + RTC_LOG(LS_INFO) << "output: guid = " << guid; + } + return 0; +} + +int32_t AudioDeviceModuleImpl::RecordingDeviceName( + uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << index << ", ...)"; + CHECKinitialized_(); + if (name == NULL) { + return -1; + } + if (audio_device_->RecordingDeviceName(index, name, guid) == -1) { + return -1; + } + if (name != NULL) { + RTC_LOG(LS_INFO) << "output: name = " << name; + } + if (guid != NULL) { + RTC_LOG(LS_INFO) << "output: guid = " << guid; + } + return 0; +} + +int16_t AudioDeviceModuleImpl::RecordingDevices() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + uint16_t nRecordingDevices = audio_device_->RecordingDevices(); + RTC_LOG(LS_INFO) << "output: " << nRecordingDevices; + return (int16_t)nRecordingDevices; +} + +int32_t AudioDeviceModuleImpl::SetRecordingDevice(uint16_t index) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << index << ")"; + CHECKinitialized_(); + return audio_device_->SetRecordingDevice(index); +} + +int32_t AudioDeviceModuleImpl::SetRecordingDevice(WindowsDeviceType device) { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + return audio_device_->SetRecordingDevice(device); +} + +int32_t AudioDeviceModuleImpl::InitPlayout() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + if (PlayoutIsInitialized()) { + return 0; + } + int32_t result = audio_device_->InitPlayout(); + RTC_LOG(LS_INFO) << "output: " << result; + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.InitPlayoutSuccess", + static_cast(result == 0)); + return result; +} + +int32_t AudioDeviceModuleImpl::InitRecording() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + if (RecordingIsInitialized()) { + return 0; + } + int32_t result = audio_device_->InitRecording(); + RTC_LOG(LS_INFO) << "output: " << result; + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.InitRecordingSuccess", + static_cast(result == 0)); + return result; +} + +bool AudioDeviceModuleImpl::PlayoutIsInitialized() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized__BOOL(); + return audio_device_->PlayoutIsInitialized(); +} + +bool AudioDeviceModuleImpl::RecordingIsInitialized() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized__BOOL(); + return audio_device_->RecordingIsInitialized(); +} + +int32_t AudioDeviceModuleImpl::StartPlayout() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + if (Playing()) { + return 0; + } + audio_device_buffer_.StartPlayout(); + int32_t result = audio_device_->StartPlayout(); + RTC_LOG(LS_INFO) << "output: " << result; + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.StartPlayoutSuccess", + static_cast(result == 0)); + return result; +} + +int32_t AudioDeviceModuleImpl::StopPlayout() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + int32_t result = audio_device_->StopPlayout(); + audio_device_buffer_.StopPlayout(); + RTC_LOG(LS_INFO) << "output: " << result; + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.StopPlayoutSuccess", + static_cast(result == 0)); + return result; +} + +bool AudioDeviceModuleImpl::Playing() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized__BOOL(); + return audio_device_->Playing(); +} + +int32_t AudioDeviceModuleImpl::StartRecording() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + if (Recording()) { + return 0; + } + audio_device_buffer_.StartRecording(); + int32_t result = audio_device_->StartRecording(); + RTC_LOG(LS_INFO) << "output: " << result; + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.StartRecordingSuccess", + static_cast(result == 0)); + return result; +} + +int32_t AudioDeviceModuleImpl::StopRecording() { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + int32_t result = audio_device_->StopRecording(); + audio_device_buffer_.StopRecording(); + RTC_LOG(LS_INFO) << "output: " << result; + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.StopRecordingSuccess", + static_cast(result == 0)); + return result; +} + +bool AudioDeviceModuleImpl::Recording() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized__BOOL(); + return audio_device_->Recording(); +} + +int32_t AudioDeviceModuleImpl::RegisterAudioCallback( + AudioTransport* audioCallback) { + RTC_LOG(LS_INFO) << __FUNCTION__; + return audio_device_buffer_.RegisterAudioCallback(audioCallback); +} + +int32_t AudioDeviceModuleImpl::PlayoutDelay(uint16_t* delayMS) const { + CHECKinitialized_(); + uint16_t delay = 0; + if (audio_device_->PlayoutDelay(delay) == -1) { + RTC_LOG(LS_ERROR) << "failed to retrieve the playout delay"; + return -1; + } + *delayMS = delay; + return 0; +} + +bool AudioDeviceModuleImpl::BuiltInAECIsAvailable() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized__BOOL(); + bool isAvailable = audio_device_->BuiltInAECIsAvailable(); + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return isAvailable; +} + +int32_t AudioDeviceModuleImpl::EnableBuiltInAEC(bool enable) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + CHECKinitialized_(); + int32_t ok = audio_device_->EnableBuiltInAEC(enable); + RTC_LOG(LS_INFO) << "output: " << ok; + return ok; +} + +bool AudioDeviceModuleImpl::BuiltInAGCIsAvailable() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized__BOOL(); + bool isAvailable = audio_device_->BuiltInAGCIsAvailable(); + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return isAvailable; +} + +int32_t AudioDeviceModuleImpl::EnableBuiltInAGC(bool enable) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + CHECKinitialized_(); + int32_t ok = audio_device_->EnableBuiltInAGC(enable); + RTC_LOG(LS_INFO) << "output: " << ok; + return ok; +} + +bool AudioDeviceModuleImpl::BuiltInNSIsAvailable() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized__BOOL(); + bool isAvailable = audio_device_->BuiltInNSIsAvailable(); + RTC_LOG(LS_INFO) << "output: " << isAvailable; + return isAvailable; +} + +int32_t AudioDeviceModuleImpl::EnableBuiltInNS(bool enable) { + RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << enable << ")"; + CHECKinitialized_(); + int32_t ok = audio_device_->EnableBuiltInNS(enable); + RTC_LOG(LS_INFO) << "output: " << ok; + return ok; +} + +int32_t AudioDeviceModuleImpl::GetPlayoutUnderrunCount() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + CHECKinitialized_(); + int32_t underrunCount = audio_device_->GetPlayoutUnderrunCount(); + RTC_LOG(LS_INFO) << "output: " << underrunCount; + return underrunCount; +} + +#if defined(WEBRTC_IOS) +int AudioDeviceModuleImpl::GetPlayoutAudioParameters( + AudioParameters* params) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + int r = audio_device_->GetPlayoutAudioParameters(params); + RTC_LOG(LS_INFO) << "output: " << r; + return r; +} + +int AudioDeviceModuleImpl::GetRecordAudioParameters( + AudioParameters* params) const { + RTC_LOG(LS_INFO) << __FUNCTION__; + int r = audio_device_->GetRecordAudioParameters(params); + RTC_LOG(LS_INFO) << "output: " << r; + return r; +} +#endif // WEBRTC_IOS + +AudioDeviceModuleImpl::PlatformType AudioDeviceModuleImpl::Platform() const { + RTC_LOG(LS_INFO) << __FUNCTION__; + return platform_type_; +} + +AudioDeviceModule::AudioLayer AudioDeviceModuleImpl::PlatformAudioLayer() + const { + RTC_LOG(LS_INFO) << __FUNCTION__; + return audio_layer_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_impl.h b/third_party/libwebrtc/modules/audio_device/audio_device_impl.h new file mode 100644 index 0000000000..45f73dcd65 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_impl.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_AUDIO_DEVICE_IMPL_H_ +#define MODULES_AUDIO_DEVICE_AUDIO_DEVICE_IMPL_H_ + +#if defined(WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE) + +#include + +#include + +#include "api/task_queue/task_queue_factory.h" +#include "modules/audio_device/audio_device_buffer.h" +#include "modules/audio_device/include/audio_device.h" + +namespace webrtc { + +class AudioDeviceGeneric; +class AudioManager; + +class AudioDeviceModuleImpl : public AudioDeviceModuleForTest { + public: + enum PlatformType { + kPlatformNotSupported = 0, + kPlatformWin32 = 1, + kPlatformWinCe = 2, + kPlatformLinux = 3, + kPlatformMac = 4, + kPlatformAndroid = 5, + kPlatformIOS = 6 + }; + + int32_t CheckPlatform(); + int32_t CreatePlatformSpecificObjects(); + int32_t AttachAudioBuffer(); + + AudioDeviceModuleImpl(AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory); + ~AudioDeviceModuleImpl() override; + + // Retrieve the currently utilized audio layer + int32_t ActiveAudioLayer(AudioLayer* audioLayer) const override; + + // Full-duplex transportation of PCM audio + int32_t RegisterAudioCallback(AudioTransport* audioCallback) override; + + // Main initializaton and termination + int32_t Init() override; + int32_t Terminate() override; + bool Initialized() const override; + + // Device enumeration + int16_t PlayoutDevices() override; + int16_t RecordingDevices() override; + int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + + // Device selection + int32_t SetPlayoutDevice(uint16_t index) override; + int32_t SetPlayoutDevice(WindowsDeviceType device) override; + int32_t SetRecordingDevice(uint16_t index) override; + int32_t SetRecordingDevice(WindowsDeviceType device) override; + + // Audio transport initialization + int32_t PlayoutIsAvailable(bool* available) override; + int32_t InitPlayout() override; + bool PlayoutIsInitialized() const override; + int32_t RecordingIsAvailable(bool* available) override; + int32_t InitRecording() override; + bool RecordingIsInitialized() const override; + + // Audio transport control + int32_t StartPlayout() override; + int32_t StopPlayout() override; + bool Playing() const override; + int32_t StartRecording() override; + int32_t StopRecording() override; + bool Recording() const override; + + // Audio mixer initialization + int32_t InitSpeaker() override; + bool SpeakerIsInitialized() const override; + int32_t InitMicrophone() override; + bool MicrophoneIsInitialized() const override; + + // Speaker volume controls + int32_t SpeakerVolumeIsAvailable(bool* available) override; + int32_t SetSpeakerVolume(uint32_t volume) override; + int32_t SpeakerVolume(uint32_t* volume) const override; + int32_t MaxSpeakerVolume(uint32_t* maxVolume) const override; + int32_t MinSpeakerVolume(uint32_t* minVolume) const override; + + // Microphone volume controls + int32_t MicrophoneVolumeIsAvailable(bool* available) override; + int32_t SetMicrophoneVolume(uint32_t volume) override; + int32_t MicrophoneVolume(uint32_t* volume) const override; + int32_t MaxMicrophoneVolume(uint32_t* maxVolume) const override; + int32_t MinMicrophoneVolume(uint32_t* minVolume) const override; + + // Speaker mute control + int32_t SpeakerMuteIsAvailable(bool* available) override; + int32_t SetSpeakerMute(bool enable) override; + int32_t SpeakerMute(bool* enabled) const override; + + // Microphone mute control + int32_t MicrophoneMuteIsAvailable(bool* available) override; + int32_t SetMicrophoneMute(bool enable) override; + int32_t MicrophoneMute(bool* enabled) const override; + + // Stereo support + int32_t StereoPlayoutIsAvailable(bool* available) const override; + int32_t SetStereoPlayout(bool enable) override; + int32_t StereoPlayout(bool* enabled) const override; + int32_t StereoRecordingIsAvailable(bool* available) const override; + int32_t SetStereoRecording(bool enable) override; + int32_t StereoRecording(bool* enabled) const override; + + // Delay information and control + int32_t PlayoutDelay(uint16_t* delayMS) const override; + + bool BuiltInAECIsAvailable() const override; + int32_t EnableBuiltInAEC(bool enable) override; + bool BuiltInAGCIsAvailable() const override; + int32_t EnableBuiltInAGC(bool enable) override; + bool BuiltInNSIsAvailable() const override; + int32_t EnableBuiltInNS(bool enable) override; + + // Play underrun count. + int32_t GetPlayoutUnderrunCount() const override; + +#if defined(WEBRTC_IOS) + int GetPlayoutAudioParameters(AudioParameters* params) const override; + int GetRecordAudioParameters(AudioParameters* params) const override; +#endif // WEBRTC_IOS + +#if defined(WEBRTC_ANDROID) + // Only use this acccessor for test purposes on Android. + AudioManager* GetAndroidAudioManagerForTest() { + return audio_manager_android_.get(); + } +#endif + AudioDeviceBuffer* GetAudioDeviceBuffer() { return &audio_device_buffer_; } + + int RestartPlayoutInternally() override { return -1; } + int RestartRecordingInternally() override { return -1; } + int SetPlayoutSampleRate(uint32_t sample_rate) override { return -1; } + int SetRecordingSampleRate(uint32_t sample_rate) override { return -1; } + + private: + PlatformType Platform() const; + AudioLayer PlatformAudioLayer() const; + + AudioLayer audio_layer_; + PlatformType platform_type_ = kPlatformNotSupported; + bool initialized_ = false; +#if defined(WEBRTC_ANDROID) + // Should be declared first to ensure that it outlives other resources. + std::unique_ptr audio_manager_android_; +#endif + AudioDeviceBuffer audio_device_buffer_; + std::unique_ptr audio_device_; +}; + +} // namespace webrtc + +#endif // defined(WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE) + +#endif // MODULES_AUDIO_DEVICE_AUDIO_DEVICE_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_name.cc b/third_party/libwebrtc/modules/audio_device/audio_device_name.cc new file mode 100644 index 0000000000..5318496768 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_name.cc @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/audio_device_name.h" + +#include "absl/strings/string_view.h" + +namespace webrtc { + +const char AudioDeviceName::kDefaultDeviceId[] = "default"; + +AudioDeviceName::AudioDeviceName(absl::string_view device_name, + absl::string_view unique_id) + : device_name(device_name), unique_id(unique_id) {} + +bool AudioDeviceName::IsValid() { + return !device_name.empty() && !unique_id.empty(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_name.h b/third_party/libwebrtc/modules/audio_device/audio_device_name.h new file mode 100644 index 0000000000..db37852e9a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_name.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_AUDIO_DEVICE_NAME_H_ +#define MODULES_AUDIO_DEVICE_AUDIO_DEVICE_NAME_H_ + +#include +#include + +#include "absl/strings/string_view.h" + +namespace webrtc { + +struct AudioDeviceName { + // Represents a default device. Note that, on Windows there are two different + // types of default devices (Default and Default Communication). They can + // either be two different physical devices or be two different roles for one + // single device. Hence, this id must be combined with a "role parameter" on + // Windows to uniquely identify a default device. + static const char kDefaultDeviceId[]; + + AudioDeviceName() = default; + AudioDeviceName(absl::string_view device_name, absl::string_view unique_id); + + ~AudioDeviceName() = default; + + // Support copy and move. + AudioDeviceName(const AudioDeviceName& other) = default; + AudioDeviceName(AudioDeviceName&&) = default; + AudioDeviceName& operator=(const AudioDeviceName&) = default; + AudioDeviceName& operator=(AudioDeviceName&&) = default; + + bool IsValid(); + + std::string device_name; // Friendly name of the device. + std::string unique_id; // Unique identifier for the device. +}; + +typedef std::deque AudioDeviceNames; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_AUDIO_DEVICE_NAME_H_ diff --git a/third_party/libwebrtc/modules/audio_device/audio_device_unittest.cc b/third_party/libwebrtc/modules/audio_device/audio_device_unittest.cc new file mode 100644 index 0000000000..0a3a88c2e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/audio_device_unittest.cc @@ -0,0 +1,1241 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/include/audio_device.h" + +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/scoped_refptr.h" +#include "api/sequence_checker.h" +#include "api/task_queue/default_task_queue_factory.h" +#include "api/task_queue/task_queue_factory.h" +#include "modules/audio_device/audio_device_impl.h" +#include "modules/audio_device/include/mock_audio_transport.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/buffer.h" +#include "rtc_base/event.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" +#include "rtc_base/time_utils.h" +#include "test/gmock.h" +#include "test/gtest.h" +#ifdef WEBRTC_WIN +#include "modules/audio_device/include/audio_device_factory.h" +#include "modules/audio_device/win/core_audio_utility_win.h" +#include "rtc_base/win/scoped_com_initializer.h" +#endif // WEBRTC_WIN + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::Ge; +using ::testing::Invoke; +using ::testing::Mock; +using ::testing::NiceMock; +using ::testing::NotNull; + +namespace webrtc { +namespace { + +// Using a #define for AUDIO_DEVICE since we will call *different* versions of +// the ADM functions, depending on the ID type. +#if defined(WEBRTC_WIN) +#define AUDIO_DEVICE_ID (AudioDeviceModule::WindowsDeviceType::kDefaultDevice) +#else +#define AUDIO_DEVICE_ID (0u) +#endif // defined(WEBRTC_WIN) + +// #define ENABLE_DEBUG_PRINTF +#ifdef ENABLE_DEBUG_PRINTF +#define PRINTD(...) fprintf(stderr, __VA_ARGS__); +#else +#define PRINTD(...) ((void)0) +#endif +#define PRINT(...) fprintf(stderr, __VA_ARGS__); + +// Don't run these tests if audio-related requirements are not met. +#define SKIP_TEST_IF_NOT(requirements_satisfied) \ + do { \ + if (!requirements_satisfied) { \ + GTEST_SKIP() << "Skipped. No audio device found."; \ + } \ + } while (false) + +// Number of callbacks (input or output) the tests waits for before we set +// an event indicating that the test was OK. +static constexpr size_t kNumCallbacks = 10; +// Max amount of time we wait for an event to be set while counting callbacks. +static constexpr TimeDelta kTestTimeOut = TimeDelta::Seconds(10); +// Average number of audio callbacks per second assuming 10ms packet size. +static constexpr size_t kNumCallbacksPerSecond = 100; +// Run the full-duplex test during this time (unit is in seconds). +static constexpr TimeDelta kFullDuplexTime = TimeDelta::Seconds(5); +// Length of round-trip latency measurements. Number of deteced impulses +// shall be kImpulseFrequencyInHz * kMeasureLatencyTime - 1 since the +// last transmitted pulse is not used. +static constexpr TimeDelta kMeasureLatencyTime = TimeDelta::Seconds(10); +// Sets the number of impulses per second in the latency test. +static constexpr size_t kImpulseFrequencyInHz = 1; +// Utilized in round-trip latency measurements to avoid capturing noise samples. +static constexpr int kImpulseThreshold = 1000; + +enum class TransportType { + kInvalid, + kPlay, + kRecord, + kPlayAndRecord, +}; + +// Interface for processing the audio stream. Real implementations can e.g. +// run audio in loopback, read audio from a file or perform latency +// measurements. +class AudioStream { + public: + virtual void Write(rtc::ArrayView source) = 0; + virtual void Read(rtc::ArrayView destination) = 0; + + virtual ~AudioStream() = default; +}; + +// Converts index corresponding to position within a 10ms buffer into a +// delay value in milliseconds. +// Example: index=240, frames_per_10ms_buffer=480 => 5ms as output. +int IndexToMilliseconds(size_t index, size_t frames_per_10ms_buffer) { + return rtc::checked_cast( + 10.0 * (static_cast(index) / frames_per_10ms_buffer) + 0.5); +} + +} // namespace + +// Simple first in first out (FIFO) class that wraps a list of 16-bit audio +// buffers of fixed size and allows Write and Read operations. The idea is to +// store recorded audio buffers (using Write) and then read (using Read) these +// stored buffers with as short delay as possible when the audio layer needs +// data to play out. The number of buffers in the FIFO will stabilize under +// normal conditions since there will be a balance between Write and Read calls. +// The container is a std::list container and access is protected with a lock +// since both sides (playout and recording) are driven by its own thread. +// Note that, we know by design that the size of the audio buffer will not +// change over time and that both sides will in most cases use the same size. +class FifoAudioStream : public AudioStream { + public: + void Write(rtc::ArrayView source) override { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + const size_t size = [&] { + MutexLock lock(&lock_); + fifo_.push_back(Buffer16(source.data(), source.size())); + return fifo_.size(); + }(); + if (size > max_size_) { + max_size_ = size; + } + // Add marker once per second to signal that audio is active. + if (write_count_++ % 100 == 0) { + PRINTD("."); + } + written_elements_ += size; + } + + void Read(rtc::ArrayView destination) override { + MutexLock lock(&lock_); + if (fifo_.empty()) { + std::fill(destination.begin(), destination.end(), 0); + } else { + const Buffer16& buffer = fifo_.front(); + if (buffer.size() == destination.size()) { + // Default case where input and output uses same sample rate and + // channel configuration. No conversion is needed. + std::copy(buffer.begin(), buffer.end(), destination.begin()); + } else if (destination.size() == 2 * buffer.size()) { + // Recorded input signal in `buffer` is in mono. Do channel upmix to + // match stereo output (1 -> 2). + for (size_t i = 0; i < buffer.size(); ++i) { + destination[2 * i] = buffer[i]; + destination[2 * i + 1] = buffer[i]; + } + } else if (buffer.size() == 2 * destination.size()) { + // Recorded input signal in `buffer` is in stereo. Do channel downmix + // to match mono output (2 -> 1). + for (size_t i = 0; i < destination.size(); ++i) { + destination[i] = + (static_cast(buffer[2 * i]) + buffer[2 * i + 1]) / 2; + } + } else { + RTC_DCHECK_NOTREACHED() << "Required conversion is not support"; + } + fifo_.pop_front(); + } + } + + size_t size() const { + MutexLock lock(&lock_); + return fifo_.size(); + } + + size_t max_size() const { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + return max_size_; + } + + size_t average_size() const { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + return 0.5 + static_cast(written_elements_ / write_count_); + } + + using Buffer16 = rtc::BufferT; + + mutable Mutex lock_; + rtc::RaceChecker race_checker_; + + std::list fifo_ RTC_GUARDED_BY(lock_); + size_t write_count_ RTC_GUARDED_BY(race_checker_) = 0; + size_t max_size_ RTC_GUARDED_BY(race_checker_) = 0; + size_t written_elements_ RTC_GUARDED_BY(race_checker_) = 0; +}; + +// Inserts periodic impulses and measures the latency between the time of +// transmission and time of receiving the same impulse. +class LatencyAudioStream : public AudioStream { + public: + LatencyAudioStream() { + // Delay thread checkers from being initialized until first callback from + // respective thread. + read_thread_checker_.Detach(); + write_thread_checker_.Detach(); + } + + // Insert periodic impulses in first two samples of `destination`. + void Read(rtc::ArrayView destination) override { + RTC_DCHECK_RUN_ON(&read_thread_checker_); + if (read_count_ == 0) { + PRINT("["); + } + read_count_++; + std::fill(destination.begin(), destination.end(), 0); + if (read_count_ % (kNumCallbacksPerSecond / kImpulseFrequencyInHz) == 0) { + PRINT("."); + { + MutexLock lock(&lock_); + if (!pulse_time_) { + pulse_time_ = rtc::TimeMillis(); + } + } + constexpr int16_t impulse = std::numeric_limits::max(); + std::fill_n(destination.begin(), 2, impulse); + } + } + + // Detect received impulses in `source`, derive time between transmission and + // detection and add the calculated delay to list of latencies. + void Write(rtc::ArrayView source) override { + RTC_DCHECK_RUN_ON(&write_thread_checker_); + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + MutexLock lock(&lock_); + write_count_++; + if (!pulse_time_) { + // Avoid detection of new impulse response until a new impulse has + // been transmitted (sets `pulse_time_` to value larger than zero). + return; + } + // Find index (element position in vector) of the max element. + const size_t index_of_max = + std::max_element(source.begin(), source.end()) - source.begin(); + // Derive time between transmitted pulse and received pulse if the level + // is high enough (removes noise). + const size_t max = source[index_of_max]; + if (max > kImpulseThreshold) { + PRINTD("(%zu, %zu)", max, index_of_max); + int64_t now_time = rtc::TimeMillis(); + int extra_delay = IndexToMilliseconds(index_of_max, source.size()); + PRINTD("[%d]", rtc::checked_cast(now_time - pulse_time_)); + PRINTD("[%d]", extra_delay); + // Total latency is the difference between transmit time and detection + // tome plus the extra delay within the buffer in which we detected the + // received impulse. It is transmitted at sample 0 but can be received + // at sample N where N > 0. The term `extra_delay` accounts for N and it + // is a value between 0 and 10ms. + latencies_.push_back(now_time - *pulse_time_ + extra_delay); + pulse_time_.reset(); + } else { + PRINTD("-"); + } + } + + size_t num_latency_values() const { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + return latencies_.size(); + } + + int min_latency() const { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + if (latencies_.empty()) + return 0; + return *std::min_element(latencies_.begin(), latencies_.end()); + } + + int max_latency() const { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + if (latencies_.empty()) + return 0; + return *std::max_element(latencies_.begin(), latencies_.end()); + } + + int average_latency() const { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + if (latencies_.empty()) + return 0; + return 0.5 + static_cast( + std::accumulate(latencies_.begin(), latencies_.end(), 0)) / + latencies_.size(); + } + + void PrintResults() const { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + PRINT("] "); + for (auto it = latencies_.begin(); it != latencies_.end(); ++it) { + PRINTD("%d ", *it); + } + PRINT("\n"); + PRINT("[..........] [min, max, avg]=[%d, %d, %d] ms\n", min_latency(), + max_latency(), average_latency()); + } + + Mutex lock_; + rtc::RaceChecker race_checker_; + SequenceChecker read_thread_checker_; + SequenceChecker write_thread_checker_; + + absl::optional pulse_time_ RTC_GUARDED_BY(lock_); + std::vector latencies_ RTC_GUARDED_BY(race_checker_); + size_t read_count_ RTC_GUARDED_BY(read_thread_checker_) = 0; + size_t write_count_ RTC_GUARDED_BY(write_thread_checker_) = 0; +}; + +// Mocks the AudioTransport object and proxies actions for the two callbacks +// (RecordedDataIsAvailable and NeedMorePlayData) to different implementations +// of AudioStreamInterface. +class MockAudioTransport : public test::MockAudioTransport { + public: + explicit MockAudioTransport(TransportType type) : type_(type) {} + ~MockAudioTransport() {} + + // Set default actions of the mock object. We are delegating to fake + // implementation where the number of callbacks is counted and an event + // is set after a certain number of callbacks. Audio parameters are also + // checked. + void HandleCallbacks(rtc::Event* event, + AudioStream* audio_stream, + int num_callbacks) { + event_ = event; + audio_stream_ = audio_stream; + num_callbacks_ = num_callbacks; + if (play_mode()) { + ON_CALL(*this, NeedMorePlayData(_, _, _, _, _, _, _, _)) + .WillByDefault( + Invoke(this, &MockAudioTransport::RealNeedMorePlayData)); + } + if (rec_mode()) { + ON_CALL(*this, RecordedDataIsAvailable(_, _, _, _, _, _, _, _, _, _)) + .WillByDefault( + Invoke(this, &MockAudioTransport::RealRecordedDataIsAvailable)); + } + } + + // Special constructor used in manual tests where the user wants to run audio + // until e.g. a keyboard key is pressed. The event flag is set to nullptr by + // default since it is up to the user to stop the test. See e.g. + // DISABLED_RunPlayoutAndRecordingInFullDuplexAndWaitForEnterKey(). + void HandleCallbacks(AudioStream* audio_stream) { + HandleCallbacks(nullptr, audio_stream, 0); + } + + int32_t RealRecordedDataIsAvailable(const void* audio_buffer, + const size_t samples_per_channel, + const size_t bytes_per_frame, + const size_t channels, + const uint32_t sample_rate, + const uint32_t total_delay_ms, + const int32_t clock_drift, + const uint32_t current_mic_level, + const bool typing_status, + uint32_t& new_mic_level) { + EXPECT_TRUE(rec_mode()) << "No test is expecting these callbacks."; + // Store audio parameters once in the first callback. For all other + // callbacks, verify that the provided audio parameters are maintained and + // that each callback corresponds to 10ms for any given sample rate. + if (!record_parameters_.is_complete()) { + record_parameters_.reset(sample_rate, channels, samples_per_channel); + } else { + EXPECT_EQ(samples_per_channel, record_parameters_.frames_per_buffer()); + EXPECT_EQ(bytes_per_frame, record_parameters_.GetBytesPerFrame()); + EXPECT_EQ(channels, record_parameters_.channels()); + EXPECT_EQ(static_cast(sample_rate), + record_parameters_.sample_rate()); + EXPECT_EQ(samples_per_channel, + record_parameters_.frames_per_10ms_buffer()); + } + { + MutexLock lock(&lock_); + rec_count_++; + } + // Write audio data to audio stream object if one has been injected. + if (audio_stream_) { + audio_stream_->Write( + rtc::MakeArrayView(static_cast(audio_buffer), + samples_per_channel * channels)); + } + // Signal the event after given amount of callbacks. + if (event_ && ReceivedEnoughCallbacks()) { + event_->Set(); + } + return 0; + } + + int32_t RealNeedMorePlayData(const size_t samples_per_channel, + const size_t bytes_per_frame, + const size_t channels, + const uint32_t sample_rate, + void* audio_buffer, + size_t& samples_out, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) { + EXPECT_TRUE(play_mode()) << "No test is expecting these callbacks."; + // Store audio parameters once in the first callback. For all other + // callbacks, verify that the provided audio parameters are maintained and + // that each callback corresponds to 10ms for any given sample rate. + if (!playout_parameters_.is_complete()) { + playout_parameters_.reset(sample_rate, channels, samples_per_channel); + } else { + EXPECT_EQ(samples_per_channel, playout_parameters_.frames_per_buffer()); + EXPECT_EQ(bytes_per_frame, playout_parameters_.GetBytesPerFrame()); + EXPECT_EQ(channels, playout_parameters_.channels()); + EXPECT_EQ(static_cast(sample_rate), + playout_parameters_.sample_rate()); + EXPECT_EQ(samples_per_channel, + playout_parameters_.frames_per_10ms_buffer()); + } + { + MutexLock lock(&lock_); + play_count_++; + } + samples_out = samples_per_channel * channels; + // Read audio data from audio stream object if one has been injected. + if (audio_stream_) { + audio_stream_->Read(rtc::MakeArrayView( + static_cast(audio_buffer), samples_per_channel * channels)); + } else { + // Fill the audio buffer with zeros to avoid disturbing audio. + const size_t num_bytes = samples_per_channel * bytes_per_frame; + std::memset(audio_buffer, 0, num_bytes); + } + // Signal the event after given amount of callbacks. + if (event_ && ReceivedEnoughCallbacks()) { + event_->Set(); + } + return 0; + } + + bool ReceivedEnoughCallbacks() { + bool recording_done = false; + if (rec_mode()) { + MutexLock lock(&lock_); + recording_done = rec_count_ >= num_callbacks_; + } else { + recording_done = true; + } + bool playout_done = false; + if (play_mode()) { + MutexLock lock(&lock_); + playout_done = play_count_ >= num_callbacks_; + } else { + playout_done = true; + } + return recording_done && playout_done; + } + + bool play_mode() const { + return type_ == TransportType::kPlay || + type_ == TransportType::kPlayAndRecord; + } + + bool rec_mode() const { + return type_ == TransportType::kRecord || + type_ == TransportType::kPlayAndRecord; + } + + void ResetCallbackCounters() { + MutexLock lock(&lock_); + if (play_mode()) { + play_count_ = 0; + } + if (rec_mode()) { + rec_count_ = 0; + } + } + + private: + Mutex lock_; + TransportType type_ = TransportType::kInvalid; + rtc::Event* event_ = nullptr; + AudioStream* audio_stream_ = nullptr; + size_t num_callbacks_ = 0; + size_t play_count_ RTC_GUARDED_BY(lock_) = 0; + size_t rec_count_ RTC_GUARDED_BY(lock_) = 0; + AudioParameters playout_parameters_; + AudioParameters record_parameters_; +}; + +// AudioDeviceTest test fixture. + +// bugs.webrtc.org/9808 +// Both the tests and the code under test are very old, unstaffed and not +// a part of webRTC stack. +// Here sanitizers make the tests hang, without providing usefull report. +// So we are just disabling them, without intention to re-enable them. +#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \ + defined(THREAD_SANITIZER) || defined(UNDEFINED_SANITIZER) +#define MAYBE_AudioDeviceTest DISABLED_AudioDeviceTest +#else +#define MAYBE_AudioDeviceTest AudioDeviceTest +#endif + +class MAYBE_AudioDeviceTest + : public ::testing::TestWithParam { + protected: + MAYBE_AudioDeviceTest() + : audio_layer_(GetParam()), + task_queue_factory_(CreateDefaultTaskQueueFactory()) { + rtc::LogMessage::LogToDebug(rtc::LS_INFO); + // Add extra logging fields here if needed for debugging. + rtc::LogMessage::LogTimestamps(); + rtc::LogMessage::LogThreads(); + audio_device_ = CreateAudioDevice(); + EXPECT_NE(audio_device_.get(), nullptr); + AudioDeviceModule::AudioLayer audio_layer; + int got_platform_audio_layer = + audio_device_->ActiveAudioLayer(&audio_layer); + // First, ensure that a valid audio layer can be activated. + if (got_platform_audio_layer != 0) { + requirements_satisfied_ = false; + } + // Next, verify that the ADM can be initialized. + if (requirements_satisfied_) { + requirements_satisfied_ = (audio_device_->Init() == 0); + } + // Finally, ensure that at least one valid device exists in each direction. + if (requirements_satisfied_) { + const int16_t num_playout_devices = audio_device_->PlayoutDevices(); + const int16_t num_record_devices = audio_device_->RecordingDevices(); + requirements_satisfied_ = + num_playout_devices > 0 && num_record_devices > 0; + } + if (requirements_satisfied_) { + EXPECT_EQ(0, audio_device_->SetPlayoutDevice(AUDIO_DEVICE_ID)); + EXPECT_EQ(0, audio_device_->InitSpeaker()); + EXPECT_EQ(0, audio_device_->StereoPlayoutIsAvailable(&stereo_playout_)); + EXPECT_EQ(0, audio_device_->SetStereoPlayout(stereo_playout_)); + EXPECT_EQ(0, audio_device_->SetRecordingDevice(AUDIO_DEVICE_ID)); + EXPECT_EQ(0, audio_device_->InitMicrophone()); + // Avoid asking for input stereo support and always record in mono + // since asking can cause issues in combination with remote desktop. + // See https://bugs.chromium.org/p/webrtc/issues/detail?id=7397 for + // details. + EXPECT_EQ(0, audio_device_->SetStereoRecording(false)); + } + } + + // This is needed by all tests using MockAudioTransport, + // since there is no way to unregister it. + // Without Terminate(), audio_device would still accesses + // the destructed mock via "webrtc_audio_module_rec_thread". + // An alternative would be for the mock to outlive audio_device. + void PreTearDown() { EXPECT_EQ(0, audio_device_->Terminate()); } + + virtual ~MAYBE_AudioDeviceTest() { + if (audio_device_) { + EXPECT_EQ(0, audio_device_->Terminate()); + } + } + + bool requirements_satisfied() const { return requirements_satisfied_; } + rtc::Event* event() { return &event_; } + AudioDeviceModule::AudioLayer audio_layer() const { return audio_layer_; } + + // AudioDeviceModuleForTest extends the default ADM interface with some extra + // test methods. Intended for usage in tests only and requires a unique + // factory method. See CreateAudioDevice() for details. + const rtc::scoped_refptr& audio_device() const { + return audio_device_; + } + + rtc::scoped_refptr CreateAudioDevice() { + // Use the default factory for kPlatformDefaultAudio and a special factory + // CreateWindowsCoreAudioAudioDeviceModuleForTest() for kWindowsCoreAudio2. + // The value of `audio_layer_` is set at construction by GetParam() and two + // different layers are tested on Windows only. + if (audio_layer_ == AudioDeviceModule::kPlatformDefaultAudio) { + return AudioDeviceModule::CreateForTest(audio_layer_, + task_queue_factory_.get()); + } else if (audio_layer_ == AudioDeviceModule::kWindowsCoreAudio2) { +#ifdef WEBRTC_WIN + // We must initialize the COM library on a thread before we calling any of + // the library functions. All COM functions in the ADM will return + // CO_E_NOTINITIALIZED otherwise. + com_initializer_ = + std::make_unique(ScopedCOMInitializer::kMTA); + EXPECT_TRUE(com_initializer_->Succeeded()); + EXPECT_TRUE(webrtc_win::core_audio_utility::IsSupported()); + EXPECT_TRUE(webrtc_win::core_audio_utility::IsMMCSSSupported()); + return CreateWindowsCoreAudioAudioDeviceModuleForTest( + task_queue_factory_.get(), true); +#else + return nullptr; +#endif + } else { + return nullptr; + } + } + + void StartPlayout() { + EXPECT_FALSE(audio_device()->Playing()); + EXPECT_EQ(0, audio_device()->InitPlayout()); + EXPECT_TRUE(audio_device()->PlayoutIsInitialized()); + EXPECT_EQ(0, audio_device()->StartPlayout()); + EXPECT_TRUE(audio_device()->Playing()); + } + + void StopPlayout() { + EXPECT_EQ(0, audio_device()->StopPlayout()); + EXPECT_FALSE(audio_device()->Playing()); + EXPECT_FALSE(audio_device()->PlayoutIsInitialized()); + } + + void StartRecording() { + EXPECT_FALSE(audio_device()->Recording()); + EXPECT_EQ(0, audio_device()->InitRecording()); + EXPECT_TRUE(audio_device()->RecordingIsInitialized()); + EXPECT_EQ(0, audio_device()->StartRecording()); + EXPECT_TRUE(audio_device()->Recording()); + } + + void StopRecording() { + EXPECT_EQ(0, audio_device()->StopRecording()); + EXPECT_FALSE(audio_device()->Recording()); + EXPECT_FALSE(audio_device()->RecordingIsInitialized()); + } + + bool NewWindowsAudioDeviceModuleIsUsed() { +#ifdef WEBRTC_WIN + AudioDeviceModule::AudioLayer audio_layer; + EXPECT_EQ(0, audio_device()->ActiveAudioLayer(&audio_layer)); + if (audio_layer == AudioDeviceModule::kWindowsCoreAudio2) { + // Default device is always added as first element in the list and the + // default communication device as the second element. Hence, the list + // contains two extra elements in this case. + return true; + } +#endif + return false; + } + + private: +#ifdef WEBRTC_WIN + // Windows Core Audio based ADM needs to run on a COM initialized thread. + std::unique_ptr com_initializer_; +#endif + AudioDeviceModule::AudioLayer audio_layer_; + std::unique_ptr task_queue_factory_; + bool requirements_satisfied_ = true; + rtc::Event event_; + rtc::scoped_refptr audio_device_; + bool stereo_playout_ = false; +}; + +// Instead of using the test fixture, verify that the different factory methods +// work as intended. +TEST(MAYBE_AudioDeviceTestWin, ConstructDestructWithFactory) { + std::unique_ptr task_queue_factory = + CreateDefaultTaskQueueFactory(); + rtc::scoped_refptr audio_device; + // The default factory should work for all platforms when a default ADM is + // requested. + audio_device = AudioDeviceModule::Create( + AudioDeviceModule::kPlatformDefaultAudio, task_queue_factory.get()); + EXPECT_TRUE(audio_device); + audio_device = nullptr; +#ifdef WEBRTC_WIN + // For Windows, the old factory method creates an ADM where the platform- + // specific parts are implemented by an AudioDeviceGeneric object. Verify + // that the old factory can't be used in combination with the latest audio + // layer AudioDeviceModule::kWindowsCoreAudio2. + audio_device = AudioDeviceModule::Create( + AudioDeviceModule::kWindowsCoreAudio2, task_queue_factory.get()); + EXPECT_FALSE(audio_device); + audio_device = nullptr; + // Instead, ensure that the new dedicated factory method called + // CreateWindowsCoreAudioAudioDeviceModule() can be used on Windows and that + // it sets the audio layer to kWindowsCoreAudio2 implicitly. Note that, the + // new ADM for Windows must be created on a COM thread. + ScopedCOMInitializer com_initializer(ScopedCOMInitializer::kMTA); + EXPECT_TRUE(com_initializer.Succeeded()); + audio_device = + CreateWindowsCoreAudioAudioDeviceModule(task_queue_factory.get()); + EXPECT_TRUE(audio_device); + AudioDeviceModule::AudioLayer audio_layer; + EXPECT_EQ(0, audio_device->ActiveAudioLayer(&audio_layer)); + EXPECT_EQ(audio_layer, AudioDeviceModule::kWindowsCoreAudio2); +#endif +} + +// Uses the test fixture to create, initialize and destruct the ADM. +TEST_P(MAYBE_AudioDeviceTest, ConstructDestructDefault) {} + +TEST_P(MAYBE_AudioDeviceTest, InitTerminate) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + // Initialization is part of the test fixture. + EXPECT_TRUE(audio_device()->Initialized()); + EXPECT_EQ(0, audio_device()->Terminate()); + EXPECT_FALSE(audio_device()->Initialized()); +} + +// Enumerate all available and active output devices. +TEST_P(MAYBE_AudioDeviceTest, PlayoutDeviceNames) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + char device_name[kAdmMaxDeviceNameSize]; + char unique_id[kAdmMaxGuidSize]; + int num_devices = audio_device()->PlayoutDevices(); + if (NewWindowsAudioDeviceModuleIsUsed()) { + num_devices += 2; + } + EXPECT_GT(num_devices, 0); + for (int i = 0; i < num_devices; ++i) { + EXPECT_EQ(0, audio_device()->PlayoutDeviceName(i, device_name, unique_id)); + } + EXPECT_EQ(-1, audio_device()->PlayoutDeviceName(num_devices, device_name, + unique_id)); +} + +// Enumerate all available and active input devices. +TEST_P(MAYBE_AudioDeviceTest, RecordingDeviceNames) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + char device_name[kAdmMaxDeviceNameSize]; + char unique_id[kAdmMaxGuidSize]; + int num_devices = audio_device()->RecordingDevices(); + if (NewWindowsAudioDeviceModuleIsUsed()) { + num_devices += 2; + } + EXPECT_GT(num_devices, 0); + for (int i = 0; i < num_devices; ++i) { + EXPECT_EQ(0, + audio_device()->RecordingDeviceName(i, device_name, unique_id)); + } + EXPECT_EQ(-1, audio_device()->RecordingDeviceName(num_devices, device_name, + unique_id)); +} + +// Counts number of active output devices and ensure that all can be selected. +TEST_P(MAYBE_AudioDeviceTest, SetPlayoutDevice) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + int num_devices = audio_device()->PlayoutDevices(); + if (NewWindowsAudioDeviceModuleIsUsed()) { + num_devices += 2; + } + EXPECT_GT(num_devices, 0); + // Verify that all available playout devices can be set (not enabled yet). + for (int i = 0; i < num_devices; ++i) { + EXPECT_EQ(0, audio_device()->SetPlayoutDevice(i)); + } + EXPECT_EQ(-1, audio_device()->SetPlayoutDevice(num_devices)); +#ifdef WEBRTC_WIN + // On Windows, verify the alternative method where the user can select device + // by role. + EXPECT_EQ( + 0, audio_device()->SetPlayoutDevice(AudioDeviceModule::kDefaultDevice)); + EXPECT_EQ(0, audio_device()->SetPlayoutDevice( + AudioDeviceModule::kDefaultCommunicationDevice)); +#endif +} + +// Counts number of active input devices and ensure that all can be selected. +TEST_P(MAYBE_AudioDeviceTest, SetRecordingDevice) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + int num_devices = audio_device()->RecordingDevices(); + if (NewWindowsAudioDeviceModuleIsUsed()) { + num_devices += 2; + } + EXPECT_GT(num_devices, 0); + // Verify that all available recording devices can be set (not enabled yet). + for (int i = 0; i < num_devices; ++i) { + EXPECT_EQ(0, audio_device()->SetRecordingDevice(i)); + } + EXPECT_EQ(-1, audio_device()->SetRecordingDevice(num_devices)); +#ifdef WEBRTC_WIN + // On Windows, verify the alternative method where the user can select device + // by role. + EXPECT_EQ( + 0, audio_device()->SetRecordingDevice(AudioDeviceModule::kDefaultDevice)); + EXPECT_EQ(0, audio_device()->SetRecordingDevice( + AudioDeviceModule::kDefaultCommunicationDevice)); +#endif +} + +// Tests Start/Stop playout without any registered audio callback. +TEST_P(MAYBE_AudioDeviceTest, StartStopPlayout) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + StartPlayout(); + StopPlayout(); +} + +// Tests Start/Stop recording without any registered audio callback. +TEST_P(MAYBE_AudioDeviceTest, StartStopRecording) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + StartRecording(); + StopRecording(); +} + +// Tests Start/Stop playout for all available input devices to ensure that +// the selected device can be created and used as intended. +TEST_P(MAYBE_AudioDeviceTest, StartStopPlayoutWithRealDevice) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + int num_devices = audio_device()->PlayoutDevices(); + if (NewWindowsAudioDeviceModuleIsUsed()) { + num_devices += 2; + } + EXPECT_GT(num_devices, 0); + // Verify that all available playout devices can be set and used. + for (int i = 0; i < num_devices; ++i) { + EXPECT_EQ(0, audio_device()->SetPlayoutDevice(i)); + StartPlayout(); + StopPlayout(); + } +#ifdef WEBRTC_WIN + AudioDeviceModule::WindowsDeviceType device_role[] = { + AudioDeviceModule::kDefaultDevice, + AudioDeviceModule::kDefaultCommunicationDevice}; + for (size_t i = 0; i < arraysize(device_role); ++i) { + EXPECT_EQ(0, audio_device()->SetPlayoutDevice(device_role[i])); + StartPlayout(); + StopPlayout(); + } +#endif +} + +// Tests Start/Stop recording for all available input devices to ensure that +// the selected device can be created and used as intended. +TEST_P(MAYBE_AudioDeviceTest, StartStopRecordingWithRealDevice) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + int num_devices = audio_device()->RecordingDevices(); + if (NewWindowsAudioDeviceModuleIsUsed()) { + num_devices += 2; + } + EXPECT_GT(num_devices, 0); + // Verify that all available recording devices can be set and used. + for (int i = 0; i < num_devices; ++i) { + EXPECT_EQ(0, audio_device()->SetRecordingDevice(i)); + StartRecording(); + StopRecording(); + } +#ifdef WEBRTC_WIN + AudioDeviceModule::WindowsDeviceType device_role[] = { + AudioDeviceModule::kDefaultDevice, + AudioDeviceModule::kDefaultCommunicationDevice}; + for (size_t i = 0; i < arraysize(device_role); ++i) { + EXPECT_EQ(0, audio_device()->SetRecordingDevice(device_role[i])); + StartRecording(); + StopRecording(); + } +#endif +} + +// Tests Init/Stop/Init recording without any registered audio callback. +// See https://bugs.chromium.org/p/webrtc/issues/detail?id=8041 for details +// on why this test is useful. +TEST_P(MAYBE_AudioDeviceTest, InitStopInitRecording) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + EXPECT_EQ(0, audio_device()->InitRecording()); + EXPECT_TRUE(audio_device()->RecordingIsInitialized()); + StopRecording(); + EXPECT_EQ(0, audio_device()->InitRecording()); + StopRecording(); +} + +// Verify that additional attempts to initialize or start recording while +// already being active works. Additional calls should just be ignored. +TEST_P(MAYBE_AudioDeviceTest, StartInitRecording) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + StartRecording(); + // An additional attempt to initialize at this stage should be ignored. + EXPECT_EQ(0, audio_device()->InitRecording()); + // Same for additional request to start recording while already active. + EXPECT_EQ(0, audio_device()->StartRecording()); + StopRecording(); +} + +// Verify that additional attempts to initialize or start playou while +// already being active works. Additional calls should just be ignored. +TEST_P(MAYBE_AudioDeviceTest, StartInitPlayout) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + StartPlayout(); + // An additional attempt to initialize at this stage should be ignored. + EXPECT_EQ(0, audio_device()->InitPlayout()); + // Same for additional request to start playout while already active. + EXPECT_EQ(0, audio_device()->StartPlayout()); + StopPlayout(); +} + +// Tests Init/Stop/Init recording while playout is active. +TEST_P(MAYBE_AudioDeviceTest, InitStopInitRecordingWhilePlaying) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + StartPlayout(); + EXPECT_EQ(0, audio_device()->InitRecording()); + EXPECT_TRUE(audio_device()->RecordingIsInitialized()); + StopRecording(); + EXPECT_EQ(0, audio_device()->InitRecording()); + StopRecording(); + StopPlayout(); +} + +// Tests Init/Stop/Init playout without any registered audio callback. +TEST_P(MAYBE_AudioDeviceTest, InitStopInitPlayout) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + EXPECT_EQ(0, audio_device()->InitPlayout()); + EXPECT_TRUE(audio_device()->PlayoutIsInitialized()); + StopPlayout(); + EXPECT_EQ(0, audio_device()->InitPlayout()); + StopPlayout(); +} + +// Tests Init/Stop/Init playout while recording is active. +TEST_P(MAYBE_AudioDeviceTest, InitStopInitPlayoutWhileRecording) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + StartRecording(); + EXPECT_EQ(0, audio_device()->InitPlayout()); + EXPECT_TRUE(audio_device()->PlayoutIsInitialized()); + StopPlayout(); + EXPECT_EQ(0, audio_device()->InitPlayout()); + StopPlayout(); + StopRecording(); +} + +// TODO(henrika): restart without intermediate destruction is currently only +// supported on Windows. +#ifdef WEBRTC_WIN +// Tests Start/Stop playout followed by a second session (emulates a restart +// triggered by a user using public APIs). +TEST_P(MAYBE_AudioDeviceTest, StartStopPlayoutWithExternalRestart) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + StartPlayout(); + StopPlayout(); + // Restart playout without destroying the ADM in between. Ensures that we + // support: Init(), Start(), Stop(), Init(), Start(), Stop(). + StartPlayout(); + StopPlayout(); +} + +// Tests Start/Stop recording followed by a second session (emulates a restart +// triggered by a user using public APIs). +TEST_P(MAYBE_AudioDeviceTest, StartStopRecordingWithExternalRestart) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + StartRecording(); + StopRecording(); + // Restart recording without destroying the ADM in between. Ensures that we + // support: Init(), Start(), Stop(), Init(), Start(), Stop(). + StartRecording(); + StopRecording(); +} + +// Tests Start/Stop playout followed by a second session (emulates a restart +// triggered by an internal callback e.g. corresponding to a device switch). +// Note that, internal restart is only supported in combination with the latest +// Windows ADM. +TEST_P(MAYBE_AudioDeviceTest, StartStopPlayoutWithInternalRestart) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + if (audio_layer() != AudioDeviceModule::kWindowsCoreAudio2) { + return; + } + MockAudioTransport mock(TransportType::kPlay); + mock.HandleCallbacks(event(), nullptr, kNumCallbacks); + EXPECT_CALL(mock, NeedMorePlayData(_, _, _, _, NotNull(), _, _, _)) + .Times(AtLeast(kNumCallbacks)); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartPlayout(); + event()->Wait(kTestTimeOut); + EXPECT_TRUE(audio_device()->Playing()); + // Restart playout but without stopping the internal audio thread. + // This procedure uses a non-public test API and it emulates what happens + // inside the ADM when e.g. a device is removed. + EXPECT_EQ(0, audio_device()->RestartPlayoutInternally()); + + // Run basic tests of public APIs while a restart attempt is active. + // These calls should now be very thin and not trigger any new actions. + EXPECT_EQ(-1, audio_device()->StopPlayout()); + EXPECT_TRUE(audio_device()->Playing()); + EXPECT_TRUE(audio_device()->PlayoutIsInitialized()); + EXPECT_EQ(0, audio_device()->InitPlayout()); + EXPECT_EQ(0, audio_device()->StartPlayout()); + + // Wait until audio has restarted and a new sequence of audio callbacks + // becomes active. + // TODO(henrika): is it possible to verify that the internal state transition + // is Stop->Init->Start? + ASSERT_TRUE(Mock::VerifyAndClearExpectations(&mock)); + mock.ResetCallbackCounters(); + EXPECT_CALL(mock, NeedMorePlayData(_, _, _, _, NotNull(), _, _, _)) + .Times(AtLeast(kNumCallbacks)); + event()->Wait(kTestTimeOut); + EXPECT_TRUE(audio_device()->Playing()); + // Stop playout and the audio thread after successful internal restart. + StopPlayout(); + PreTearDown(); +} + +// Tests Start/Stop recording followed by a second session (emulates a restart +// triggered by an internal callback e.g. corresponding to a device switch). +// Note that, internal restart is only supported in combination with the latest +// Windows ADM. +TEST_P(MAYBE_AudioDeviceTest, StartStopRecordingWithInternalRestart) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + if (audio_layer() != AudioDeviceModule::kWindowsCoreAudio2) { + return; + } + MockAudioTransport mock(TransportType::kRecord); + mock.HandleCallbacks(event(), nullptr, kNumCallbacks); + EXPECT_CALL(mock, RecordedDataIsAvailable(NotNull(), _, _, _, _, Ge(0u), 0, _, + false, _)) + .Times(AtLeast(kNumCallbacks)); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartRecording(); + event()->Wait(kTestTimeOut); + EXPECT_TRUE(audio_device()->Recording()); + // Restart recording but without stopping the internal audio thread. + // This procedure uses a non-public test API and it emulates what happens + // inside the ADM when e.g. a device is removed. + EXPECT_EQ(0, audio_device()->RestartRecordingInternally()); + + // Run basic tests of public APIs while a restart attempt is active. + // These calls should now be very thin and not trigger any new actions. + EXPECT_EQ(-1, audio_device()->StopRecording()); + EXPECT_TRUE(audio_device()->Recording()); + EXPECT_TRUE(audio_device()->RecordingIsInitialized()); + EXPECT_EQ(0, audio_device()->InitRecording()); + EXPECT_EQ(0, audio_device()->StartRecording()); + + // Wait until audio has restarted and a new sequence of audio callbacks + // becomes active. + // TODO(henrika): is it possible to verify that the internal state transition + // is Stop->Init->Start? + ASSERT_TRUE(Mock::VerifyAndClearExpectations(&mock)); + mock.ResetCallbackCounters(); + EXPECT_CALL(mock, RecordedDataIsAvailable(NotNull(), _, _, _, _, Ge(0u), 0, _, + false, _)) + .Times(AtLeast(kNumCallbacks)); + event()->Wait(kTestTimeOut); + EXPECT_TRUE(audio_device()->Recording()); + // Stop recording and the audio thread after successful internal restart. + StopRecording(); + PreTearDown(); +} +#endif // #ifdef WEBRTC_WIN + +// Start playout and verify that the native audio layer starts asking for real +// audio samples to play out using the NeedMorePlayData() callback. +// Note that we can't add expectations on audio parameters in EXPECT_CALL +// since parameter are not provided in the each callback. We therefore test and +// verify the parameters in the fake audio transport implementation instead. +TEST_P(MAYBE_AudioDeviceTest, StartPlayoutVerifyCallbacks) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + MockAudioTransport mock(TransportType::kPlay); + mock.HandleCallbacks(event(), nullptr, kNumCallbacks); + EXPECT_CALL(mock, NeedMorePlayData(_, _, _, _, NotNull(), _, _, _)) + .Times(AtLeast(kNumCallbacks)); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartPlayout(); + event()->Wait(kTestTimeOut); + StopPlayout(); + PreTearDown(); +} + +// Don't run these tests in combination with sanitizers. +// They are already flaky *without* sanitizers. +// Sanitizers seem to increase flakiness (which brings noise), +// without reporting anything. +// TODO(webrtc:10867): Re-enable when flakiness fixed. +#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \ + defined(THREAD_SANITIZER) +#define MAYBE_StartRecordingVerifyCallbacks \ + DISABLED_StartRecordingVerifyCallbacks +#define MAYBE_StartPlayoutAndRecordingVerifyCallbacks \ + DISABLED_StartPlayoutAndRecordingVerifyCallbacks +#else +#define MAYBE_StartRecordingVerifyCallbacks StartRecordingVerifyCallbacks +#define MAYBE_StartPlayoutAndRecordingVerifyCallbacks \ + StartPlayoutAndRecordingVerifyCallbacks +#endif + +// Start recording and verify that the native audio layer starts providing real +// audio samples using the RecordedDataIsAvailable() callback. +TEST_P(MAYBE_AudioDeviceTest, MAYBE_StartRecordingVerifyCallbacks) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + MockAudioTransport mock(TransportType::kRecord); + mock.HandleCallbacks(event(), nullptr, kNumCallbacks); + EXPECT_CALL(mock, RecordedDataIsAvailable(NotNull(), _, _, _, _, Ge(0u), 0, _, + false, _)) + .Times(AtLeast(kNumCallbacks)); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartRecording(); + event()->Wait(kTestTimeOut); + StopRecording(); + PreTearDown(); +} + +// Start playout and recording (full-duplex audio) and verify that audio is +// active in both directions. +TEST_P(MAYBE_AudioDeviceTest, MAYBE_StartPlayoutAndRecordingVerifyCallbacks) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + MockAudioTransport mock(TransportType::kPlayAndRecord); + mock.HandleCallbacks(event(), nullptr, kNumCallbacks); + EXPECT_CALL(mock, NeedMorePlayData(_, _, _, _, NotNull(), _, _, _)) + .Times(AtLeast(kNumCallbacks)); + EXPECT_CALL(mock, RecordedDataIsAvailable(NotNull(), _, _, _, _, Ge(0u), 0, _, + false, _)) + .Times(AtLeast(kNumCallbacks)); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + StartPlayout(); + StartRecording(); + event()->Wait(kTestTimeOut); + StopRecording(); + StopPlayout(); + PreTearDown(); +} + +// Start playout and recording and store recorded data in an intermediate FIFO +// buffer from which the playout side then reads its samples in the same order +// as they were stored. Under ideal circumstances, a callback sequence would +// look like: ...+-+-+-+-+-+-+-..., where '+' means 'packet recorded' and '-' +// means 'packet played'. Under such conditions, the FIFO would contain max 1, +// with an average somewhere in (0,1) depending on how long the packets are +// buffered. However, under more realistic conditions, the size +// of the FIFO will vary more due to an unbalance between the two sides. +// This test tries to verify that the device maintains a balanced callback- +// sequence by running in loopback for a few seconds while measuring the size +// (max and average) of the FIFO. The size of the FIFO is increased by the +// recording side and decreased by the playout side. +TEST_P(MAYBE_AudioDeviceTest, RunPlayoutAndRecordingInFullDuplex) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + NiceMock mock(TransportType::kPlayAndRecord); + FifoAudioStream audio_stream; + mock.HandleCallbacks(event(), &audio_stream, + kFullDuplexTime.seconds() * kNumCallbacksPerSecond); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + // Run both sides using the same channel configuration to avoid conversions + // between mono/stereo while running in full duplex mode. Also, some devices + // (mainly on Windows) do not support mono. + EXPECT_EQ(0, audio_device()->SetStereoPlayout(true)); + EXPECT_EQ(0, audio_device()->SetStereoRecording(true)); + // Mute speakers to prevent howling. + EXPECT_EQ(0, audio_device()->SetSpeakerVolume(0)); + StartPlayout(); + StartRecording(); + event()->Wait(std::max(kTestTimeOut, kFullDuplexTime)); + StopRecording(); + StopPlayout(); + PreTearDown(); +} + +// Runs audio in full duplex until user hits Enter. Intended as a manual test +// to ensure that the audio quality is good and that real device switches works +// as intended. +TEST_P(MAYBE_AudioDeviceTest, + DISABLED_RunPlayoutAndRecordingInFullDuplexAndWaitForEnterKey) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + if (audio_layer() != AudioDeviceModule::kWindowsCoreAudio2) { + return; + } + NiceMock mock(TransportType::kPlayAndRecord); + FifoAudioStream audio_stream; + mock.HandleCallbacks(&audio_stream); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + EXPECT_EQ(0, audio_device()->SetStereoPlayout(true)); + EXPECT_EQ(0, audio_device()->SetStereoRecording(true)); + // Ensure that the sample rate for both directions are identical so that we + // always can listen to our own voice. Will lead to rate conversion (and + // higher latency) if the native sample rate is not 48kHz. + EXPECT_EQ(0, audio_device()->SetPlayoutSampleRate(48000)); + EXPECT_EQ(0, audio_device()->SetRecordingSampleRate(48000)); + StartPlayout(); + StartRecording(); + do { + PRINT("Loopback audio is active at 48kHz. Press Enter to stop.\n"); + } while (getchar() != '\n'); + StopRecording(); + StopPlayout(); + PreTearDown(); +} + +// Measures loopback latency and reports the min, max and average values for +// a full duplex audio session. +// The latency is measured like so: +// - Insert impulses periodically on the output side. +// - Detect the impulses on the input side. +// - Measure the time difference between the transmit time and receive time. +// - Store time differences in a vector and calculate min, max and average. +// This test needs the '--gtest_also_run_disabled_tests' flag to run and also +// some sort of audio feedback loop. E.g. a headset where the mic is placed +// close to the speaker to ensure highest possible echo. It is also recommended +// to run the test at highest possible output volume. +TEST_P(MAYBE_AudioDeviceTest, DISABLED_MeasureLoopbackLatency) { + SKIP_TEST_IF_NOT(requirements_satisfied()); + NiceMock mock(TransportType::kPlayAndRecord); + LatencyAudioStream audio_stream; + mock.HandleCallbacks(event(), &audio_stream, + kMeasureLatencyTime.seconds() * kNumCallbacksPerSecond); + EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock)); + EXPECT_EQ(0, audio_device()->SetStereoPlayout(true)); + EXPECT_EQ(0, audio_device()->SetStereoRecording(true)); + StartPlayout(); + StartRecording(); + event()->Wait(std::max(kTestTimeOut, kMeasureLatencyTime)); + StopRecording(); + StopPlayout(); + // Avoid concurrent access to audio_stream. + PreTearDown(); + // Verify that a sufficient number of transmitted impulses are detected. + EXPECT_GE(audio_stream.num_latency_values(), + static_cast( + kImpulseFrequencyInHz * kMeasureLatencyTime.seconds() - 2)); + // Print out min, max and average delay values for debugging purposes. + audio_stream.PrintResults(); +} + +#ifdef WEBRTC_WIN +// Test two different audio layers (or rather two different Core Audio +// implementations) for Windows. +INSTANTIATE_TEST_SUITE_P( + AudioLayerWin, + MAYBE_AudioDeviceTest, + ::testing::Values(AudioDeviceModule::kPlatformDefaultAudio, + AudioDeviceModule::kWindowsCoreAudio2)); +#else +// For all platforms but Windows, only test the default audio layer. +INSTANTIATE_TEST_SUITE_P( + AudioLayer, + MAYBE_AudioDeviceTest, + ::testing::Values(AudioDeviceModule::kPlatformDefaultAudio)); +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/dummy/audio_device_dummy.cc b/third_party/libwebrtc/modules/audio_device/dummy/audio_device_dummy.cc new file mode 100644 index 0000000000..b8fd837038 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/dummy/audio_device_dummy.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/dummy/audio_device_dummy.h" + +namespace webrtc { + +int32_t AudioDeviceDummy::ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const { + return -1; +} + +AudioDeviceGeneric::InitStatus AudioDeviceDummy::Init() { + return InitStatus::OK; +} + +int32_t AudioDeviceDummy::Terminate() { + return 0; +} + +bool AudioDeviceDummy::Initialized() const { + return true; +} + +int16_t AudioDeviceDummy::PlayoutDevices() { + return -1; +} + +int16_t AudioDeviceDummy::RecordingDevices() { + return -1; +} + +int32_t AudioDeviceDummy::PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + return -1; +} + +int32_t AudioDeviceDummy::RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + return -1; +} + +int32_t AudioDeviceDummy::SetPlayoutDevice(uint16_t index) { + return -1; +} + +int32_t AudioDeviceDummy::SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) { + return -1; +} + +int32_t AudioDeviceDummy::SetRecordingDevice(uint16_t index) { + return -1; +} + +int32_t AudioDeviceDummy::SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) { + return -1; +} + +int32_t AudioDeviceDummy::PlayoutIsAvailable(bool& available) { + return -1; +} + +int32_t AudioDeviceDummy::InitPlayout() { + return -1; +} + +bool AudioDeviceDummy::PlayoutIsInitialized() const { + return false; +} + +int32_t AudioDeviceDummy::RecordingIsAvailable(bool& available) { + return -1; +} + +int32_t AudioDeviceDummy::InitRecording() { + return -1; +} + +bool AudioDeviceDummy::RecordingIsInitialized() const { + return false; +} + +int32_t AudioDeviceDummy::StartPlayout() { + return -1; +} + +int32_t AudioDeviceDummy::StopPlayout() { + return 0; +} + +bool AudioDeviceDummy::Playing() const { + return false; +} + +int32_t AudioDeviceDummy::StartRecording() { + return -1; +} + +int32_t AudioDeviceDummy::StopRecording() { + return 0; +} + +bool AudioDeviceDummy::Recording() const { + return false; +} + +int32_t AudioDeviceDummy::InitSpeaker() { + return -1; +} + +bool AudioDeviceDummy::SpeakerIsInitialized() const { + return false; +} + +int32_t AudioDeviceDummy::InitMicrophone() { + return -1; +} + +bool AudioDeviceDummy::MicrophoneIsInitialized() const { + return false; +} + +int32_t AudioDeviceDummy::SpeakerVolumeIsAvailable(bool& available) { + return -1; +} + +int32_t AudioDeviceDummy::SetSpeakerVolume(uint32_t volume) { + return -1; +} + +int32_t AudioDeviceDummy::SpeakerVolume(uint32_t& volume) const { + return -1; +} + +int32_t AudioDeviceDummy::MaxSpeakerVolume(uint32_t& maxVolume) const { + return -1; +} + +int32_t AudioDeviceDummy::MinSpeakerVolume(uint32_t& minVolume) const { + return -1; +} + +int32_t AudioDeviceDummy::MicrophoneVolumeIsAvailable(bool& available) { + return -1; +} + +int32_t AudioDeviceDummy::SetMicrophoneVolume(uint32_t volume) { + return -1; +} + +int32_t AudioDeviceDummy::MicrophoneVolume(uint32_t& volume) const { + return -1; +} + +int32_t AudioDeviceDummy::MaxMicrophoneVolume(uint32_t& maxVolume) const { + return -1; +} + +int32_t AudioDeviceDummy::MinMicrophoneVolume(uint32_t& minVolume) const { + return -1; +} + +int32_t AudioDeviceDummy::SpeakerMuteIsAvailable(bool& available) { + return -1; +} + +int32_t AudioDeviceDummy::SetSpeakerMute(bool enable) { + return -1; +} + +int32_t AudioDeviceDummy::SpeakerMute(bool& enabled) const { + return -1; +} + +int32_t AudioDeviceDummy::MicrophoneMuteIsAvailable(bool& available) { + return -1; +} + +int32_t AudioDeviceDummy::SetMicrophoneMute(bool enable) { + return -1; +} + +int32_t AudioDeviceDummy::MicrophoneMute(bool& enabled) const { + return -1; +} + +int32_t AudioDeviceDummy::StereoPlayoutIsAvailable(bool& available) { + return -1; +} +int32_t AudioDeviceDummy::SetStereoPlayout(bool enable) { + return -1; +} + +int32_t AudioDeviceDummy::StereoPlayout(bool& enabled) const { + return -1; +} + +int32_t AudioDeviceDummy::StereoRecordingIsAvailable(bool& available) { + return -1; +} + +int32_t AudioDeviceDummy::SetStereoRecording(bool enable) { + return -1; +} + +int32_t AudioDeviceDummy::StereoRecording(bool& enabled) const { + return -1; +} + +int32_t AudioDeviceDummy::PlayoutDelay(uint16_t& delayMS) const { + return -1; +} + +void AudioDeviceDummy::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) {} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/dummy/audio_device_dummy.h b/third_party/libwebrtc/modules/audio_device/dummy/audio_device_dummy.h new file mode 100644 index 0000000000..2a2541098e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/dummy/audio_device_dummy.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_AUDIO_DEVICE_DUMMY_H_ +#define AUDIO_DEVICE_AUDIO_DEVICE_DUMMY_H_ + +#include + +#include "modules/audio_device/audio_device_buffer.h" +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_device/include/audio_device_defines.h" + +namespace webrtc { + +class AudioDeviceDummy : public AudioDeviceGeneric { + public: + AudioDeviceDummy() {} + virtual ~AudioDeviceDummy() {} + + // Retrieve the currently utilized audio layer + int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const override; + + // Main initializaton and termination + InitStatus Init() override; + int32_t Terminate() override; + bool Initialized() const override; + + // Device enumeration + int16_t PlayoutDevices() override; + int16_t RecordingDevices() override; + int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + + // Device selection + int32_t SetPlayoutDevice(uint16_t index) override; + int32_t SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) override; + int32_t SetRecordingDevice(uint16_t index) override; + int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) override; + + // Audio transport initialization + int32_t PlayoutIsAvailable(bool& available) override; + int32_t InitPlayout() override; + bool PlayoutIsInitialized() const override; + int32_t RecordingIsAvailable(bool& available) override; + int32_t InitRecording() override; + bool RecordingIsInitialized() const override; + + // Audio transport control + int32_t StartPlayout() override; + int32_t StopPlayout() override; + bool Playing() const override; + int32_t StartRecording() override; + int32_t StopRecording() override; + bool Recording() const override; + + // Audio mixer initialization + int32_t InitSpeaker() override; + bool SpeakerIsInitialized() const override; + int32_t InitMicrophone() override; + bool MicrophoneIsInitialized() const override; + + // Speaker volume controls + int32_t SpeakerVolumeIsAvailable(bool& available) override; + int32_t SetSpeakerVolume(uint32_t volume) override; + int32_t SpeakerVolume(uint32_t& volume) const override; + int32_t MaxSpeakerVolume(uint32_t& maxVolume) const override; + int32_t MinSpeakerVolume(uint32_t& minVolume) const override; + + // Microphone volume controls + int32_t MicrophoneVolumeIsAvailable(bool& available) override; + int32_t SetMicrophoneVolume(uint32_t volume) override; + int32_t MicrophoneVolume(uint32_t& volume) const override; + int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const override; + int32_t MinMicrophoneVolume(uint32_t& minVolume) const override; + + // Speaker mute control + int32_t SpeakerMuteIsAvailable(bool& available) override; + int32_t SetSpeakerMute(bool enable) override; + int32_t SpeakerMute(bool& enabled) const override; + + // Microphone mute control + int32_t MicrophoneMuteIsAvailable(bool& available) override; + int32_t SetMicrophoneMute(bool enable) override; + int32_t MicrophoneMute(bool& enabled) const override; + + // Stereo support + int32_t StereoPlayoutIsAvailable(bool& available) override; + int32_t SetStereoPlayout(bool enable) override; + int32_t StereoPlayout(bool& enabled) const override; + int32_t StereoRecordingIsAvailable(bool& available) override; + int32_t SetStereoRecording(bool enable) override; + int32_t StereoRecording(bool& enabled) const override; + + // Delay information and control + int32_t PlayoutDelay(uint16_t& delayMS) const override; + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) override; +}; + +} // namespace webrtc + +#endif // AUDIO_DEVICE_AUDIO_DEVICE_DUMMY_H_ diff --git a/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device.cc b/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device.cc new file mode 100644 index 0000000000..8c10ae4186 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device.cc @@ -0,0 +1,508 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/dummy/file_audio_device.h" + +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/sleep.h" + +namespace webrtc { + +const int kRecordingFixedSampleRate = 48000; +const size_t kRecordingNumChannels = 2; +const int kPlayoutFixedSampleRate = 48000; +const size_t kPlayoutNumChannels = 2; +const size_t kPlayoutBufferSize = + kPlayoutFixedSampleRate / 100 * kPlayoutNumChannels * 2; +const size_t kRecordingBufferSize = + kRecordingFixedSampleRate / 100 * kRecordingNumChannels * 2; + +FileAudioDevice::FileAudioDevice(absl::string_view inputFilename, + absl::string_view outputFilename) + : _ptrAudioBuffer(NULL), + _recordingBuffer(NULL), + _playoutBuffer(NULL), + _recordingFramesLeft(0), + _playoutFramesLeft(0), + _recordingBufferSizeIn10MS(0), + _recordingFramesIn10MS(0), + _playoutFramesIn10MS(0), + _playing(false), + _recording(false), + _lastCallPlayoutMillis(0), + _lastCallRecordMillis(0), + _outputFilename(outputFilename), + _inputFilename(inputFilename) {} + +FileAudioDevice::~FileAudioDevice() {} + +int32_t FileAudioDevice::ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const { + return -1; +} + +AudioDeviceGeneric::InitStatus FileAudioDevice::Init() { + return InitStatus::OK; +} + +int32_t FileAudioDevice::Terminate() { + return 0; +} + +bool FileAudioDevice::Initialized() const { + return true; +} + +int16_t FileAudioDevice::PlayoutDevices() { + return 1; +} + +int16_t FileAudioDevice::RecordingDevices() { + return 1; +} + +int32_t FileAudioDevice::PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + const char* kName = "dummy_device"; + const char* kGuid = "dummy_device_unique_id"; + if (index < 1) { + memset(name, 0, kAdmMaxDeviceNameSize); + memset(guid, 0, kAdmMaxGuidSize); + memcpy(name, kName, strlen(kName)); + memcpy(guid, kGuid, strlen(guid)); + return 0; + } + return -1; +} + +int32_t FileAudioDevice::RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + const char* kName = "dummy_device"; + const char* kGuid = "dummy_device_unique_id"; + if (index < 1) { + memset(name, 0, kAdmMaxDeviceNameSize); + memset(guid, 0, kAdmMaxGuidSize); + memcpy(name, kName, strlen(kName)); + memcpy(guid, kGuid, strlen(guid)); + return 0; + } + return -1; +} + +int32_t FileAudioDevice::SetPlayoutDevice(uint16_t index) { + if (index == 0) { + _playout_index = index; + return 0; + } + return -1; +} + +int32_t FileAudioDevice::SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) { + return -1; +} + +int32_t FileAudioDevice::SetRecordingDevice(uint16_t index) { + if (index == 0) { + _record_index = index; + return _record_index; + } + return -1; +} + +int32_t FileAudioDevice::SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) { + return -1; +} + +int32_t FileAudioDevice::PlayoutIsAvailable(bool& available) { + if (_playout_index == 0) { + available = true; + return _playout_index; + } + available = false; + return -1; +} + +int32_t FileAudioDevice::InitPlayout() { + MutexLock lock(&mutex_); + + if (_playing) { + return -1; + } + + _playoutFramesIn10MS = static_cast(kPlayoutFixedSampleRate / 100); + + if (_ptrAudioBuffer) { + // Update webrtc audio buffer with the selected parameters + _ptrAudioBuffer->SetPlayoutSampleRate(kPlayoutFixedSampleRate); + _ptrAudioBuffer->SetPlayoutChannels(kPlayoutNumChannels); + } + return 0; +} + +bool FileAudioDevice::PlayoutIsInitialized() const { + return _playoutFramesIn10MS != 0; +} + +int32_t FileAudioDevice::RecordingIsAvailable(bool& available) { + if (_record_index == 0) { + available = true; + return _record_index; + } + available = false; + return -1; +} + +int32_t FileAudioDevice::InitRecording() { + MutexLock lock(&mutex_); + + if (_recording) { + return -1; + } + + _recordingFramesIn10MS = static_cast(kRecordingFixedSampleRate / 100); + + if (_ptrAudioBuffer) { + _ptrAudioBuffer->SetRecordingSampleRate(kRecordingFixedSampleRate); + _ptrAudioBuffer->SetRecordingChannels(kRecordingNumChannels); + } + return 0; +} + +bool FileAudioDevice::RecordingIsInitialized() const { + return _recordingFramesIn10MS != 0; +} + +int32_t FileAudioDevice::StartPlayout() { + if (_playing) { + return 0; + } + + _playing = true; + _playoutFramesLeft = 0; + + if (!_playoutBuffer) { + _playoutBuffer = new int8_t[kPlayoutBufferSize]; + } + if (!_playoutBuffer) { + _playing = false; + return -1; + } + + // PLAYOUT + if (!_outputFilename.empty()) { + _outputFile = FileWrapper::OpenWriteOnly(_outputFilename); + if (!_outputFile.is_open()) { + RTC_LOG(LS_ERROR) << "Failed to open playout file: " << _outputFilename; + _playing = false; + delete[] _playoutBuffer; + _playoutBuffer = NULL; + return -1; + } + } + + _ptrThreadPlay = rtc::PlatformThread::SpawnJoinable( + [this] { + while (PlayThreadProcess()) { + } + }, + "webrtc_audio_module_play_thread", + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime)); + + RTC_LOG(LS_INFO) << "Started playout capture to output file: " + << _outputFilename; + return 0; +} + +int32_t FileAudioDevice::StopPlayout() { + { + MutexLock lock(&mutex_); + _playing = false; + } + + // stop playout thread first + if (!_ptrThreadPlay.empty()) + _ptrThreadPlay.Finalize(); + + MutexLock lock(&mutex_); + + _playoutFramesLeft = 0; + delete[] _playoutBuffer; + _playoutBuffer = NULL; + _outputFile.Close(); + + RTC_LOG(LS_INFO) << "Stopped playout capture to output file: " + << _outputFilename; + return 0; +} + +bool FileAudioDevice::Playing() const { + return _playing; +} + +int32_t FileAudioDevice::StartRecording() { + _recording = true; + + // Make sure we only create the buffer once. + _recordingBufferSizeIn10MS = + _recordingFramesIn10MS * kRecordingNumChannels * 2; + if (!_recordingBuffer) { + _recordingBuffer = new int8_t[_recordingBufferSizeIn10MS]; + } + + if (!_inputFilename.empty()) { + _inputFile = FileWrapper::OpenReadOnly(_inputFilename); + if (!_inputFile.is_open()) { + RTC_LOG(LS_ERROR) << "Failed to open audio input file: " + << _inputFilename; + _recording = false; + delete[] _recordingBuffer; + _recordingBuffer = NULL; + return -1; + } + } + + _ptrThreadRec = rtc::PlatformThread::SpawnJoinable( + [this] { + while (RecThreadProcess()) { + } + }, + "webrtc_audio_module_capture_thread", + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime)); + + RTC_LOG(LS_INFO) << "Started recording from input file: " << _inputFilename; + + return 0; +} + +int32_t FileAudioDevice::StopRecording() { + { + MutexLock lock(&mutex_); + _recording = false; + } + + if (!_ptrThreadRec.empty()) + _ptrThreadRec.Finalize(); + + MutexLock lock(&mutex_); + _recordingFramesLeft = 0; + if (_recordingBuffer) { + delete[] _recordingBuffer; + _recordingBuffer = NULL; + } + _inputFile.Close(); + + RTC_LOG(LS_INFO) << "Stopped recording from input file: " << _inputFilename; + return 0; +} + +bool FileAudioDevice::Recording() const { + return _recording; +} + +int32_t FileAudioDevice::InitSpeaker() { + return -1; +} + +bool FileAudioDevice::SpeakerIsInitialized() const { + return false; +} + +int32_t FileAudioDevice::InitMicrophone() { + return 0; +} + +bool FileAudioDevice::MicrophoneIsInitialized() const { + return true; +} + +int32_t FileAudioDevice::SpeakerVolumeIsAvailable(bool& available) { + return -1; +} + +int32_t FileAudioDevice::SetSpeakerVolume(uint32_t volume) { + return -1; +} + +int32_t FileAudioDevice::SpeakerVolume(uint32_t& volume) const { + return -1; +} + +int32_t FileAudioDevice::MaxSpeakerVolume(uint32_t& maxVolume) const { + return -1; +} + +int32_t FileAudioDevice::MinSpeakerVolume(uint32_t& minVolume) const { + return -1; +} + +int32_t FileAudioDevice::MicrophoneVolumeIsAvailable(bool& available) { + return -1; +} + +int32_t FileAudioDevice::SetMicrophoneVolume(uint32_t volume) { + return -1; +} + +int32_t FileAudioDevice::MicrophoneVolume(uint32_t& volume) const { + return -1; +} + +int32_t FileAudioDevice::MaxMicrophoneVolume(uint32_t& maxVolume) const { + return -1; +} + +int32_t FileAudioDevice::MinMicrophoneVolume(uint32_t& minVolume) const { + return -1; +} + +int32_t FileAudioDevice::SpeakerMuteIsAvailable(bool& available) { + return -1; +} + +int32_t FileAudioDevice::SetSpeakerMute(bool enable) { + return -1; +} + +int32_t FileAudioDevice::SpeakerMute(bool& enabled) const { + return -1; +} + +int32_t FileAudioDevice::MicrophoneMuteIsAvailable(bool& available) { + return -1; +} + +int32_t FileAudioDevice::SetMicrophoneMute(bool enable) { + return -1; +} + +int32_t FileAudioDevice::MicrophoneMute(bool& enabled) const { + return -1; +} + +int32_t FileAudioDevice::StereoPlayoutIsAvailable(bool& available) { + available = true; + return 0; +} +int32_t FileAudioDevice::SetStereoPlayout(bool enable) { + return 0; +} + +int32_t FileAudioDevice::StereoPlayout(bool& enabled) const { + enabled = true; + return 0; +} + +int32_t FileAudioDevice::StereoRecordingIsAvailable(bool& available) { + available = true; + return 0; +} + +int32_t FileAudioDevice::SetStereoRecording(bool enable) { + return 0; +} + +int32_t FileAudioDevice::StereoRecording(bool& enabled) const { + enabled = true; + return 0; +} + +int32_t FileAudioDevice::PlayoutDelay(uint16_t& delayMS) const { + return 0; +} + +void FileAudioDevice::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + MutexLock lock(&mutex_); + + _ptrAudioBuffer = audioBuffer; + + // Inform the AudioBuffer about default settings for this implementation. + // Set all values to zero here since the actual settings will be done by + // InitPlayout and InitRecording later. + _ptrAudioBuffer->SetRecordingSampleRate(0); + _ptrAudioBuffer->SetPlayoutSampleRate(0); + _ptrAudioBuffer->SetRecordingChannels(0); + _ptrAudioBuffer->SetPlayoutChannels(0); +} + +bool FileAudioDevice::PlayThreadProcess() { + if (!_playing) { + return false; + } + int64_t currentTime = rtc::TimeMillis(); + mutex_.Lock(); + + if (_lastCallPlayoutMillis == 0 || + currentTime - _lastCallPlayoutMillis >= 10) { + mutex_.Unlock(); + _ptrAudioBuffer->RequestPlayoutData(_playoutFramesIn10MS); + mutex_.Lock(); + + _playoutFramesLeft = _ptrAudioBuffer->GetPlayoutData(_playoutBuffer); + RTC_DCHECK_EQ(_playoutFramesIn10MS, _playoutFramesLeft); + if (_outputFile.is_open()) { + _outputFile.Write(_playoutBuffer, kPlayoutBufferSize); + } + _lastCallPlayoutMillis = currentTime; + } + _playoutFramesLeft = 0; + mutex_.Unlock(); + + int64_t deltaTimeMillis = rtc::TimeMillis() - currentTime; + if (deltaTimeMillis < 10) { + SleepMs(10 - deltaTimeMillis); + } + + return true; +} + +bool FileAudioDevice::RecThreadProcess() { + if (!_recording) { + return false; + } + + int64_t currentTime = rtc::TimeMillis(); + mutex_.Lock(); + + if (_lastCallRecordMillis == 0 || currentTime - _lastCallRecordMillis >= 10) { + if (_inputFile.is_open()) { + if (_inputFile.Read(_recordingBuffer, kRecordingBufferSize) > 0) { + _ptrAudioBuffer->SetRecordedBuffer(_recordingBuffer, + _recordingFramesIn10MS); + } else { + _inputFile.Rewind(); + } + _lastCallRecordMillis = currentTime; + mutex_.Unlock(); + _ptrAudioBuffer->DeliverRecordedData(); + mutex_.Lock(); + } + } + + mutex_.Unlock(); + + int64_t deltaTimeMillis = rtc::TimeMillis() - currentTime; + if (deltaTimeMillis < 10) { + SleepMs(10 - deltaTimeMillis); + } + + return true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device.h b/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device.h new file mode 100644 index 0000000000..27979933f2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_FILE_AUDIO_DEVICE_H_ +#define AUDIO_DEVICE_FILE_AUDIO_DEVICE_H_ + +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_device/audio_device_generic.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/system/file_wrapper.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +// This is a fake audio device which plays audio from a file as its microphone +// and plays out into a file. +class FileAudioDevice : public AudioDeviceGeneric { + public: + // Constructs a file audio device with `id`. It will read audio from + // `inputFilename` and record output audio to `outputFilename`. + // + // The input file should be a readable 48k stereo raw file, and the output + // file should point to a writable location. The output format will also be + // 48k stereo raw audio. + FileAudioDevice(absl::string_view inputFilename, + absl::string_view outputFilename); + virtual ~FileAudioDevice(); + + // Retrieve the currently utilized audio layer + int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const override; + + // Main initializaton and termination + InitStatus Init() override; + int32_t Terminate() override; + bool Initialized() const override; + + // Device enumeration + int16_t PlayoutDevices() override; + int16_t RecordingDevices() override; + int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + + // Device selection + int32_t SetPlayoutDevice(uint16_t index) override; + int32_t SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) override; + int32_t SetRecordingDevice(uint16_t index) override; + int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) override; + + // Audio transport initialization + int32_t PlayoutIsAvailable(bool& available) override; + int32_t InitPlayout() override; + bool PlayoutIsInitialized() const override; + int32_t RecordingIsAvailable(bool& available) override; + int32_t InitRecording() override; + bool RecordingIsInitialized() const override; + + // Audio transport control + int32_t StartPlayout() override; + int32_t StopPlayout() override; + bool Playing() const override; + int32_t StartRecording() override; + int32_t StopRecording() override; + bool Recording() const override; + + // Audio mixer initialization + int32_t InitSpeaker() override; + bool SpeakerIsInitialized() const override; + int32_t InitMicrophone() override; + bool MicrophoneIsInitialized() const override; + + // Speaker volume controls + int32_t SpeakerVolumeIsAvailable(bool& available) override; + int32_t SetSpeakerVolume(uint32_t volume) override; + int32_t SpeakerVolume(uint32_t& volume) const override; + int32_t MaxSpeakerVolume(uint32_t& maxVolume) const override; + int32_t MinSpeakerVolume(uint32_t& minVolume) const override; + + // Microphone volume controls + int32_t MicrophoneVolumeIsAvailable(bool& available) override; + int32_t SetMicrophoneVolume(uint32_t volume) override; + int32_t MicrophoneVolume(uint32_t& volume) const override; + int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const override; + int32_t MinMicrophoneVolume(uint32_t& minVolume) const override; + + // Speaker mute control + int32_t SpeakerMuteIsAvailable(bool& available) override; + int32_t SetSpeakerMute(bool enable) override; + int32_t SpeakerMute(bool& enabled) const override; + + // Microphone mute control + int32_t MicrophoneMuteIsAvailable(bool& available) override; + int32_t SetMicrophoneMute(bool enable) override; + int32_t MicrophoneMute(bool& enabled) const override; + + // Stereo support + int32_t StereoPlayoutIsAvailable(bool& available) override; + int32_t SetStereoPlayout(bool enable) override; + int32_t StereoPlayout(bool& enabled) const override; + int32_t StereoRecordingIsAvailable(bool& available) override; + int32_t SetStereoRecording(bool enable) override; + int32_t StereoRecording(bool& enabled) const override; + + // Delay information and control + int32_t PlayoutDelay(uint16_t& delayMS) const override; + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) override; + + private: + static void RecThreadFunc(void*); + static void PlayThreadFunc(void*); + bool RecThreadProcess(); + bool PlayThreadProcess(); + + int32_t _playout_index; + int32_t _record_index; + AudioDeviceBuffer* _ptrAudioBuffer; + int8_t* _recordingBuffer; // In bytes. + int8_t* _playoutBuffer; // In bytes. + uint32_t _recordingFramesLeft; + uint32_t _playoutFramesLeft; + Mutex mutex_; + + size_t _recordingBufferSizeIn10MS; + size_t _recordingFramesIn10MS; + size_t _playoutFramesIn10MS; + + rtc::PlatformThread _ptrThreadRec; + rtc::PlatformThread _ptrThreadPlay; + + bool _playing; + bool _recording; + int64_t _lastCallPlayoutMillis; + int64_t _lastCallRecordMillis; + + FileWrapper _outputFile; + FileWrapper _inputFile; + std::string _outputFilename; + std::string _inputFilename; +}; + +} // namespace webrtc + +#endif // AUDIO_DEVICE_FILE_AUDIO_DEVICE_H_ diff --git a/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device_factory.cc b/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device_factory.cc new file mode 100644 index 0000000000..8c41111478 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device_factory.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/dummy/file_audio_device_factory.h" + +#include + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_device/dummy/file_audio_device.h" +#include "rtc_base/logging.h" +#include "rtc_base/string_utils.h" + +namespace webrtc { + +bool FileAudioDeviceFactory::_isConfigured = false; +char FileAudioDeviceFactory::_inputAudioFilename[MAX_FILENAME_LEN] = ""; +char FileAudioDeviceFactory::_outputAudioFilename[MAX_FILENAME_LEN] = ""; + +FileAudioDevice* FileAudioDeviceFactory::CreateFileAudioDevice() { + // Bail out here if the files haven't been set explicitly. + // audio_device_impl.cc should then fall back to dummy audio. + if (!_isConfigured) { + RTC_LOG(LS_WARNING) + << "WebRTC configured with WEBRTC_DUMMY_FILE_DEVICES but " + "no device files supplied. Will fall back to dummy " + "audio."; + + return nullptr; + } + return new FileAudioDevice(_inputAudioFilename, _outputAudioFilename); +} + +void FileAudioDeviceFactory::SetFilenamesToUse( + absl::string_view inputAudioFilename, + absl::string_view outputAudioFilename) { +#ifdef WEBRTC_DUMMY_FILE_DEVICES + RTC_DCHECK_LT(inputAudioFilename.size(), MAX_FILENAME_LEN); + RTC_DCHECK_LT(outputAudioFilename.size(), MAX_FILENAME_LEN); + + // Copy the strings since we don't know the lifetime of the input pointers. + rtc::strcpyn(_inputAudioFilename, MAX_FILENAME_LEN, inputAudioFilename); + rtc::strcpyn(_outputAudioFilename, MAX_FILENAME_LEN, outputAudioFilename); + _isConfigured = true; +#else + // Sanity: must be compiled with the right define to run this. + printf( + "Trying to use dummy file devices, but is not compiled " + "with WEBRTC_DUMMY_FILE_DEVICES. Bailing out.\n"); + std::exit(1); +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device_factory.h b/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device_factory.h new file mode 100644 index 0000000000..18f9388f21 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/dummy/file_audio_device_factory.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_FILE_AUDIO_DEVICE_FACTORY_H_ +#define AUDIO_DEVICE_FILE_AUDIO_DEVICE_FACTORY_H_ + +#include + +#include "absl/strings/string_view.h" + +namespace webrtc { + +class FileAudioDevice; + +// This class is used by audio_device_impl.cc when WebRTC is compiled with +// WEBRTC_DUMMY_FILE_DEVICES. The application must include this file and set the +// filenames to use before the audio device module is initialized. This is +// intended for test tools which use the audio device module. +class FileAudioDeviceFactory { + public: + static FileAudioDevice* CreateFileAudioDevice(); + + // The input file must be a readable 48k stereo raw file. The output + // file must be writable. The strings will be copied. + static void SetFilenamesToUse(absl::string_view inputAudioFilename, + absl::string_view outputAudioFilename); + + private: + enum : uint32_t { MAX_FILENAME_LEN = 512 }; + static bool _isConfigured; + static char _inputAudioFilename[MAX_FILENAME_LEN]; + static char _outputAudioFilename[MAX_FILENAME_LEN]; +}; + +} // namespace webrtc + +#endif // AUDIO_DEVICE_FILE_AUDIO_DEVICE_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_device/fine_audio_buffer.cc b/third_party/libwebrtc/modules/audio_device/fine_audio_buffer.cc new file mode 100644 index 0000000000..86240da196 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/fine_audio_buffer.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/fine_audio_buffer.h" + +#include +#include + +#include "modules/audio_device/audio_device_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +FineAudioBuffer::FineAudioBuffer(AudioDeviceBuffer* audio_device_buffer) + : audio_device_buffer_(audio_device_buffer), + playout_samples_per_channel_10ms_(rtc::dchecked_cast( + audio_device_buffer->PlayoutSampleRate() * 10 / 1000)), + record_samples_per_channel_10ms_(rtc::dchecked_cast( + audio_device_buffer->RecordingSampleRate() * 10 / 1000)), + playout_channels_(audio_device_buffer->PlayoutChannels()), + record_channels_(audio_device_buffer->RecordingChannels()) { + RTC_DCHECK(audio_device_buffer_); + RTC_DLOG(LS_INFO) << __FUNCTION__; + if (IsReadyForPlayout()) { + RTC_DLOG(LS_INFO) << "playout_samples_per_channel_10ms: " + << playout_samples_per_channel_10ms_; + RTC_DLOG(LS_INFO) << "playout_channels: " << playout_channels_; + } + if (IsReadyForRecord()) { + RTC_DLOG(LS_INFO) << "record_samples_per_channel_10ms: " + << record_samples_per_channel_10ms_; + RTC_DLOG(LS_INFO) << "record_channels: " << record_channels_; + } +} + +FineAudioBuffer::~FineAudioBuffer() { + RTC_DLOG(LS_INFO) << __FUNCTION__; +} + +void FineAudioBuffer::ResetPlayout() { + playout_buffer_.Clear(); +} + +void FineAudioBuffer::ResetRecord() { + record_buffer_.Clear(); +} + +bool FineAudioBuffer::IsReadyForPlayout() const { + return playout_samples_per_channel_10ms_ > 0 && playout_channels_ > 0; +} + +bool FineAudioBuffer::IsReadyForRecord() const { + return record_samples_per_channel_10ms_ > 0 && record_channels_ > 0; +} + +void FineAudioBuffer::GetPlayoutData(rtc::ArrayView audio_buffer, + int playout_delay_ms) { + RTC_DCHECK(IsReadyForPlayout()); + // Ask WebRTC for new data in chunks of 10ms until we have enough to + // fulfill the request. It is possible that the buffer already contains + // enough samples from the last round. + while (playout_buffer_.size() < audio_buffer.size()) { + // Get 10ms decoded audio from WebRTC. The ADB knows about number of + // channels; hence we can ask for number of samples per channel here. + if (audio_device_buffer_->RequestPlayoutData( + playout_samples_per_channel_10ms_) == + static_cast(playout_samples_per_channel_10ms_)) { + // Append 10ms to the end of the local buffer taking number of channels + // into account. + const size_t num_elements_10ms = + playout_channels_ * playout_samples_per_channel_10ms_; + const size_t written_elements = playout_buffer_.AppendData( + num_elements_10ms, [&](rtc::ArrayView buf) { + const size_t samples_per_channel_10ms = + audio_device_buffer_->GetPlayoutData(buf.data()); + return playout_channels_ * samples_per_channel_10ms; + }); + RTC_DCHECK_EQ(num_elements_10ms, written_elements); + } else { + // Provide silence if AudioDeviceBuffer::RequestPlayoutData() fails. + // Can e.g. happen when an AudioTransport has not been registered. + const size_t num_bytes = audio_buffer.size() * sizeof(int16_t); + std::memset(audio_buffer.data(), 0, num_bytes); + return; + } + } + + // Provide the requested number of bytes to the consumer. + const size_t num_bytes = audio_buffer.size() * sizeof(int16_t); + memcpy(audio_buffer.data(), playout_buffer_.data(), num_bytes); + // Move remaining samples to start of buffer to prepare for next round. + memmove(playout_buffer_.data(), playout_buffer_.data() + audio_buffer.size(), + (playout_buffer_.size() - audio_buffer.size()) * sizeof(int16_t)); + playout_buffer_.SetSize(playout_buffer_.size() - audio_buffer.size()); + // Cache playout latency for usage in DeliverRecordedData(); + playout_delay_ms_ = playout_delay_ms; +} + +void FineAudioBuffer::DeliverRecordedData( + rtc::ArrayView audio_buffer, + int record_delay_ms) { + RTC_DCHECK(IsReadyForRecord()); + // Always append new data and grow the buffer when needed. + record_buffer_.AppendData(audio_buffer.data(), audio_buffer.size()); + // Consume samples from buffer in chunks of 10ms until there is not + // enough data left. The number of remaining samples in the cache is given by + // the new size of the internal `record_buffer_`. + const size_t num_elements_10ms = + record_channels_ * record_samples_per_channel_10ms_; + while (record_buffer_.size() >= num_elements_10ms) { + audio_device_buffer_->SetRecordedBuffer(record_buffer_.data(), + record_samples_per_channel_10ms_); + audio_device_buffer_->SetVQEData(playout_delay_ms_, record_delay_ms); + audio_device_buffer_->DeliverRecordedData(); + memmove(record_buffer_.data(), record_buffer_.data() + num_elements_10ms, + (record_buffer_.size() - num_elements_10ms) * sizeof(int16_t)); + record_buffer_.SetSize(record_buffer_.size() - num_elements_10ms); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/fine_audio_buffer.h b/third_party/libwebrtc/modules/audio_device/fine_audio_buffer.h new file mode 100644 index 0000000000..a6c3042bb2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/fine_audio_buffer.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_FINE_AUDIO_BUFFER_H_ +#define MODULES_AUDIO_DEVICE_FINE_AUDIO_BUFFER_H_ + +#include "api/array_view.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class AudioDeviceBuffer; + +// FineAudioBuffer takes an AudioDeviceBuffer (ADB) which deals with 16-bit PCM +// audio samples corresponding to 10ms of data. It then allows for this data +// to be pulled in a finer or coarser granularity. I.e. interacting with this +// class instead of directly with the AudioDeviceBuffer one can ask for any +// number of audio data samples. This class also ensures that audio data can be +// delivered to the ADB in 10ms chunks when the size of the provided audio +// buffers differs from 10ms. +// As an example: calling DeliverRecordedData() with 5ms buffers will deliver +// accumulated 10ms worth of data to the ADB every second call. +class FineAudioBuffer { + public: + // `device_buffer` is a buffer that provides 10ms of audio data. + FineAudioBuffer(AudioDeviceBuffer* audio_device_buffer); + ~FineAudioBuffer(); + + // Clears buffers and counters dealing with playout and/or recording. + void ResetPlayout(); + void ResetRecord(); + + // Utility methods which returns true if valid parameters are acquired at + // constructions. + bool IsReadyForPlayout() const; + bool IsReadyForRecord() const; + + // Copies audio samples into `audio_buffer` where number of requested + // elements is specified by `audio_buffer.size()`. The producer will always + // fill up the audio buffer and if no audio exists, the buffer will contain + // silence instead. The provided delay estimate in `playout_delay_ms` should + // contain an estimate of the latency between when an audio frame is read from + // WebRTC and when it is played out on the speaker. + void GetPlayoutData(rtc::ArrayView audio_buffer, + int playout_delay_ms); + + // Consumes the audio data in `audio_buffer` and sends it to the WebRTC layer + // in chunks of 10ms. The sum of the provided delay estimate in + // `record_delay_ms` and the latest `playout_delay_ms` in GetPlayoutData() + // are given to the AEC in the audio processing module. + // They can be fixed values on most platforms and they are ignored if an + // external (hardware/built-in) AEC is used. + // Example: buffer size is 5ms => call #1 stores 5ms of data, call #2 stores + // 5ms of data and sends a total of 10ms to WebRTC and clears the internal + // cache. Call #3 restarts the scheme above. + void DeliverRecordedData(rtc::ArrayView audio_buffer, + int record_delay_ms); + + private: + // Device buffer that works with 10ms chunks of data both for playout and + // for recording. I.e., the WebRTC side will always be asked for audio to be + // played out in 10ms chunks and recorded audio will be sent to WebRTC in + // 10ms chunks as well. This raw pointer is owned by the constructor of this + // class and the owner must ensure that the pointer is valid during the life- + // time of this object. + AudioDeviceBuffer* const audio_device_buffer_; + // Number of audio samples per channel per 10ms. Set once at construction + // based on parameters in `audio_device_buffer`. + const size_t playout_samples_per_channel_10ms_; + const size_t record_samples_per_channel_10ms_; + // Number of audio channels. Set once at construction based on parameters in + // `audio_device_buffer`. + const size_t playout_channels_; + const size_t record_channels_; + // Storage for output samples from which a consumer can read audio buffers + // in any size using GetPlayoutData(). + rtc::BufferT playout_buffer_; + // Storage for input samples that are about to be delivered to the WebRTC + // ADB or remains from the last successful delivery of a 10ms audio buffer. + rtc::BufferT record_buffer_; + // Contains latest delay estimate given to GetPlayoutData(). + int playout_delay_ms_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_FINE_AUDIO_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/fine_audio_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_device/fine_audio_buffer_unittest.cc new file mode 100644 index 0000000000..36ea85f7dd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/fine_audio_buffer_unittest.cc @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/fine_audio_buffer.h" + +#include + +#include + +#include "api/array_view.h" +#include "api/task_queue/default_task_queue_factory.h" +#include "modules/audio_device/mock_audio_device_buffer.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::InSequence; +using ::testing::Return; + +namespace webrtc { + +const int kSampleRate = 44100; +const int kChannels = 2; +const int kSamplesPer10Ms = kSampleRate * 10 / 1000; + +// The fake audio data is 0,1,..SCHAR_MAX-1,0,1,... This is to make it easy +// to detect errors. This function verifies that the buffers contain such data. +// E.g. if there are two buffers of size 3, buffer 1 would contain 0,1,2 and +// buffer 2 would contain 3,4,5. Note that SCHAR_MAX is 127 so wrap-around +// will happen. +// `buffer` is the audio buffer to verify. +bool VerifyBuffer(const int16_t* buffer, int buffer_number, int size) { + int start_value = (buffer_number * size) % SCHAR_MAX; + for (int i = 0; i < size; ++i) { + if (buffer[i] != (i + start_value) % SCHAR_MAX) { + return false; + } + } + return true; +} + +// This function replaces the real AudioDeviceBuffer::GetPlayoutData when it's +// called (which is done implicitly when calling GetBufferData). It writes the +// sequence 0,1,..SCHAR_MAX-1,0,1,... to the buffer. Note that this is likely a +// buffer of different size than the one VerifyBuffer verifies. +// `iteration` is the number of calls made to UpdateBuffer prior to this call. +// `samples_per_10_ms` is the number of samples that should be written to the +// buffer (`arg0`). +ACTION_P2(UpdateBuffer, iteration, samples_per_10_ms) { + int16_t* buffer = static_cast(arg0); + int start_value = (iteration * samples_per_10_ms) % SCHAR_MAX; + for (int i = 0; i < samples_per_10_ms; ++i) { + buffer[i] = (i + start_value) % SCHAR_MAX; + } + // Should return samples per channel. + return samples_per_10_ms / kChannels; +} + +// Writes a periodic ramp pattern to the supplied `buffer`. See UpdateBuffer() +// for details. +void UpdateInputBuffer(int16_t* buffer, int iteration, int size) { + int start_value = (iteration * size) % SCHAR_MAX; + for (int i = 0; i < size; ++i) { + buffer[i] = (i + start_value) % SCHAR_MAX; + } +} + +// Action macro which verifies that the recorded 10ms chunk of audio data +// (in `arg0`) contains the correct reference values even if they have been +// supplied using a buffer size that is smaller or larger than 10ms. +// See VerifyBuffer() for details. +ACTION_P2(VerifyInputBuffer, iteration, samples_per_10_ms) { + const int16_t* buffer = static_cast(arg0); + int start_value = (iteration * samples_per_10_ms) % SCHAR_MAX; + for (int i = 0; i < samples_per_10_ms; ++i) { + EXPECT_EQ(buffer[i], (i + start_value) % SCHAR_MAX); + } + return 0; +} + +void RunFineBufferTest(int frame_size_in_samples) { + const int kFrameSizeSamples = frame_size_in_samples; + const int kNumberOfFrames = 5; + // Ceiling of integer division: 1 + ((x - 1) / y) + const int kNumberOfUpdateBufferCalls = + 1 + ((kNumberOfFrames * frame_size_in_samples - 1) / kSamplesPer10Ms); + + auto task_queue_factory = CreateDefaultTaskQueueFactory(); + MockAudioDeviceBuffer audio_device_buffer(task_queue_factory.get()); + audio_device_buffer.SetPlayoutSampleRate(kSampleRate); + audio_device_buffer.SetPlayoutChannels(kChannels); + audio_device_buffer.SetRecordingSampleRate(kSampleRate); + audio_device_buffer.SetRecordingChannels(kChannels); + + EXPECT_CALL(audio_device_buffer, RequestPlayoutData(_)) + .WillRepeatedly(Return(kSamplesPer10Ms)); + { + InSequence s; + for (int i = 0; i < kNumberOfUpdateBufferCalls; ++i) { + EXPECT_CALL(audio_device_buffer, GetPlayoutData(_)) + .WillOnce(UpdateBuffer(i, kChannels * kSamplesPer10Ms)) + .RetiresOnSaturation(); + } + } + { + InSequence s; + for (int j = 0; j < kNumberOfUpdateBufferCalls - 1; ++j) { + EXPECT_CALL(audio_device_buffer, SetRecordedBuffer(_, kSamplesPer10Ms)) + .WillOnce(VerifyInputBuffer(j, kChannels * kSamplesPer10Ms)) + .RetiresOnSaturation(); + } + } + EXPECT_CALL(audio_device_buffer, SetVQEData(_, _)) + .Times(kNumberOfUpdateBufferCalls - 1); + EXPECT_CALL(audio_device_buffer, DeliverRecordedData()) + .Times(kNumberOfUpdateBufferCalls - 1) + .WillRepeatedly(Return(0)); + + FineAudioBuffer fine_buffer(&audio_device_buffer); + std::unique_ptr out_buffer( + new int16_t[kChannels * kFrameSizeSamples]); + std::unique_ptr in_buffer( + new int16_t[kChannels * kFrameSizeSamples]); + + for (int i = 0; i < kNumberOfFrames; ++i) { + fine_buffer.GetPlayoutData( + rtc::ArrayView(out_buffer.get(), + kChannels * kFrameSizeSamples), + 0); + EXPECT_TRUE( + VerifyBuffer(out_buffer.get(), i, kChannels * kFrameSizeSamples)); + UpdateInputBuffer(in_buffer.get(), i, kChannels * kFrameSizeSamples); + fine_buffer.DeliverRecordedData( + rtc::ArrayView(in_buffer.get(), + kChannels * kFrameSizeSamples), + 0); + } +} + +TEST(FineBufferTest, BufferLessThan10ms) { + const int kFrameSizeSamples = kSamplesPer10Ms - 50; + RunFineBufferTest(kFrameSizeSamples); +} + +TEST(FineBufferTest, GreaterThan10ms) { + const int kFrameSizeSamples = kSamplesPer10Ms + 50; + RunFineBufferTest(kFrameSizeSamples); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/g3doc/audio_device_module.md b/third_party/libwebrtc/modules/audio_device/g3doc/audio_device_module.md new file mode 100644 index 0000000000..e325faacad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/g3doc/audio_device_module.md @@ -0,0 +1,171 @@ + + + +# Audio Device Module (ADM) + +## Overview + +The ADM is responsible for driving input (microphone) and output (speaker) audio +in WebRTC and the API is defined in [audio_device.h][19]. + +Main functions of the ADM are: + +* Initialization and termination of native audio libraries. +* Registration of an [AudioTransport object][16] which handles audio callbacks + for audio in both directions. +* Device enumeration and selection (only for Linux, Windows and Mac OSX). +* Start/Stop physical audio streams: + * Recording audio from the selected microphone, and + * playing out audio on the selected speaker. +* Level control of the active audio streams. +* Control of built-in audio effects (Audio Echo Cancelation (AEC), Audio Gain + Control (AGC) and Noise Suppression (NS)) for Android and iOS. + +ADM implementations reside at two different locations in the WebRTC repository: +`/modules/audio_device/` and `/sdk/`. The latest implementations for [iOS][20] +and [Android][21] can be found under `/sdk/`. `/modules/audio_device/` contains +older versions for mobile platforms and also implementations for desktop +platforms such as [Linux][22], [Windows][23] and [Mac OSX][24]. This document is +focusing on the parts in `/modules/audio_device/` but implementation specific +details such as threading models are omitted to keep the descriptions as simple +as possible. + +By default, the ADM in WebRTC is created in [`WebRtcVoiceEngine::Init`][1] but +an external implementation can also be injected using +[`rtc::CreatePeerConnectionFactory`][25]. An example of where an external ADM is +injected can be found in [PeerConnectionInterfaceTest][26] where a so-called +[fake ADM][29] is utilized to avoid hardware dependency in a gtest. Clients can +also inject their own ADMs in situations where functionality is needed that is +not provided by the default implementations. + +## Background + +This section contains a historical background of the ADM API. + +The ADM interface is old and has undergone many changes over the years. It used +to be much more granular but it still contains more than 50 methods and is +implemented on several different hardware platforms. + +Some APIs are not implemented on all platforms, and functionality can be spread +out differently between the methods. + +The most up-to-date implementations of the ADM interface are for [iOS][27] and +for [Android][28]. + +Desktop version are not updated to comply with the latest +[C++ style guide](https://chromium.googlesource.com/chromium/src/+/main/styleguide/c++/c++.md) +and more work is also needed to improve the performance and stability of these +versions. + +## WebRtcVoiceEngine + +[`WebRtcVoiceEngine`][2] does not utilize all methods of the ADM but it still +serves as the best example of its architecture and how to use it. For a more +detailed view of all methods in the ADM interface, see [ADM unit tests][3]. + +Assuming that an external ADM implementation is not injected, a default - or +internal - ADM is created in [`WebRtcVoiceEngine::Init`][1] using +[`AudioDeviceModule::Create`][4]. + +Basic initialization is done using a utility method called +[`adm_helpers::Init`][5] which calls fundamental ADM APIs like: + +* [`AudiDeviceModule::Init`][6] - initializes the native audio parts required + for each platform. +* [`AudiDeviceModule::SetPlayoutDevice`][7] - specifies which speaker to use + for playing out audio using an `index` retrieved by the corresponding + enumeration method [`AudiDeviceModule::PlayoutDeviceName`][8]. +* [`AudiDeviceModule::SetRecordingDevice`][9] - specifies which microphone to + use for recording audio using an `index` retrieved by the corresponding + enumeration method which is [`AudiDeviceModule::RecordingDeviceName`][10]. +* [`AudiDeviceModule::InitSpeaker`][11] - sets up the parts of the ADM needed + to use the selected output device. +* [`AudiDeviceModule::InitMicrophone`][12] - sets up the parts of the ADM + needed to use the selected input device. +* [`AudiDeviceModule::SetStereoPlayout`][13] - enables playout in stereo if + the selected audio device supports it. +* [`AudiDeviceModule::SetStereoRecording`][14] - enables recording in stereo + if the selected audio device supports it. + +[`WebRtcVoiceEngine::Init`][1] also calls +[`AudiDeviceModule::RegisterAudioTransport`][15] to register an existing +[AudioTransport][16] implementation which handles audio callbacks in both +directions and therefore serves as the bridge between the native ADM and the +upper WebRTC layers. + +Recorded audio samples are delivered from the ADM to the `WebRtcVoiceEngine` +(who owns the `AudioTransport` object) via +[`AudioTransport::RecordedDataIsAvailable`][17]: + +``` +int32_t RecordedDataIsAvailable(const void* audioSamples, size_t nSamples, size_t nBytesPerSample, + size_t nChannels, uint32_t samplesPerSec, uint32_t totalDelayMS, + int32_t clockDrift, uint32_t currentMicLevel, bool keyPressed, + uint32_t& newMicLevel) +``` + +Decoded audio samples ready to be played out are are delivered by the +`WebRtcVoiceEngine` to the ADM, via [`AudioTransport::NeedMorePlayoutData`][18]: + +``` +int32_t NeedMorePlayData(size_t nSamples, size_t nBytesPerSample, size_t nChannels, int32_t samplesPerSec, + void* audioSamples, size_t& nSamplesOut, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) +``` + +Audio samples are 16-bit [linear PCM](https://wiki.multimedia.cx/index.php/PCM) +using regular interleaving of channels within each sample. + +`WebRtcVoiceEngine` also owns an [`AudioState`][30] member and this class is +used has helper to start and stop audio to and from the ADM. To initialize and +start recording, it calls: + +* [`AudiDeviceModule::InitRecording`][31] +* [`AudiDeviceModule::StartRecording`][32] + +and to initialize and start playout: + +* [`AudiDeviceModule::InitPlayout`][33] +* [`AudiDeviceModule::StartPlayout`][34] + +Finally, the corresponding stop methods [`AudiDeviceModule::StopRecording`][35] +and [`AudiDeviceModule::StopPlayout`][36] are called followed by +[`AudiDeviceModule::Terminate`][37]. + +[1]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/media/engine/webrtc_voice_engine.cc;l=314;drc=f7b1b95f11c74cb5369fdd528b73c70a50f2e206 +[2]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/media/engine/webrtc_voice_engine.h;l=48;drc=d15a575ec3528c252419149d35977e55269d8a41 +[3]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/audio_device_unittest.cc;l=1;drc=d15a575ec3528c252419149d35977e55269d8a41 +[4]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=46;drc=eb8c4ca608486add9800f6bfb7a8ba3cf23e738e +[5]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/media/engine/adm_helpers.h;drc=2222a80e79ae1ef5cb9510ec51d3868be75f47a2 +[6]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=62;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[7]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=77;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[8]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=69;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[9]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=79;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[10]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=72;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[11]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=99;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[12]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=101;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[13]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=130;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[14]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=133;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[15]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=59;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[16]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device_defines.h;l=34;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[17]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device_defines.h;l=36;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[18]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device_defines.h;l=48;drc=9438fb3fff97c803d1ead34c0e4f223db168526f +[19]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;drc=eb8c4ca608486add9800f6bfb7a8ba3cf23e738es +[20]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/sdk/objc/native/api/audio_device_module.h;drc=76443eafa9375374d9f1d23da2b913f2acac6ac2 +[21]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/sdk/android/src/jni/audio_device/audio_device_module.h;drc=bbeb10925eb106eeed6143ccf571bc438ec22ce1 +[22]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/linux/;drc=d15a575ec3528c252419149d35977e55269d8a41 +[23]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/win/;drc=d15a575ec3528c252419149d35977e55269d8a41 +[24]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/mac/;drc=3b68aa346a5d3483c3448852d19d91723846825c +[25]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/create_peerconnection_factory.h;l=45;drc=09ceed2165137c4bea4e02e8d3db31970d0bf273 +[26]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/pc/peer_connection_interface_unittest.cc;l=692;drc=2efb8a5ec61b1b87475d046c03d20244f53b14b6 +[27]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/sdk/objc/native/api/audio_device_module.h;drc=76443eafa9375374d9f1d23da2b913f2acac6ac2 +[28]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/sdk/android/src/jni/audio_device/audio_device_module.h;drc=bbeb10925eb106eeed6143ccf571bc438ec22ce1 +[29]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/pc/test/fake_audio_capture_module.h;l=42;drc=d15a575ec3528c252419149d35977e55269d8a41 +[30]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/audio/audio_state.h;drc=d15a575ec3528c252419149d35977e55269d8a41 +[31]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=87;drc=eb8c4ca608486add9800f6bfb7a8ba3cf23e738e +[32]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=94;drc=eb8c4ca608486add9800f6bfb7a8ba3cf23e738e +[33]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=84;drc=eb8c4ca608486add9800f6bfb7a8ba3cf23e738e +[34]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=91;drc=eb8c4ca608486add9800f6bfb7a8ba3cf23e738e +[35]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=95;drc=eb8c4ca608486add9800f6bfb7a8ba3cf23e738e +[36]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=92;drc=eb8c4ca608486add9800f6bfb7a8ba3cf23e738e +[37]: https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_device/include/audio_device.h;l=63;drc=eb8c4ca608486add9800f6bfb7a8ba3cf23e738e diff --git a/third_party/libwebrtc/modules/audio_device/include/audio_device.h b/third_party/libwebrtc/modules/audio_device/include/audio_device.h new file mode 100644 index 0000000000..936ee6cb04 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/audio_device.h @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_H_ +#define MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_H_ + +#include "absl/types/optional.h" +#include "api/scoped_refptr.h" +#include "api/task_queue/task_queue_factory.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "rtc_base/ref_count.h" + +namespace webrtc { + +class AudioDeviceModuleForTest; + +class AudioDeviceModule : public rtc::RefCountInterface { + public: + enum AudioLayer { + kPlatformDefaultAudio = 0, + kWindowsCoreAudio, + kWindowsCoreAudio2, + kLinuxAlsaAudio, + kLinuxPulseAudio, + kAndroidJavaAudio, + kAndroidOpenSLESAudio, + kAndroidJavaInputAndOpenSLESOutputAudio, + kAndroidAAudioAudio, + kAndroidJavaInputAndAAudioOutputAudio, + kDummyAudio, + }; + + enum WindowsDeviceType { + kDefaultCommunicationDevice = -1, + kDefaultDevice = -2 + }; + + struct Stats { + // The fields below correspond to similarly-named fields in the WebRTC stats + // spec. https://w3c.github.io/webrtc-stats/#playoutstats-dict* + double synthesized_samples_duration_s = 0; + uint64_t synthesized_samples_events = 0; + double total_samples_duration_s = 0; + double total_playout_delay_s = 0; + uint64_t total_samples_count = 0; + }; + + public: + // Creates a default ADM for usage in production code. + static rtc::scoped_refptr Create( + AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory); + // Creates an ADM with support for extra test methods. Don't use this factory + // in production code. + static rtc::scoped_refptr CreateForTest( + AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory); + + // Retrieve the currently utilized audio layer + virtual int32_t ActiveAudioLayer(AudioLayer* audioLayer) const = 0; + + // Full-duplex transportation of PCM audio + virtual int32_t RegisterAudioCallback(AudioTransport* audioCallback) = 0; + + // Main initialization and termination + virtual int32_t Init() = 0; + virtual int32_t Terminate() = 0; + virtual bool Initialized() const = 0; + + // Device enumeration + virtual int16_t PlayoutDevices() = 0; + virtual int16_t RecordingDevices() = 0; + virtual int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) = 0; + virtual int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) = 0; + + // Device selection + virtual int32_t SetPlayoutDevice(uint16_t index) = 0; + virtual int32_t SetPlayoutDevice(WindowsDeviceType device) = 0; + virtual int32_t SetRecordingDevice(uint16_t index) = 0; + virtual int32_t SetRecordingDevice(WindowsDeviceType device) = 0; + + // Audio transport initialization + virtual int32_t PlayoutIsAvailable(bool* available) = 0; + virtual int32_t InitPlayout() = 0; + virtual bool PlayoutIsInitialized() const = 0; + virtual int32_t RecordingIsAvailable(bool* available) = 0; + virtual int32_t InitRecording() = 0; + virtual bool RecordingIsInitialized() const = 0; + + // Audio transport control + virtual int32_t StartPlayout() = 0; + virtual int32_t StopPlayout() = 0; + virtual bool Playing() const = 0; + virtual int32_t StartRecording() = 0; + virtual int32_t StopRecording() = 0; + virtual bool Recording() const = 0; + + // Audio mixer initialization + virtual int32_t InitSpeaker() = 0; + virtual bool SpeakerIsInitialized() const = 0; + virtual int32_t InitMicrophone() = 0; + virtual bool MicrophoneIsInitialized() const = 0; + + // Speaker volume controls + virtual int32_t SpeakerVolumeIsAvailable(bool* available) = 0; + virtual int32_t SetSpeakerVolume(uint32_t volume) = 0; + virtual int32_t SpeakerVolume(uint32_t* volume) const = 0; + virtual int32_t MaxSpeakerVolume(uint32_t* maxVolume) const = 0; + virtual int32_t MinSpeakerVolume(uint32_t* minVolume) const = 0; + + // Microphone volume controls + virtual int32_t MicrophoneVolumeIsAvailable(bool* available) = 0; + virtual int32_t SetMicrophoneVolume(uint32_t volume) = 0; + virtual int32_t MicrophoneVolume(uint32_t* volume) const = 0; + virtual int32_t MaxMicrophoneVolume(uint32_t* maxVolume) const = 0; + virtual int32_t MinMicrophoneVolume(uint32_t* minVolume) const = 0; + + // Speaker mute control + virtual int32_t SpeakerMuteIsAvailable(bool* available) = 0; + virtual int32_t SetSpeakerMute(bool enable) = 0; + virtual int32_t SpeakerMute(bool* enabled) const = 0; + + // Microphone mute control + virtual int32_t MicrophoneMuteIsAvailable(bool* available) = 0; + virtual int32_t SetMicrophoneMute(bool enable) = 0; + virtual int32_t MicrophoneMute(bool* enabled) const = 0; + + // Stereo support + virtual int32_t StereoPlayoutIsAvailable(bool* available) const = 0; + virtual int32_t SetStereoPlayout(bool enable) = 0; + virtual int32_t StereoPlayout(bool* enabled) const = 0; + virtual int32_t StereoRecordingIsAvailable(bool* available) const = 0; + virtual int32_t SetStereoRecording(bool enable) = 0; + virtual int32_t StereoRecording(bool* enabled) const = 0; + + // Playout delay + virtual int32_t PlayoutDelay(uint16_t* delayMS) const = 0; + + // Only supported on Android. + virtual bool BuiltInAECIsAvailable() const = 0; + virtual bool BuiltInAGCIsAvailable() const = 0; + virtual bool BuiltInNSIsAvailable() const = 0; + + // Enables the built-in audio effects. Only supported on Android. + virtual int32_t EnableBuiltInAEC(bool enable) = 0; + virtual int32_t EnableBuiltInAGC(bool enable) = 0; + virtual int32_t EnableBuiltInNS(bool enable) = 0; + + // Play underrun count. Only supported on Android. + // TODO(alexnarest): Make it abstract after upstream projects support it. + virtual int32_t GetPlayoutUnderrunCount() const { return -1; } + + // Used to generate RTC stats. If not implemented, RTCAudioPlayoutStats will + // not be present in the stats. + virtual absl::optional GetStats() const { return absl::nullopt; } + +// Only supported on iOS. +#if defined(WEBRTC_IOS) + virtual int GetPlayoutAudioParameters(AudioParameters* params) const = 0; + virtual int GetRecordAudioParameters(AudioParameters* params) const = 0; +#endif // WEBRTC_IOS + + protected: + ~AudioDeviceModule() override {} +}; + +// Extends the default ADM interface with some extra test methods. +// Intended for usage in tests only and requires a unique factory method. +class AudioDeviceModuleForTest : public AudioDeviceModule { + public: + // Triggers internal restart sequences of audio streaming. Can be used by + // tests to emulate events corresponding to e.g. removal of an active audio + // device or other actions which causes the stream to be disconnected. + virtual int RestartPlayoutInternally() = 0; + virtual int RestartRecordingInternally() = 0; + + virtual int SetPlayoutSampleRate(uint32_t sample_rate) = 0; + virtual int SetRecordingSampleRate(uint32_t sample_rate) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_H_ diff --git a/third_party/libwebrtc/modules/audio_device/include/audio_device_data_observer.h b/third_party/libwebrtc/modules/audio_device/include/audio_device_data_observer.h new file mode 100644 index 0000000000..36dc45f19e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/audio_device_data_observer.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DATA_OBSERVER_H_ +#define MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DATA_OBSERVER_H_ + +#include +#include + +#include "absl/base/attributes.h" +#include "api/scoped_refptr.h" +#include "api/task_queue/task_queue_factory.h" +#include "modules/audio_device/include/audio_device.h" + +namespace webrtc { + +// This interface will capture the raw PCM data of both the local captured as +// well as the mixed/rendered remote audio. +class AudioDeviceDataObserver { + public: + virtual void OnCaptureData(const void* audio_samples, + size_t num_samples, + size_t bytes_per_sample, + size_t num_channels, + uint32_t samples_per_sec) = 0; + + virtual void OnRenderData(const void* audio_samples, + size_t num_samples, + size_t bytes_per_sample, + size_t num_channels, + uint32_t samples_per_sec) = 0; + + AudioDeviceDataObserver() = default; + virtual ~AudioDeviceDataObserver() = default; +}; + +// Creates an ADMWrapper around an ADM instance that registers +// the provided AudioDeviceDataObserver. +rtc::scoped_refptr CreateAudioDeviceWithDataObserver( + rtc::scoped_refptr impl, + std::unique_ptr observer); + +// Creates an ADMWrapper around an ADM instance that registers +// the provided AudioDeviceDataObserver. +ABSL_DEPRECATED("") +rtc::scoped_refptr CreateAudioDeviceWithDataObserver( + rtc::scoped_refptr impl, + AudioDeviceDataObserver* observer); + +// Creates an ADM instance with AudioDeviceDataObserver registered. +rtc::scoped_refptr CreateAudioDeviceWithDataObserver( + AudioDeviceModule::AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory, + std::unique_ptr observer); + +// Creates an ADM instance with AudioDeviceDataObserver registered. +ABSL_DEPRECATED("") +rtc::scoped_refptr CreateAudioDeviceWithDataObserver( + AudioDeviceModule::AudioLayer audio_layer, + TaskQueueFactory* task_queue_factory, + AudioDeviceDataObserver* observer); + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DATA_OBSERVER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/include/audio_device_default.h b/third_party/libwebrtc/modules/audio_device/include/audio_device_default.h new file mode 100644 index 0000000000..3779d6fb3b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/audio_device_default.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DEFAULT_H_ +#define MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DEFAULT_H_ + +#include "modules/audio_device/include/audio_device.h" + +namespace webrtc { +namespace webrtc_impl { + +// AudioDeviceModuleDefault template adds default implementation for all +// AudioDeviceModule methods to the class, which inherits from +// AudioDeviceModuleDefault. +template +class AudioDeviceModuleDefault : public T { + public: + AudioDeviceModuleDefault() {} + virtual ~AudioDeviceModuleDefault() {} + + int32_t RegisterAudioCallback(AudioTransport* audioCallback) override { + return 0; + } + int32_t Init() override { return 0; } + int32_t InitSpeaker() override { return 0; } + int32_t SetPlayoutDevice(uint16_t index) override { return 0; } + int32_t SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) override { + return 0; + } + int32_t SetStereoPlayout(bool enable) override { return 0; } + int32_t StopPlayout() override { return 0; } + int32_t InitMicrophone() override { return 0; } + int32_t SetRecordingDevice(uint16_t index) override { return 0; } + int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) override { + return 0; + } + int32_t SetStereoRecording(bool enable) override { return 0; } + int32_t StopRecording() override { return 0; } + + int32_t Terminate() override { return 0; } + + int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer* audioLayer) const override { + return 0; + } + bool Initialized() const override { return true; } + int16_t PlayoutDevices() override { return 0; } + int16_t RecordingDevices() override { return 0; } + int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override { + return 0; + } + int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override { + return 0; + } + int32_t PlayoutIsAvailable(bool* available) override { return 0; } + int32_t InitPlayout() override { return 0; } + bool PlayoutIsInitialized() const override { return true; } + int32_t RecordingIsAvailable(bool* available) override { return 0; } + int32_t InitRecording() override { return 0; } + bool RecordingIsInitialized() const override { return true; } + int32_t StartPlayout() override { return 0; } + bool Playing() const override { return false; } + int32_t StartRecording() override { return 0; } + bool Recording() const override { return false; } + bool SpeakerIsInitialized() const override { return true; } + bool MicrophoneIsInitialized() const override { return true; } + int32_t SpeakerVolumeIsAvailable(bool* available) override { return 0; } + int32_t SetSpeakerVolume(uint32_t volume) override { return 0; } + int32_t SpeakerVolume(uint32_t* volume) const override { return 0; } + int32_t MaxSpeakerVolume(uint32_t* maxVolume) const override { return 0; } + int32_t MinSpeakerVolume(uint32_t* minVolume) const override { return 0; } + int32_t MicrophoneVolumeIsAvailable(bool* available) override { return 0; } + int32_t SetMicrophoneVolume(uint32_t volume) override { return 0; } + int32_t MicrophoneVolume(uint32_t* volume) const override { return 0; } + int32_t MaxMicrophoneVolume(uint32_t* maxVolume) const override { return 0; } + int32_t MinMicrophoneVolume(uint32_t* minVolume) const override { return 0; } + int32_t SpeakerMuteIsAvailable(bool* available) override { return 0; } + int32_t SetSpeakerMute(bool enable) override { return 0; } + int32_t SpeakerMute(bool* enabled) const override { return 0; } + int32_t MicrophoneMuteIsAvailable(bool* available) override { return 0; } + int32_t SetMicrophoneMute(bool enable) override { return 0; } + int32_t MicrophoneMute(bool* enabled) const override { return 0; } + int32_t StereoPlayoutIsAvailable(bool* available) const override { + *available = false; + return 0; + } + int32_t StereoPlayout(bool* enabled) const override { return 0; } + int32_t StereoRecordingIsAvailable(bool* available) const override { + *available = false; + return 0; + } + int32_t StereoRecording(bool* enabled) const override { return 0; } + int32_t PlayoutDelay(uint16_t* delayMS) const override { + *delayMS = 0; + return 0; + } + bool BuiltInAECIsAvailable() const override { return false; } + int32_t EnableBuiltInAEC(bool enable) override { return -1; } + bool BuiltInAGCIsAvailable() const override { return false; } + int32_t EnableBuiltInAGC(bool enable) override { return -1; } + bool BuiltInNSIsAvailable() const override { return false; } + int32_t EnableBuiltInNS(bool enable) override { return -1; } + + int32_t GetPlayoutUnderrunCount() const override { return -1; } + +#if defined(WEBRTC_IOS) + int GetPlayoutAudioParameters(AudioParameters* params) const override { + return -1; + } + int GetRecordAudioParameters(AudioParameters* params) const override { + return -1; + } +#endif // WEBRTC_IOS +}; + +} // namespace webrtc_impl +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DEFAULT_H_ diff --git a/third_party/libwebrtc/modules/audio_device/include/audio_device_defines.h b/third_party/libwebrtc/modules/audio_device/include/audio_device_defines.h new file mode 100644 index 0000000000..d677d41f69 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/audio_device_defines.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DEFINES_H_ +#define MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DEFINES_H_ + +#include + +#include + +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { + +static const int kAdmMaxDeviceNameSize = 128; +static const int kAdmMaxFileNameSize = 512; +static const int kAdmMaxGuidSize = 128; + +static const int kAdmMinPlayoutBufferSizeMs = 10; +static const int kAdmMaxPlayoutBufferSizeMs = 250; + +// ---------------------------------------------------------------------------- +// AudioTransport +// ---------------------------------------------------------------------------- + +class AudioTransport { + public: + // TODO(bugs.webrtc.org/13620) Deprecate this function + virtual int32_t RecordedDataIsAvailable(const void* audioSamples, + size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samplesPerSec, + uint32_t totalDelayMS, + int32_t clockDrift, + uint32_t currentMicLevel, + bool keyPressed, + uint32_t& newMicLevel) = 0; // NOLINT + + virtual int32_t RecordedDataIsAvailable( + const void* audioSamples, + size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samplesPerSec, + uint32_t totalDelayMS, + int32_t clockDrift, + uint32_t currentMicLevel, + bool keyPressed, + uint32_t& newMicLevel, + absl::optional estimatedCaptureTimeNS) { // NOLINT + // TODO(webrtc:13620) Make the default behaver of the new API to behave as + // the old API. This can be pure virtual if all uses of the old API is + // removed. + return RecordedDataIsAvailable( + audioSamples, nSamples, nBytesPerSample, nChannels, samplesPerSec, + totalDelayMS, clockDrift, currentMicLevel, keyPressed, newMicLevel); + } + + // Implementation has to setup safe values for all specified out parameters. + virtual int32_t NeedMorePlayData(size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samplesPerSec, + void* audioSamples, + size_t& nSamplesOut, // NOLINT + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) = 0; // NOLINT + + // Method to pull mixed render audio data from all active VoE channels. + // The data will not be passed as reference for audio processing internally. + virtual void PullRenderData(int bits_per_sample, + int sample_rate, + size_t number_of_channels, + size_t number_of_frames, + void* audio_data, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) = 0; + + protected: + virtual ~AudioTransport() {} +}; + +// Helper class for storage of fundamental audio parameters such as sample rate, +// number of channels, native buffer size etc. +// Note that one audio frame can contain more than one channel sample and each +// sample is assumed to be a 16-bit PCM sample. Hence, one audio frame in +// stereo contains 2 * (16/8) = 4 bytes of data. +class AudioParameters { + public: + // This implementation does only support 16-bit PCM samples. + static const size_t kBitsPerSample = 16; + AudioParameters() + : sample_rate_(0), + channels_(0), + frames_per_buffer_(0), + frames_per_10ms_buffer_(0) {} + AudioParameters(int sample_rate, size_t channels, size_t frames_per_buffer) + : sample_rate_(sample_rate), + channels_(channels), + frames_per_buffer_(frames_per_buffer), + frames_per_10ms_buffer_(static_cast(sample_rate / 100)) {} + void reset(int sample_rate, size_t channels, size_t frames_per_buffer) { + sample_rate_ = sample_rate; + channels_ = channels; + frames_per_buffer_ = frames_per_buffer; + frames_per_10ms_buffer_ = static_cast(sample_rate / 100); + } + size_t bits_per_sample() const { return kBitsPerSample; } + void reset(int sample_rate, size_t channels, double buffer_duration) { + reset(sample_rate, channels, + static_cast(sample_rate * buffer_duration + 0.5)); + } + void reset(int sample_rate, size_t channels) { + reset(sample_rate, channels, static_cast(0)); + } + int sample_rate() const { return sample_rate_; } + size_t channels() const { return channels_; } + size_t frames_per_buffer() const { return frames_per_buffer_; } + size_t frames_per_10ms_buffer() const { return frames_per_10ms_buffer_; } + size_t GetBytesPerFrame() const { return channels_ * kBitsPerSample / 8; } + size_t GetBytesPerBuffer() const { + return frames_per_buffer_ * GetBytesPerFrame(); + } + // The WebRTC audio device buffer (ADB) only requires that the sample rate + // and number of channels are configured. Hence, to be "valid", only these + // two attributes must be set. + bool is_valid() const { return ((sample_rate_ > 0) && (channels_ > 0)); } + // Most platforms also require that a native buffer size is defined. + // An audio parameter instance is considered to be "complete" if it is both + // "valid" (can be used by the ADB) and also has a native frame size. + bool is_complete() const { return (is_valid() && (frames_per_buffer_ > 0)); } + size_t GetBytesPer10msBuffer() const { + return frames_per_10ms_buffer_ * GetBytesPerFrame(); + } + double GetBufferSizeInMilliseconds() const { + if (sample_rate_ == 0) + return 0.0; + return frames_per_buffer_ / (sample_rate_ / 1000.0); + } + double GetBufferSizeInSeconds() const { + if (sample_rate_ == 0) + return 0.0; + return static_cast(frames_per_buffer_) / (sample_rate_); + } + std::string ToString() const { + char ss_buf[1024]; + rtc::SimpleStringBuilder ss(ss_buf); + ss << "AudioParameters: "; + ss << "sample_rate=" << sample_rate() << ", channels=" << channels(); + ss << ", frames_per_buffer=" << frames_per_buffer(); + ss << ", frames_per_10ms_buffer=" << frames_per_10ms_buffer(); + ss << ", bytes_per_frame=" << GetBytesPerFrame(); + ss << ", bytes_per_buffer=" << GetBytesPerBuffer(); + ss << ", bytes_per_10ms_buffer=" << GetBytesPer10msBuffer(); + ss << ", size_in_ms=" << GetBufferSizeInMilliseconds(); + return ss.str(); + } + + private: + int sample_rate_; + size_t channels_; + size_t frames_per_buffer_; + size_t frames_per_10ms_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DEFINES_H_ diff --git a/third_party/libwebrtc/modules/audio_device/include/audio_device_factory.cc b/third_party/libwebrtc/modules/audio_device/include/audio_device_factory.cc new file mode 100644 index 0000000000..130e096e6d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/audio_device_factory.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/include/audio_device_factory.h" + +#include + +#if defined(WEBRTC_WIN) +#include "modules/audio_device/win/audio_device_module_win.h" +#include "modules/audio_device/win/core_audio_input_win.h" +#include "modules/audio_device/win/core_audio_output_win.h" +#include "modules/audio_device/win/core_audio_utility_win.h" +#endif + +#include "api/task_queue/task_queue_factory.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +rtc::scoped_refptr CreateWindowsCoreAudioAudioDeviceModule( + TaskQueueFactory* task_queue_factory, + bool automatic_restart) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return CreateWindowsCoreAudioAudioDeviceModuleForTest(task_queue_factory, + automatic_restart); +} + +rtc::scoped_refptr +CreateWindowsCoreAudioAudioDeviceModuleForTest( + TaskQueueFactory* task_queue_factory, + bool automatic_restart) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + // Returns NULL if Core Audio is not supported or if COM has not been + // initialized correctly using ScopedCOMInitializer. + if (!webrtc_win::core_audio_utility::IsSupported()) { + RTC_LOG(LS_ERROR) + << "Unable to create ADM since Core Audio is not supported"; + return nullptr; + } + return CreateWindowsCoreAudioAudioDeviceModuleFromInputAndOutput( + std::make_unique(automatic_restart), + std::make_unique(automatic_restart), + task_queue_factory); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/include/audio_device_factory.h b/third_party/libwebrtc/modules/audio_device/include/audio_device_factory.h new file mode 100644 index 0000000000..edd7686b8e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/audio_device_factory.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_FACTORY_H_ +#define MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_FACTORY_H_ + +#include + +#include "api/task_queue/task_queue_factory.h" +#include "modules/audio_device/include/audio_device.h" + +namespace webrtc { + +// Creates an AudioDeviceModule (ADM) for Windows based on the Core Audio API. +// The creating thread must be a COM thread; otherwise nullptr will be returned. +// By default `automatic_restart` is set to true and it results in support for +// automatic restart of audio if e.g. the existing device is removed. If set to +// false, no attempt to restart audio is performed under these conditions. +// +// Example (assuming webrtc namespace): +// +// public: +// rtc::scoped_refptr CreateAudioDevice() { +// task_queue_factory_ = CreateDefaultTaskQueueFactory(); +// // Tell COM that this thread shall live in the MTA. +// com_initializer_ = std::make_unique( +// ScopedCOMInitializer::kMTA); +// if (!com_initializer_->Succeeded()) { +// return nullptr; +// } +// // Create the ADM with support for automatic restart if devices are +// // unplugged. +// return CreateWindowsCoreAudioAudioDeviceModule( +// task_queue_factory_.get()); +// } +// +// private: +// std::unique_ptr com_initializer_; +// std::unique_ptr task_queue_factory_; +// +rtc::scoped_refptr CreateWindowsCoreAudioAudioDeviceModule( + TaskQueueFactory* task_queue_factory, + bool automatic_restart = true); + +rtc::scoped_refptr +CreateWindowsCoreAudioAudioDeviceModuleForTest( + TaskQueueFactory* task_queue_factory, + bool automatic_restart = true); + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_device/include/fake_audio_device.h b/third_party/libwebrtc/modules/audio_device/include/fake_audio_device.h new file mode 100644 index 0000000000..2322ce0263 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/fake_audio_device.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_INCLUDE_FAKE_AUDIO_DEVICE_H_ +#define MODULES_AUDIO_DEVICE_INCLUDE_FAKE_AUDIO_DEVICE_H_ + +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_device/include/audio_device_default.h" + +namespace webrtc { + +class FakeAudioDeviceModule + : public webrtc_impl::AudioDeviceModuleDefault { + public: + // TODO(bugs.webrtc.org/12701): Fix all users of this class to managed + // references using scoped_refptr. Current code doesn't always use refcounting + // for this class. + void AddRef() const override {} + rtc::RefCountReleaseStatus Release() const override { + return rtc::RefCountReleaseStatus::kDroppedLastRef; + } +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_INCLUDE_FAKE_AUDIO_DEVICE_H_ diff --git a/third_party/libwebrtc/modules/audio_device/include/mock_audio_device.h b/third_party/libwebrtc/modules/audio_device/include/mock_audio_device.h new file mode 100644 index 0000000000..73fbdd547d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/mock_audio_device.h @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_INCLUDE_MOCK_AUDIO_DEVICE_H_ +#define MODULES_AUDIO_DEVICE_INCLUDE_MOCK_AUDIO_DEVICE_H_ + +#include + +#include "api/make_ref_counted.h" +#include "modules/audio_device/include/audio_device.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockAudioDeviceModule : public AudioDeviceModule { + public: + static rtc::scoped_refptr CreateNice() { + return rtc::make_ref_counted<::testing::NiceMock>(); + } + static rtc::scoped_refptr CreateStrict() { + return rtc::make_ref_counted< + ::testing::StrictMock>(); + } + + // AudioDeviceModule. + MOCK_METHOD(int32_t, + ActiveAudioLayer, + (AudioLayer * audioLayer), + (const, override)); + MOCK_METHOD(int32_t, + RegisterAudioCallback, + (AudioTransport * audioCallback), + (override)); + MOCK_METHOD(int32_t, Init, (), (override)); + MOCK_METHOD(int32_t, Terminate, (), (override)); + MOCK_METHOD(bool, Initialized, (), (const, override)); + MOCK_METHOD(int16_t, PlayoutDevices, (), (override)); + MOCK_METHOD(int16_t, RecordingDevices, (), (override)); + MOCK_METHOD(int32_t, + PlayoutDeviceName, + (uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]), + (override)); + MOCK_METHOD(int32_t, + RecordingDeviceName, + (uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]), + (override)); + MOCK_METHOD(int32_t, SetPlayoutDevice, (uint16_t index), (override)); + MOCK_METHOD(int32_t, + SetPlayoutDevice, + (WindowsDeviceType device), + (override)); + MOCK_METHOD(int32_t, SetRecordingDevice, (uint16_t index), (override)); + MOCK_METHOD(int32_t, + SetRecordingDevice, + (WindowsDeviceType device), + (override)); + MOCK_METHOD(int32_t, PlayoutIsAvailable, (bool* available), (override)); + MOCK_METHOD(int32_t, InitPlayout, (), (override)); + MOCK_METHOD(bool, PlayoutIsInitialized, (), (const, override)); + MOCK_METHOD(int32_t, RecordingIsAvailable, (bool* available), (override)); + MOCK_METHOD(int32_t, InitRecording, (), (override)); + MOCK_METHOD(bool, RecordingIsInitialized, (), (const, override)); + MOCK_METHOD(int32_t, StartPlayout, (), (override)); + MOCK_METHOD(int32_t, StopPlayout, (), (override)); + MOCK_METHOD(bool, Playing, (), (const, override)); + MOCK_METHOD(int32_t, StartRecording, (), (override)); + MOCK_METHOD(int32_t, StopRecording, (), (override)); + MOCK_METHOD(bool, Recording, (), (const, override)); + MOCK_METHOD(int32_t, InitSpeaker, (), (override)); + MOCK_METHOD(bool, SpeakerIsInitialized, (), (const, override)); + MOCK_METHOD(int32_t, InitMicrophone, (), (override)); + MOCK_METHOD(bool, MicrophoneIsInitialized, (), (const, override)); + MOCK_METHOD(int32_t, SpeakerVolumeIsAvailable, (bool* available), (override)); + MOCK_METHOD(int32_t, SetSpeakerVolume, (uint32_t volume), (override)); + MOCK_METHOD(int32_t, SpeakerVolume, (uint32_t * volume), (const, override)); + MOCK_METHOD(int32_t, + MaxSpeakerVolume, + (uint32_t * maxVolume), + (const, override)); + MOCK_METHOD(int32_t, + MinSpeakerVolume, + (uint32_t * minVolume), + (const, override)); + MOCK_METHOD(int32_t, + MicrophoneVolumeIsAvailable, + (bool* available), + (override)); + MOCK_METHOD(int32_t, SetMicrophoneVolume, (uint32_t volume), (override)); + MOCK_METHOD(int32_t, + MicrophoneVolume, + (uint32_t * volume), + (const, override)); + MOCK_METHOD(int32_t, + MaxMicrophoneVolume, + (uint32_t * maxVolume), + (const, override)); + MOCK_METHOD(int32_t, + MinMicrophoneVolume, + (uint32_t * minVolume), + (const, override)); + MOCK_METHOD(int32_t, SpeakerMuteIsAvailable, (bool* available), (override)); + MOCK_METHOD(int32_t, SetSpeakerMute, (bool enable), (override)); + MOCK_METHOD(int32_t, SpeakerMute, (bool* enabled), (const, override)); + MOCK_METHOD(int32_t, + MicrophoneMuteIsAvailable, + (bool* available), + (override)); + MOCK_METHOD(int32_t, SetMicrophoneMute, (bool enable), (override)); + MOCK_METHOD(int32_t, MicrophoneMute, (bool* enabled), (const, override)); + MOCK_METHOD(int32_t, + StereoPlayoutIsAvailable, + (bool* available), + (const, override)); + MOCK_METHOD(int32_t, SetStereoPlayout, (bool enable), (override)); + MOCK_METHOD(int32_t, StereoPlayout, (bool* enabled), (const, override)); + MOCK_METHOD(int32_t, + StereoRecordingIsAvailable, + (bool* available), + (const, override)); + MOCK_METHOD(int32_t, SetStereoRecording, (bool enable), (override)); + MOCK_METHOD(int32_t, StereoRecording, (bool* enabled), (const, override)); + MOCK_METHOD(int32_t, PlayoutDelay, (uint16_t * delayMS), (const, override)); + MOCK_METHOD(bool, BuiltInAECIsAvailable, (), (const, override)); + MOCK_METHOD(bool, BuiltInAGCIsAvailable, (), (const, override)); + MOCK_METHOD(bool, BuiltInNSIsAvailable, (), (const, override)); + MOCK_METHOD(int32_t, EnableBuiltInAEC, (bool enable), (override)); + MOCK_METHOD(int32_t, EnableBuiltInAGC, (bool enable), (override)); + MOCK_METHOD(int32_t, EnableBuiltInNS, (bool enable), (override)); + MOCK_METHOD(int32_t, GetPlayoutUnderrunCount, (), (const, override)); +#if defined(WEBRTC_IOS) + MOCK_METHOD(int, + GetPlayoutAudioParameters, + (AudioParameters * params), + (const, override)); + MOCK_METHOD(int, + GetRecordAudioParameters, + (AudioParameters * params), + (const, override)); +#endif // WEBRTC_IOS +}; +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_INCLUDE_MOCK_AUDIO_DEVICE_H_ diff --git a/third_party/libwebrtc/modules/audio_device/include/mock_audio_transport.h b/third_party/libwebrtc/modules/audio_device/include/mock_audio_transport.h new file mode 100644 index 0000000000..b886967319 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/mock_audio_transport.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_INCLUDE_MOCK_AUDIO_TRANSPORT_H_ +#define MODULES_AUDIO_DEVICE_INCLUDE_MOCK_AUDIO_TRANSPORT_H_ + +#include "modules/audio_device/include/audio_device_defines.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockAudioTransport : public AudioTransport { + public: + MockAudioTransport() {} + ~MockAudioTransport() {} + + MOCK_METHOD(int32_t, + RecordedDataIsAvailable, + (const void* audioSamples, + size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samplesPerSec, + uint32_t totalDelayMS, + int32_t clockDrift, + uint32_t currentMicLevel, + bool keyPressed, + uint32_t& newMicLevel), + (override)); + + MOCK_METHOD(int32_t, + RecordedDataIsAvailable, + (const void* audioSamples, + size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samplesPerSec, + uint32_t totalDelayMS, + int32_t clockDrift, + uint32_t currentMicLevel, + bool keyPressed, + uint32_t& newMicLevel, + absl::optional estimated_capture_time_ns), + (override)); + + MOCK_METHOD(int32_t, + NeedMorePlayData, + (size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samplesPerSec, + void* audioSamples, + size_t& nSamplesOut, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms), + (override)); + + MOCK_METHOD(void, + PullRenderData, + (int bits_per_sample, + int sample_rate, + size_t number_of_channels, + size_t number_of_frames, + void* audio_data, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms), + (override)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_INCLUDE_MOCK_AUDIO_TRANSPORT_H_ diff --git a/third_party/libwebrtc/modules/audio_device/include/test_audio_device.cc b/third_party/libwebrtc/modules/audio_device/include/test_audio_device.cc new file mode 100644 index 0000000000..2189646eff --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/test_audio_device.cc @@ -0,0 +1,497 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_device/include/test_audio_device.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "api/make_ref_counted.h" +#include "common_audio/wav_file.h" +#include "modules/audio_device/include/audio_device_default.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/event.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/random.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/task_utils/repeating_task.h" +#include "rtc_base/thread_annotations.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +namespace { + +constexpr int kFrameLengthUs = 10000; +constexpr int kFramesPerSecond = rtc::kNumMicrosecsPerSec / kFrameLengthUs; + +// TestAudioDeviceModule implements an AudioDevice module that can act both as a +// capturer and a renderer. It will use 10ms audio frames. +class TestAudioDeviceModuleImpl + : public webrtc_impl::AudioDeviceModuleDefault { + public: + // Creates a new TestAudioDeviceModule. When capturing or playing, 10 ms audio + // frames will be processed every 10ms / `speed`. + // `capturer` is an object that produces audio data. Can be nullptr if this + // device is never used for recording. + // `renderer` is an object that receives audio data that would have been + // played out. Can be nullptr if this device is never used for playing. + // Use one of the Create... functions to get these instances. + TestAudioDeviceModuleImpl(TaskQueueFactory* task_queue_factory, + std::unique_ptr capturer, + std::unique_ptr renderer, + float speed = 1) + : task_queue_factory_(task_queue_factory), + capturer_(std::move(capturer)), + renderer_(std::move(renderer)), + process_interval_us_(kFrameLengthUs / speed), + audio_callback_(nullptr), + rendering_(false), + capturing_(false) { + auto good_sample_rate = [](int sr) { + return sr == 8000 || sr == 16000 || sr == 32000 || sr == 44100 || + sr == 48000; + }; + + if (renderer_) { + const int sample_rate = renderer_->SamplingFrequency(); + playout_buffer_.resize( + SamplesPerFrame(sample_rate) * renderer_->NumChannels(), 0); + RTC_CHECK(good_sample_rate(sample_rate)); + } + if (capturer_) { + RTC_CHECK(good_sample_rate(capturer_->SamplingFrequency())); + } + } + + ~TestAudioDeviceModuleImpl() override { + StopPlayout(); + StopRecording(); + } + + int32_t Init() override { + task_queue_ = + std::make_unique(task_queue_factory_->CreateTaskQueue( + "TestAudioDeviceModuleImpl", TaskQueueFactory::Priority::NORMAL)); + + RepeatingTaskHandle::Start(task_queue_->Get(), [this]() { + ProcessAudio(); + return TimeDelta::Micros(process_interval_us_); + }); + return 0; + } + + int32_t RegisterAudioCallback(AudioTransport* callback) override { + MutexLock lock(&lock_); + RTC_DCHECK(callback || audio_callback_); + audio_callback_ = callback; + return 0; + } + + int32_t StartPlayout() override { + MutexLock lock(&lock_); + RTC_CHECK(renderer_); + rendering_ = true; + return 0; + } + + int32_t StopPlayout() override { + MutexLock lock(&lock_); + rendering_ = false; + return 0; + } + + int32_t StartRecording() override { + MutexLock lock(&lock_); + RTC_CHECK(capturer_); + capturing_ = true; + return 0; + } + + int32_t StopRecording() override { + MutexLock lock(&lock_); + capturing_ = false; + return 0; + } + + bool Playing() const override { + MutexLock lock(&lock_); + return rendering_; + } + + bool Recording() const override { + MutexLock lock(&lock_); + return capturing_; + } + + // Blocks forever until the Recorder stops producing data. + void WaitForRecordingEnd() override { + done_capturing_.Wait(rtc::Event::kForever); + } + + private: + void ProcessAudio() { + MutexLock lock(&lock_); + if (capturing_) { + // Capture 10ms of audio. 2 bytes per sample. + const bool keep_capturing = capturer_->Capture(&recording_buffer_); + uint32_t new_mic_level = 0; + if (recording_buffer_.size() > 0) { + audio_callback_->RecordedDataIsAvailable( + recording_buffer_.data(), + recording_buffer_.size() / capturer_->NumChannels(), + 2 * capturer_->NumChannels(), capturer_->NumChannels(), + capturer_->SamplingFrequency(), 0, 0, 0, false, new_mic_level); + } + if (!keep_capturing) { + capturing_ = false; + done_capturing_.Set(); + } + } + if (rendering_) { + size_t samples_out = 0; + int64_t elapsed_time_ms = -1; + int64_t ntp_time_ms = -1; + const int sampling_frequency = renderer_->SamplingFrequency(); + audio_callback_->NeedMorePlayData( + SamplesPerFrame(sampling_frequency), 2 * renderer_->NumChannels(), + renderer_->NumChannels(), sampling_frequency, playout_buffer_.data(), + samples_out, &elapsed_time_ms, &ntp_time_ms); + const bool keep_rendering = renderer_->Render( + rtc::ArrayView(playout_buffer_.data(), samples_out)); + if (!keep_rendering) { + rendering_ = false; + done_rendering_.Set(); + } + } + } + TaskQueueFactory* const task_queue_factory_; + const std::unique_ptr capturer_ RTC_GUARDED_BY(lock_); + const std::unique_ptr renderer_ RTC_GUARDED_BY(lock_); + const int64_t process_interval_us_; + + mutable Mutex lock_; + AudioTransport* audio_callback_ RTC_GUARDED_BY(lock_); + bool rendering_ RTC_GUARDED_BY(lock_); + bool capturing_ RTC_GUARDED_BY(lock_); + rtc::Event done_rendering_; + rtc::Event done_capturing_; + + std::vector playout_buffer_ RTC_GUARDED_BY(lock_); + rtc::BufferT recording_buffer_ RTC_GUARDED_BY(lock_); + std::unique_ptr task_queue_; +}; + +// A fake capturer that generates pulses with random samples between +// -max_amplitude and +max_amplitude. +class PulsedNoiseCapturerImpl final + : public TestAudioDeviceModule::PulsedNoiseCapturer { + public: + // Assuming 10ms audio packets. + PulsedNoiseCapturerImpl(int16_t max_amplitude, + int sampling_frequency_in_hz, + int num_channels) + : sampling_frequency_in_hz_(sampling_frequency_in_hz), + fill_with_zero_(false), + random_generator_(1), + max_amplitude_(max_amplitude), + num_channels_(num_channels) { + RTC_DCHECK_GT(max_amplitude, 0); + } + + int SamplingFrequency() const override { return sampling_frequency_in_hz_; } + + int NumChannels() const override { return num_channels_; } + + bool Capture(rtc::BufferT* buffer) override { + fill_with_zero_ = !fill_with_zero_; + int16_t max_amplitude; + { + MutexLock lock(&lock_); + max_amplitude = max_amplitude_; + } + buffer->SetData( + TestAudioDeviceModule::SamplesPerFrame(sampling_frequency_in_hz_) * + num_channels_, + [&](rtc::ArrayView data) { + if (fill_with_zero_) { + std::fill(data.begin(), data.end(), 0); + } else { + std::generate(data.begin(), data.end(), [&]() { + return random_generator_.Rand(-max_amplitude, max_amplitude); + }); + } + return data.size(); + }); + return true; + } + + void SetMaxAmplitude(int16_t amplitude) override { + MutexLock lock(&lock_); + max_amplitude_ = amplitude; + } + + private: + int sampling_frequency_in_hz_; + bool fill_with_zero_; + Random random_generator_; + Mutex lock_; + int16_t max_amplitude_ RTC_GUARDED_BY(lock_); + const int num_channels_; +}; + +class WavFileReader final : public TestAudioDeviceModule::Capturer { + public: + WavFileReader(absl::string_view filename, + int sampling_frequency_in_hz, + int num_channels, + bool repeat) + : WavFileReader(std::make_unique(filename), + sampling_frequency_in_hz, + num_channels, + repeat) {} + + int SamplingFrequency() const override { return sampling_frequency_in_hz_; } + + int NumChannels() const override { return num_channels_; } + + bool Capture(rtc::BufferT* buffer) override { + buffer->SetData( + TestAudioDeviceModule::SamplesPerFrame(sampling_frequency_in_hz_) * + num_channels_, + [&](rtc::ArrayView data) { + size_t read = wav_reader_->ReadSamples(data.size(), data.data()); + if (read < data.size() && repeat_) { + do { + wav_reader_->Reset(); + size_t delta = wav_reader_->ReadSamples( + data.size() - read, data.subview(read).data()); + RTC_CHECK_GT(delta, 0) << "No new data read from file"; + read += delta; + } while (read < data.size()); + } + return read; + }); + return buffer->size() > 0; + } + + private: + WavFileReader(std::unique_ptr wav_reader, + int sampling_frequency_in_hz, + int num_channels, + bool repeat) + : sampling_frequency_in_hz_(sampling_frequency_in_hz), + num_channels_(num_channels), + wav_reader_(std::move(wav_reader)), + repeat_(repeat) { + RTC_CHECK_EQ(wav_reader_->sample_rate(), sampling_frequency_in_hz); + RTC_CHECK_EQ(wav_reader_->num_channels(), num_channels); + } + + const int sampling_frequency_in_hz_; + const int num_channels_; + std::unique_ptr wav_reader_; + const bool repeat_; +}; + +class WavFileWriter final : public TestAudioDeviceModule::Renderer { + public: + WavFileWriter(absl::string_view filename, + int sampling_frequency_in_hz, + int num_channels) + : WavFileWriter(std::make_unique(filename, + sampling_frequency_in_hz, + num_channels), + sampling_frequency_in_hz, + num_channels) {} + + int SamplingFrequency() const override { return sampling_frequency_in_hz_; } + + int NumChannels() const override { return num_channels_; } + + bool Render(rtc::ArrayView data) override { + wav_writer_->WriteSamples(data.data(), data.size()); + return true; + } + + private: + WavFileWriter(std::unique_ptr wav_writer, + int sampling_frequency_in_hz, + int num_channels) + : sampling_frequency_in_hz_(sampling_frequency_in_hz), + wav_writer_(std::move(wav_writer)), + num_channels_(num_channels) {} + + int sampling_frequency_in_hz_; + std::unique_ptr wav_writer_; + const int num_channels_; +}; + +class BoundedWavFileWriter : public TestAudioDeviceModule::Renderer { + public: + BoundedWavFileWriter(absl::string_view filename, + int sampling_frequency_in_hz, + int num_channels) + : sampling_frequency_in_hz_(sampling_frequency_in_hz), + wav_writer_(filename, sampling_frequency_in_hz, num_channels), + num_channels_(num_channels), + silent_audio_( + TestAudioDeviceModule::SamplesPerFrame(sampling_frequency_in_hz) * + num_channels, + 0), + started_writing_(false), + trailing_zeros_(0) {} + + int SamplingFrequency() const override { return sampling_frequency_in_hz_; } + + int NumChannels() const override { return num_channels_; } + + bool Render(rtc::ArrayView data) override { + const int16_t kAmplitudeThreshold = 5; + + const int16_t* begin = data.begin(); + const int16_t* end = data.end(); + if (!started_writing_) { + // Cut off silence at the beginning. + while (begin < end) { + if (std::abs(*begin) > kAmplitudeThreshold) { + started_writing_ = true; + break; + } + ++begin; + } + } + if (started_writing_) { + // Cut off silence at the end. + while (begin < end) { + if (*(end - 1) != 0) { + break; + } + --end; + } + if (begin < end) { + // If it turns out that the silence was not final, need to write all the + // skipped zeros and continue writing audio. + while (trailing_zeros_ > 0) { + const size_t zeros_to_write = + std::min(trailing_zeros_, silent_audio_.size()); + wav_writer_.WriteSamples(silent_audio_.data(), zeros_to_write); + trailing_zeros_ -= zeros_to_write; + } + wav_writer_.WriteSamples(begin, end - begin); + } + // Save the number of zeros we skipped in case this needs to be restored. + trailing_zeros_ += data.end() - end; + } + return true; + } + + private: + int sampling_frequency_in_hz_; + WavWriter wav_writer_; + const int num_channels_; + std::vector silent_audio_; + bool started_writing_; + size_t trailing_zeros_; +}; + +class DiscardRenderer final : public TestAudioDeviceModule::Renderer { + public: + explicit DiscardRenderer(int sampling_frequency_in_hz, int num_channels) + : sampling_frequency_in_hz_(sampling_frequency_in_hz), + num_channels_(num_channels) {} + + int SamplingFrequency() const override { return sampling_frequency_in_hz_; } + + int NumChannels() const override { return num_channels_; } + + bool Render(rtc::ArrayView data) override { return true; } + + private: + int sampling_frequency_in_hz_; + const int num_channels_; +}; + +} // namespace + +size_t TestAudioDeviceModule::SamplesPerFrame(int sampling_frequency_in_hz) { + return rtc::CheckedDivExact(sampling_frequency_in_hz, kFramesPerSecond); +} + +rtc::scoped_refptr TestAudioDeviceModule::Create( + TaskQueueFactory* task_queue_factory, + std::unique_ptr capturer, + std::unique_ptr renderer, + float speed) { + return rtc::make_ref_counted( + task_queue_factory, std::move(capturer), std::move(renderer), speed); +} + +std::unique_ptr +TestAudioDeviceModule::CreatePulsedNoiseCapturer(int16_t max_amplitude, + int sampling_frequency_in_hz, + int num_channels) { + return std::make_unique( + max_amplitude, sampling_frequency_in_hz, num_channels); +} + +std::unique_ptr +TestAudioDeviceModule::CreateDiscardRenderer(int sampling_frequency_in_hz, + int num_channels) { + return std::make_unique(sampling_frequency_in_hz, + num_channels); +} + +std::unique_ptr +TestAudioDeviceModule::CreateWavFileReader(absl::string_view filename, + int sampling_frequency_in_hz, + int num_channels) { + return std::make_unique(filename, sampling_frequency_in_hz, + num_channels, false); +} + +std::unique_ptr +TestAudioDeviceModule::CreateWavFileReader(absl::string_view filename, + bool repeat) { + WavReader reader(filename); + int sampling_frequency_in_hz = reader.sample_rate(); + int num_channels = rtc::checked_cast(reader.num_channels()); + return std::make_unique(filename, sampling_frequency_in_hz, + num_channels, repeat); +} + +std::unique_ptr +TestAudioDeviceModule::CreateWavFileWriter(absl::string_view filename, + int sampling_frequency_in_hz, + int num_channels) { + return std::make_unique(filename, sampling_frequency_in_hz, + num_channels); +} + +std::unique_ptr +TestAudioDeviceModule::CreateBoundedWavFileWriter(absl::string_view filename, + int sampling_frequency_in_hz, + int num_channels) { + return std::make_unique( + filename, sampling_frequency_in_hz, num_channels); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/include/test_audio_device.h b/third_party/libwebrtc/modules/audio_device/include/test_audio_device.h new file mode 100644 index 0000000000..8413479291 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/test_audio_device.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_AUDIO_DEVICE_INCLUDE_TEST_AUDIO_DEVICE_H_ +#define MODULES_AUDIO_DEVICE_INCLUDE_TEST_AUDIO_DEVICE_H_ + +#include +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "api/scoped_refptr.h" +#include "api/task_queue/task_queue_factory.h" +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +// TestAudioDeviceModule implements an AudioDevice module that can act both as a +// capturer and a renderer. It will use 10ms audio frames. +class TestAudioDeviceModule : public AudioDeviceModule { + public: + // Returns the number of samples that Capturers and Renderers with this + // sampling frequency will work with every time Capture or Render is called. + static size_t SamplesPerFrame(int sampling_frequency_in_hz); + + class Capturer { + public: + virtual ~Capturer() {} + // Returns the sampling frequency in Hz of the audio data that this + // capturer produces. + virtual int SamplingFrequency() const = 0; + // Returns the number of channels of captured audio data. + virtual int NumChannels() const = 0; + // Replaces the contents of `buffer` with 10ms of captured audio data + // (see TestAudioDeviceModule::SamplesPerFrame). Returns true if the + // capturer can keep producing data, or false when the capture finishes. + virtual bool Capture(rtc::BufferT* buffer) = 0; + }; + + class Renderer { + public: + virtual ~Renderer() {} + // Returns the sampling frequency in Hz of the audio data that this + // renderer receives. + virtual int SamplingFrequency() const = 0; + // Returns the number of channels of audio data to be required. + virtual int NumChannels() const = 0; + // Renders the passed audio data and returns true if the renderer wants + // to keep receiving data, or false otherwise. + virtual bool Render(rtc::ArrayView data) = 0; + }; + + // A fake capturer that generates pulses with random samples between + // -max_amplitude and +max_amplitude. + class PulsedNoiseCapturer : public Capturer { + public: + ~PulsedNoiseCapturer() override {} + + virtual void SetMaxAmplitude(int16_t amplitude) = 0; + }; + + ~TestAudioDeviceModule() override {} + + // Creates a new TestAudioDeviceModule. When capturing or playing, 10 ms audio + // frames will be processed every 10ms / `speed`. + // `capturer` is an object that produces audio data. Can be nullptr if this + // device is never used for recording. + // `renderer` is an object that receives audio data that would have been + // played out. Can be nullptr if this device is never used for playing. + // Use one of the Create... functions to get these instances. + static rtc::scoped_refptr Create( + TaskQueueFactory* task_queue_factory, + std::unique_ptr capturer, + std::unique_ptr renderer, + float speed = 1); + + // Returns a Capturer instance that generates a signal of `num_channels` + // channels where every second frame is zero and every second frame is evenly + // distributed random noise with max amplitude `max_amplitude`. + static std::unique_ptr CreatePulsedNoiseCapturer( + int16_t max_amplitude, + int sampling_frequency_in_hz, + int num_channels = 1); + + // Returns a Renderer instance that does nothing with the audio data. + static std::unique_ptr CreateDiscardRenderer( + int sampling_frequency_in_hz, + int num_channels = 1); + + // WavReader and WavWriter creation based on file name. + + // Returns a Capturer instance that gets its data from a file. The sample rate + // and channels will be checked against the Wav file. + static std::unique_ptr CreateWavFileReader( + absl::string_view filename, + int sampling_frequency_in_hz, + int num_channels = 1); + + // Returns a Capturer instance that gets its data from a file. + // Automatically detects sample rate and num of channels. + // `repeat` - if true, the file will be replayed from the start when we reach + // the end of file. + static std::unique_ptr CreateWavFileReader( + absl::string_view filename, + bool repeat = false); + + // Returns a Renderer instance that writes its data to a file. + static std::unique_ptr CreateWavFileWriter( + absl::string_view filename, + int sampling_frequency_in_hz, + int num_channels = 1); + + // Returns a Renderer instance that writes its data to a WAV file, cutting + // off silence at the beginning (not necessarily perfect silence, see + // kAmplitudeThreshold) and at the end (only actual 0 samples in this case). + static std::unique_ptr CreateBoundedWavFileWriter( + absl::string_view filename, + int sampling_frequency_in_hz, + int num_channels = 1); + + int32_t Init() override = 0; + int32_t RegisterAudioCallback(AudioTransport* callback) override = 0; + + int32_t StartPlayout() override = 0; + int32_t StopPlayout() override = 0; + int32_t StartRecording() override = 0; + int32_t StopRecording() override = 0; + + bool Playing() const override = 0; + bool Recording() const override = 0; + + // Blocks forever until the Recorder stops producing data. + virtual void WaitForRecordingEnd() = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_INCLUDE_TEST_AUDIO_DEVICE_H_ diff --git a/third_party/libwebrtc/modules/audio_device/include/test_audio_device_unittest.cc b/third_party/libwebrtc/modules/audio_device/include/test_audio_device_unittest.cc new file mode 100644 index 0000000000..2975b11325 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/include/test_audio_device_unittest.cc @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/include/test_audio_device.h" + +#include +#include + +#include "api/array_view.h" +#include "common_audio/wav_file.h" +#include "common_audio/wav_header.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace { + +void RunTest(const std::vector& input_samples, + const std::vector& expected_samples, + size_t samples_per_frame) { + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + + const std::string output_filename = + test::OutputPath() + "BoundedWavFileWriterTest_" + test_info->name() + + "_" + std::to_string(std::rand()) + ".wav"; + + static const size_t kSamplesPerFrame = 8; + static const int kSampleRate = kSamplesPerFrame * 100; + EXPECT_EQ(TestAudioDeviceModule::SamplesPerFrame(kSampleRate), + kSamplesPerFrame); + + // Test through file name API. + { + std::unique_ptr writer = + TestAudioDeviceModule::CreateBoundedWavFileWriter(output_filename, 800); + + for (size_t i = 0; i < input_samples.size(); i += kSamplesPerFrame) { + EXPECT_TRUE(writer->Render(rtc::ArrayView( + &input_samples[i], + std::min(kSamplesPerFrame, input_samples.size() - i)))); + } + } + + { + WavReader reader(output_filename); + std::vector read_samples(expected_samples.size()); + EXPECT_EQ(expected_samples.size(), + reader.ReadSamples(read_samples.size(), read_samples.data())); + EXPECT_EQ(expected_samples, read_samples); + + EXPECT_EQ(0u, reader.ReadSamples(read_samples.size(), read_samples.data())); + } + + remove(output_filename.c_str()); +} +} // namespace + +TEST(BoundedWavFileWriterTest, NoSilence) { + static const std::vector kInputSamples = { + 75, 1234, 243, -1231, -22222, 0, 3, 88, + 1222, -1213, -13222, -7, -3525, 5787, -25247, 8}; + static const std::vector kExpectedSamples = kInputSamples; + RunTest(kInputSamples, kExpectedSamples, 8); +} + +TEST(BoundedWavFileWriterTest, SomeStartSilence) { + static const std::vector kInputSamples = { + 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, -13222, -7, -3525, 5787, -25247, 8}; + static const std::vector kExpectedSamples(kInputSamples.begin() + 10, + kInputSamples.end()); + RunTest(kInputSamples, kExpectedSamples, 8); +} + +TEST(BoundedWavFileWriterTest, NegativeStartSilence) { + static const std::vector kInputSamples = { + 0, -4, -6, 0, 3, 0, 0, 0, 0, 3, -13222, -7, -3525, 5787, -25247, 8}; + static const std::vector kExpectedSamples(kInputSamples.begin() + 2, + kInputSamples.end()); + RunTest(kInputSamples, kExpectedSamples, 8); +} + +TEST(BoundedWavFileWriterTest, SomeEndSilence) { + static const std::vector kInputSamples = { + 75, 1234, 243, -1231, -22222, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + static const std::vector kExpectedSamples(kInputSamples.begin(), + kInputSamples.end() - 9); + RunTest(kInputSamples, kExpectedSamples, 8); +} + +TEST(BoundedWavFileWriterTest, DoubleEndSilence) { + static const std::vector kInputSamples = { + 75, 1234, 243, -1231, -22222, 0, 0, 0, + 0, -1213, -13222, -7, -3525, 5787, 0, 0}; + static const std::vector kExpectedSamples(kInputSamples.begin(), + kInputSamples.end() - 2); + RunTest(kInputSamples, kExpectedSamples, 8); +} + +TEST(BoundedWavFileWriterTest, DoubleSilence) { + static const std::vector kInputSamples = {0, -1213, -13222, -7, + -3525, 5787, 0, 0}; + static const std::vector kExpectedSamples(kInputSamples.begin() + 1, + kInputSamples.end() - 2); + RunTest(kInputSamples, kExpectedSamples, 8); +} + +TEST(BoundedWavFileWriterTest, EndSilenceCutoff) { + static const std::vector kInputSamples = { + 75, 1234, 243, -1231, -22222, 0, 1, 0, 0, 0, 0}; + static const std::vector kExpectedSamples(kInputSamples.begin(), + kInputSamples.end() - 4); + RunTest(kInputSamples, kExpectedSamples, 8); +} + +TEST(WavFileReaderTest, RepeatedTrueWithSingleFrameFileReadTwice) { + static const std::vector kInputSamples = {75, 1234, 243, -1231, + -22222, 0, 3, 88}; + static const rtc::BufferT kExpectedSamples(kInputSamples.data(), + kInputSamples.size()); + + const std::string output_filename = test::OutputPath() + + "WavFileReaderTest_RepeatedTrue_" + + std::to_string(std::rand()) + ".wav"; + + static const size_t kSamplesPerFrame = 8; + static const int kSampleRate = kSamplesPerFrame * 100; + EXPECT_EQ(TestAudioDeviceModule::SamplesPerFrame(kSampleRate), + kSamplesPerFrame); + + // Create wav file to read. + { + std::unique_ptr writer = + TestAudioDeviceModule::CreateWavFileWriter(output_filename, 800); + + for (size_t i = 0; i < kInputSamples.size(); i += kSamplesPerFrame) { + EXPECT_TRUE(writer->Render(rtc::ArrayView( + &kInputSamples[i], + std::min(kSamplesPerFrame, kInputSamples.size() - i)))); + } + } + + { + std::unique_ptr reader = + TestAudioDeviceModule::CreateWavFileReader(output_filename, true); + rtc::BufferT buffer(kExpectedSamples.size()); + EXPECT_TRUE(reader->Capture(&buffer)); + EXPECT_EQ(kExpectedSamples, buffer); + EXPECT_TRUE(reader->Capture(&buffer)); + EXPECT_EQ(kExpectedSamples, buffer); + } + + remove(output_filename.c_str()); +} + +TEST(PulsedNoiseCapturerTest, SetMaxAmplitude) { + const int16_t kAmplitude = 50; + std::unique_ptr capturer = + TestAudioDeviceModule::CreatePulsedNoiseCapturer( + kAmplitude, /*sampling_frequency_in_hz=*/8000); + rtc::BufferT recording_buffer; + + // Verify that the capturer doesn't create entries louder than than + // kAmplitude. Since the pulse generator alternates between writing + // zeroes and actual entries, we need to do the capturing twice. + capturer->Capture(&recording_buffer); + capturer->Capture(&recording_buffer); + int16_t max_sample = + *std::max_element(recording_buffer.begin(), recording_buffer.end()); + EXPECT_LE(max_sample, kAmplitude); + + // Increase the amplitude and verify that the samples can now be louder + // than the previous max. + capturer->SetMaxAmplitude(kAmplitude * 2); + capturer->Capture(&recording_buffer); + capturer->Capture(&recording_buffer); + max_sample = + *std::max_element(recording_buffer.begin(), recording_buffer.end()); + EXPECT_GT(max_sample, kAmplitude); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/linux/alsasymboltable_linux.cc b/third_party/libwebrtc/modules/audio_device/linux/alsasymboltable_linux.cc new file mode 100644 index 0000000000..5dfb91d6f4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/alsasymboltable_linux.cc @@ -0,0 +1,40 @@ +/* + * libjingle + * Copyright 2004--2010, Google Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "modules/audio_device/linux/alsasymboltable_linux.h" + +namespace webrtc { +namespace adm_linux_alsa { + +LATE_BINDING_SYMBOL_TABLE_DEFINE_BEGIN(AlsaSymbolTable, "libasound.so.2") +#define X(sym) LATE_BINDING_SYMBOL_TABLE_DEFINE_ENTRY(AlsaSymbolTable, sym) +ALSA_SYMBOLS_LIST +#undef X +LATE_BINDING_SYMBOL_TABLE_DEFINE_END(AlsaSymbolTable) + +} // namespace adm_linux_alsa +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/linux/alsasymboltable_linux.h b/third_party/libwebrtc/modules/audio_device/linux/alsasymboltable_linux.h new file mode 100644 index 0000000000..c9970b02bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/alsasymboltable_linux.h @@ -0,0 +1,148 @@ +/* + * libjingle + * Copyright 2004--2010, Google Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AUDIO_DEVICE_ALSASYMBOLTABLE_LINUX_H_ +#define AUDIO_DEVICE_ALSASYMBOLTABLE_LINUX_H_ + +#include "modules/audio_device/linux/latebindingsymboltable_linux.h" + +namespace webrtc { +namespace adm_linux_alsa { + +// The ALSA symbols we need, as an X-Macro list. +// This list must contain precisely every libasound function that is used in +// alsasoundsystem.cc. +#define ALSA_SYMBOLS_LIST \ + X(snd_device_name_free_hint) \ + X(snd_device_name_get_hint) \ + X(snd_device_name_hint) \ + X(snd_pcm_avail_update) \ + X(snd_pcm_close) \ + X(snd_pcm_delay) \ + X(snd_pcm_drop) \ + X(snd_pcm_open) \ + X(snd_pcm_prepare) \ + X(snd_pcm_readi) \ + X(snd_pcm_recover) \ + X(snd_pcm_resume) \ + X(snd_pcm_reset) \ + X(snd_pcm_state) \ + X(snd_pcm_set_params) \ + X(snd_pcm_get_params) \ + X(snd_pcm_start) \ + X(snd_pcm_stream) \ + X(snd_pcm_frames_to_bytes) \ + X(snd_pcm_bytes_to_frames) \ + X(snd_pcm_wait) \ + X(snd_pcm_writei) \ + X(snd_pcm_info_get_class) \ + X(snd_pcm_info_get_subdevices_avail) \ + X(snd_pcm_info_get_subdevice_name) \ + X(snd_pcm_info_set_subdevice) \ + X(snd_pcm_info_get_id) \ + X(snd_pcm_info_set_device) \ + X(snd_pcm_info_set_stream) \ + X(snd_pcm_info_get_name) \ + X(snd_pcm_info_get_subdevices_count) \ + X(snd_pcm_info_sizeof) \ + X(snd_pcm_hw_params) \ + X(snd_pcm_hw_params_malloc) \ + X(snd_pcm_hw_params_free) \ + X(snd_pcm_hw_params_any) \ + X(snd_pcm_hw_params_set_access) \ + X(snd_pcm_hw_params_set_format) \ + X(snd_pcm_hw_params_set_channels) \ + X(snd_pcm_hw_params_set_rate_near) \ + X(snd_pcm_hw_params_set_buffer_size_near) \ + X(snd_card_next) \ + X(snd_card_get_name) \ + X(snd_config_update) \ + X(snd_config_copy) \ + X(snd_config_get_id) \ + X(snd_ctl_open) \ + X(snd_ctl_close) \ + X(snd_ctl_card_info) \ + X(snd_ctl_card_info_sizeof) \ + X(snd_ctl_card_info_get_id) \ + X(snd_ctl_card_info_get_name) \ + X(snd_ctl_pcm_next_device) \ + X(snd_ctl_pcm_info) \ + X(snd_mixer_load) \ + X(snd_mixer_free) \ + X(snd_mixer_detach) \ + X(snd_mixer_close) \ + X(snd_mixer_open) \ + X(snd_mixer_attach) \ + X(snd_mixer_first_elem) \ + X(snd_mixer_elem_next) \ + X(snd_mixer_selem_get_name) \ + X(snd_mixer_selem_is_active) \ + X(snd_mixer_selem_register) \ + X(snd_mixer_selem_set_playback_volume_all) \ + X(snd_mixer_selem_get_playback_volume) \ + X(snd_mixer_selem_has_playback_volume) \ + X(snd_mixer_selem_get_playback_volume_range) \ + X(snd_mixer_selem_has_playback_switch) \ + X(snd_mixer_selem_get_playback_switch) \ + X(snd_mixer_selem_set_playback_switch_all) \ + X(snd_mixer_selem_has_capture_switch) \ + X(snd_mixer_selem_get_capture_switch) \ + X(snd_mixer_selem_set_capture_switch_all) \ + X(snd_mixer_selem_has_capture_volume) \ + X(snd_mixer_selem_set_capture_volume_all) \ + X(snd_mixer_selem_get_capture_volume) \ + X(snd_mixer_selem_get_capture_volume_range) \ + X(snd_dlopen) \ + X(snd_dlclose) \ + X(snd_config) \ + X(snd_config_search) \ + X(snd_config_get_string) \ + X(snd_config_search_definition) \ + X(snd_config_get_type) \ + X(snd_config_delete) \ + X(snd_config_iterator_entry) \ + X(snd_config_iterator_first) \ + X(snd_config_iterator_next) \ + X(snd_config_iterator_end) \ + X(snd_config_delete_compound_members) \ + X(snd_config_get_integer) \ + X(snd_config_get_bool) \ + X(snd_dlsym) \ + X(snd_strerror) \ + X(snd_lib_error) \ + X(snd_lib_error_set_handler) + +LATE_BINDING_SYMBOL_TABLE_DECLARE_BEGIN(AlsaSymbolTable) +#define X(sym) LATE_BINDING_SYMBOL_TABLE_DECLARE_ENTRY(AlsaSymbolTable, sym) +ALSA_SYMBOLS_LIST +#undef X +LATE_BINDING_SYMBOL_TABLE_DECLARE_END(AlsaSymbolTable) + +} // namespace adm_linux_alsa +} // namespace webrtc + +#endif // AUDIO_DEVICE_ALSASYMBOLTABLE_LINUX_H_ diff --git a/third_party/libwebrtc/modules/audio_device/linux/audio_device_alsa_linux.cc b/third_party/libwebrtc/modules/audio_device/linux/audio_device_alsa_linux.cc new file mode 100644 index 0000000000..50cf3beb6c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/audio_device_alsa_linux.cc @@ -0,0 +1,1637 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/linux/audio_device_alsa_linux.h" + + +#include "modules/audio_device/audio_device_config.h" +#include "rtc_base/logging.h" +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/sleep.h" + +WebRTCAlsaSymbolTable* GetAlsaSymbolTable() { + static WebRTCAlsaSymbolTable* alsa_symbol_table = new WebRTCAlsaSymbolTable(); + return alsa_symbol_table; +} + +// Accesses ALSA functions through our late-binding symbol table instead of +// directly. This way we don't have to link to libasound, which means our binary +// will work on systems that don't have it. +#define LATE(sym) \ + LATESYM_GET(webrtc::adm_linux_alsa::AlsaSymbolTable, GetAlsaSymbolTable(), \ + sym) + +// Redefine these here to be able to do late-binding +#undef snd_ctl_card_info_alloca +#define snd_ctl_card_info_alloca(ptr) \ + do { \ + *ptr = (snd_ctl_card_info_t*)__builtin_alloca( \ + LATE(snd_ctl_card_info_sizeof)()); \ + memset(*ptr, 0, LATE(snd_ctl_card_info_sizeof)()); \ + } while (0) + +#undef snd_pcm_info_alloca +#define snd_pcm_info_alloca(pInfo) \ + do { \ + *pInfo = (snd_pcm_info_t*)__builtin_alloca(LATE(snd_pcm_info_sizeof)()); \ + memset(*pInfo, 0, LATE(snd_pcm_info_sizeof)()); \ + } while (0) + +// snd_lib_error_handler_t +void WebrtcAlsaErrorHandler(const char* file, + int line, + const char* function, + int err, + const char* fmt, + ...) {} + +namespace webrtc { +static const unsigned int ALSA_PLAYOUT_FREQ = 48000; +static const unsigned int ALSA_PLAYOUT_CH = 2; +static const unsigned int ALSA_PLAYOUT_LATENCY = 40 * 1000; // in us +static const unsigned int ALSA_CAPTURE_FREQ = 48000; +static const unsigned int ALSA_CAPTURE_CH = 2; +static const unsigned int ALSA_CAPTURE_LATENCY = 40 * 1000; // in us +static const unsigned int ALSA_CAPTURE_WAIT_TIMEOUT = 5; // in ms + +#define FUNC_GET_NUM_OF_DEVICE 0 +#define FUNC_GET_DEVICE_NAME 1 +#define FUNC_GET_DEVICE_NAME_FOR_AN_ENUM 2 + +AudioDeviceLinuxALSA::AudioDeviceLinuxALSA() + : _ptrAudioBuffer(NULL), + _inputDeviceIndex(0), + _outputDeviceIndex(0), + _inputDeviceIsSpecified(false), + _outputDeviceIsSpecified(false), + _handleRecord(NULL), + _handlePlayout(NULL), + _recordingBuffersizeInFrame(0), + _recordingPeriodSizeInFrame(0), + _playoutBufferSizeInFrame(0), + _playoutPeriodSizeInFrame(0), + _recordingBufferSizeIn10MS(0), + _playoutBufferSizeIn10MS(0), + _recordingFramesIn10MS(0), + _playoutFramesIn10MS(0), + _recordingFreq(ALSA_CAPTURE_FREQ), + _playoutFreq(ALSA_PLAYOUT_FREQ), + _recChannels(ALSA_CAPTURE_CH), + _playChannels(ALSA_PLAYOUT_CH), + _recordingBuffer(NULL), + _playoutBuffer(NULL), + _recordingFramesLeft(0), + _playoutFramesLeft(0), + _initialized(false), + _recording(false), + _playing(false), + _recIsInitialized(false), + _playIsInitialized(false), + _recordingDelay(0), + _playoutDelay(0) { + memset(_oldKeyState, 0, sizeof(_oldKeyState)); + RTC_DLOG(LS_INFO) << __FUNCTION__ << " created"; +} + +// ---------------------------------------------------------------------------- +// AudioDeviceLinuxALSA - dtor +// ---------------------------------------------------------------------------- + +AudioDeviceLinuxALSA::~AudioDeviceLinuxALSA() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " destroyed"; + + Terminate(); + + // Clean up the recording buffer and playout buffer. + if (_recordingBuffer) { + delete[] _recordingBuffer; + _recordingBuffer = NULL; + } + if (_playoutBuffer) { + delete[] _playoutBuffer; + _playoutBuffer = NULL; + } +} + +void AudioDeviceLinuxALSA::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + MutexLock lock(&mutex_); + + _ptrAudioBuffer = audioBuffer; + + // Inform the AudioBuffer about default settings for this implementation. + // Set all values to zero here since the actual settings will be done by + // InitPlayout and InitRecording later. + _ptrAudioBuffer->SetRecordingSampleRate(0); + _ptrAudioBuffer->SetPlayoutSampleRate(0); + _ptrAudioBuffer->SetRecordingChannels(0); + _ptrAudioBuffer->SetPlayoutChannels(0); +} + +int32_t AudioDeviceLinuxALSA::ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const { + audioLayer = AudioDeviceModule::kLinuxAlsaAudio; + return 0; +} + +AudioDeviceGeneric::InitStatus AudioDeviceLinuxALSA::Init() { + MutexLock lock(&mutex_); + + // Load libasound + if (!GetAlsaSymbolTable()->Load()) { + // Alsa is not installed on this system + RTC_LOG(LS_ERROR) << "failed to load symbol table"; + return InitStatus::OTHER_ERROR; + } + + if (_initialized) { + return InitStatus::OK; + } +#if defined(WEBRTC_USE_X11) + // Get X display handle for typing detection + _XDisplay = XOpenDisplay(NULL); + if (!_XDisplay) { + RTC_LOG(LS_WARNING) + << "failed to open X display, typing detection will not work"; + } +#endif + + _initialized = true; + + return InitStatus::OK; +} + +int32_t AudioDeviceLinuxALSA::Terminate() { + if (!_initialized) { + return 0; + } + + MutexLock lock(&mutex_); + + _mixerManager.Close(); + + // RECORDING + mutex_.Unlock(); + _ptrThreadRec.Finalize(); + + // PLAYOUT + _ptrThreadPlay.Finalize(); + mutex_.Lock(); + +#if defined(WEBRTC_USE_X11) + if (_XDisplay) { + XCloseDisplay(_XDisplay); + _XDisplay = NULL; + } +#endif + _initialized = false; + _outputDeviceIsSpecified = false; + _inputDeviceIsSpecified = false; + + return 0; +} + +bool AudioDeviceLinuxALSA::Initialized() const { + return (_initialized); +} + +int32_t AudioDeviceLinuxALSA::InitSpeaker() { + MutexLock lock(&mutex_); + return InitSpeakerLocked(); +} + +int32_t AudioDeviceLinuxALSA::InitSpeakerLocked() { + if (_playing) { + return -1; + } + + char devName[kAdmMaxDeviceNameSize] = {0}; + GetDevicesInfo(2, true, _outputDeviceIndex, devName, kAdmMaxDeviceNameSize); + return _mixerManager.OpenSpeaker(devName); +} + +int32_t AudioDeviceLinuxALSA::InitMicrophone() { + MutexLock lock(&mutex_); + return InitMicrophoneLocked(); +} + +int32_t AudioDeviceLinuxALSA::InitMicrophoneLocked() { + if (_recording) { + return -1; + } + + char devName[kAdmMaxDeviceNameSize] = {0}; + GetDevicesInfo(2, false, _inputDeviceIndex, devName, kAdmMaxDeviceNameSize); + return _mixerManager.OpenMicrophone(devName); +} + +bool AudioDeviceLinuxALSA::SpeakerIsInitialized() const { + return (_mixerManager.SpeakerIsInitialized()); +} + +bool AudioDeviceLinuxALSA::MicrophoneIsInitialized() const { + return (_mixerManager.MicrophoneIsInitialized()); +} + +int32_t AudioDeviceLinuxALSA::SpeakerVolumeIsAvailable(bool& available) { + bool wasInitialized = _mixerManager.SpeakerIsInitialized(); + + // Make an attempt to open up the + // output mixer corresponding to the currently selected output device. + if (!wasInitialized && InitSpeaker() == -1) { + // If we end up here it means that the selected speaker has no volume + // control. + available = false; + return 0; + } + + // Given that InitSpeaker was successful, we know that a volume control + // exists + available = true; + + // Close the initialized output mixer + if (!wasInitialized) { + _mixerManager.CloseSpeaker(); + } + + return 0; +} + +int32_t AudioDeviceLinuxALSA::SetSpeakerVolume(uint32_t volume) { + return (_mixerManager.SetSpeakerVolume(volume)); +} + +int32_t AudioDeviceLinuxALSA::SpeakerVolume(uint32_t& volume) const { + uint32_t level(0); + + if (_mixerManager.SpeakerVolume(level) == -1) { + return -1; + } + + volume = level; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::MaxSpeakerVolume(uint32_t& maxVolume) const { + uint32_t maxVol(0); + + if (_mixerManager.MaxSpeakerVolume(maxVol) == -1) { + return -1; + } + + maxVolume = maxVol; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::MinSpeakerVolume(uint32_t& minVolume) const { + uint32_t minVol(0); + + if (_mixerManager.MinSpeakerVolume(minVol) == -1) { + return -1; + } + + minVolume = minVol; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::SpeakerMuteIsAvailable(bool& available) { + bool isAvailable(false); + bool wasInitialized = _mixerManager.SpeakerIsInitialized(); + + // Make an attempt to open up the + // output mixer corresponding to the currently selected output device. + // + if (!wasInitialized && InitSpeaker() == -1) { + // If we end up here it means that the selected speaker has no volume + // control, hence it is safe to state that there is no mute control + // already at this stage. + available = false; + return 0; + } + + // Check if the selected speaker has a mute control + _mixerManager.SpeakerMuteIsAvailable(isAvailable); + + available = isAvailable; + + // Close the initialized output mixer + if (!wasInitialized) { + _mixerManager.CloseSpeaker(); + } + + return 0; +} + +int32_t AudioDeviceLinuxALSA::SetSpeakerMute(bool enable) { + return (_mixerManager.SetSpeakerMute(enable)); +} + +int32_t AudioDeviceLinuxALSA::SpeakerMute(bool& enabled) const { + bool muted(0); + + if (_mixerManager.SpeakerMute(muted) == -1) { + return -1; + } + + enabled = muted; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::MicrophoneMuteIsAvailable(bool& available) { + bool isAvailable(false); + bool wasInitialized = _mixerManager.MicrophoneIsInitialized(); + + // Make an attempt to open up the + // input mixer corresponding to the currently selected input device. + // + if (!wasInitialized && InitMicrophone() == -1) { + // If we end up here it means that the selected microphone has no volume + // control, hence it is safe to state that there is no mute control + // already at this stage. + available = false; + return 0; + } + + // Check if the selected microphone has a mute control + // + _mixerManager.MicrophoneMuteIsAvailable(isAvailable); + available = isAvailable; + + // Close the initialized input mixer + // + if (!wasInitialized) { + _mixerManager.CloseMicrophone(); + } + + return 0; +} + +int32_t AudioDeviceLinuxALSA::SetMicrophoneMute(bool enable) { + return (_mixerManager.SetMicrophoneMute(enable)); +} + +// ---------------------------------------------------------------------------- +// MicrophoneMute +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceLinuxALSA::MicrophoneMute(bool& enabled) const { + bool muted(0); + + if (_mixerManager.MicrophoneMute(muted) == -1) { + return -1; + } + + enabled = muted; + return 0; +} + +int32_t AudioDeviceLinuxALSA::StereoRecordingIsAvailable(bool& available) { + MutexLock lock(&mutex_); + + // If we already have initialized in stereo it's obviously available + if (_recIsInitialized && (2 == _recChannels)) { + available = true; + return 0; + } + + // Save rec states and the number of rec channels + bool recIsInitialized = _recIsInitialized; + bool recording = _recording; + int recChannels = _recChannels; + + available = false; + + // Stop/uninitialize recording if initialized (and possibly started) + if (_recIsInitialized) { + StopRecordingLocked(); + } + + // Try init in stereo; + _recChannels = 2; + if (InitRecordingLocked() == 0) { + available = true; + } + + // Stop/uninitialize recording + StopRecordingLocked(); + + // Recover previous states + _recChannels = recChannels; + if (recIsInitialized) { + InitRecordingLocked(); + } + if (recording) { + StartRecording(); + } + + return 0; +} + +int32_t AudioDeviceLinuxALSA::SetStereoRecording(bool enable) { + if (enable) + _recChannels = 2; + else + _recChannels = 1; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::StereoRecording(bool& enabled) const { + if (_recChannels == 2) + enabled = true; + else + enabled = false; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::StereoPlayoutIsAvailable(bool& available) { + MutexLock lock(&mutex_); + + // If we already have initialized in stereo it's obviously available + if (_playIsInitialized && (2 == _playChannels)) { + available = true; + return 0; + } + + // Save rec states and the number of rec channels + bool playIsInitialized = _playIsInitialized; + bool playing = _playing; + int playChannels = _playChannels; + + available = false; + + // Stop/uninitialize recording if initialized (and possibly started) + if (_playIsInitialized) { + StopPlayoutLocked(); + } + + // Try init in stereo; + _playChannels = 2; + if (InitPlayoutLocked() == 0) { + available = true; + } + + // Stop/uninitialize recording + StopPlayoutLocked(); + + // Recover previous states + _playChannels = playChannels; + if (playIsInitialized) { + InitPlayoutLocked(); + } + if (playing) { + StartPlayout(); + } + + return 0; +} + +int32_t AudioDeviceLinuxALSA::SetStereoPlayout(bool enable) { + if (enable) + _playChannels = 2; + else + _playChannels = 1; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::StereoPlayout(bool& enabled) const { + if (_playChannels == 2) + enabled = true; + else + enabled = false; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::MicrophoneVolumeIsAvailable(bool& available) { + bool wasInitialized = _mixerManager.MicrophoneIsInitialized(); + + // Make an attempt to open up the + // input mixer corresponding to the currently selected output device. + if (!wasInitialized && InitMicrophone() == -1) { + // If we end up here it means that the selected microphone has no volume + // control. + available = false; + return 0; + } + + // Given that InitMicrophone was successful, we know that a volume control + // exists + available = true; + + // Close the initialized input mixer + if (!wasInitialized) { + _mixerManager.CloseMicrophone(); + } + + return 0; +} + +int32_t AudioDeviceLinuxALSA::SetMicrophoneVolume(uint32_t volume) { + return (_mixerManager.SetMicrophoneVolume(volume)); +} + +int32_t AudioDeviceLinuxALSA::MicrophoneVolume(uint32_t& volume) const { + uint32_t level(0); + + if (_mixerManager.MicrophoneVolume(level) == -1) { + RTC_LOG(LS_WARNING) << "failed to retrive current microphone level"; + return -1; + } + + volume = level; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::MaxMicrophoneVolume(uint32_t& maxVolume) const { + uint32_t maxVol(0); + + if (_mixerManager.MaxMicrophoneVolume(maxVol) == -1) { + return -1; + } + + maxVolume = maxVol; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::MinMicrophoneVolume(uint32_t& minVolume) const { + uint32_t minVol(0); + + if (_mixerManager.MinMicrophoneVolume(minVol) == -1) { + return -1; + } + + minVolume = minVol; + + return 0; +} + +int16_t AudioDeviceLinuxALSA::PlayoutDevices() { + return (int16_t)GetDevicesInfo(0, true); +} + +int32_t AudioDeviceLinuxALSA::SetPlayoutDevice(uint16_t index) { + if (_playIsInitialized) { + return -1; + } + + uint32_t nDevices = GetDevicesInfo(0, true); + RTC_LOG(LS_VERBOSE) << "number of available audio output devices is " + << nDevices; + + if (index > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "device index is out of range [0," << (nDevices - 1) + << "]"; + return -1; + } + + _outputDeviceIndex = index; + _outputDeviceIsSpecified = true; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType /*device*/) { + RTC_LOG(LS_ERROR) << "WindowsDeviceType not supported"; + return -1; +} + +int32_t AudioDeviceLinuxALSA::PlayoutDeviceName( + uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + const uint16_t nDevices(PlayoutDevices()); + + if ((index > (nDevices - 1)) || (name == NULL)) { + return -1; + } + + memset(name, 0, kAdmMaxDeviceNameSize); + + if (guid != NULL) { + memset(guid, 0, kAdmMaxGuidSize); + } + + return GetDevicesInfo(1, true, index, name, kAdmMaxDeviceNameSize); +} + +int32_t AudioDeviceLinuxALSA::RecordingDeviceName( + uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + const uint16_t nDevices(RecordingDevices()); + + if ((index > (nDevices - 1)) || (name == NULL)) { + return -1; + } + + memset(name, 0, kAdmMaxDeviceNameSize); + + if (guid != NULL) { + memset(guid, 0, kAdmMaxGuidSize); + } + + return GetDevicesInfo(1, false, index, name, kAdmMaxDeviceNameSize); +} + +int16_t AudioDeviceLinuxALSA::RecordingDevices() { + return (int16_t)GetDevicesInfo(0, false); +} + +int32_t AudioDeviceLinuxALSA::SetRecordingDevice(uint16_t index) { + if (_recIsInitialized) { + return -1; + } + + uint32_t nDevices = GetDevicesInfo(0, false); + RTC_LOG(LS_VERBOSE) << "number of availiable audio input devices is " + << nDevices; + + if (index > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "device index is out of range [0," << (nDevices - 1) + << "]"; + return -1; + } + + _inputDeviceIndex = index; + _inputDeviceIsSpecified = true; + + return 0; +} + +// ---------------------------------------------------------------------------- +// SetRecordingDevice II (II) +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceLinuxALSA::SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType /*device*/) { + RTC_LOG(LS_ERROR) << "WindowsDeviceType not supported"; + return -1; +} + +int32_t AudioDeviceLinuxALSA::PlayoutIsAvailable(bool& available) { + available = false; + + // Try to initialize the playout side with mono + // Assumes that user set num channels after calling this function + _playChannels = 1; + int32_t res = InitPlayout(); + + // Cancel effect of initialization + StopPlayout(); + + if (res != -1) { + available = true; + } else { + // It may be possible to play out in stereo + res = StereoPlayoutIsAvailable(available); + if (available) { + // Then set channels to 2 so InitPlayout doesn't fail + _playChannels = 2; + } + } + + return res; +} + +int32_t AudioDeviceLinuxALSA::RecordingIsAvailable(bool& available) { + available = false; + + // Try to initialize the recording side with mono + // Assumes that user set num channels after calling this function + _recChannels = 1; + int32_t res = InitRecording(); + + // Cancel effect of initialization + StopRecording(); + + if (res != -1) { + available = true; + } else { + // It may be possible to record in stereo + res = StereoRecordingIsAvailable(available); + if (available) { + // Then set channels to 2 so InitPlayout doesn't fail + _recChannels = 2; + } + } + + return res; +} + +int32_t AudioDeviceLinuxALSA::InitPlayout() { + MutexLock lock(&mutex_); + return InitPlayoutLocked(); +} + +int32_t AudioDeviceLinuxALSA::InitPlayoutLocked() { + int errVal = 0; + + if (_playing) { + return -1; + } + + if (!_outputDeviceIsSpecified) { + return -1; + } + + if (_playIsInitialized) { + return 0; + } + // Initialize the speaker (devices might have been added or removed) + if (InitSpeakerLocked() == -1) { + RTC_LOG(LS_WARNING) << "InitSpeaker() failed"; + } + + // Start by closing any existing wave-output devices + // + if (_handlePlayout != NULL) { + LATE(snd_pcm_close)(_handlePlayout); + _handlePlayout = NULL; + _playIsInitialized = false; + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error closing current playout sound device, error: " + << LATE(snd_strerror)(errVal); + } + } + + // Open PCM device for playout + char deviceName[kAdmMaxDeviceNameSize] = {0}; + GetDevicesInfo(2, true, _outputDeviceIndex, deviceName, + kAdmMaxDeviceNameSize); + + RTC_LOG(LS_VERBOSE) << "InitPlayout open (" << deviceName << ")"; + + errVal = LATE(snd_pcm_open)(&_handlePlayout, deviceName, + SND_PCM_STREAM_PLAYBACK, SND_PCM_NONBLOCK); + + if (errVal == -EBUSY) // Device busy - try some more! + { + for (int i = 0; i < 5; i++) { + SleepMs(1000); + errVal = LATE(snd_pcm_open)(&_handlePlayout, deviceName, + SND_PCM_STREAM_PLAYBACK, SND_PCM_NONBLOCK); + if (errVal == 0) { + break; + } + } + } + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "unable to open playback device: " + << LATE(snd_strerror)(errVal) << " (" << errVal << ")"; + _handlePlayout = NULL; + return -1; + } + + _playoutFramesIn10MS = _playoutFreq / 100; + if ((errVal = LATE(snd_pcm_set_params)( + _handlePlayout, +#if defined(WEBRTC_ARCH_BIG_ENDIAN) + SND_PCM_FORMAT_S16_BE, +#else + SND_PCM_FORMAT_S16_LE, // format +#endif + SND_PCM_ACCESS_RW_INTERLEAVED, // access + _playChannels, // channels + _playoutFreq, // rate + 1, // soft_resample + ALSA_PLAYOUT_LATENCY // 40*1000 //latency required overall latency + // in us + )) < 0) { /* 0.5sec */ + _playoutFramesIn10MS = 0; + RTC_LOG(LS_ERROR) << "unable to set playback device: " + << LATE(snd_strerror)(errVal) << " (" << errVal << ")"; + ErrorRecovery(errVal, _handlePlayout); + errVal = LATE(snd_pcm_close)(_handlePlayout); + _handlePlayout = NULL; + return -1; + } + + errVal = LATE(snd_pcm_get_params)(_handlePlayout, &_playoutBufferSizeInFrame, + &_playoutPeriodSizeInFrame); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "snd_pcm_get_params: " << LATE(snd_strerror)(errVal) + << " (" << errVal << ")"; + _playoutBufferSizeInFrame = 0; + _playoutPeriodSizeInFrame = 0; + } else { + RTC_LOG(LS_VERBOSE) << "playout snd_pcm_get_params buffer_size:" + << _playoutBufferSizeInFrame + << " period_size :" << _playoutPeriodSizeInFrame; + } + + if (_ptrAudioBuffer) { + // Update webrtc audio buffer with the selected parameters + _ptrAudioBuffer->SetPlayoutSampleRate(_playoutFreq); + _ptrAudioBuffer->SetPlayoutChannels(_playChannels); + } + + // Set play buffer size + _playoutBufferSizeIn10MS = + LATE(snd_pcm_frames_to_bytes)(_handlePlayout, _playoutFramesIn10MS); + + // Init varaibles used for play + + if (_handlePlayout != NULL) { + _playIsInitialized = true; + return 0; + } else { + return -1; + } +} + +int32_t AudioDeviceLinuxALSA::InitRecording() { + MutexLock lock(&mutex_); + return InitRecordingLocked(); +} + +int32_t AudioDeviceLinuxALSA::InitRecordingLocked() { + int errVal = 0; + + if (_recording) { + return -1; + } + + if (!_inputDeviceIsSpecified) { + return -1; + } + + if (_recIsInitialized) { + return 0; + } + + // Initialize the microphone (devices might have been added or removed) + if (InitMicrophoneLocked() == -1) { + RTC_LOG(LS_WARNING) << "InitMicrophone() failed"; + } + + // Start by closing any existing pcm-input devices + // + if (_handleRecord != NULL) { + int errVal = LATE(snd_pcm_close)(_handleRecord); + _handleRecord = NULL; + _recIsInitialized = false; + if (errVal < 0) { + RTC_LOG(LS_ERROR) + << "Error closing current recording sound device, error: " + << LATE(snd_strerror)(errVal); + } + } + + // Open PCM device for recording + // The corresponding settings for playout are made after the record settings + char deviceName[kAdmMaxDeviceNameSize] = {0}; + GetDevicesInfo(2, false, _inputDeviceIndex, deviceName, + kAdmMaxDeviceNameSize); + + RTC_LOG(LS_VERBOSE) << "InitRecording open (" << deviceName << ")"; + errVal = LATE(snd_pcm_open)(&_handleRecord, deviceName, + SND_PCM_STREAM_CAPTURE, SND_PCM_NONBLOCK); + + // Available modes: 0 = blocking, SND_PCM_NONBLOCK, SND_PCM_ASYNC + if (errVal == -EBUSY) // Device busy - try some more! + { + for (int i = 0; i < 5; i++) { + SleepMs(1000); + errVal = LATE(snd_pcm_open)(&_handleRecord, deviceName, + SND_PCM_STREAM_CAPTURE, SND_PCM_NONBLOCK); + if (errVal == 0) { + break; + } + } + } + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "unable to open record device: " + << LATE(snd_strerror)(errVal); + _handleRecord = NULL; + return -1; + } + + _recordingFramesIn10MS = _recordingFreq / 100; + if ((errVal = + LATE(snd_pcm_set_params)(_handleRecord, +#if defined(WEBRTC_ARCH_BIG_ENDIAN) + SND_PCM_FORMAT_S16_BE, // format +#else + SND_PCM_FORMAT_S16_LE, // format +#endif + SND_PCM_ACCESS_RW_INTERLEAVED, // access + _recChannels, // channels + _recordingFreq, // rate + 1, // soft_resample + ALSA_CAPTURE_LATENCY // latency in us + )) < 0) { + // Fall back to another mode then. + if (_recChannels == 1) + _recChannels = 2; + else + _recChannels = 1; + + if ((errVal = + LATE(snd_pcm_set_params)(_handleRecord, +#if defined(WEBRTC_ARCH_BIG_ENDIAN) + SND_PCM_FORMAT_S16_BE, // format +#else + SND_PCM_FORMAT_S16_LE, // format +#endif + SND_PCM_ACCESS_RW_INTERLEAVED, // access + _recChannels, // channels + _recordingFreq, // rate + 1, // soft_resample + ALSA_CAPTURE_LATENCY // latency in us + )) < 0) { + _recordingFramesIn10MS = 0; + RTC_LOG(LS_ERROR) << "unable to set record settings: " + << LATE(snd_strerror)(errVal) << " (" << errVal << ")"; + ErrorRecovery(errVal, _handleRecord); + errVal = LATE(snd_pcm_close)(_handleRecord); + _handleRecord = NULL; + return -1; + } + } + + errVal = LATE(snd_pcm_get_params)(_handleRecord, &_recordingBuffersizeInFrame, + &_recordingPeriodSizeInFrame); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "snd_pcm_get_params " << LATE(snd_strerror)(errVal) + << " (" << errVal << ")"; + _recordingBuffersizeInFrame = 0; + _recordingPeriodSizeInFrame = 0; + } else { + RTC_LOG(LS_VERBOSE) << "capture snd_pcm_get_params, buffer_size:" + << _recordingBuffersizeInFrame + << ", period_size:" << _recordingPeriodSizeInFrame; + } + + if (_ptrAudioBuffer) { + // Update webrtc audio buffer with the selected parameters + _ptrAudioBuffer->SetRecordingSampleRate(_recordingFreq); + _ptrAudioBuffer->SetRecordingChannels(_recChannels); + } + + // Set rec buffer size and create buffer + _recordingBufferSizeIn10MS = + LATE(snd_pcm_frames_to_bytes)(_handleRecord, _recordingFramesIn10MS); + + if (_handleRecord != NULL) { + // Mark recording side as initialized + _recIsInitialized = true; + return 0; + } else { + return -1; + } +} + +int32_t AudioDeviceLinuxALSA::StartRecording() { + if (!_recIsInitialized) { + return -1; + } + + if (_recording) { + return 0; + } + + _recording = true; + + int errVal = 0; + _recordingFramesLeft = _recordingFramesIn10MS; + + // Make sure we only create the buffer once. + if (!_recordingBuffer) + _recordingBuffer = new int8_t[_recordingBufferSizeIn10MS]; + if (!_recordingBuffer) { + RTC_LOG(LS_ERROR) << "failed to alloc recording buffer"; + _recording = false; + return -1; + } + // RECORDING + _ptrThreadRec = rtc::PlatformThread::SpawnJoinable( + [this] { + while (RecThreadProcess()) { + } + }, + "webrtc_audio_module_capture_thread", + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime)); + + errVal = LATE(snd_pcm_prepare)(_handleRecord); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "capture snd_pcm_prepare failed (" + << LATE(snd_strerror)(errVal) << ")\n"; + // just log error + // if snd_pcm_open fails will return -1 + } + + errVal = LATE(snd_pcm_start)(_handleRecord); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "capture snd_pcm_start err: " + << LATE(snd_strerror)(errVal); + errVal = LATE(snd_pcm_start)(_handleRecord); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "capture snd_pcm_start 2nd try err: " + << LATE(snd_strerror)(errVal); + StopRecording(); + return -1; + } + } + + return 0; +} + +int32_t AudioDeviceLinuxALSA::StopRecording() { + MutexLock lock(&mutex_); + return StopRecordingLocked(); +} + +int32_t AudioDeviceLinuxALSA::StopRecordingLocked() { + if (!_recIsInitialized) { + return 0; + } + + if (_handleRecord == NULL) { + return -1; + } + + // Make sure we don't start recording (it's asynchronous). + _recIsInitialized = false; + _recording = false; + + _ptrThreadRec.Finalize(); + + _recordingFramesLeft = 0; + if (_recordingBuffer) { + delete[] _recordingBuffer; + _recordingBuffer = NULL; + } + + // Stop and close pcm recording device. + int errVal = LATE(snd_pcm_drop)(_handleRecord); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error stop recording: " << LATE(snd_strerror)(errVal); + return -1; + } + + errVal = LATE(snd_pcm_close)(_handleRecord); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error closing record sound device, error: " + << LATE(snd_strerror)(errVal); + return -1; + } + + // Check if we have muted and unmute if so. + bool muteEnabled = false; + MicrophoneMute(muteEnabled); + if (muteEnabled) { + SetMicrophoneMute(false); + } + + // set the pcm input handle to NULL + _handleRecord = NULL; + return 0; +} + +bool AudioDeviceLinuxALSA::RecordingIsInitialized() const { + return (_recIsInitialized); +} + +bool AudioDeviceLinuxALSA::Recording() const { + return (_recording); +} + +bool AudioDeviceLinuxALSA::PlayoutIsInitialized() const { + return (_playIsInitialized); +} + +int32_t AudioDeviceLinuxALSA::StartPlayout() { + if (!_playIsInitialized) { + return -1; + } + + if (_playing) { + return 0; + } + + _playing = true; + + _playoutFramesLeft = 0; + if (!_playoutBuffer) + _playoutBuffer = new int8_t[_playoutBufferSizeIn10MS]; + if (!_playoutBuffer) { + RTC_LOG(LS_ERROR) << "failed to alloc playout buf"; + _playing = false; + return -1; + } + + // PLAYOUT + _ptrThreadPlay = rtc::PlatformThread::SpawnJoinable( + [this] { + while (PlayThreadProcess()) { + } + }, + "webrtc_audio_module_play_thread", + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime)); + + int errVal = LATE(snd_pcm_prepare)(_handlePlayout); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "playout snd_pcm_prepare failed (" + << LATE(snd_strerror)(errVal) << ")\n"; + // just log error + // if snd_pcm_open fails will return -1 + } + + return 0; +} + +int32_t AudioDeviceLinuxALSA::StopPlayout() { + MutexLock lock(&mutex_); + return StopPlayoutLocked(); +} + +int32_t AudioDeviceLinuxALSA::StopPlayoutLocked() { + if (!_playIsInitialized) { + return 0; + } + + if (_handlePlayout == NULL) { + return -1; + } + + _playing = false; + + // stop playout thread first + _ptrThreadPlay.Finalize(); + + _playoutFramesLeft = 0; + delete[] _playoutBuffer; + _playoutBuffer = NULL; + + // stop and close pcm playout device + int errVal = LATE(snd_pcm_drop)(_handlePlayout); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error stop playing: " << LATE(snd_strerror)(errVal); + } + + errVal = LATE(snd_pcm_close)(_handlePlayout); + if (errVal < 0) + RTC_LOG(LS_ERROR) << "Error closing playout sound device, error: " + << LATE(snd_strerror)(errVal); + + // set the pcm input handle to NULL + _playIsInitialized = false; + _handlePlayout = NULL; + RTC_LOG(LS_VERBOSE) << "handle_playout is now set to NULL"; + + return 0; +} + +int32_t AudioDeviceLinuxALSA::PlayoutDelay(uint16_t& delayMS) const { + delayMS = (uint16_t)_playoutDelay * 1000 / _playoutFreq; + return 0; +} + +bool AudioDeviceLinuxALSA::Playing() const { + return (_playing); +} + +// ============================================================================ +// Private Methods +// ============================================================================ + +int32_t AudioDeviceLinuxALSA::GetDevicesInfo(const int32_t function, + const bool playback, + const int32_t enumDeviceNo, + char* enumDeviceName, + const int32_t ednLen) const { + // Device enumeration based on libjingle implementation + // by Tristan Schmelcher at Google Inc. + + const char* type = playback ? "Output" : "Input"; + // dmix and dsnoop are only for playback and capture, respectively, but ALSA + // stupidly includes them in both lists. + const char* ignorePrefix = playback ? "dsnoop:" : "dmix:"; + // (ALSA lists many more "devices" of questionable interest, but we show them + // just in case the weird devices may actually be desirable for some + // users/systems.) + + int err; + int enumCount(0); + bool keepSearching(true); + + // From Chromium issue 95797 + // Loop through the sound cards to get Alsa device hints. + // Don't use snd_device_name_hint(-1,..) since there is a access violation + // inside this ALSA API with libasound.so.2.0.0. + int card = -1; + while (!(LATE(snd_card_next)(&card)) && (card >= 0) && keepSearching) { + void** hints; + err = LATE(snd_device_name_hint)(card, "pcm", &hints); + if (err != 0) { + RTC_LOG(LS_ERROR) << "GetDevicesInfo - device name hint error: " + << LATE(snd_strerror)(err); + return -1; + } + + enumCount++; // default is 0 + if ((function == FUNC_GET_DEVICE_NAME || + function == FUNC_GET_DEVICE_NAME_FOR_AN_ENUM) && + enumDeviceNo == 0) { + strcpy(enumDeviceName, "default"); + + err = LATE(snd_device_name_free_hint)(hints); + if (err != 0) { + RTC_LOG(LS_ERROR) << "GetDevicesInfo - device name free hint error: " + << LATE(snd_strerror)(err); + } + + return 0; + } + + for (void** list = hints; *list != NULL; ++list) { + char* actualType = LATE(snd_device_name_get_hint)(*list, "IOID"); + if (actualType) { // NULL means it's both. + bool wrongType = (strcmp(actualType, type) != 0); + free(actualType); + if (wrongType) { + // Wrong type of device (i.e., input vs. output). + continue; + } + } + + char* name = LATE(snd_device_name_get_hint)(*list, "NAME"); + if (!name) { + RTC_LOG(LS_ERROR) << "Device has no name"; + // Skip it. + continue; + } + + // Now check if we actually want to show this device. + if (strcmp(name, "default") != 0 && strcmp(name, "null") != 0 && + strcmp(name, "pulse") != 0 && + strncmp(name, ignorePrefix, strlen(ignorePrefix)) != 0) { + // Yes, we do. + char* desc = LATE(snd_device_name_get_hint)(*list, "DESC"); + if (!desc) { + // Virtual devices don't necessarily have descriptions. + // Use their names instead. + desc = name; + } + + if (FUNC_GET_NUM_OF_DEVICE == function) { + RTC_LOG(LS_VERBOSE) << "Enum device " << enumCount << " - " << name; + } + if ((FUNC_GET_DEVICE_NAME == function) && (enumDeviceNo == enumCount)) { + // We have found the enum device, copy the name to buffer. + strncpy(enumDeviceName, desc, ednLen); + enumDeviceName[ednLen - 1] = '\0'; + keepSearching = false; + // Replace '\n' with '-'. + char* pret = strchr(enumDeviceName, '\n' /*0xa*/); // LF + if (pret) + *pret = '-'; + } + if ((FUNC_GET_DEVICE_NAME_FOR_AN_ENUM == function) && + (enumDeviceNo == enumCount)) { + // We have found the enum device, copy the name to buffer. + strncpy(enumDeviceName, name, ednLen); + enumDeviceName[ednLen - 1] = '\0'; + keepSearching = false; + } + + if (keepSearching) + ++enumCount; + + if (desc != name) + free(desc); + } + + free(name); + + if (!keepSearching) + break; + } + + err = LATE(snd_device_name_free_hint)(hints); + if (err != 0) { + RTC_LOG(LS_ERROR) << "GetDevicesInfo - device name free hint error: " + << LATE(snd_strerror)(err); + // Continue and return true anyway, since we did get the whole list. + } + } + + if (FUNC_GET_NUM_OF_DEVICE == function) { + if (enumCount == 1) // only default? + enumCount = 0; + return enumCount; // Normal return point for function 0 + } + + if (keepSearching) { + // If we get here for function 1 and 2, we didn't find the specified + // enum device. + RTC_LOG(LS_ERROR) + << "GetDevicesInfo - Could not find device name or numbers"; + return -1; + } + + return 0; +} + +int32_t AudioDeviceLinuxALSA::InputSanityCheckAfterUnlockedPeriod() const { + if (_handleRecord == NULL) { + RTC_LOG(LS_ERROR) << "input state has been modified during unlocked period"; + return -1; + } + return 0; +} + +int32_t AudioDeviceLinuxALSA::OutputSanityCheckAfterUnlockedPeriod() const { + if (_handlePlayout == NULL) { + RTC_LOG(LS_ERROR) + << "output state has been modified during unlocked period"; + return -1; + } + return 0; +} + +int32_t AudioDeviceLinuxALSA::ErrorRecovery(int32_t error, + snd_pcm_t* deviceHandle) { + int st = LATE(snd_pcm_state)(deviceHandle); + RTC_LOG(LS_VERBOSE) << "Trying to recover from " + << ((LATE(snd_pcm_stream)(deviceHandle) == + SND_PCM_STREAM_CAPTURE) + ? "capture" + : "playout") + << " error: " << LATE(snd_strerror)(error) << " (" + << error << ") (state " << st << ")"; + + // It is recommended to use snd_pcm_recover for all errors. If that function + // cannot handle the error, the input error code will be returned, otherwise + // 0 is returned. From snd_pcm_recover API doc: "This functions handles + // -EINTR (4) (interrupted system call), -EPIPE (32) (playout overrun or + // capture underrun) and -ESTRPIPE (86) (stream is suspended) error codes + // trying to prepare given stream for next I/O." + + /** Open */ + // SND_PCM_STATE_OPEN = 0, + /** Setup installed */ + // SND_PCM_STATE_SETUP, + /** Ready to start */ + // SND_PCM_STATE_PREPARED, + /** Running */ + // SND_PCM_STATE_RUNNING, + /** Stopped: underrun (playback) or overrun (capture) detected */ + // SND_PCM_STATE_XRUN,= 4 + /** Draining: running (playback) or stopped (capture) */ + // SND_PCM_STATE_DRAINING, + /** Paused */ + // SND_PCM_STATE_PAUSED, + /** Hardware is suspended */ + // SND_PCM_STATE_SUSPENDED, + // ** Hardware is disconnected */ + // SND_PCM_STATE_DISCONNECTED, + // SND_PCM_STATE_LAST = SND_PCM_STATE_DISCONNECTED + + // snd_pcm_recover isn't available in older alsa, e.g. on the FC4 machine + // in Sthlm lab. + + int res = LATE(snd_pcm_recover)(deviceHandle, error, 1); + if (0 == res) { + RTC_LOG(LS_VERBOSE) << "Recovery - snd_pcm_recover OK"; + + if ((error == -EPIPE || error == -ESTRPIPE) && // Buf underrun/overrun. + _recording && + LATE(snd_pcm_stream)(deviceHandle) == SND_PCM_STREAM_CAPTURE) { + // For capture streams we also have to repeat the explicit start() + // to get data flowing again. + int err = LATE(snd_pcm_start)(deviceHandle); + if (err != 0) { + RTC_LOG(LS_ERROR) << "Recovery - snd_pcm_start error: " << err; + return -1; + } + } + + if ((error == -EPIPE || error == -ESTRPIPE) && // Buf underrun/overrun. + _playing && + LATE(snd_pcm_stream)(deviceHandle) == SND_PCM_STREAM_PLAYBACK) { + // For capture streams we also have to repeat the explicit start() to get + // data flowing again. + int err = LATE(snd_pcm_start)(deviceHandle); + if (err != 0) { + RTC_LOG(LS_ERROR) << "Recovery - snd_pcm_start error: " + << LATE(snd_strerror)(err); + return -1; + } + } + + return -EPIPE == error ? 1 : 0; + } else { + RTC_LOG(LS_ERROR) << "Unrecoverable alsa stream error: " << res; + } + + return res; +} + +// ============================================================================ +// Thread Methods +// ============================================================================ + +bool AudioDeviceLinuxALSA::PlayThreadProcess() { + if (!_playing) + return false; + + int err; + snd_pcm_sframes_t frames; + snd_pcm_sframes_t avail_frames; + + Lock(); + // return a positive number of frames ready otherwise a negative error code + avail_frames = LATE(snd_pcm_avail_update)(_handlePlayout); + if (avail_frames < 0) { + RTC_LOG(LS_ERROR) << "playout snd_pcm_avail_update error: " + << LATE(snd_strerror)(avail_frames); + ErrorRecovery(avail_frames, _handlePlayout); + UnLock(); + return true; + } else if (avail_frames == 0) { + UnLock(); + + // maximum tixe in milliseconds to wait, a negative value means infinity + err = LATE(snd_pcm_wait)(_handlePlayout, 2); + if (err == 0) { // timeout occured + RTC_LOG(LS_VERBOSE) << "playout snd_pcm_wait timeout"; + } + + return true; + } + + if (_playoutFramesLeft <= 0) { + UnLock(); + _ptrAudioBuffer->RequestPlayoutData(_playoutFramesIn10MS); + Lock(); + + _playoutFramesLeft = _ptrAudioBuffer->GetPlayoutData(_playoutBuffer); + RTC_DCHECK_EQ(_playoutFramesLeft, _playoutFramesIn10MS); + } + + if (static_cast(avail_frames) > _playoutFramesLeft) + avail_frames = _playoutFramesLeft; + + int size = LATE(snd_pcm_frames_to_bytes)(_handlePlayout, _playoutFramesLeft); + frames = LATE(snd_pcm_writei)( + _handlePlayout, &_playoutBuffer[_playoutBufferSizeIn10MS - size], + avail_frames); + + if (frames < 0) { + RTC_LOG(LS_VERBOSE) << "playout snd_pcm_writei error: " + << LATE(snd_strerror)(frames); + _playoutFramesLeft = 0; + ErrorRecovery(frames, _handlePlayout); + UnLock(); + return true; + } else { + RTC_DCHECK_EQ(frames, avail_frames); + _playoutFramesLeft -= frames; + } + + UnLock(); + return true; +} + +bool AudioDeviceLinuxALSA::RecThreadProcess() { + if (!_recording) + return false; + + int err; + snd_pcm_sframes_t frames; + snd_pcm_sframes_t avail_frames; + int8_t buffer[_recordingBufferSizeIn10MS]; + + Lock(); + + // return a positive number of frames ready otherwise a negative error code + avail_frames = LATE(snd_pcm_avail_update)(_handleRecord); + if (avail_frames < 0) { + RTC_LOG(LS_ERROR) << "capture snd_pcm_avail_update error: " + << LATE(snd_strerror)(avail_frames); + ErrorRecovery(avail_frames, _handleRecord); + UnLock(); + return true; + } else if (avail_frames == 0) { // no frame is available now + UnLock(); + + // maximum time in milliseconds to wait, a negative value means infinity + err = LATE(snd_pcm_wait)(_handleRecord, ALSA_CAPTURE_WAIT_TIMEOUT); + if (err == 0) // timeout occured + RTC_LOG(LS_VERBOSE) << "capture snd_pcm_wait timeout"; + + return true; + } + + if (static_cast(avail_frames) > _recordingFramesLeft) + avail_frames = _recordingFramesLeft; + + frames = LATE(snd_pcm_readi)(_handleRecord, buffer, + avail_frames); // frames to be written + if (frames < 0) { + RTC_LOG(LS_ERROR) << "capture snd_pcm_readi error: " + << LATE(snd_strerror)(frames); + ErrorRecovery(frames, _handleRecord); + UnLock(); + return true; + } else if (frames > 0) { + RTC_DCHECK_EQ(frames, avail_frames); + + int left_size = + LATE(snd_pcm_frames_to_bytes)(_handleRecord, _recordingFramesLeft); + int size = LATE(snd_pcm_frames_to_bytes)(_handleRecord, frames); + + memcpy(&_recordingBuffer[_recordingBufferSizeIn10MS - left_size], buffer, + size); + _recordingFramesLeft -= frames; + + if (!_recordingFramesLeft) { // buf is full + _recordingFramesLeft = _recordingFramesIn10MS; + + // store the recorded buffer (no action will be taken if the + // #recorded samples is not a full buffer) + _ptrAudioBuffer->SetRecordedBuffer(_recordingBuffer, + _recordingFramesIn10MS); + + // calculate delay + _playoutDelay = 0; + _recordingDelay = 0; + if (_handlePlayout) { + err = LATE(snd_pcm_delay)(_handlePlayout, + &_playoutDelay); // returned delay in frames + if (err < 0) { + // TODO(xians): Shall we call ErrorRecovery() here? + _playoutDelay = 0; + RTC_LOG(LS_ERROR) + << "playout snd_pcm_delay: " << LATE(snd_strerror)(err); + } + } + + err = LATE(snd_pcm_delay)(_handleRecord, + &_recordingDelay); // returned delay in frames + if (err < 0) { + // TODO(xians): Shall we call ErrorRecovery() here? + _recordingDelay = 0; + RTC_LOG(LS_ERROR) << "capture snd_pcm_delay: " + << LATE(snd_strerror)(err); + } + + // TODO(xians): Shall we add 10ms buffer delay to the record delay? + _ptrAudioBuffer->SetVQEData(_playoutDelay * 1000 / _playoutFreq, + _recordingDelay * 1000 / _recordingFreq); + + _ptrAudioBuffer->SetTypingStatus(KeyPressed()); + + // Deliver recorded samples at specified sample rate, mic level etc. + // to the observer using callback. + UnLock(); + _ptrAudioBuffer->DeliverRecordedData(); + Lock(); + } + } + + UnLock(); + return true; +} + +bool AudioDeviceLinuxALSA::KeyPressed() const { +#if defined(WEBRTC_USE_X11) + char szKey[32]; + unsigned int i = 0; + char state = 0; + + if (!_XDisplay) + return false; + + // Check key map status + XQueryKeymap(_XDisplay, szKey); + + // A bit change in keymap means a key is pressed + for (i = 0; i < sizeof(szKey); i++) + state |= (szKey[i] ^ _oldKeyState[i]) & szKey[i]; + + // Save old state + memcpy((char*)_oldKeyState, (char*)szKey, sizeof(_oldKeyState)); + return (state != 0); +#else + return false; +#endif +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/linux/audio_device_alsa_linux.h b/third_party/libwebrtc/modules/audio_device/linux/audio_device_alsa_linux.h new file mode 100644 index 0000000000..23e21d3ce9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/audio_device_alsa_linux.h @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_AUDIO_DEVICE_ALSA_LINUX_H_ +#define AUDIO_DEVICE_AUDIO_DEVICE_ALSA_LINUX_H_ + +#include + +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/linux/audio_mixer_manager_alsa_linux.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/synchronization/mutex.h" + +#if defined(WEBRTC_USE_X11) +#include +#endif +#include +#include +#include + +typedef webrtc::adm_linux_alsa::AlsaSymbolTable WebRTCAlsaSymbolTable; +WebRTCAlsaSymbolTable* GetAlsaSymbolTable(); + +namespace webrtc { + +class AudioDeviceLinuxALSA : public AudioDeviceGeneric { + public: + AudioDeviceLinuxALSA(); + virtual ~AudioDeviceLinuxALSA(); + + // Retrieve the currently utilized audio layer + int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const override; + + // Main initializaton and termination + InitStatus Init() RTC_LOCKS_EXCLUDED(mutex_) override; + int32_t Terminate() RTC_LOCKS_EXCLUDED(mutex_) override; + bool Initialized() const override; + + // Device enumeration + int16_t PlayoutDevices() override; + int16_t RecordingDevices() override; + int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + + // Device selection + int32_t SetPlayoutDevice(uint16_t index) override; + int32_t SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) override; + int32_t SetRecordingDevice(uint16_t index) override; + int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) override; + + // Audio transport initialization + int32_t PlayoutIsAvailable(bool& available) override; + int32_t InitPlayout() RTC_LOCKS_EXCLUDED(mutex_) override; + bool PlayoutIsInitialized() const override; + int32_t RecordingIsAvailable(bool& available) override; + int32_t InitRecording() RTC_LOCKS_EXCLUDED(mutex_) override; + bool RecordingIsInitialized() const override; + + // Audio transport control + int32_t StartPlayout() override; + int32_t StopPlayout() RTC_LOCKS_EXCLUDED(mutex_) override; + bool Playing() const override; + int32_t StartRecording() override; + int32_t StopRecording() RTC_LOCKS_EXCLUDED(mutex_) override; + bool Recording() const override; + + // Audio mixer initialization + int32_t InitSpeaker() RTC_LOCKS_EXCLUDED(mutex_) override; + bool SpeakerIsInitialized() const override; + int32_t InitMicrophone() RTC_LOCKS_EXCLUDED(mutex_) override; + bool MicrophoneIsInitialized() const override; + + // Speaker volume controls + int32_t SpeakerVolumeIsAvailable(bool& available) override; + int32_t SetSpeakerVolume(uint32_t volume) override; + int32_t SpeakerVolume(uint32_t& volume) const override; + int32_t MaxSpeakerVolume(uint32_t& maxVolume) const override; + int32_t MinSpeakerVolume(uint32_t& minVolume) const override; + + // Microphone volume controls + int32_t MicrophoneVolumeIsAvailable(bool& available) override; + int32_t SetMicrophoneVolume(uint32_t volume) override; + int32_t MicrophoneVolume(uint32_t& volume) const override; + int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const override; + int32_t MinMicrophoneVolume(uint32_t& minVolume) const override; + + // Speaker mute control + int32_t SpeakerMuteIsAvailable(bool& available) override; + int32_t SetSpeakerMute(bool enable) override; + int32_t SpeakerMute(bool& enabled) const override; + + // Microphone mute control + int32_t MicrophoneMuteIsAvailable(bool& available) override; + int32_t SetMicrophoneMute(bool enable) override; + int32_t MicrophoneMute(bool& enabled) const override; + + // Stereo support + int32_t StereoPlayoutIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_) override; + int32_t SetStereoPlayout(bool enable) override; + int32_t StereoPlayout(bool& enabled) const override; + int32_t StereoRecordingIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_) override; + int32_t SetStereoRecording(bool enable) override; + int32_t StereoRecording(bool& enabled) const override; + + // Delay information and control + int32_t PlayoutDelay(uint16_t& delayMS) const override; + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) + RTC_LOCKS_EXCLUDED(mutex_) override; + + private: + int32_t InitRecordingLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t StopRecordingLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t StopPlayoutLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t InitPlayoutLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t InitSpeakerLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t InitMicrophoneLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t GetDevicesInfo(int32_t function, + bool playback, + int32_t enumDeviceNo = 0, + char* enumDeviceName = NULL, + int32_t ednLen = 0) const; + int32_t ErrorRecovery(int32_t error, snd_pcm_t* deviceHandle); + + bool KeyPressed() const; + + void Lock() RTC_EXCLUSIVE_LOCK_FUNCTION(mutex_) { mutex_.Lock(); } + void UnLock() RTC_UNLOCK_FUNCTION(mutex_) { mutex_.Unlock(); } + + inline int32_t InputSanityCheckAfterUnlockedPeriod() const; + inline int32_t OutputSanityCheckAfterUnlockedPeriod() const; + + static void RecThreadFunc(void*); + static void PlayThreadFunc(void*); + bool RecThreadProcess(); + bool PlayThreadProcess(); + + AudioDeviceBuffer* _ptrAudioBuffer; + + Mutex mutex_; + + rtc::PlatformThread _ptrThreadRec; + rtc::PlatformThread _ptrThreadPlay; + + AudioMixerManagerLinuxALSA _mixerManager; + + uint16_t _inputDeviceIndex; + uint16_t _outputDeviceIndex; + bool _inputDeviceIsSpecified; + bool _outputDeviceIsSpecified; + + snd_pcm_t* _handleRecord; + snd_pcm_t* _handlePlayout; + + snd_pcm_uframes_t _recordingBuffersizeInFrame; + snd_pcm_uframes_t _recordingPeriodSizeInFrame; + snd_pcm_uframes_t _playoutBufferSizeInFrame; + snd_pcm_uframes_t _playoutPeriodSizeInFrame; + + ssize_t _recordingBufferSizeIn10MS; + ssize_t _playoutBufferSizeIn10MS; + uint32_t _recordingFramesIn10MS; + uint32_t _playoutFramesIn10MS; + + uint32_t _recordingFreq; + uint32_t _playoutFreq; + uint8_t _recChannels; + uint8_t _playChannels; + + int8_t* _recordingBuffer; // in byte + int8_t* _playoutBuffer; // in byte + uint32_t _recordingFramesLeft; + uint32_t _playoutFramesLeft; + + bool _initialized; + bool _recording; + bool _playing; + bool _recIsInitialized; + bool _playIsInitialized; + + snd_pcm_sframes_t _recordingDelay; + snd_pcm_sframes_t _playoutDelay; + + char _oldKeyState[32]; +#if defined(WEBRTC_USE_X11) + Display* _XDisplay; +#endif +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_MAIN_SOURCE_LINUX_AUDIO_DEVICE_ALSA_LINUX_H_ diff --git a/third_party/libwebrtc/modules/audio_device/linux/audio_device_pulse_linux.cc b/third_party/libwebrtc/modules/audio_device/linux/audio_device_pulse_linux.cc new file mode 100644 index 0000000000..90cd58c497 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/audio_device_pulse_linux.cc @@ -0,0 +1,2286 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/linux/audio_device_pulse_linux.h" + +#include + +#include "modules/audio_device/linux/latebindingsymboltable_linux.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_thread.h" + +WebRTCPulseSymbolTable* GetPulseSymbolTable() { + static WebRTCPulseSymbolTable* pulse_symbol_table = + new WebRTCPulseSymbolTable(); + return pulse_symbol_table; +} + +// Accesses Pulse functions through our late-binding symbol table instead of +// directly. This way we don't have to link to libpulse, which means our binary +// will work on systems that don't have it. +#define LATE(sym) \ + LATESYM_GET(webrtc::adm_linux_pulse::PulseAudioSymbolTable, \ + GetPulseSymbolTable(), sym) + +namespace webrtc { + +AudioDeviceLinuxPulse::AudioDeviceLinuxPulse() + : _ptrAudioBuffer(NULL), + _inputDeviceIndex(0), + _outputDeviceIndex(0), + _inputDeviceIsSpecified(false), + _outputDeviceIsSpecified(false), + sample_rate_hz_(0), + _recChannels(1), + _playChannels(1), + _initialized(false), + _recording(false), + _playing(false), + _recIsInitialized(false), + _playIsInitialized(false), + _startRec(false), + _startPlay(false), + update_speaker_volume_at_startup_(false), + quit_(false), + _sndCardPlayDelay(0), + _writeErrors(0), + _deviceIndex(-1), + _numPlayDevices(0), + _numRecDevices(0), + _playDeviceName(NULL), + _recDeviceName(NULL), + _playDisplayDeviceName(NULL), + _recDisplayDeviceName(NULL), + _playBuffer(NULL), + _playbackBufferSize(0), + _playbackBufferUnused(0), + _tempBufferSpace(0), + _recBuffer(NULL), + _recordBufferSize(0), + _recordBufferUsed(0), + _tempSampleData(NULL), + _tempSampleDataSize(0), + _configuredLatencyPlay(0), + _configuredLatencyRec(0), + _paDeviceIndex(-1), + _paStateChanged(false), + _paMainloop(NULL), + _paMainloopApi(NULL), + _paContext(NULL), + _recStream(NULL), + _playStream(NULL), + _recStreamFlags(0), + _playStreamFlags(0) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " created"; + + memset(_paServerVersion, 0, sizeof(_paServerVersion)); + memset(&_playBufferAttr, 0, sizeof(_playBufferAttr)); + memset(&_recBufferAttr, 0, sizeof(_recBufferAttr)); + memset(_oldKeyState, 0, sizeof(_oldKeyState)); +} + +AudioDeviceLinuxPulse::~AudioDeviceLinuxPulse() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " destroyed"; + RTC_DCHECK(thread_checker_.IsCurrent()); + Terminate(); + + if (_recBuffer) { + delete[] _recBuffer; + _recBuffer = NULL; + } + if (_playBuffer) { + delete[] _playBuffer; + _playBuffer = NULL; + } + if (_playDeviceName) { + delete[] _playDeviceName; + _playDeviceName = NULL; + } + if (_recDeviceName) { + delete[] _recDeviceName; + _recDeviceName = NULL; + } +} + +void AudioDeviceLinuxPulse::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + RTC_DCHECK(thread_checker_.IsCurrent()); + + _ptrAudioBuffer = audioBuffer; + + // Inform the AudioBuffer about default settings for this implementation. + // Set all values to zero here since the actual settings will be done by + // InitPlayout and InitRecording later. + _ptrAudioBuffer->SetRecordingSampleRate(0); + _ptrAudioBuffer->SetPlayoutSampleRate(0); + _ptrAudioBuffer->SetRecordingChannels(0); + _ptrAudioBuffer->SetPlayoutChannels(0); +} + +// ---------------------------------------------------------------------------- +// ActiveAudioLayer +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceLinuxPulse::ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const { + audioLayer = AudioDeviceModule::kLinuxPulseAudio; + return 0; +} + +AudioDeviceGeneric::InitStatus AudioDeviceLinuxPulse::Init() { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_initialized) { + return InitStatus::OK; + } + + // Initialize PulseAudio + if (InitPulseAudio() < 0) { + RTC_LOG(LS_ERROR) << "failed to initialize PulseAudio"; + if (TerminatePulseAudio() < 0) { + RTC_LOG(LS_ERROR) << "failed to terminate PulseAudio"; + } + return InitStatus::OTHER_ERROR; + } + +#if defined(WEBRTC_USE_X11) + // Get X display handle for typing detection + _XDisplay = XOpenDisplay(NULL); + if (!_XDisplay) { + RTC_LOG(LS_WARNING) + << "failed to open X display, typing detection will not work"; + } +#endif + + // RECORDING + const auto attributes = + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime); + _ptrThreadRec = rtc::PlatformThread::SpawnJoinable( + [this] { + while (RecThreadProcess()) { + } + }, + "webrtc_audio_module_rec_thread", attributes); + + // PLAYOUT + _ptrThreadPlay = rtc::PlatformThread::SpawnJoinable( + [this] { + while (PlayThreadProcess()) { + } + }, + "webrtc_audio_module_play_thread", attributes); + _initialized = true; + + return InitStatus::OK; +} + +int32_t AudioDeviceLinuxPulse::Terminate() { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!_initialized) { + return 0; + } + { + MutexLock lock(&mutex_); + quit_ = true; + } + _mixerManager.Close(); + + // RECORDING + _timeEventRec.Set(); + _ptrThreadRec.Finalize(); + + // PLAYOUT + _timeEventPlay.Set(); + _ptrThreadPlay.Finalize(); + + // Terminate PulseAudio + if (TerminatePulseAudio() < 0) { + RTC_LOG(LS_ERROR) << "failed to terminate PulseAudio"; + return -1; + } + +#if defined(WEBRTC_USE_X11) + if (_XDisplay) { + XCloseDisplay(_XDisplay); + _XDisplay = NULL; + } +#endif + + _initialized = false; + _outputDeviceIsSpecified = false; + _inputDeviceIsSpecified = false; + + return 0; +} + +bool AudioDeviceLinuxPulse::Initialized() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (_initialized); +} + +int32_t AudioDeviceLinuxPulse::InitSpeaker() { + RTC_DCHECK(thread_checker_.IsCurrent()); + + if (_playing) { + return -1; + } + + if (!_outputDeviceIsSpecified) { + return -1; + } + + // check if default device + if (_outputDeviceIndex == 0) { + uint16_t deviceIndex = 0; + GetDefaultDeviceInfo(false, NULL, deviceIndex); + _paDeviceIndex = deviceIndex; + } else { + // get the PA device index from + // the callback + _deviceIndex = _outputDeviceIndex; + + // get playout devices + PlayoutDevices(); + } + + // the callback has now set the _paDeviceIndex to + // the PulseAudio index of the device + if (_mixerManager.OpenSpeaker(_paDeviceIndex) == -1) { + return -1; + } + + // clear _deviceIndex + _deviceIndex = -1; + _paDeviceIndex = -1; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::InitMicrophone() { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_recording) { + return -1; + } + + if (!_inputDeviceIsSpecified) { + return -1; + } + + // Check if default device + if (_inputDeviceIndex == 0) { + uint16_t deviceIndex = 0; + GetDefaultDeviceInfo(true, NULL, deviceIndex); + _paDeviceIndex = deviceIndex; + } else { + // Get the PA device index from + // the callback + _deviceIndex = _inputDeviceIndex; + + // get recording devices + RecordingDevices(); + } + + // The callback has now set the _paDeviceIndex to + // the PulseAudio index of the device + if (_mixerManager.OpenMicrophone(_paDeviceIndex) == -1) { + return -1; + } + + // Clear _deviceIndex + _deviceIndex = -1; + _paDeviceIndex = -1; + + return 0; +} + +bool AudioDeviceLinuxPulse::SpeakerIsInitialized() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (_mixerManager.SpeakerIsInitialized()); +} + +bool AudioDeviceLinuxPulse::MicrophoneIsInitialized() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (_mixerManager.MicrophoneIsInitialized()); +} + +int32_t AudioDeviceLinuxPulse::SpeakerVolumeIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + bool wasInitialized = _mixerManager.SpeakerIsInitialized(); + + // Make an attempt to open up the + // output mixer corresponding to the currently selected output device. + if (!wasInitialized && InitSpeaker() == -1) { + // If we end up here it means that the selected speaker has no volume + // control. + available = false; + return 0; + } + + // Given that InitSpeaker was successful, we know volume control exists. + available = true; + + // Close the initialized output mixer + if (!wasInitialized) { + _mixerManager.CloseSpeaker(); + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::SetSpeakerVolume(uint32_t volume) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!_playing) { + // Only update the volume if it's been set while we weren't playing. + update_speaker_volume_at_startup_ = true; + } + return (_mixerManager.SetSpeakerVolume(volume)); +} + +int32_t AudioDeviceLinuxPulse::SpeakerVolume(uint32_t& volume) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + uint32_t level(0); + + if (_mixerManager.SpeakerVolume(level) == -1) { + return -1; + } + + volume = level; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::MaxSpeakerVolume(uint32_t& maxVolume) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + uint32_t maxVol(0); + + if (_mixerManager.MaxSpeakerVolume(maxVol) == -1) { + return -1; + } + + maxVolume = maxVol; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::MinSpeakerVolume(uint32_t& minVolume) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + uint32_t minVol(0); + + if (_mixerManager.MinSpeakerVolume(minVol) == -1) { + return -1; + } + + minVolume = minVol; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::SpeakerMuteIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + bool isAvailable(false); + bool wasInitialized = _mixerManager.SpeakerIsInitialized(); + + // Make an attempt to open up the + // output mixer corresponding to the currently selected output device. + // + if (!wasInitialized && InitSpeaker() == -1) { + // If we end up here it means that the selected speaker has no volume + // control, hence it is safe to state that there is no mute control + // already at this stage. + available = false; + return 0; + } + + // Check if the selected speaker has a mute control + _mixerManager.SpeakerMuteIsAvailable(isAvailable); + + available = isAvailable; + + // Close the initialized output mixer + if (!wasInitialized) { + _mixerManager.CloseSpeaker(); + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::SetSpeakerMute(bool enable) { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (_mixerManager.SetSpeakerMute(enable)); +} + +int32_t AudioDeviceLinuxPulse::SpeakerMute(bool& enabled) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + bool muted(0); + if (_mixerManager.SpeakerMute(muted) == -1) { + return -1; + } + + enabled = muted; + return 0; +} + +int32_t AudioDeviceLinuxPulse::MicrophoneMuteIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + bool isAvailable(false); + bool wasInitialized = _mixerManager.MicrophoneIsInitialized(); + + // Make an attempt to open up the + // input mixer corresponding to the currently selected input device. + // + if (!wasInitialized && InitMicrophone() == -1) { + // If we end up here it means that the selected microphone has no + // volume control, hence it is safe to state that there is no + // boost control already at this stage. + available = false; + return 0; + } + + // Check if the selected microphone has a mute control + // + _mixerManager.MicrophoneMuteIsAvailable(isAvailable); + available = isAvailable; + + // Close the initialized input mixer + // + if (!wasInitialized) { + _mixerManager.CloseMicrophone(); + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::SetMicrophoneMute(bool enable) { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (_mixerManager.SetMicrophoneMute(enable)); +} + +int32_t AudioDeviceLinuxPulse::MicrophoneMute(bool& enabled) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + bool muted(0); + if (_mixerManager.MicrophoneMute(muted) == -1) { + return -1; + } + + enabled = muted; + return 0; +} + +int32_t AudioDeviceLinuxPulse::StereoRecordingIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_recChannels == 2 && _recording) { + available = true; + return 0; + } + + available = false; + bool wasInitialized = _mixerManager.MicrophoneIsInitialized(); + int error = 0; + + if (!wasInitialized && InitMicrophone() == -1) { + // Cannot open the specified device + available = false; + return 0; + } + + // Check if the selected microphone can record stereo. + bool isAvailable(false); + error = _mixerManager.StereoRecordingIsAvailable(isAvailable); + if (!error) + available = isAvailable; + + // Close the initialized input mixer + if (!wasInitialized) { + _mixerManager.CloseMicrophone(); + } + + return error; +} + +int32_t AudioDeviceLinuxPulse::SetStereoRecording(bool enable) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (enable) + _recChannels = 2; + else + _recChannels = 1; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::StereoRecording(bool& enabled) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_recChannels == 2) + enabled = true; + else + enabled = false; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::StereoPlayoutIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_playChannels == 2 && _playing) { + available = true; + return 0; + } + + available = false; + bool wasInitialized = _mixerManager.SpeakerIsInitialized(); + int error = 0; + + if (!wasInitialized && InitSpeaker() == -1) { + // Cannot open the specified device. + return -1; + } + + // Check if the selected speaker can play stereo. + bool isAvailable(false); + error = _mixerManager.StereoPlayoutIsAvailable(isAvailable); + if (!error) + available = isAvailable; + + // Close the initialized input mixer + if (!wasInitialized) { + _mixerManager.CloseSpeaker(); + } + + return error; +} + +int32_t AudioDeviceLinuxPulse::SetStereoPlayout(bool enable) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (enable) + _playChannels = 2; + else + _playChannels = 1; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::StereoPlayout(bool& enabled) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_playChannels == 2) + enabled = true; + else + enabled = false; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::MicrophoneVolumeIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + bool wasInitialized = _mixerManager.MicrophoneIsInitialized(); + + // Make an attempt to open up the + // input mixer corresponding to the currently selected output device. + if (!wasInitialized && InitMicrophone() == -1) { + // If we end up here it means that the selected microphone has no + // volume control. + available = false; + return 0; + } + + // Given that InitMicrophone was successful, we know that a volume control + // exists. + available = true; + + // Close the initialized input mixer + if (!wasInitialized) { + _mixerManager.CloseMicrophone(); + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::SetMicrophoneVolume(uint32_t volume) { + return (_mixerManager.SetMicrophoneVolume(volume)); +} + +int32_t AudioDeviceLinuxPulse::MicrophoneVolume(uint32_t& volume) const { + uint32_t level(0); + + if (_mixerManager.MicrophoneVolume(level) == -1) { + RTC_LOG(LS_WARNING) << "failed to retrieve current microphone level"; + return -1; + } + + volume = level; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::MaxMicrophoneVolume(uint32_t& maxVolume) const { + uint32_t maxVol(0); + + if (_mixerManager.MaxMicrophoneVolume(maxVol) == -1) { + return -1; + } + + maxVolume = maxVol; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::MinMicrophoneVolume(uint32_t& minVolume) const { + uint32_t minVol(0); + + if (_mixerManager.MinMicrophoneVolume(minVol) == -1) { + return -1; + } + + minVolume = minVol; + + return 0; +} + +int16_t AudioDeviceLinuxPulse::PlayoutDevices() { + PaLock(); + + pa_operation* paOperation = NULL; + _numPlayDevices = 1; // init to 1 to account for "default" + + // get the whole list of devices and update _numPlayDevices + paOperation = + LATE(pa_context_get_sink_info_list)(_paContext, PaSinkInfoCallback, this); + + WaitForOperationCompletion(paOperation); + + PaUnLock(); + + return _numPlayDevices; +} + +int32_t AudioDeviceLinuxPulse::SetPlayoutDevice(uint16_t index) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_playIsInitialized) { + return -1; + } + + const uint16_t nDevices = PlayoutDevices(); + + RTC_LOG(LS_VERBOSE) << "number of availiable output devices is " << nDevices; + + if (index > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "device index is out of range [0," << (nDevices - 1) + << "]"; + return -1; + } + + _outputDeviceIndex = index; + _outputDeviceIsSpecified = true; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType /*device*/) { + RTC_LOG(LS_ERROR) << "WindowsDeviceType not supported"; + return -1; +} + +int32_t AudioDeviceLinuxPulse::PlayoutDeviceName( + uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + RTC_DCHECK(thread_checker_.IsCurrent()); + const uint16_t nDevices = PlayoutDevices(); + + if ((index > (nDevices - 1)) || (name == NULL)) { + return -1; + } + + memset(name, 0, kAdmMaxDeviceNameSize); + + if (guid != NULL) { + memset(guid, 0, kAdmMaxGuidSize); + } + + // Check if default device + if (index == 0) { + uint16_t deviceIndex = 0; + return GetDefaultDeviceInfo(false, name, deviceIndex); + } + + // Tell the callback that we want + // The name for this device + _playDisplayDeviceName = name; + _deviceIndex = index; + + // get playout devices + PlayoutDevices(); + + // clear device name and index + _playDisplayDeviceName = NULL; + _deviceIndex = -1; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::RecordingDeviceName( + uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + RTC_DCHECK(thread_checker_.IsCurrent()); + const uint16_t nDevices(RecordingDevices()); + + if ((index > (nDevices - 1)) || (name == NULL)) { + return -1; + } + + memset(name, 0, kAdmMaxDeviceNameSize); + + if (guid != NULL) { + memset(guid, 0, kAdmMaxGuidSize); + } + + // Check if default device + if (index == 0) { + uint16_t deviceIndex = 0; + return GetDefaultDeviceInfo(true, name, deviceIndex); + } + + // Tell the callback that we want + // the name for this device + _recDisplayDeviceName = name; + _deviceIndex = index; + + // Get recording devices + RecordingDevices(); + + // Clear device name and index + _recDisplayDeviceName = NULL; + _deviceIndex = -1; + + return 0; +} + +int16_t AudioDeviceLinuxPulse::RecordingDevices() { + PaLock(); + + pa_operation* paOperation = NULL; + _numRecDevices = 1; // Init to 1 to account for "default" + + // Get the whole list of devices and update _numRecDevices + paOperation = LATE(pa_context_get_source_info_list)( + _paContext, PaSourceInfoCallback, this); + + WaitForOperationCompletion(paOperation); + + PaUnLock(); + + return _numRecDevices; +} + +int32_t AudioDeviceLinuxPulse::SetRecordingDevice(uint16_t index) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_recIsInitialized) { + return -1; + } + + const uint16_t nDevices(RecordingDevices()); + + RTC_LOG(LS_VERBOSE) << "number of availiable input devices is " << nDevices; + + if (index > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "device index is out of range [0," << (nDevices - 1) + << "]"; + return -1; + } + + _inputDeviceIndex = index; + _inputDeviceIsSpecified = true; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType /*device*/) { + RTC_LOG(LS_ERROR) << "WindowsDeviceType not supported"; + return -1; +} + +int32_t AudioDeviceLinuxPulse::PlayoutIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + available = false; + + // Try to initialize the playout side + int32_t res = InitPlayout(); + + // Cancel effect of initialization + StopPlayout(); + + if (res != -1) { + available = true; + } + + return res; +} + +int32_t AudioDeviceLinuxPulse::RecordingIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + available = false; + + // Try to initialize the playout side + int32_t res = InitRecording(); + + // Cancel effect of initialization + StopRecording(); + + if (res != -1) { + available = true; + } + + return res; +} + +int32_t AudioDeviceLinuxPulse::InitPlayout() { + RTC_DCHECK(thread_checker_.IsCurrent()); + + if (_playing) { + return -1; + } + + if (!_outputDeviceIsSpecified) { + return -1; + } + + if (_playIsInitialized) { + return 0; + } + + // Initialize the speaker (devices might have been added or removed) + if (InitSpeaker() == -1) { + RTC_LOG(LS_WARNING) << "InitSpeaker() failed"; + } + + // Set the play sample specification + pa_sample_spec playSampleSpec; + playSampleSpec.channels = _playChannels; + playSampleSpec.format = PA_SAMPLE_S16LE; + playSampleSpec.rate = sample_rate_hz_; + + // Create a new play stream + { + MutexLock lock(&mutex_); + _playStream = + LATE(pa_stream_new)(_paContext, "playStream", &playSampleSpec, NULL); + } + + if (!_playStream) { + RTC_LOG(LS_ERROR) << "failed to create play stream, err=" + << LATE(pa_context_errno)(_paContext); + return -1; + } + + // Provide the playStream to the mixer + _mixerManager.SetPlayStream(_playStream); + + if (_ptrAudioBuffer) { + // Update audio buffer with the selected parameters + _ptrAudioBuffer->SetPlayoutSampleRate(sample_rate_hz_); + _ptrAudioBuffer->SetPlayoutChannels((uint8_t)_playChannels); + } + + RTC_LOG(LS_VERBOSE) << "stream state " + << LATE(pa_stream_get_state)(_playStream); + + // Set stream flags + _playStreamFlags = (pa_stream_flags_t)(PA_STREAM_AUTO_TIMING_UPDATE | + PA_STREAM_INTERPOLATE_TIMING); + + if (_configuredLatencyPlay != WEBRTC_PA_NO_LATENCY_REQUIREMENTS) { + // If configuring a specific latency then we want to specify + // PA_STREAM_ADJUST_LATENCY to make the server adjust parameters + // automatically to reach that target latency. However, that flag + // doesn't exist in Ubuntu 8.04 and many people still use that, + // so we have to check the protocol version of libpulse. + if (LATE(pa_context_get_protocol_version)(_paContext) >= + WEBRTC_PA_ADJUST_LATENCY_PROTOCOL_VERSION) { + _playStreamFlags |= PA_STREAM_ADJUST_LATENCY; + } + + const pa_sample_spec* spec = LATE(pa_stream_get_sample_spec)(_playStream); + if (!spec) { + RTC_LOG(LS_ERROR) << "pa_stream_get_sample_spec()"; + return -1; + } + + size_t bytesPerSec = LATE(pa_bytes_per_second)(spec); + uint32_t latency = bytesPerSec * WEBRTC_PA_PLAYBACK_LATENCY_MINIMUM_MSECS / + WEBRTC_PA_MSECS_PER_SEC; + + // Set the play buffer attributes + _playBufferAttr.maxlength = latency; // num bytes stored in the buffer + _playBufferAttr.tlength = latency; // target fill level of play buffer + // minimum free num bytes before server request more data + _playBufferAttr.minreq = latency / WEBRTC_PA_PLAYBACK_REQUEST_FACTOR; + // prebuffer tlength before starting playout + _playBufferAttr.prebuf = _playBufferAttr.tlength - _playBufferAttr.minreq; + + _configuredLatencyPlay = latency; + } + + // num samples in bytes * num channels + _playbackBufferSize = sample_rate_hz_ / 100 * 2 * _playChannels; + _playbackBufferUnused = _playbackBufferSize; + _playBuffer = new int8_t[_playbackBufferSize]; + + // Enable underflow callback + LATE(pa_stream_set_underflow_callback) + (_playStream, PaStreamUnderflowCallback, this); + + // Set the state callback function for the stream + LATE(pa_stream_set_state_callback)(_playStream, PaStreamStateCallback, this); + + // Mark playout side as initialized + { + MutexLock lock(&mutex_); + _playIsInitialized = true; + _sndCardPlayDelay = 0; + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::InitRecording() { + RTC_DCHECK(thread_checker_.IsCurrent()); + + if (_recording) { + return -1; + } + + if (!_inputDeviceIsSpecified) { + return -1; + } + + if (_recIsInitialized) { + return 0; + } + + // Initialize the microphone (devices might have been added or removed) + if (InitMicrophone() == -1) { + RTC_LOG(LS_WARNING) << "InitMicrophone() failed"; + } + + // Set the rec sample specification + pa_sample_spec recSampleSpec; + recSampleSpec.channels = _recChannels; + recSampleSpec.format = PA_SAMPLE_S16LE; + recSampleSpec.rate = sample_rate_hz_; + + // Create a new rec stream + _recStream = + LATE(pa_stream_new)(_paContext, "recStream", &recSampleSpec, NULL); + if (!_recStream) { + RTC_LOG(LS_ERROR) << "failed to create rec stream, err=" + << LATE(pa_context_errno)(_paContext); + return -1; + } + + // Provide the recStream to the mixer + _mixerManager.SetRecStream(_recStream); + + if (_ptrAudioBuffer) { + // Update audio buffer with the selected parameters + _ptrAudioBuffer->SetRecordingSampleRate(sample_rate_hz_); + _ptrAudioBuffer->SetRecordingChannels((uint8_t)_recChannels); + } + + if (_configuredLatencyRec != WEBRTC_PA_NO_LATENCY_REQUIREMENTS) { + _recStreamFlags = (pa_stream_flags_t)(PA_STREAM_AUTO_TIMING_UPDATE | + PA_STREAM_INTERPOLATE_TIMING); + + // If configuring a specific latency then we want to specify + // PA_STREAM_ADJUST_LATENCY to make the server adjust parameters + // automatically to reach that target latency. However, that flag + // doesn't exist in Ubuntu 8.04 and many people still use that, + // so we have to check the protocol version of libpulse. + if (LATE(pa_context_get_protocol_version)(_paContext) >= + WEBRTC_PA_ADJUST_LATENCY_PROTOCOL_VERSION) { + _recStreamFlags |= PA_STREAM_ADJUST_LATENCY; + } + + const pa_sample_spec* spec = LATE(pa_stream_get_sample_spec)(_recStream); + if (!spec) { + RTC_LOG(LS_ERROR) << "pa_stream_get_sample_spec(rec)"; + return -1; + } + + size_t bytesPerSec = LATE(pa_bytes_per_second)(spec); + uint32_t latency = bytesPerSec * WEBRTC_PA_LOW_CAPTURE_LATENCY_MSECS / + WEBRTC_PA_MSECS_PER_SEC; + + // Set the rec buffer attributes + // Note: fragsize specifies a maximum transfer size, not a minimum, so + // it is not possible to force a high latency setting, only a low one. + _recBufferAttr.fragsize = latency; // size of fragment + _recBufferAttr.maxlength = + latency + bytesPerSec * WEBRTC_PA_CAPTURE_BUFFER_EXTRA_MSECS / + WEBRTC_PA_MSECS_PER_SEC; + + _configuredLatencyRec = latency; + } + + _recordBufferSize = sample_rate_hz_ / 100 * 2 * _recChannels; + _recordBufferUsed = 0; + _recBuffer = new int8_t[_recordBufferSize]; + + // Enable overflow callback + LATE(pa_stream_set_overflow_callback) + (_recStream, PaStreamOverflowCallback, this); + + // Set the state callback function for the stream + LATE(pa_stream_set_state_callback)(_recStream, PaStreamStateCallback, this); + + // Mark recording side as initialized + _recIsInitialized = true; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::StartRecording() { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (!_recIsInitialized) { + return -1; + } + + if (_recording) { + return 0; + } + + // Set state to ensure that the recording starts from the audio thread. + _startRec = true; + + // The audio thread will signal when recording has started. + _timeEventRec.Set(); + if (!_recStartEvent.Wait(TimeDelta::Seconds(10))) { + { + MutexLock lock(&mutex_); + _startRec = false; + } + StopRecording(); + RTC_LOG(LS_ERROR) << "failed to activate recording"; + return -1; + } + + { + MutexLock lock(&mutex_); + if (_recording) { + // The recording state is set by the audio thread after recording + // has started. + } else { + RTC_LOG(LS_ERROR) << "failed to activate recording"; + return -1; + } + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::StopRecording() { + RTC_DCHECK(thread_checker_.IsCurrent()); + MutexLock lock(&mutex_); + + if (!_recIsInitialized) { + return 0; + } + + if (_recStream == NULL) { + return -1; + } + + _recIsInitialized = false; + _recording = false; + + RTC_LOG(LS_VERBOSE) << "stopping recording"; + + // Stop Recording + PaLock(); + + DisableReadCallback(); + LATE(pa_stream_set_overflow_callback)(_recStream, NULL, NULL); + + // Unset this here so that we don't get a TERMINATED callback + LATE(pa_stream_set_state_callback)(_recStream, NULL, NULL); + + if (LATE(pa_stream_get_state)(_recStream) != PA_STREAM_UNCONNECTED) { + // Disconnect the stream + if (LATE(pa_stream_disconnect)(_recStream) != PA_OK) { + RTC_LOG(LS_ERROR) << "failed to disconnect rec stream, err=" + << LATE(pa_context_errno)(_paContext); + PaUnLock(); + return -1; + } + + RTC_LOG(LS_VERBOSE) << "disconnected recording"; + } + + LATE(pa_stream_unref)(_recStream); + _recStream = NULL; + + PaUnLock(); + + // Provide the recStream to the mixer + _mixerManager.SetRecStream(_recStream); + + if (_recBuffer) { + delete[] _recBuffer; + _recBuffer = NULL; + } + + return 0; +} + +bool AudioDeviceLinuxPulse::RecordingIsInitialized() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (_recIsInitialized); +} + +bool AudioDeviceLinuxPulse::Recording() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (_recording); +} + +bool AudioDeviceLinuxPulse::PlayoutIsInitialized() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (_playIsInitialized); +} + +int32_t AudioDeviceLinuxPulse::StartPlayout() { + RTC_DCHECK(thread_checker_.IsCurrent()); + + if (!_playIsInitialized) { + return -1; + } + + if (_playing) { + return 0; + } + + // Set state to ensure that playout starts from the audio thread. + { + MutexLock lock(&mutex_); + _startPlay = true; + } + + // Both `_startPlay` and `_playing` needs protction since they are also + // accessed on the playout thread. + + // The audio thread will signal when playout has started. + _timeEventPlay.Set(); + if (!_playStartEvent.Wait(TimeDelta::Seconds(10))) { + { + MutexLock lock(&mutex_); + _startPlay = false; + } + StopPlayout(); + RTC_LOG(LS_ERROR) << "failed to activate playout"; + return -1; + } + + { + MutexLock lock(&mutex_); + if (_playing) { + // The playing state is set by the audio thread after playout + // has started. + } else { + RTC_LOG(LS_ERROR) << "failed to activate playing"; + return -1; + } + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::StopPlayout() { + RTC_DCHECK(thread_checker_.IsCurrent()); + MutexLock lock(&mutex_); + + if (!_playIsInitialized) { + return 0; + } + + if (_playStream == NULL) { + return -1; + } + + _playIsInitialized = false; + _playing = false; + _sndCardPlayDelay = 0; + + RTC_LOG(LS_VERBOSE) << "stopping playback"; + + // Stop Playout + PaLock(); + + DisableWriteCallback(); + LATE(pa_stream_set_underflow_callback)(_playStream, NULL, NULL); + + // Unset this here so that we don't get a TERMINATED callback + LATE(pa_stream_set_state_callback)(_playStream, NULL, NULL); + + if (LATE(pa_stream_get_state)(_playStream) != PA_STREAM_UNCONNECTED) { + // Disconnect the stream + if (LATE(pa_stream_disconnect)(_playStream) != PA_OK) { + RTC_LOG(LS_ERROR) << "failed to disconnect play stream, err=" + << LATE(pa_context_errno)(_paContext); + PaUnLock(); + return -1; + } + + RTC_LOG(LS_VERBOSE) << "disconnected playback"; + } + + LATE(pa_stream_unref)(_playStream); + _playStream = NULL; + + PaUnLock(); + + // Provide the playStream to the mixer + _mixerManager.SetPlayStream(_playStream); + + if (_playBuffer) { + delete[] _playBuffer; + _playBuffer = NULL; + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::PlayoutDelay(uint16_t& delayMS) const { + MutexLock lock(&mutex_); + delayMS = (uint16_t)_sndCardPlayDelay; + return 0; +} + +bool AudioDeviceLinuxPulse::Playing() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + return (_playing); +} + +// ============================================================================ +// Private Methods +// ============================================================================ + +void AudioDeviceLinuxPulse::PaContextStateCallback(pa_context* c, void* pThis) { + static_cast(pThis)->PaContextStateCallbackHandler(c); +} + +// ---------------------------------------------------------------------------- +// PaSinkInfoCallback +// ---------------------------------------------------------------------------- + +void AudioDeviceLinuxPulse::PaSinkInfoCallback(pa_context* /*c*/, + const pa_sink_info* i, + int eol, + void* pThis) { + static_cast(pThis)->PaSinkInfoCallbackHandler(i, eol); +} + +void AudioDeviceLinuxPulse::PaSourceInfoCallback(pa_context* /*c*/, + const pa_source_info* i, + int eol, + void* pThis) { + static_cast(pThis)->PaSourceInfoCallbackHandler(i, + eol); +} + +void AudioDeviceLinuxPulse::PaServerInfoCallback(pa_context* /*c*/, + const pa_server_info* i, + void* pThis) { + static_cast(pThis)->PaServerInfoCallbackHandler(i); +} + +void AudioDeviceLinuxPulse::PaStreamStateCallback(pa_stream* p, void* pThis) { + static_cast(pThis)->PaStreamStateCallbackHandler(p); +} + +void AudioDeviceLinuxPulse::PaContextStateCallbackHandler(pa_context* c) { + RTC_LOG(LS_VERBOSE) << "context state cb"; + + pa_context_state_t state = LATE(pa_context_get_state)(c); + switch (state) { + case PA_CONTEXT_UNCONNECTED: + RTC_LOG(LS_VERBOSE) << "unconnected"; + break; + case PA_CONTEXT_CONNECTING: + case PA_CONTEXT_AUTHORIZING: + case PA_CONTEXT_SETTING_NAME: + RTC_LOG(LS_VERBOSE) << "no state"; + break; + case PA_CONTEXT_FAILED: + case PA_CONTEXT_TERMINATED: + RTC_LOG(LS_VERBOSE) << "failed"; + _paStateChanged = true; + LATE(pa_threaded_mainloop_signal)(_paMainloop, 0); + break; + case PA_CONTEXT_READY: + RTC_LOG(LS_VERBOSE) << "ready"; + _paStateChanged = true; + LATE(pa_threaded_mainloop_signal)(_paMainloop, 0); + break; + } +} + +void AudioDeviceLinuxPulse::PaSinkInfoCallbackHandler(const pa_sink_info* i, + int eol) { + if (eol) { + // Signal that we are done + LATE(pa_threaded_mainloop_signal)(_paMainloop, 0); + return; + } + + if (_numPlayDevices == _deviceIndex) { + // Convert the device index to the one of the sink + _paDeviceIndex = i->index; + + if (_playDeviceName) { + // Copy the sink name + strncpy(_playDeviceName, i->name, kAdmMaxDeviceNameSize); + _playDeviceName[kAdmMaxDeviceNameSize - 1] = '\0'; + } + if (_playDisplayDeviceName) { + // Copy the sink display name + strncpy(_playDisplayDeviceName, i->description, kAdmMaxDeviceNameSize); + _playDisplayDeviceName[kAdmMaxDeviceNameSize - 1] = '\0'; + } + } + + _numPlayDevices++; +} + +void AudioDeviceLinuxPulse::PaSourceInfoCallbackHandler(const pa_source_info* i, + int eol) { + if (eol) { + // Signal that we are done + LATE(pa_threaded_mainloop_signal)(_paMainloop, 0); + return; + } + + // We don't want to list output devices + if (i->monitor_of_sink == PA_INVALID_INDEX) { + if (_numRecDevices == _deviceIndex) { + // Convert the device index to the one of the source + _paDeviceIndex = i->index; + + if (_recDeviceName) { + // copy the source name + strncpy(_recDeviceName, i->name, kAdmMaxDeviceNameSize); + _recDeviceName[kAdmMaxDeviceNameSize - 1] = '\0'; + } + if (_recDisplayDeviceName) { + // Copy the source display name + strncpy(_recDisplayDeviceName, i->description, kAdmMaxDeviceNameSize); + _recDisplayDeviceName[kAdmMaxDeviceNameSize - 1] = '\0'; + } + } + + _numRecDevices++; + } +} + +void AudioDeviceLinuxPulse::PaServerInfoCallbackHandler( + const pa_server_info* i) { + // Use PA native sampling rate + sample_rate_hz_ = i->sample_spec.rate; + + // Copy the PA server version + strncpy(_paServerVersion, i->server_version, 31); + _paServerVersion[31] = '\0'; + + if (_recDisplayDeviceName) { + // Copy the source name + strncpy(_recDisplayDeviceName, i->default_source_name, + kAdmMaxDeviceNameSize); + _recDisplayDeviceName[kAdmMaxDeviceNameSize - 1] = '\0'; + } + + if (_playDisplayDeviceName) { + // Copy the sink name + strncpy(_playDisplayDeviceName, i->default_sink_name, + kAdmMaxDeviceNameSize); + _playDisplayDeviceName[kAdmMaxDeviceNameSize - 1] = '\0'; + } + + LATE(pa_threaded_mainloop_signal)(_paMainloop, 0); +} + +void AudioDeviceLinuxPulse::PaStreamStateCallbackHandler(pa_stream* p) { + RTC_LOG(LS_VERBOSE) << "stream state cb"; + + pa_stream_state_t state = LATE(pa_stream_get_state)(p); + switch (state) { + case PA_STREAM_UNCONNECTED: + RTC_LOG(LS_VERBOSE) << "unconnected"; + break; + case PA_STREAM_CREATING: + RTC_LOG(LS_VERBOSE) << "creating"; + break; + case PA_STREAM_FAILED: + case PA_STREAM_TERMINATED: + RTC_LOG(LS_VERBOSE) << "failed"; + break; + case PA_STREAM_READY: + RTC_LOG(LS_VERBOSE) << "ready"; + break; + } + + LATE(pa_threaded_mainloop_signal)(_paMainloop, 0); +} + +int32_t AudioDeviceLinuxPulse::CheckPulseAudioVersion() { + PaLock(); + + pa_operation* paOperation = NULL; + + // get the server info and update deviceName + paOperation = + LATE(pa_context_get_server_info)(_paContext, PaServerInfoCallback, this); + + WaitForOperationCompletion(paOperation); + + PaUnLock(); + + RTC_LOG(LS_VERBOSE) << "checking PulseAudio version: " << _paServerVersion; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::InitSamplingFrequency() { + PaLock(); + + pa_operation* paOperation = NULL; + + // Get the server info and update sample_rate_hz_ + paOperation = + LATE(pa_context_get_server_info)(_paContext, PaServerInfoCallback, this); + + WaitForOperationCompletion(paOperation); + + PaUnLock(); + + return 0; +} + +int32_t AudioDeviceLinuxPulse::GetDefaultDeviceInfo(bool recDevice, + char* name, + uint16_t& index) { + char tmpName[kAdmMaxDeviceNameSize] = {0}; + // subtract length of "default: " + uint16_t nameLen = kAdmMaxDeviceNameSize - 9; + char* pName = NULL; + + if (name) { + // Add "default: " + strcpy(name, "default: "); + pName = &name[9]; + } + + // Tell the callback that we want + // the name for this device + if (recDevice) { + _recDisplayDeviceName = tmpName; + } else { + _playDisplayDeviceName = tmpName; + } + + // Set members + _paDeviceIndex = -1; + _deviceIndex = 0; + _numPlayDevices = 0; + _numRecDevices = 0; + + PaLock(); + + pa_operation* paOperation = NULL; + + // Get the server info and update deviceName + paOperation = + LATE(pa_context_get_server_info)(_paContext, PaServerInfoCallback, this); + + WaitForOperationCompletion(paOperation); + + // Get the device index + if (recDevice) { + paOperation = LATE(pa_context_get_source_info_by_name)( + _paContext, (char*)tmpName, PaSourceInfoCallback, this); + } else { + paOperation = LATE(pa_context_get_sink_info_by_name)( + _paContext, (char*)tmpName, PaSinkInfoCallback, this); + } + + WaitForOperationCompletion(paOperation); + + PaUnLock(); + + // Set the index + index = _paDeviceIndex; + + if (name) { + // Copy to name string + strncpy(pName, tmpName, nameLen); + } + + // Clear members + _playDisplayDeviceName = NULL; + _recDisplayDeviceName = NULL; + _paDeviceIndex = -1; + _deviceIndex = -1; + _numPlayDevices = 0; + _numRecDevices = 0; + + return 0; +} + +int32_t AudioDeviceLinuxPulse::InitPulseAudio() { + int retVal = 0; + + // Load libpulse + if (!GetPulseSymbolTable()->Load()) { + // Most likely the Pulse library and sound server are not installed on + // this system + RTC_LOG(LS_ERROR) << "failed to load symbol table"; + return -1; + } + + // Create a mainloop API and connection to the default server + // the mainloop is the internal asynchronous API event loop + if (_paMainloop) { + RTC_LOG(LS_ERROR) << "PA mainloop has already existed"; + return -1; + } + _paMainloop = LATE(pa_threaded_mainloop_new)(); + if (!_paMainloop) { + RTC_LOG(LS_ERROR) << "could not create mainloop"; + return -1; + } + + // Start the threaded main loop + retVal = LATE(pa_threaded_mainloop_start)(_paMainloop); + if (retVal != PA_OK) { + RTC_LOG(LS_ERROR) << "failed to start main loop, error=" << retVal; + return -1; + } + + RTC_LOG(LS_VERBOSE) << "mainloop running!"; + + PaLock(); + + _paMainloopApi = LATE(pa_threaded_mainloop_get_api)(_paMainloop); + if (!_paMainloopApi) { + RTC_LOG(LS_ERROR) << "could not create mainloop API"; + PaUnLock(); + return -1; + } + + // Create a new PulseAudio context + if (_paContext) { + RTC_LOG(LS_ERROR) << "PA context has already existed"; + PaUnLock(); + return -1; + } + _paContext = LATE(pa_context_new)(_paMainloopApi, "WEBRTC VoiceEngine"); + + if (!_paContext) { + RTC_LOG(LS_ERROR) << "could not create context"; + PaUnLock(); + return -1; + } + + // Set state callback function + LATE(pa_context_set_state_callback)(_paContext, PaContextStateCallback, this); + + // Connect the context to a server (default) + _paStateChanged = false; + retVal = + LATE(pa_context_connect)(_paContext, NULL, PA_CONTEXT_NOAUTOSPAWN, NULL); + + if (retVal != PA_OK) { + RTC_LOG(LS_ERROR) << "failed to connect context, error=" << retVal; + PaUnLock(); + return -1; + } + + // Wait for state change + while (!_paStateChanged) { + LATE(pa_threaded_mainloop_wait)(_paMainloop); + } + + // Now check to see what final state we reached. + pa_context_state_t state = LATE(pa_context_get_state)(_paContext); + + if (state != PA_CONTEXT_READY) { + if (state == PA_CONTEXT_FAILED) { + RTC_LOG(LS_ERROR) << "failed to connect to PulseAudio sound server"; + } else if (state == PA_CONTEXT_TERMINATED) { + RTC_LOG(LS_ERROR) << "PulseAudio connection terminated early"; + } else { + // Shouldn't happen, because we only signal on one of those three + // states + RTC_LOG(LS_ERROR) << "unknown problem connecting to PulseAudio"; + } + PaUnLock(); + return -1; + } + + PaUnLock(); + + // Give the objects to the mixer manager + _mixerManager.SetPulseAudioObjects(_paMainloop, _paContext); + + // Check the version + if (CheckPulseAudioVersion() < 0) { + RTC_LOG(LS_ERROR) << "PulseAudio version " << _paServerVersion + << " not supported"; + return -1; + } + + // Initialize sampling frequency + if (InitSamplingFrequency() < 0 || sample_rate_hz_ == 0) { + RTC_LOG(LS_ERROR) << "failed to initialize sampling frequency, set to " + << sample_rate_hz_ << " Hz"; + return -1; + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::TerminatePulseAudio() { + // Do nothing if the instance doesn't exist + // likely GetPulseSymbolTable.Load() fails + if (!_paMainloop) { + return 0; + } + + PaLock(); + + // Disconnect the context + if (_paContext) { + LATE(pa_context_disconnect)(_paContext); + } + + // Unreference the context + if (_paContext) { + LATE(pa_context_unref)(_paContext); + } + + PaUnLock(); + _paContext = NULL; + + // Stop the threaded main loop + if (_paMainloop) { + LATE(pa_threaded_mainloop_stop)(_paMainloop); + } + + // Free the mainloop + if (_paMainloop) { + LATE(pa_threaded_mainloop_free)(_paMainloop); + } + + _paMainloop = NULL; + + RTC_LOG(LS_VERBOSE) << "PulseAudio terminated"; + + return 0; +} + +void AudioDeviceLinuxPulse::PaLock() { + LATE(pa_threaded_mainloop_lock)(_paMainloop); +} + +void AudioDeviceLinuxPulse::PaUnLock() { + LATE(pa_threaded_mainloop_unlock)(_paMainloop); +} + +void AudioDeviceLinuxPulse::WaitForOperationCompletion( + pa_operation* paOperation) const { + if (!paOperation) { + RTC_LOG(LS_ERROR) << "paOperation NULL in WaitForOperationCompletion"; + return; + } + + while (LATE(pa_operation_get_state)(paOperation) == PA_OPERATION_RUNNING) { + LATE(pa_threaded_mainloop_wait)(_paMainloop); + } + + LATE(pa_operation_unref)(paOperation); +} + +// ============================================================================ +// Thread Methods +// ============================================================================ + +void AudioDeviceLinuxPulse::EnableWriteCallback() { + if (LATE(pa_stream_get_state)(_playStream) == PA_STREAM_READY) { + // May already have available space. Must check. + _tempBufferSpace = LATE(pa_stream_writable_size)(_playStream); + if (_tempBufferSpace > 0) { + // Yup, there is already space available, so if we register a + // write callback then it will not receive any event. So dispatch + // one ourself instead. + _timeEventPlay.Set(); + return; + } + } + + LATE(pa_stream_set_write_callback)(_playStream, &PaStreamWriteCallback, this); +} + +void AudioDeviceLinuxPulse::DisableWriteCallback() { + LATE(pa_stream_set_write_callback)(_playStream, NULL, NULL); +} + +void AudioDeviceLinuxPulse::PaStreamWriteCallback(pa_stream* /*unused*/, + size_t buffer_space, + void* pThis) { + static_cast(pThis)->PaStreamWriteCallbackHandler( + buffer_space); +} + +void AudioDeviceLinuxPulse::PaStreamWriteCallbackHandler(size_t bufferSpace) { + _tempBufferSpace = bufferSpace; + + // Since we write the data asynchronously on a different thread, we have + // to temporarily disable the write callback or else Pulse will call it + // continuously until we write the data. We re-enable it below. + DisableWriteCallback(); + _timeEventPlay.Set(); +} + +void AudioDeviceLinuxPulse::PaStreamUnderflowCallback(pa_stream* /*unused*/, + void* pThis) { + static_cast(pThis) + ->PaStreamUnderflowCallbackHandler(); +} + +void AudioDeviceLinuxPulse::PaStreamUnderflowCallbackHandler() { + RTC_LOG(LS_WARNING) << "Playout underflow"; + + if (_configuredLatencyPlay == WEBRTC_PA_NO_LATENCY_REQUIREMENTS) { + // We didn't configure a pa_buffer_attr before, so switching to + // one now would be questionable. + return; + } + + // Otherwise reconfigure the stream with a higher target latency. + + const pa_sample_spec* spec = LATE(pa_stream_get_sample_spec)(_playStream); + if (!spec) { + RTC_LOG(LS_ERROR) << "pa_stream_get_sample_spec()"; + return; + } + + size_t bytesPerSec = LATE(pa_bytes_per_second)(spec); + uint32_t newLatency = + _configuredLatencyPlay + bytesPerSec * + WEBRTC_PA_PLAYBACK_LATENCY_INCREMENT_MSECS / + WEBRTC_PA_MSECS_PER_SEC; + + // Set the play buffer attributes + _playBufferAttr.maxlength = newLatency; + _playBufferAttr.tlength = newLatency; + _playBufferAttr.minreq = newLatency / WEBRTC_PA_PLAYBACK_REQUEST_FACTOR; + _playBufferAttr.prebuf = _playBufferAttr.tlength - _playBufferAttr.minreq; + + pa_operation* op = LATE(pa_stream_set_buffer_attr)( + _playStream, &_playBufferAttr, NULL, NULL); + if (!op) { + RTC_LOG(LS_ERROR) << "pa_stream_set_buffer_attr()"; + return; + } + + // Don't need to wait for this to complete. + LATE(pa_operation_unref)(op); + + // Save the new latency in case we underflow again. + _configuredLatencyPlay = newLatency; +} + +void AudioDeviceLinuxPulse::EnableReadCallback() { + LATE(pa_stream_set_read_callback)(_recStream, &PaStreamReadCallback, this); +} + +void AudioDeviceLinuxPulse::DisableReadCallback() { + LATE(pa_stream_set_read_callback)(_recStream, NULL, NULL); +} + +void AudioDeviceLinuxPulse::PaStreamReadCallback(pa_stream* /*unused1*/, + size_t /*unused2*/, + void* pThis) { + static_cast(pThis)->PaStreamReadCallbackHandler(); +} + +void AudioDeviceLinuxPulse::PaStreamReadCallbackHandler() { + // We get the data pointer and size now in order to save one Lock/Unlock + // in the worker thread. + if (LATE(pa_stream_peek)(_recStream, &_tempSampleData, + &_tempSampleDataSize) != 0) { + RTC_LOG(LS_ERROR) << "Can't read data!"; + return; + } + + // Since we consume the data asynchronously on a different thread, we have + // to temporarily disable the read callback or else Pulse will call it + // continuously until we consume the data. We re-enable it below. + DisableReadCallback(); + _timeEventRec.Set(); +} + +void AudioDeviceLinuxPulse::PaStreamOverflowCallback(pa_stream* /*unused*/, + void* pThis) { + static_cast(pThis)->PaStreamOverflowCallbackHandler(); +} + +void AudioDeviceLinuxPulse::PaStreamOverflowCallbackHandler() { + RTC_LOG(LS_WARNING) << "Recording overflow"; +} + +int32_t AudioDeviceLinuxPulse::LatencyUsecs(pa_stream* stream) { + if (!WEBRTC_PA_REPORT_LATENCY) { + return 0; + } + + if (!stream) { + return 0; + } + + pa_usec_t latency; + int negative; + if (LATE(pa_stream_get_latency)(stream, &latency, &negative) != 0) { + RTC_LOG(LS_ERROR) << "Can't query latency"; + // We'd rather continue playout/capture with an incorrect delay than + // stop it altogether, so return a valid value. + return 0; + } + + if (negative) { + RTC_LOG(LS_VERBOSE) + << "warning: pa_stream_get_latency reported negative delay"; + + // The delay can be negative for monitoring streams if the captured + // samples haven't been played yet. In such a case, "latency" + // contains the magnitude, so we must negate it to get the real value. + int32_t tmpLatency = (int32_t)-latency; + if (tmpLatency < 0) { + // Make sure that we don't use a negative delay. + tmpLatency = 0; + } + + return tmpLatency; + } else { + return (int32_t)latency; + } +} + +int32_t AudioDeviceLinuxPulse::ReadRecordedData(const void* bufferData, + size_t bufferSize) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + size_t size = bufferSize; + uint32_t numRecSamples = _recordBufferSize / (2 * _recChannels); + + // Account for the peeked data and the used data. + uint32_t recDelay = + (uint32_t)((LatencyUsecs(_recStream) / 1000) + + 10 * ((size + _recordBufferUsed) / _recordBufferSize)); + + if (_playStream) { + // Get the playout delay. + _sndCardPlayDelay = (uint32_t)(LatencyUsecs(_playStream) / 1000); + } + + if (_recordBufferUsed > 0) { + // Have to copy to the buffer until it is full. + size_t copy = _recordBufferSize - _recordBufferUsed; + if (size < copy) { + copy = size; + } + + memcpy(&_recBuffer[_recordBufferUsed], bufferData, copy); + _recordBufferUsed += copy; + bufferData = static_cast(bufferData) + copy; + size -= copy; + + if (_recordBufferUsed != _recordBufferSize) { + // Not enough data yet to pass to VoE. + return 0; + } + + // Provide data to VoiceEngine. + if (ProcessRecordedData(_recBuffer, numRecSamples, recDelay) == -1) { + // We have stopped recording. + return -1; + } + + _recordBufferUsed = 0; + } + + // Now process full 10ms sample sets directly from the input. + while (size >= _recordBufferSize) { + // Provide data to VoiceEngine. + if (ProcessRecordedData(static_cast(const_cast(bufferData)), + numRecSamples, recDelay) == -1) { + // We have stopped recording. + return -1; + } + + bufferData = static_cast(bufferData) + _recordBufferSize; + size -= _recordBufferSize; + + // We have consumed 10ms of data. + recDelay -= 10; + } + + // Now save any leftovers for later. + if (size > 0) { + memcpy(_recBuffer, bufferData, size); + _recordBufferUsed = size; + } + + return 0; +} + +int32_t AudioDeviceLinuxPulse::ProcessRecordedData(int8_t* bufferData, + uint32_t bufferSizeInSamples, + uint32_t recDelay) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + _ptrAudioBuffer->SetRecordedBuffer(bufferData, bufferSizeInSamples); + + // TODO(andrew): this is a temporary hack, to avoid non-causal far- and + // near-end signals at the AEC for PulseAudio. I think the system delay is + // being correctly calculated here, but for legacy reasons we add +10 ms + // to the value in the AEC. The real fix will be part of a larger + // investigation into managing system delay in the AEC. + if (recDelay > 10) + recDelay -= 10; + else + recDelay = 0; + _ptrAudioBuffer->SetVQEData(_sndCardPlayDelay, recDelay); + _ptrAudioBuffer->SetTypingStatus(KeyPressed()); + // Deliver recorded samples at specified sample rate, + // mic level etc. to the observer using callback. + UnLock(); + _ptrAudioBuffer->DeliverRecordedData(); + Lock(); + + // We have been unlocked - check the flag again. + if (!_recording) { + return -1; + } + + return 0; +} + +bool AudioDeviceLinuxPulse::PlayThreadProcess() { + if (!_timeEventPlay.Wait(TimeDelta::Seconds(1))) { + return true; + } + + MutexLock lock(&mutex_); + + if (quit_) { + return false; + } + + if (_startPlay) { + RTC_LOG(LS_VERBOSE) << "_startPlay true, performing initial actions"; + + _startPlay = false; + _playDeviceName = NULL; + + // Set if not default device + if (_outputDeviceIndex > 0) { + // Get the playout device name + _playDeviceName = new char[kAdmMaxDeviceNameSize]; + _deviceIndex = _outputDeviceIndex; + PlayoutDevices(); + } + + // Start muted only supported on 0.9.11 and up + if (LATE(pa_context_get_protocol_version)(_paContext) >= + WEBRTC_PA_ADJUST_LATENCY_PROTOCOL_VERSION) { + // Get the currently saved speaker mute status + // and set the initial mute status accordingly + bool enabled(false); + _mixerManager.SpeakerMute(enabled); + if (enabled) { + _playStreamFlags |= PA_STREAM_START_MUTED; + } + } + + // Get the currently saved speaker volume + uint32_t volume = 0; + if (update_speaker_volume_at_startup_) + _mixerManager.SpeakerVolume(volume); + + PaLock(); + + // NULL gives PA the choice of startup volume. + pa_cvolume* ptr_cvolume = NULL; + if (update_speaker_volume_at_startup_) { + pa_cvolume cVolumes; + ptr_cvolume = &cVolumes; + + // Set the same volume for all channels + const pa_sample_spec* spec = LATE(pa_stream_get_sample_spec)(_playStream); + LATE(pa_cvolume_set)(&cVolumes, spec->channels, volume); + update_speaker_volume_at_startup_ = false; + } + + // Connect the stream to a sink + if (LATE(pa_stream_connect_playback)( + _playStream, _playDeviceName, &_playBufferAttr, + (pa_stream_flags_t)_playStreamFlags, ptr_cvolume, NULL) != PA_OK) { + RTC_LOG(LS_ERROR) << "failed to connect play stream, err=" + << LATE(pa_context_errno)(_paContext); + } + + RTC_LOG(LS_VERBOSE) << "play stream connected"; + + // Wait for state change + while (LATE(pa_stream_get_state)(_playStream) != PA_STREAM_READY) { + LATE(pa_threaded_mainloop_wait)(_paMainloop); + } + + RTC_LOG(LS_VERBOSE) << "play stream ready"; + + // We can now handle write callbacks + EnableWriteCallback(); + + PaUnLock(); + + // Clear device name + if (_playDeviceName) { + delete[] _playDeviceName; + _playDeviceName = NULL; + } + + _playing = true; + _playStartEvent.Set(); + + return true; + } + + if (_playing) { + if (!_recording) { + // Update the playout delay + _sndCardPlayDelay = (uint32_t)(LatencyUsecs(_playStream) / 1000); + } + + if (_playbackBufferUnused < _playbackBufferSize) { + size_t write = _playbackBufferSize - _playbackBufferUnused; + if (_tempBufferSpace < write) { + write = _tempBufferSpace; + } + + PaLock(); + if (LATE(pa_stream_write)( + _playStream, (void*)&_playBuffer[_playbackBufferUnused], write, + NULL, (int64_t)0, PA_SEEK_RELATIVE) != PA_OK) { + _writeErrors++; + if (_writeErrors > 10) { + RTC_LOG(LS_ERROR) << "Playout error: _writeErrors=" << _writeErrors + << ", error=" << LATE(pa_context_errno)(_paContext); + _writeErrors = 0; + } + } + PaUnLock(); + + _playbackBufferUnused += write; + _tempBufferSpace -= write; + } + + uint32_t numPlaySamples = _playbackBufferSize / (2 * _playChannels); + // Might have been reduced to zero by the above. + if (_tempBufferSpace > 0) { + // Ask for new PCM data to be played out using the + // AudioDeviceBuffer ensure that this callback is executed + // without taking the audio-thread lock. + UnLock(); + RTC_LOG(LS_VERBOSE) << "requesting data"; + uint32_t nSamples = _ptrAudioBuffer->RequestPlayoutData(numPlaySamples); + Lock(); + + // We have been unlocked - check the flag again. + if (!_playing) { + return true; + } + + nSamples = _ptrAudioBuffer->GetPlayoutData(_playBuffer); + if (nSamples != numPlaySamples) { + RTC_LOG(LS_ERROR) << "invalid number of output samples(" << nSamples + << ")"; + } + + size_t write = _playbackBufferSize; + if (_tempBufferSpace < write) { + write = _tempBufferSpace; + } + + RTC_LOG(LS_VERBOSE) << "will write"; + PaLock(); + if (LATE(pa_stream_write)(_playStream, (void*)&_playBuffer[0], write, + NULL, (int64_t)0, PA_SEEK_RELATIVE) != PA_OK) { + _writeErrors++; + if (_writeErrors > 10) { + RTC_LOG(LS_ERROR) << "Playout error: _writeErrors=" << _writeErrors + << ", error=" << LATE(pa_context_errno)(_paContext); + _writeErrors = 0; + } + } + PaUnLock(); + + _playbackBufferUnused = write; + } + + _tempBufferSpace = 0; + PaLock(); + EnableWriteCallback(); + PaUnLock(); + + } // _playing + + return true; +} + +bool AudioDeviceLinuxPulse::RecThreadProcess() { + if (!_timeEventRec.Wait(TimeDelta::Seconds(1))) { + return true; + } + + MutexLock lock(&mutex_); + if (quit_) { + return false; + } + if (_startRec) { + RTC_LOG(LS_VERBOSE) << "_startRec true, performing initial actions"; + + _recDeviceName = NULL; + + // Set if not default device + if (_inputDeviceIndex > 0) { + // Get the recording device name + _recDeviceName = new char[kAdmMaxDeviceNameSize]; + _deviceIndex = _inputDeviceIndex; + RecordingDevices(); + } + + PaLock(); + + RTC_LOG(LS_VERBOSE) << "connecting stream"; + + // Connect the stream to a source + if (LATE(pa_stream_connect_record)( + _recStream, _recDeviceName, &_recBufferAttr, + (pa_stream_flags_t)_recStreamFlags) != PA_OK) { + RTC_LOG(LS_ERROR) << "failed to connect rec stream, err=" + << LATE(pa_context_errno)(_paContext); + } + + RTC_LOG(LS_VERBOSE) << "connected"; + + // Wait for state change + while (LATE(pa_stream_get_state)(_recStream) != PA_STREAM_READY) { + LATE(pa_threaded_mainloop_wait)(_paMainloop); + } + + RTC_LOG(LS_VERBOSE) << "done"; + + // We can now handle read callbacks + EnableReadCallback(); + + PaUnLock(); + + // Clear device name + if (_recDeviceName) { + delete[] _recDeviceName; + _recDeviceName = NULL; + } + + _startRec = false; + _recording = true; + _recStartEvent.Set(); + + return true; + } + + if (_recording) { + // Read data and provide it to VoiceEngine + if (ReadRecordedData(_tempSampleData, _tempSampleDataSize) == -1) { + return true; + } + + _tempSampleData = NULL; + _tempSampleDataSize = 0; + + PaLock(); + while (true) { + // Ack the last thing we read + if (LATE(pa_stream_drop)(_recStream) != 0) { + RTC_LOG(LS_WARNING) + << "failed to drop, err=" << LATE(pa_context_errno)(_paContext); + } + + if (LATE(pa_stream_readable_size)(_recStream) <= 0) { + // Then that was all the data + break; + } + + // Else more data. + const void* sampleData; + size_t sampleDataSize; + + if (LATE(pa_stream_peek)(_recStream, &sampleData, &sampleDataSize) != 0) { + RTC_LOG(LS_ERROR) << "RECORD_ERROR, error = " + << LATE(pa_context_errno)(_paContext); + break; + } + + // Drop lock for sigslot dispatch, which could take a while. + PaUnLock(); + // Read data and provide it to VoiceEngine + if (ReadRecordedData(sampleData, sampleDataSize) == -1) { + return true; + } + PaLock(); + + // Return to top of loop for the ack and the check for more data. + } + + EnableReadCallback(); + PaUnLock(); + + } // _recording + + return true; +} + +bool AudioDeviceLinuxPulse::KeyPressed() const { +#if defined(WEBRTC_USE_X11) + char szKey[32]; + unsigned int i = 0; + char state = 0; + + if (!_XDisplay) + return false; + + // Check key map status + XQueryKeymap(_XDisplay, szKey); + + // A bit change in keymap means a key is pressed + for (i = 0; i < sizeof(szKey); i++) + state |= (szKey[i] ^ _oldKeyState[i]) & szKey[i]; + + // Save old state + memcpy((char*)_oldKeyState, (char*)szKey, sizeof(_oldKeyState)); + return (state != 0); +#else + return false; +#endif +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/linux/audio_device_pulse_linux.h b/third_party/libwebrtc/modules/audio_device/linux/audio_device_pulse_linux.h new file mode 100644 index 0000000000..0cf89ef011 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/audio_device_pulse_linux.h @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_AUDIO_DEVICE_PULSE_LINUX_H_ +#define AUDIO_DEVICE_AUDIO_DEVICE_PULSE_LINUX_H_ + +#include + +#include "api/sequence_checker.h" +#include "modules/audio_device/audio_device_buffer.h" +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "modules/audio_device/linux/audio_mixer_manager_pulse_linux.h" +#include "modules/audio_device/linux/pulseaudiosymboltable_linux.h" +#include "rtc_base/event.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +#if defined(WEBRTC_USE_X11) +#include +#endif + +#include +#include +#include + +// We define this flag if it's missing from our headers, because we want to be +// able to compile against old headers but still use PA_STREAM_ADJUST_LATENCY +// if run against a recent version of the library. +#ifndef PA_STREAM_ADJUST_LATENCY +#define PA_STREAM_ADJUST_LATENCY 0x2000U +#endif +#ifndef PA_STREAM_START_MUTED +#define PA_STREAM_START_MUTED 0x1000U +#endif + +// Set this constant to 0 to disable latency reading +const uint32_t WEBRTC_PA_REPORT_LATENCY = 1; + +// Constants from implementation by Tristan Schmelcher [tschmelcher@google.com] + +// First PulseAudio protocol version that supports PA_STREAM_ADJUST_LATENCY. +const uint32_t WEBRTC_PA_ADJUST_LATENCY_PROTOCOL_VERSION = 13; + +// Some timing constants for optimal operation. See +// https://tango.0pointer.de/pipermail/pulseaudio-discuss/2008-January/001170.html +// for a good explanation of some of the factors that go into this. + +// Playback. + +// For playback, there is a round-trip delay to fill the server-side playback +// buffer, so setting too low of a latency is a buffer underflow risk. We will +// automatically increase the latency if a buffer underflow does occur, but we +// also enforce a sane minimum at start-up time. Anything lower would be +// virtually guaranteed to underflow at least once, so there's no point in +// allowing lower latencies. +const uint32_t WEBRTC_PA_PLAYBACK_LATENCY_MINIMUM_MSECS = 20; + +// Every time a playback stream underflows, we will reconfigure it with target +// latency that is greater by this amount. +const uint32_t WEBRTC_PA_PLAYBACK_LATENCY_INCREMENT_MSECS = 20; + +// We also need to configure a suitable request size. Too small and we'd burn +// CPU from the overhead of transfering small amounts of data at once. Too large +// and the amount of data remaining in the buffer right before refilling it +// would be a buffer underflow risk. We set it to half of the buffer size. +const uint32_t WEBRTC_PA_PLAYBACK_REQUEST_FACTOR = 2; + +// Capture. + +// For capture, low latency is not a buffer overflow risk, but it makes us burn +// CPU from the overhead of transfering small amounts of data at once, so we set +// a recommended value that we use for the kLowLatency constant (but if the user +// explicitly requests something lower then we will honour it). +// 1ms takes about 6-7% CPU. 5ms takes about 5%. 10ms takes about 4.x%. +const uint32_t WEBRTC_PA_LOW_CAPTURE_LATENCY_MSECS = 10; + +// There is a round-trip delay to ack the data to the server, so the +// server-side buffer needs extra space to prevent buffer overflow. 20ms is +// sufficient, but there is no penalty to making it bigger, so we make it huge. +// (750ms is libpulse's default value for the _total_ buffer size in the +// kNoLatencyRequirements case.) +const uint32_t WEBRTC_PA_CAPTURE_BUFFER_EXTRA_MSECS = 750; + +const uint32_t WEBRTC_PA_MSECS_PER_SEC = 1000; + +// Init _configuredLatencyRec/Play to this value to disable latency requirements +const int32_t WEBRTC_PA_NO_LATENCY_REQUIREMENTS = -1; + +// Set this const to 1 to account for peeked and used data in latency +// calculation +const uint32_t WEBRTC_PA_CAPTURE_BUFFER_LATENCY_ADJUSTMENT = 0; + +typedef webrtc::adm_linux_pulse::PulseAudioSymbolTable WebRTCPulseSymbolTable; +WebRTCPulseSymbolTable* GetPulseSymbolTable(); + +namespace webrtc { + +class AudioDeviceLinuxPulse : public AudioDeviceGeneric { + public: + AudioDeviceLinuxPulse(); + virtual ~AudioDeviceLinuxPulse(); + + // Retrieve the currently utilized audio layer + int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const override; + + // Main initializaton and termination + InitStatus Init() override; + int32_t Terminate() RTC_LOCKS_EXCLUDED(mutex_) override; + bool Initialized() const override; + + // Device enumeration + int16_t PlayoutDevices() override; + int16_t RecordingDevices() override; + int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override; + + // Device selection + int32_t SetPlayoutDevice(uint16_t index) override; + int32_t SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) override; + int32_t SetRecordingDevice(uint16_t index) override; + int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) override; + + // Audio transport initialization + int32_t PlayoutIsAvailable(bool& available) override; + int32_t InitPlayout() RTC_LOCKS_EXCLUDED(mutex_) override; + bool PlayoutIsInitialized() const override; + int32_t RecordingIsAvailable(bool& available) override; + int32_t InitRecording() override; + bool RecordingIsInitialized() const override; + + // Audio transport control + int32_t StartPlayout() RTC_LOCKS_EXCLUDED(mutex_) override; + int32_t StopPlayout() RTC_LOCKS_EXCLUDED(mutex_) override; + bool Playing() const override; + int32_t StartRecording() RTC_LOCKS_EXCLUDED(mutex_) override; + int32_t StopRecording() RTC_LOCKS_EXCLUDED(mutex_) override; + bool Recording() const override; + + // Audio mixer initialization + int32_t InitSpeaker() override; + bool SpeakerIsInitialized() const override; + int32_t InitMicrophone() override; + bool MicrophoneIsInitialized() const override; + + // Speaker volume controls + int32_t SpeakerVolumeIsAvailable(bool& available) override; + int32_t SetSpeakerVolume(uint32_t volume) override; + int32_t SpeakerVolume(uint32_t& volume) const override; + int32_t MaxSpeakerVolume(uint32_t& maxVolume) const override; + int32_t MinSpeakerVolume(uint32_t& minVolume) const override; + + // Microphone volume controls + int32_t MicrophoneVolumeIsAvailable(bool& available) override; + int32_t SetMicrophoneVolume(uint32_t volume) override; + int32_t MicrophoneVolume(uint32_t& volume) const override; + int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const override; + int32_t MinMicrophoneVolume(uint32_t& minVolume) const override; + + // Speaker mute control + int32_t SpeakerMuteIsAvailable(bool& available) override; + int32_t SetSpeakerMute(bool enable) override; + int32_t SpeakerMute(bool& enabled) const override; + + // Microphone mute control + int32_t MicrophoneMuteIsAvailable(bool& available) override; + int32_t SetMicrophoneMute(bool enable) override; + int32_t MicrophoneMute(bool& enabled) const override; + + // Stereo support + int32_t StereoPlayoutIsAvailable(bool& available) override; + int32_t SetStereoPlayout(bool enable) override; + int32_t StereoPlayout(bool& enabled) const override; + int32_t StereoRecordingIsAvailable(bool& available) override; + int32_t SetStereoRecording(bool enable) override; + int32_t StereoRecording(bool& enabled) const override; + + // Delay information and control + int32_t PlayoutDelay(uint16_t& delayMS) const + RTC_LOCKS_EXCLUDED(mutex_) override; + + void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) override; + + private: + void Lock() RTC_EXCLUSIVE_LOCK_FUNCTION(mutex_) { mutex_.Lock(); } + void UnLock() RTC_UNLOCK_FUNCTION(mutex_) { mutex_.Unlock(); } + void WaitForOperationCompletion(pa_operation* paOperation) const; + void WaitForSuccess(pa_operation* paOperation) const; + + bool KeyPressed() const; + + static void PaContextStateCallback(pa_context* c, void* pThis); + static void PaSinkInfoCallback(pa_context* c, + const pa_sink_info* i, + int eol, + void* pThis); + static void PaSourceInfoCallback(pa_context* c, + const pa_source_info* i, + int eol, + void* pThis); + static void PaServerInfoCallback(pa_context* c, + const pa_server_info* i, + void* pThis); + static void PaStreamStateCallback(pa_stream* p, void* pThis); + void PaContextStateCallbackHandler(pa_context* c); + void PaSinkInfoCallbackHandler(const pa_sink_info* i, int eol); + void PaSourceInfoCallbackHandler(const pa_source_info* i, int eol); + void PaServerInfoCallbackHandler(const pa_server_info* i); + void PaStreamStateCallbackHandler(pa_stream* p); + + void EnableWriteCallback(); + void DisableWriteCallback(); + static void PaStreamWriteCallback(pa_stream* unused, + size_t buffer_space, + void* pThis); + void PaStreamWriteCallbackHandler(size_t buffer_space); + static void PaStreamUnderflowCallback(pa_stream* unused, void* pThis); + void PaStreamUnderflowCallbackHandler(); + void EnableReadCallback(); + void DisableReadCallback(); + static void PaStreamReadCallback(pa_stream* unused1, + size_t unused2, + void* pThis); + void PaStreamReadCallbackHandler(); + static void PaStreamOverflowCallback(pa_stream* unused, void* pThis); + void PaStreamOverflowCallbackHandler(); + int32_t LatencyUsecs(pa_stream* stream); + int32_t ReadRecordedData(const void* bufferData, size_t bufferSize); + int32_t ProcessRecordedData(int8_t* bufferData, + uint32_t bufferSizeInSamples, + uint32_t recDelay); + + int32_t CheckPulseAudioVersion(); + int32_t InitSamplingFrequency(); + int32_t GetDefaultDeviceInfo(bool recDevice, char* name, uint16_t& index); + int32_t InitPulseAudio(); + int32_t TerminatePulseAudio(); + + void PaLock(); + void PaUnLock(); + + static void RecThreadFunc(void*); + static void PlayThreadFunc(void*); + bool RecThreadProcess() RTC_LOCKS_EXCLUDED(mutex_); + bool PlayThreadProcess() RTC_LOCKS_EXCLUDED(mutex_); + + AudioDeviceBuffer* _ptrAudioBuffer; + + mutable Mutex mutex_; + rtc::Event _timeEventRec; + rtc::Event _timeEventPlay; + rtc::Event _recStartEvent; + rtc::Event _playStartEvent; + + rtc::PlatformThread _ptrThreadPlay; + rtc::PlatformThread _ptrThreadRec; + + AudioMixerManagerLinuxPulse _mixerManager; + + uint16_t _inputDeviceIndex; + uint16_t _outputDeviceIndex; + bool _inputDeviceIsSpecified; + bool _outputDeviceIsSpecified; + + int sample_rate_hz_; + uint8_t _recChannels; + uint8_t _playChannels; + + // Stores thread ID in constructor. + // We can then use RTC_DCHECK_RUN_ON(&worker_thread_checker_) to ensure that + // other methods are called from the same thread. + // Currently only does RTC_DCHECK(thread_checker_.IsCurrent()). + SequenceChecker thread_checker_; + + bool _initialized; + bool _recording; + bool _playing; + bool _recIsInitialized; + bool _playIsInitialized; + bool _startRec; + bool _startPlay; + bool update_speaker_volume_at_startup_; + bool quit_ RTC_GUARDED_BY(&mutex_); + + uint32_t _sndCardPlayDelay RTC_GUARDED_BY(&mutex_); + + int32_t _writeErrors; + + uint16_t _deviceIndex; + int16_t _numPlayDevices; + int16_t _numRecDevices; + char* _playDeviceName; + char* _recDeviceName; + char* _playDisplayDeviceName; + char* _recDisplayDeviceName; + char _paServerVersion[32]; + + int8_t* _playBuffer; + size_t _playbackBufferSize; + size_t _playbackBufferUnused; + size_t _tempBufferSpace; + int8_t* _recBuffer; + size_t _recordBufferSize; + size_t _recordBufferUsed; + const void* _tempSampleData; + size_t _tempSampleDataSize; + int32_t _configuredLatencyPlay; + int32_t _configuredLatencyRec; + + // PulseAudio + uint16_t _paDeviceIndex; + bool _paStateChanged; + + pa_threaded_mainloop* _paMainloop; + pa_mainloop_api* _paMainloopApi; + pa_context* _paContext; + + pa_stream* _recStream; + pa_stream* _playStream; + uint32_t _recStreamFlags; + uint32_t _playStreamFlags; + pa_buffer_attr _playBufferAttr; + pa_buffer_attr _recBufferAttr; + + char _oldKeyState[32]; +#if defined(WEBRTC_USE_X11) + Display* _XDisplay; +#endif +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_MAIN_SOURCE_LINUX_AUDIO_DEVICE_PULSE_LINUX_H_ diff --git a/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_alsa_linux.cc b/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_alsa_linux.cc new file mode 100644 index 0000000000..e7e7033173 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_alsa_linux.cc @@ -0,0 +1,979 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/linux/audio_mixer_manager_alsa_linux.h" + +#include "modules/audio_device/linux/audio_device_alsa_linux.h" +#include "rtc_base/logging.h" + +// Accesses ALSA functions through our late-binding symbol table instead of +// directly. This way we don't have to link to libasound, which means our binary +// will work on systems that don't have it. +#define LATE(sym) \ + LATESYM_GET(webrtc::adm_linux_alsa::AlsaSymbolTable, GetAlsaSymbolTable(), \ + sym) + +namespace webrtc { + +AudioMixerManagerLinuxALSA::AudioMixerManagerLinuxALSA() + : _outputMixerHandle(NULL), + _inputMixerHandle(NULL), + _outputMixerElement(NULL), + _inputMixerElement(NULL) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " created"; + + memset(_outputMixerStr, 0, kAdmMaxDeviceNameSize); + memset(_inputMixerStr, 0, kAdmMaxDeviceNameSize); +} + +AudioMixerManagerLinuxALSA::~AudioMixerManagerLinuxALSA() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " destroyed"; + Close(); +} + +// ============================================================================ +// PUBLIC METHODS +// ============================================================================ + +int32_t AudioMixerManagerLinuxALSA::Close() { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + MutexLock lock(&mutex_); + + CloseSpeakerLocked(); + CloseMicrophoneLocked(); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::CloseSpeaker() { + MutexLock lock(&mutex_); + return CloseSpeakerLocked(); +} + +int32_t AudioMixerManagerLinuxALSA::CloseSpeakerLocked() { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + int errVal = 0; + + if (_outputMixerHandle != NULL) { + RTC_LOG(LS_VERBOSE) << "Closing playout mixer"; + LATE(snd_mixer_free)(_outputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error freeing playout mixer: " + << LATE(snd_strerror)(errVal); + } + errVal = LATE(snd_mixer_detach)(_outputMixerHandle, _outputMixerStr); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error detaching playout mixer: " + << LATE(snd_strerror)(errVal); + } + errVal = LATE(snd_mixer_close)(_outputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error snd_mixer_close(handleMixer) errVal=" + << errVal; + } + _outputMixerHandle = NULL; + _outputMixerElement = NULL; + } + memset(_outputMixerStr, 0, kAdmMaxDeviceNameSize); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::CloseMicrophone() { + MutexLock lock(&mutex_); + return CloseMicrophoneLocked(); +} + +int32_t AudioMixerManagerLinuxALSA::CloseMicrophoneLocked() { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + int errVal = 0; + + if (_inputMixerHandle != NULL) { + RTC_LOG(LS_VERBOSE) << "Closing record mixer"; + + LATE(snd_mixer_free)(_inputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error freeing record mixer: " + << LATE(snd_strerror)(errVal); + } + RTC_LOG(LS_VERBOSE) << "Closing record mixer 2"; + + errVal = LATE(snd_mixer_detach)(_inputMixerHandle, _inputMixerStr); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error detaching record mixer: " + << LATE(snd_strerror)(errVal); + } + RTC_LOG(LS_VERBOSE) << "Closing record mixer 3"; + + errVal = LATE(snd_mixer_close)(_inputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error snd_mixer_close(handleMixer) errVal=" + << errVal; + } + + RTC_LOG(LS_VERBOSE) << "Closing record mixer 4"; + _inputMixerHandle = NULL; + _inputMixerElement = NULL; + } + memset(_inputMixerStr, 0, kAdmMaxDeviceNameSize); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::OpenSpeaker(char* deviceName) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxALSA::OpenSpeaker(name=" + << deviceName << ")"; + + MutexLock lock(&mutex_); + + int errVal = 0; + + // Close any existing output mixer handle + // + if (_outputMixerHandle != NULL) { + RTC_LOG(LS_VERBOSE) << "Closing playout mixer"; + + LATE(snd_mixer_free)(_outputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error freeing playout mixer: " + << LATE(snd_strerror)(errVal); + } + errVal = LATE(snd_mixer_detach)(_outputMixerHandle, _outputMixerStr); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error detaching playout mixer: " + << LATE(snd_strerror)(errVal); + } + errVal = LATE(snd_mixer_close)(_outputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error snd_mixer_close(handleMixer) errVal=" + << errVal; + } + } + _outputMixerHandle = NULL; + _outputMixerElement = NULL; + + errVal = LATE(snd_mixer_open)(&_outputMixerHandle, 0); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "snd_mixer_open(&_outputMixerHandle, 0) - error"; + return -1; + } + + char controlName[kAdmMaxDeviceNameSize] = {0}; + GetControlName(controlName, deviceName); + + RTC_LOG(LS_VERBOSE) << "snd_mixer_attach(_outputMixerHandle, " << controlName + << ")"; + + errVal = LATE(snd_mixer_attach)(_outputMixerHandle, controlName); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "snd_mixer_attach(_outputMixerHandle, " << controlName + << ") error: " << LATE(snd_strerror)(errVal); + _outputMixerHandle = NULL; + return -1; + } + strcpy(_outputMixerStr, controlName); + + errVal = LATE(snd_mixer_selem_register)(_outputMixerHandle, NULL, NULL); + if (errVal < 0) { + RTC_LOG(LS_ERROR) + << "snd_mixer_selem_register(_outputMixerHandle, NULL, NULL), " + "error: " + << LATE(snd_strerror)(errVal); + _outputMixerHandle = NULL; + return -1; + } + + // Load and find the proper mixer element + if (LoadSpeakerMixerElement() < 0) { + return -1; + } + + if (_outputMixerHandle != NULL) { + RTC_LOG(LS_VERBOSE) << "the output mixer device is now open (" + << _outputMixerHandle << ")"; + } + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::OpenMicrophone(char* deviceName) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxALSA::OpenMicrophone(name=" + << deviceName << ")"; + + MutexLock lock(&mutex_); + + int errVal = 0; + + // Close any existing input mixer handle + // + if (_inputMixerHandle != NULL) { + RTC_LOG(LS_VERBOSE) << "Closing record mixer"; + + LATE(snd_mixer_free)(_inputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error freeing record mixer: " + << LATE(snd_strerror)(errVal); + } + RTC_LOG(LS_VERBOSE) << "Closing record mixer"; + + errVal = LATE(snd_mixer_detach)(_inputMixerHandle, _inputMixerStr); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error detaching record mixer: " + << LATE(snd_strerror)(errVal); + } + RTC_LOG(LS_VERBOSE) << "Closing record mixer"; + + errVal = LATE(snd_mixer_close)(_inputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error snd_mixer_close(handleMixer) errVal=" + << errVal; + } + RTC_LOG(LS_VERBOSE) << "Closing record mixer"; + } + _inputMixerHandle = NULL; + _inputMixerElement = NULL; + + errVal = LATE(snd_mixer_open)(&_inputMixerHandle, 0); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "snd_mixer_open(&_inputMixerHandle, 0) - error"; + return -1; + } + + char controlName[kAdmMaxDeviceNameSize] = {0}; + GetControlName(controlName, deviceName); + + RTC_LOG(LS_VERBOSE) << "snd_mixer_attach(_inputMixerHandle, " << controlName + << ")"; + + errVal = LATE(snd_mixer_attach)(_inputMixerHandle, controlName); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "snd_mixer_attach(_inputMixerHandle, " << controlName + << ") error: " << LATE(snd_strerror)(errVal); + + _inputMixerHandle = NULL; + return -1; + } + strcpy(_inputMixerStr, controlName); + + errVal = LATE(snd_mixer_selem_register)(_inputMixerHandle, NULL, NULL); + if (errVal < 0) { + RTC_LOG(LS_ERROR) + << "snd_mixer_selem_register(_inputMixerHandle, NULL, NULL), " + "error: " + << LATE(snd_strerror)(errVal); + + _inputMixerHandle = NULL; + return -1; + } + // Load and find the proper mixer element + if (LoadMicMixerElement() < 0) { + return -1; + } + + if (_inputMixerHandle != NULL) { + RTC_LOG(LS_VERBOSE) << "the input mixer device is now open (" + << _inputMixerHandle << ")"; + } + + return 0; +} + +bool AudioMixerManagerLinuxALSA::SpeakerIsInitialized() const { + RTC_DLOG(LS_INFO) << __FUNCTION__; + + return (_outputMixerHandle != NULL); +} + +bool AudioMixerManagerLinuxALSA::MicrophoneIsInitialized() const { + RTC_DLOG(LS_INFO) << __FUNCTION__; + + return (_inputMixerHandle != NULL); +} + +int32_t AudioMixerManagerLinuxALSA::SetSpeakerVolume(uint32_t volume) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxALSA::SetSpeakerVolume(volume=" + << volume << ")"; + + MutexLock lock(&mutex_); + + if (_outputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + int errVal = LATE(snd_mixer_selem_set_playback_volume_all)( + _outputMixerElement, volume); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error changing master volume: " + << LATE(snd_strerror)(errVal); + return -1; + } + + return (0); +} + +int32_t AudioMixerManagerLinuxALSA::SpeakerVolume(uint32_t& volume) const { + if (_outputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + long int vol(0); + + int errVal = LATE(snd_mixer_selem_get_playback_volume)( + _outputMixerElement, (snd_mixer_selem_channel_id_t)0, &vol); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error getting outputvolume: " + << LATE(snd_strerror)(errVal); + return -1; + } + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxALSA::SpeakerVolume() => vol=" + << vol; + + volume = static_cast(vol); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::MaxSpeakerVolume( + uint32_t& maxVolume) const { + if (_outputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avilable output mixer element exists"; + return -1; + } + + long int minVol(0); + long int maxVol(0); + + int errVal = LATE(snd_mixer_selem_get_playback_volume_range)( + _outputMixerElement, &minVol, &maxVol); + + RTC_LOG(LS_VERBOSE) << "Playout hardware volume range, min: " << minVol + << ", max: " << maxVol; + + if (maxVol <= minVol) { + RTC_LOG(LS_ERROR) << "Error getting get_playback_volume_range: " + << LATE(snd_strerror)(errVal); + } + + maxVolume = static_cast(maxVol); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::MinSpeakerVolume( + uint32_t& minVolume) const { + if (_outputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + long int minVol(0); + long int maxVol(0); + + int errVal = LATE(snd_mixer_selem_get_playback_volume_range)( + _outputMixerElement, &minVol, &maxVol); + + RTC_LOG(LS_VERBOSE) << "Playout hardware volume range, min: " << minVol + << ", max: " << maxVol; + + if (maxVol <= minVol) { + RTC_LOG(LS_ERROR) << "Error getting get_playback_volume_range: " + << LATE(snd_strerror)(errVal); + } + + minVolume = static_cast(minVol); + + return 0; +} + +// TL: Have done testnig with these but they don't seem reliable and +// they were therefore not added +/* + // ---------------------------------------------------------------------------- + // SetMaxSpeakerVolume + // ---------------------------------------------------------------------------- + + int32_t AudioMixerManagerLinuxALSA::SetMaxSpeakerVolume( + uint32_t maxVolume) + { + + if (_outputMixerElement == NULL) + { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + long int minVol(0); + long int maxVol(0); + + int errVal = snd_mixer_selem_get_playback_volume_range( + _outputMixerElement, &minVol, &maxVol); + if ((maxVol <= minVol) || (errVal != 0)) + { + RTC_LOG(LS_WARNING) << "Error getting playback volume range: " + << snd_strerror(errVal); + } + + maxVol = maxVolume; + errVal = snd_mixer_selem_set_playback_volume_range( + _outputMixerElement, minVol, maxVol); + RTC_LOG(LS_VERBOSE) << "Playout hardware volume range, min: " << minVol + << ", max: " << maxVol; + if (errVal != 0) + { + RTC_LOG(LS_ERROR) << "Error setting playback volume range: " + << snd_strerror(errVal); + return -1; + } + + return 0; + } + + // ---------------------------------------------------------------------------- + // SetMinSpeakerVolume + // ---------------------------------------------------------------------------- + + int32_t AudioMixerManagerLinuxALSA::SetMinSpeakerVolume( + uint32_t minVolume) + { + + if (_outputMixerElement == NULL) + { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + long int minVol(0); + long int maxVol(0); + + int errVal = snd_mixer_selem_get_playback_volume_range( + _outputMixerElement, &minVol, &maxVol); + if ((maxVol <= minVol) || (errVal != 0)) + { + RTC_LOG(LS_WARNING) << "Error getting playback volume range: " + << snd_strerror(errVal); + } + + minVol = minVolume; + errVal = snd_mixer_selem_set_playback_volume_range( + _outputMixerElement, minVol, maxVol); + RTC_LOG(LS_VERBOSE) << "Playout hardware volume range, min: " << minVol + << ", max: " << maxVol; + if (errVal != 0) + { + RTC_LOG(LS_ERROR) << "Error setting playback volume range: " + << snd_strerror(errVal); + return -1; + } + + return 0; + } + */ + +int32_t AudioMixerManagerLinuxALSA::SpeakerVolumeIsAvailable(bool& available) { + if (_outputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + available = LATE(snd_mixer_selem_has_playback_volume)(_outputMixerElement); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::SpeakerMuteIsAvailable(bool& available) { + if (_outputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + available = LATE(snd_mixer_selem_has_playback_switch)(_outputMixerElement); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::SetSpeakerMute(bool enable) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxALSA::SetSpeakerMute(enable=" + << enable << ")"; + + MutexLock lock(&mutex_); + + if (_outputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + // Ensure that the selected speaker destination has a valid mute control. + bool available(false); + SpeakerMuteIsAvailable(available); + if (!available) { + RTC_LOG(LS_WARNING) << "it is not possible to mute the speaker"; + return -1; + } + + // Note value = 0 (off) means muted + int errVal = LATE(snd_mixer_selem_set_playback_switch_all)( + _outputMixerElement, !enable); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error setting playback switch: " + << LATE(snd_strerror)(errVal); + return -1; + } + + return (0); +} + +int32_t AudioMixerManagerLinuxALSA::SpeakerMute(bool& enabled) const { + if (_outputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable output mixer exists"; + return -1; + } + + // Ensure that the selected speaker destination has a valid mute control. + bool available = + LATE(snd_mixer_selem_has_playback_switch)(_outputMixerElement); + if (!available) { + RTC_LOG(LS_WARNING) << "it is not possible to mute the speaker"; + return -1; + } + + int value(false); + + // Retrieve one boolean control value for a specified mute-control + // + int errVal = LATE(snd_mixer_selem_get_playback_switch)( + _outputMixerElement, (snd_mixer_selem_channel_id_t)0, &value); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error getting playback switch: " + << LATE(snd_strerror)(errVal); + return -1; + } + + // Note value = 0 (off) means muted + enabled = (bool)!value; + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::MicrophoneMuteIsAvailable(bool& available) { + if (_inputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable input mixer element exists"; + return -1; + } + + available = LATE(snd_mixer_selem_has_capture_switch)(_inputMixerElement); + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::SetMicrophoneMute(bool enable) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxALSA::SetMicrophoneMute(enable=" + << enable << ")"; + + MutexLock lock(&mutex_); + + if (_inputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable input mixer element exists"; + return -1; + } + + // Ensure that the selected microphone destination has a valid mute control. + bool available(false); + MicrophoneMuteIsAvailable(available); + if (!available) { + RTC_LOG(LS_WARNING) << "it is not possible to mute the microphone"; + return -1; + } + + // Note value = 0 (off) means muted + int errVal = + LATE(snd_mixer_selem_set_capture_switch_all)(_inputMixerElement, !enable); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error setting capture switch: " + << LATE(snd_strerror)(errVal); + return -1; + } + + return (0); +} + +int32_t AudioMixerManagerLinuxALSA::MicrophoneMute(bool& enabled) const { + if (_inputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable input mixer exists"; + return -1; + } + + // Ensure that the selected microphone destination has a valid mute control. + bool available = LATE(snd_mixer_selem_has_capture_switch)(_inputMixerElement); + if (!available) { + RTC_LOG(LS_WARNING) << "it is not possible to mute the microphone"; + return -1; + } + + int value(false); + + // Retrieve one boolean control value for a specified mute-control + // + int errVal = LATE(snd_mixer_selem_get_capture_switch)( + _inputMixerElement, (snd_mixer_selem_channel_id_t)0, &value); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error getting capture switch: " + << LATE(snd_strerror)(errVal); + return -1; + } + + // Note value = 0 (off) means muted + enabled = (bool)!value; + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::MicrophoneVolumeIsAvailable( + bool& available) { + if (_inputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable input mixer element exists"; + return -1; + } + + available = LATE(snd_mixer_selem_has_capture_volume)(_inputMixerElement); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::SetMicrophoneVolume(uint32_t volume) { + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxALSA::SetMicrophoneVolume(volume=" << volume + << ")"; + + MutexLock lock(&mutex_); + + if (_inputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable input mixer element exists"; + return -1; + } + + int errVal = + LATE(snd_mixer_selem_set_capture_volume_all)(_inputMixerElement, volume); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error changing microphone volume: " + << LATE(snd_strerror)(errVal); + return -1; + } + + return (0); +} + +// TL: Have done testnig with these but they don't seem reliable and +// they were therefore not added +/* + // ---------------------------------------------------------------------------- + // SetMaxMicrophoneVolume + // ---------------------------------------------------------------------------- + + int32_t AudioMixerManagerLinuxALSA::SetMaxMicrophoneVolume( + uint32_t maxVolume) + { + + if (_inputMixerElement == NULL) + { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + long int minVol(0); + long int maxVol(0); + + int errVal = snd_mixer_selem_get_capture_volume_range(_inputMixerElement, + &minVol, &maxVol); + if ((maxVol <= minVol) || (errVal != 0)) + { + RTC_LOG(LS_WARNING) << "Error getting capture volume range: " + << snd_strerror(errVal); + } + + maxVol = (long int)maxVolume; + printf("min %d max %d", minVol, maxVol); + errVal = snd_mixer_selem_set_capture_volume_range(_inputMixerElement, minVol, + maxVol); RTC_LOG(LS_VERBOSE) << "Capture hardware volume range, min: " << + minVol + << ", max: " << maxVol; + if (errVal != 0) + { + RTC_LOG(LS_ERROR) << "Error setting capture volume range: " + << snd_strerror(errVal); + return -1; + } + + return 0; + } + + // ---------------------------------------------------------------------------- + // SetMinMicrophoneVolume + // ---------------------------------------------------------------------------- + + int32_t AudioMixerManagerLinuxALSA::SetMinMicrophoneVolume( + uint32_t minVolume) + { + + if (_inputMixerElement == NULL) + { + RTC_LOG(LS_WARNING) << "no avaliable output mixer element exists"; + return -1; + } + + long int minVol(0); + long int maxVol(0); + + int errVal = snd_mixer_selem_get_capture_volume_range( + _inputMixerElement, &minVol, &maxVol); + if (maxVol <= minVol) + { + //maxVol = 255; + RTC_LOG(LS_WARNING) << "Error getting capture volume range: " + << snd_strerror(errVal); + } + + printf("min %d max %d", minVol, maxVol); + minVol = (long int)minVolume; + errVal = snd_mixer_selem_set_capture_volume_range( + _inputMixerElement, minVol, maxVol); + RTC_LOG(LS_VERBOSE) << "Capture hardware volume range, min: " << minVol + << ", max: " << maxVol; + if (errVal != 0) + { + RTC_LOG(LS_ERROR) << "Error setting capture volume range: " + << snd_strerror(errVal); + return -1; + } + + return 0; + } + */ + +int32_t AudioMixerManagerLinuxALSA::MicrophoneVolume(uint32_t& volume) const { + if (_inputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable input mixer element exists"; + return -1; + } + + long int vol(0); + + int errVal = LATE(snd_mixer_selem_get_capture_volume)( + _inputMixerElement, (snd_mixer_selem_channel_id_t)0, &vol); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "Error getting inputvolume: " + << LATE(snd_strerror)(errVal); + return -1; + } + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxALSA::MicrophoneVolume() => vol=" << vol; + + volume = static_cast(vol); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::MaxMicrophoneVolume( + uint32_t& maxVolume) const { + if (_inputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable input mixer element exists"; + return -1; + } + + long int minVol(0); + long int maxVol(0); + + // check if we have mic volume at all + if (!LATE(snd_mixer_selem_has_capture_volume)(_inputMixerElement)) { + RTC_LOG(LS_ERROR) << "No microphone volume available"; + return -1; + } + + int errVal = LATE(snd_mixer_selem_get_capture_volume_range)( + _inputMixerElement, &minVol, &maxVol); + + RTC_LOG(LS_VERBOSE) << "Microphone hardware volume range, min: " << minVol + << ", max: " << maxVol; + if (maxVol <= minVol) { + RTC_LOG(LS_ERROR) << "Error getting microphone volume range: " + << LATE(snd_strerror)(errVal); + } + + maxVolume = static_cast(maxVol); + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::MinMicrophoneVolume( + uint32_t& minVolume) const { + if (_inputMixerElement == NULL) { + RTC_LOG(LS_WARNING) << "no avaliable input mixer element exists"; + return -1; + } + + long int minVol(0); + long int maxVol(0); + + int errVal = LATE(snd_mixer_selem_get_capture_volume_range)( + _inputMixerElement, &minVol, &maxVol); + + RTC_LOG(LS_VERBOSE) << "Microphone hardware volume range, min: " << minVol + << ", max: " << maxVol; + if (maxVol <= minVol) { + RTC_LOG(LS_ERROR) << "Error getting microphone volume range: " + << LATE(snd_strerror)(errVal); + } + + minVolume = static_cast(minVol); + + return 0; +} + +// ============================================================================ +// Private Methods +// ============================================================================ + +int32_t AudioMixerManagerLinuxALSA::LoadMicMixerElement() const { + int errVal = LATE(snd_mixer_load)(_inputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "snd_mixer_load(_inputMixerHandle), error: " + << LATE(snd_strerror)(errVal); + _inputMixerHandle = NULL; + return -1; + } + + snd_mixer_elem_t* elem = NULL; + snd_mixer_elem_t* micElem = NULL; + unsigned mixerIdx = 0; + const char* selemName = NULL; + + // Find and store handles to the right mixer elements + for (elem = LATE(snd_mixer_first_elem)(_inputMixerHandle); elem; + elem = LATE(snd_mixer_elem_next)(elem), mixerIdx++) { + if (LATE(snd_mixer_selem_is_active)(elem)) { + selemName = LATE(snd_mixer_selem_get_name)(elem); + if (strcmp(selemName, "Capture") == 0) // "Capture", "Mic" + { + _inputMixerElement = elem; + RTC_LOG(LS_VERBOSE) << "Capture element set"; + } else if (strcmp(selemName, "Mic") == 0) { + micElem = elem; + RTC_LOG(LS_VERBOSE) << "Mic element found"; + } + } + + if (_inputMixerElement) { + // Use the first Capture element that is found + // The second one may not work + break; + } + } + + if (_inputMixerElement == NULL) { + // We didn't find a Capture handle, use Mic. + if (micElem != NULL) { + _inputMixerElement = micElem; + RTC_LOG(LS_VERBOSE) << "Using Mic as capture volume."; + } else { + _inputMixerElement = NULL; + RTC_LOG(LS_ERROR) << "Could not find capture volume on the mixer."; + + return -1; + } + } + + return 0; +} + +int32_t AudioMixerManagerLinuxALSA::LoadSpeakerMixerElement() const { + int errVal = LATE(snd_mixer_load)(_outputMixerHandle); + if (errVal < 0) { + RTC_LOG(LS_ERROR) << "snd_mixer_load(_outputMixerHandle), error: " + << LATE(snd_strerror)(errVal); + _outputMixerHandle = NULL; + return -1; + } + + snd_mixer_elem_t* elem = NULL; + snd_mixer_elem_t* masterElem = NULL; + snd_mixer_elem_t* speakerElem = NULL; + unsigned mixerIdx = 0; + const char* selemName = NULL; + + // Find and store handles to the right mixer elements + for (elem = LATE(snd_mixer_first_elem)(_outputMixerHandle); elem; + elem = LATE(snd_mixer_elem_next)(elem), mixerIdx++) { + if (LATE(snd_mixer_selem_is_active)(elem)) { + selemName = LATE(snd_mixer_selem_get_name)(elem); + RTC_LOG(LS_VERBOSE) << "snd_mixer_selem_get_name " << mixerIdx << ": " + << selemName << " =" << elem; + + // "Master", "PCM", "Wave", "Master Mono", "PC Speaker", "PCM", "Wave" + if (strcmp(selemName, "PCM") == 0) { + _outputMixerElement = elem; + RTC_LOG(LS_VERBOSE) << "PCM element set"; + } else if (strcmp(selemName, "Master") == 0) { + masterElem = elem; + RTC_LOG(LS_VERBOSE) << "Master element found"; + } else if (strcmp(selemName, "Speaker") == 0) { + speakerElem = elem; + RTC_LOG(LS_VERBOSE) << "Speaker element found"; + } + } + + if (_outputMixerElement) { + // We have found the element we want + break; + } + } + + // If we didn't find a PCM Handle, use Master or Speaker + if (_outputMixerElement == NULL) { + if (masterElem != NULL) { + _outputMixerElement = masterElem; + RTC_LOG(LS_VERBOSE) << "Using Master as output volume."; + } else if (speakerElem != NULL) { + _outputMixerElement = speakerElem; + RTC_LOG(LS_VERBOSE) << "Using Speaker as output volume."; + } else { + _outputMixerElement = NULL; + RTC_LOG(LS_ERROR) << "Could not find output volume in the mixer."; + return -1; + } + } + + return 0; +} + +void AudioMixerManagerLinuxALSA::GetControlName(char* controlName, + char* deviceName) const { + // Example + // deviceName: "front:CARD=Intel,DEV=0" + // controlName: "hw:CARD=Intel" + char* pos1 = strchr(deviceName, ':'); + char* pos2 = strchr(deviceName, ','); + if (!pos2) { + // Can also be default:CARD=Intel + pos2 = &deviceName[strlen(deviceName)]; + } + if (pos1 && pos2) { + strcpy(controlName, "hw"); + int nChar = (int)(pos2 - pos1); + strncpy(&controlName[2], pos1, nChar); + controlName[2 + nChar] = '\0'; + } else { + strcpy(controlName, deviceName); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_alsa_linux.h b/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_alsa_linux.h new file mode 100644 index 0000000000..d98287822d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_alsa_linux.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_AUDIO_MIXER_MANAGER_ALSA_LINUX_H_ +#define AUDIO_DEVICE_AUDIO_MIXER_MANAGER_ALSA_LINUX_H_ + +#include + +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_device/linux/alsasymboltable_linux.h" +#include "rtc_base/synchronization/mutex.h" + +namespace webrtc { + +class AudioMixerManagerLinuxALSA { + public: + int32_t OpenSpeaker(char* deviceName) RTC_LOCKS_EXCLUDED(mutex_); + int32_t OpenMicrophone(char* deviceName) RTC_LOCKS_EXCLUDED(mutex_); + int32_t SetSpeakerVolume(uint32_t volume) RTC_LOCKS_EXCLUDED(mutex_); + int32_t SpeakerVolume(uint32_t& volume) const; + int32_t MaxSpeakerVolume(uint32_t& maxVolume) const; + int32_t MinSpeakerVolume(uint32_t& minVolume) const; + int32_t SpeakerVolumeIsAvailable(bool& available); + int32_t SpeakerMuteIsAvailable(bool& available); + int32_t SetSpeakerMute(bool enable) RTC_LOCKS_EXCLUDED(mutex_); + int32_t SpeakerMute(bool& enabled) const; + int32_t MicrophoneMuteIsAvailable(bool& available); + int32_t SetMicrophoneMute(bool enable) RTC_LOCKS_EXCLUDED(mutex_); + int32_t MicrophoneMute(bool& enabled) const; + int32_t MicrophoneVolumeIsAvailable(bool& available); + int32_t SetMicrophoneVolume(uint32_t volume) RTC_LOCKS_EXCLUDED(mutex_); + int32_t MicrophoneVolume(uint32_t& volume) const; + int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const; + int32_t MinMicrophoneVolume(uint32_t& minVolume) const; + int32_t Close() RTC_LOCKS_EXCLUDED(mutex_); + int32_t CloseSpeaker() RTC_LOCKS_EXCLUDED(mutex_); + int32_t CloseMicrophone() RTC_LOCKS_EXCLUDED(mutex_); + bool SpeakerIsInitialized() const; + bool MicrophoneIsInitialized() const; + + public: + AudioMixerManagerLinuxALSA(); + ~AudioMixerManagerLinuxALSA(); + + private: + int32_t CloseSpeakerLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t CloseMicrophoneLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t LoadMicMixerElement() const; + int32_t LoadSpeakerMixerElement() const; + void GetControlName(char* controlName, char* deviceName) const; + + private: + Mutex mutex_; + mutable snd_mixer_t* _outputMixerHandle; + char _outputMixerStr[kAdmMaxDeviceNameSize]; + mutable snd_mixer_t* _inputMixerHandle; + char _inputMixerStr[kAdmMaxDeviceNameSize]; + mutable snd_mixer_elem_t* _outputMixerElement; + mutable snd_mixer_elem_t* _inputMixerElement; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_MAIN_SOURCE_LINUX_AUDIO_MIXER_MANAGER_ALSA_LINUX_H_ diff --git a/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_pulse_linux.cc b/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_pulse_linux.cc new file mode 100644 index 0000000000..91beee3c87 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_pulse_linux.cc @@ -0,0 +1,844 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/linux/audio_mixer_manager_pulse_linux.h" + +#include + +#include "modules/audio_device/linux/audio_device_pulse_linux.h" +#include "modules/audio_device/linux/latebindingsymboltable_linux.h" +#include "modules/audio_device/linux/pulseaudiosymboltable_linux.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +// Accesses Pulse functions through our late-binding symbol table instead of +// directly. This way we don't have to link to libpulse, which means our binary +// will work on systems that don't have it. +#define LATE(sym) \ + LATESYM_GET(webrtc::adm_linux_pulse::PulseAudioSymbolTable, \ + GetPulseSymbolTable(), sym) + +namespace webrtc { + +class AutoPulseLock { + public: + explicit AutoPulseLock(pa_threaded_mainloop* pa_mainloop) + : pa_mainloop_(pa_mainloop) { + LATE(pa_threaded_mainloop_lock)(pa_mainloop_); + } + + ~AutoPulseLock() { LATE(pa_threaded_mainloop_unlock)(pa_mainloop_); } + + private: + pa_threaded_mainloop* const pa_mainloop_; +}; + +AudioMixerManagerLinuxPulse::AudioMixerManagerLinuxPulse() + : _paOutputDeviceIndex(-1), + _paInputDeviceIndex(-1), + _paPlayStream(NULL), + _paRecStream(NULL), + _paMainloop(NULL), + _paContext(NULL), + _paVolume(0), + _paMute(0), + _paVolSteps(0), + _paSpeakerMute(false), + _paSpeakerVolume(PA_VOLUME_NORM), + _paChannels(0), + _paObjectsSet(false) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " created"; +} + +AudioMixerManagerLinuxPulse::~AudioMixerManagerLinuxPulse() { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DLOG(LS_INFO) << __FUNCTION__ << " destroyed"; + + Close(); +} + +// =========================================================================== +// PUBLIC METHODS +// =========================================================================== + +int32_t AudioMixerManagerLinuxPulse::SetPulseAudioObjects( + pa_threaded_mainloop* mainloop, + pa_context* context) { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + if (!mainloop || !context) { + RTC_LOG(LS_ERROR) << "could not set PulseAudio objects for mixer"; + return -1; + } + + _paMainloop = mainloop; + _paContext = context; + _paObjectsSet = true; + + RTC_LOG(LS_VERBOSE) << "the PulseAudio objects for the mixer has been set"; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::Close() { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + CloseSpeaker(); + CloseMicrophone(); + + _paMainloop = NULL; + _paContext = NULL; + _paObjectsSet = false; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::CloseSpeaker() { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + // Reset the index to -1 + _paOutputDeviceIndex = -1; + _paPlayStream = NULL; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::CloseMicrophone() { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + // Reset the index to -1 + _paInputDeviceIndex = -1; + _paRecStream = NULL; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::SetPlayStream(pa_stream* playStream) { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxPulse::SetPlayStream(playStream)"; + + _paPlayStream = playStream; + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::SetRecStream(pa_stream* recStream) { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxPulse::SetRecStream(recStream)"; + + _paRecStream = recStream; + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::OpenSpeaker(uint16_t deviceIndex) { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxPulse::OpenSpeaker(deviceIndex=" + << deviceIndex << ")"; + + // No point in opening the speaker + // if PA objects have not been set + if (!_paObjectsSet) { + RTC_LOG(LS_ERROR) << "PulseAudio objects has not been set"; + return -1; + } + + // Set the index for the PulseAudio + // output device to control + _paOutputDeviceIndex = deviceIndex; + + RTC_LOG(LS_VERBOSE) << "the output mixer device is now open"; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::OpenMicrophone(uint16_t deviceIndex) { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxPulse::OpenMicrophone(deviceIndex=" + << deviceIndex << ")"; + + // No point in opening the microphone + // if PA objects have not been set + if (!_paObjectsSet) { + RTC_LOG(LS_ERROR) << "PulseAudio objects have not been set"; + return -1; + } + + // Set the index for the PulseAudio + // input device to control + _paInputDeviceIndex = deviceIndex; + + RTC_LOG(LS_VERBOSE) << "the input mixer device is now open"; + + return 0; +} + +bool AudioMixerManagerLinuxPulse::SpeakerIsInitialized() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DLOG(LS_INFO) << __FUNCTION__; + + return (_paOutputDeviceIndex != -1); +} + +bool AudioMixerManagerLinuxPulse::MicrophoneIsInitialized() const { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_DLOG(LS_INFO) << __FUNCTION__; + + return (_paInputDeviceIndex != -1); +} + +int32_t AudioMixerManagerLinuxPulse::SetSpeakerVolume(uint32_t volume) { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxPulse::SetSpeakerVolume(volume=" + << volume << ")"; + + if (_paOutputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "output device index has not been set"; + return -1; + } + + bool setFailed(false); + + if (_paPlayStream && + (LATE(pa_stream_get_state)(_paPlayStream) != PA_STREAM_UNCONNECTED)) { + // We can only really set the volume if we have a connected stream + AutoPulseLock auto_lock(_paMainloop); + + // Get the number of channels from the sample specification + const pa_sample_spec* spec = LATE(pa_stream_get_sample_spec)(_paPlayStream); + if (!spec) { + RTC_LOG(LS_ERROR) << "could not get sample specification"; + return -1; + } + + // Set the same volume for all channels + pa_cvolume cVolumes; + LATE(pa_cvolume_set)(&cVolumes, spec->channels, volume); + + pa_operation* paOperation = NULL; + paOperation = LATE(pa_context_set_sink_input_volume)( + _paContext, LATE(pa_stream_get_index)(_paPlayStream), &cVolumes, + PaSetVolumeCallback, NULL); + if (!paOperation) { + setFailed = true; + } + + // Don't need to wait for the completion + LATE(pa_operation_unref)(paOperation); + } else { + // We have not created a stream or it's not connected to the sink + // Save the volume to be set at connection + _paSpeakerVolume = volume; + } + + if (setFailed) { + RTC_LOG(LS_WARNING) << "could not set speaker volume, error=" + << LATE(pa_context_errno)(_paContext); + + return -1; + } + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::SpeakerVolume(uint32_t& volume) const { + if (_paOutputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "output device index has not been set"; + return -1; + } + + if (_paPlayStream && + (LATE(pa_stream_get_state)(_paPlayStream) != PA_STREAM_UNCONNECTED)) { + // We can only get the volume if we have a connected stream + if (!GetSinkInputInfo()) + return -1; + + AutoPulseLock auto_lock(_paMainloop); + volume = static_cast(_paVolume); + } else { + AutoPulseLock auto_lock(_paMainloop); + volume = _paSpeakerVolume; + } + + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxPulse::SpeakerVolume() => vol=" + << volume; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::MaxSpeakerVolume( + uint32_t& maxVolume) const { + if (_paOutputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "output device index has not been set"; + return -1; + } + + // PA_VOLUME_NORM corresponds to 100% (0db) + // but PA allows up to 150 db amplification + maxVolume = static_cast(PA_VOLUME_NORM); + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::MinSpeakerVolume( + uint32_t& minVolume) const { + if (_paOutputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "output device index has not been set"; + return -1; + } + + minVolume = static_cast(PA_VOLUME_MUTED); + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::SpeakerVolumeIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_paOutputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "output device index has not been set"; + return -1; + } + + // Always available in Pulse Audio + available = true; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::SpeakerMuteIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_paOutputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "output device index has not been set"; + return -1; + } + + // Always available in Pulse Audio + available = true; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::SetSpeakerMute(bool enable) { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerLinuxPulse::SetSpeakerMute(enable=" + << enable << ")"; + + if (_paOutputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "output device index has not been set"; + return -1; + } + + bool setFailed(false); + + if (_paPlayStream && + (LATE(pa_stream_get_state)(_paPlayStream) != PA_STREAM_UNCONNECTED)) { + // We can only really mute if we have a connected stream + AutoPulseLock auto_lock(_paMainloop); + + pa_operation* paOperation = NULL; + paOperation = LATE(pa_context_set_sink_input_mute)( + _paContext, LATE(pa_stream_get_index)(_paPlayStream), (int)enable, + PaSetVolumeCallback, NULL); + if (!paOperation) { + setFailed = true; + } + + // Don't need to wait for the completion + LATE(pa_operation_unref)(paOperation); + } else { + // We have not created a stream or it's not connected to the sink + // Save the mute status to be set at connection + _paSpeakerMute = enable; + } + + if (setFailed) { + RTC_LOG(LS_WARNING) << "could not mute speaker, error=" + << LATE(pa_context_errno)(_paContext); + return -1; + } + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::SpeakerMute(bool& enabled) const { + if (_paOutputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "output device index has not been set"; + return -1; + } + + if (_paPlayStream && + (LATE(pa_stream_get_state)(_paPlayStream) != PA_STREAM_UNCONNECTED)) { + // We can only get the mute status if we have a connected stream + if (!GetSinkInputInfo()) + return -1; + + enabled = static_cast(_paMute); + } else { + enabled = _paSpeakerMute; + } + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxPulse::SpeakerMute() => enabled=" << enabled; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::StereoPlayoutIsAvailable(bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_paOutputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "output device index has not been set"; + return -1; + } + + uint32_t deviceIndex = (uint32_t)_paOutputDeviceIndex; + + { + AutoPulseLock auto_lock(_paMainloop); + + // Get the actual stream device index if we have a connected stream + // The device used by the stream can be changed + // during the call + if (_paPlayStream && + (LATE(pa_stream_get_state)(_paPlayStream) != PA_STREAM_UNCONNECTED)) { + deviceIndex = LATE(pa_stream_get_device_index)(_paPlayStream); + } + } + + if (!GetSinkInfoByIndex(deviceIndex)) + return -1; + + available = static_cast(_paChannels == 2); + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::StereoRecordingIsAvailable( + bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_paInputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "input device index has not been set"; + return -1; + } + + uint32_t deviceIndex = (uint32_t)_paInputDeviceIndex; + + AutoPulseLock auto_lock(_paMainloop); + + // Get the actual stream device index if we have a connected stream + // The device used by the stream can be changed + // during the call + if (_paRecStream && + (LATE(pa_stream_get_state)(_paRecStream) != PA_STREAM_UNCONNECTED)) { + deviceIndex = LATE(pa_stream_get_device_index)(_paRecStream); + } + + pa_operation* paOperation = NULL; + + // Get info for this source + // We want to know if the actual device can record in stereo + paOperation = LATE(pa_context_get_source_info_by_index)( + _paContext, deviceIndex, PaSourceInfoCallback, (void*)this); + + WaitForOperationCompletion(paOperation); + + available = static_cast(_paChannels == 2); + + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxPulse::StereoRecordingIsAvailable()" + " => available=" + << available; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::MicrophoneMuteIsAvailable( + bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_paInputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "input device index has not been set"; + return -1; + } + + // Always available in Pulse Audio + available = true; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::SetMicrophoneMute(bool enable) { + RTC_DCHECK(thread_checker_.IsCurrent()); + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxPulse::SetMicrophoneMute(enable=" << enable + << ")"; + + if (_paInputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "input device index has not been set"; + return -1; + } + + bool setFailed(false); + pa_operation* paOperation = NULL; + + uint32_t deviceIndex = (uint32_t)_paInputDeviceIndex; + + AutoPulseLock auto_lock(_paMainloop); + + // Get the actual stream device index if we have a connected stream + // The device used by the stream can be changed + // during the call + if (_paRecStream && + (LATE(pa_stream_get_state)(_paRecStream) != PA_STREAM_UNCONNECTED)) { + deviceIndex = LATE(pa_stream_get_device_index)(_paRecStream); + } + + // Set mute switch for the source + paOperation = LATE(pa_context_set_source_mute_by_index)( + _paContext, deviceIndex, enable, PaSetVolumeCallback, NULL); + + if (!paOperation) { + setFailed = true; + } + + // Don't need to wait for this to complete. + LATE(pa_operation_unref)(paOperation); + + if (setFailed) { + RTC_LOG(LS_WARNING) << "could not mute microphone, error=" + << LATE(pa_context_errno)(_paContext); + return -1; + } + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::MicrophoneMute(bool& enabled) const { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_paInputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "input device index has not been set"; + return -1; + } + + uint32_t deviceIndex = (uint32_t)_paInputDeviceIndex; + + { + AutoPulseLock auto_lock(_paMainloop); + // Get the actual stream device index if we have a connected stream + // The device used by the stream can be changed + // during the call + if (_paRecStream && + (LATE(pa_stream_get_state)(_paRecStream) != PA_STREAM_UNCONNECTED)) { + deviceIndex = LATE(pa_stream_get_device_index)(_paRecStream); + } + } + + if (!GetSourceInfoByIndex(deviceIndex)) + return -1; + + enabled = static_cast(_paMute); + + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxPulse::MicrophoneMute() => enabled=" << enabled; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::MicrophoneVolumeIsAvailable( + bool& available) { + RTC_DCHECK(thread_checker_.IsCurrent()); + if (_paInputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "input device index has not been set"; + return -1; + } + + // Always available in Pulse Audio + available = true; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::SetMicrophoneVolume(uint32_t volume) { + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxPulse::SetMicrophoneVolume(volume=" << volume + << ")"; + + if (_paInputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "input device index has not been set"; + return -1; + } + + // Unlike output streams, input streams have no concept of a stream + // volume, only a device volume. So we have to change the volume of the + // device itself. + + // The device may have a different number of channels than the stream and + // their mapping may be different, so we don't want to use the channel + // count from our sample spec. We could use PA_CHANNELS_MAX to cover our + // bases, and the server allows that even if the device's channel count + // is lower, but some buggy PA clients don't like that (the pavucontrol + // on Hardy dies in an assert if the channel count is different). So + // instead we look up the actual number of channels that the device has. + AutoPulseLock auto_lock(_paMainloop); + uint32_t deviceIndex = (uint32_t)_paInputDeviceIndex; + + // Get the actual stream device index if we have a connected stream + // The device used by the stream can be changed + // during the call + if (_paRecStream && + (LATE(pa_stream_get_state)(_paRecStream) != PA_STREAM_UNCONNECTED)) { + deviceIndex = LATE(pa_stream_get_device_index)(_paRecStream); + } + + bool setFailed(false); + pa_operation* paOperation = NULL; + + // Get the number of channels for this source + paOperation = LATE(pa_context_get_source_info_by_index)( + _paContext, deviceIndex, PaSourceInfoCallback, (void*)this); + + WaitForOperationCompletion(paOperation); + + uint8_t channels = _paChannels; + pa_cvolume cVolumes; + LATE(pa_cvolume_set)(&cVolumes, channels, volume); + + // Set the volume for the source + paOperation = LATE(pa_context_set_source_volume_by_index)( + _paContext, deviceIndex, &cVolumes, PaSetVolumeCallback, NULL); + + if (!paOperation) { + setFailed = true; + } + + // Don't need to wait for this to complete. + LATE(pa_operation_unref)(paOperation); + + if (setFailed) { + RTC_LOG(LS_WARNING) << "could not set microphone volume, error=" + << LATE(pa_context_errno)(_paContext); + return -1; + } + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::MicrophoneVolume(uint32_t& volume) const { + if (_paInputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "input device index has not been set"; + return -1; + } + + uint32_t deviceIndex = (uint32_t)_paInputDeviceIndex; + + { + AutoPulseLock auto_lock(_paMainloop); + // Get the actual stream device index if we have a connected stream. + // The device used by the stream can be changed during the call. + if (_paRecStream && + (LATE(pa_stream_get_state)(_paRecStream) != PA_STREAM_UNCONNECTED)) { + deviceIndex = LATE(pa_stream_get_device_index)(_paRecStream); + } + } + + if (!GetSourceInfoByIndex(deviceIndex)) + return -1; + + { + AutoPulseLock auto_lock(_paMainloop); + volume = static_cast(_paVolume); + } + + RTC_LOG(LS_VERBOSE) + << "AudioMixerManagerLinuxPulse::MicrophoneVolume() => vol=" << volume; + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::MaxMicrophoneVolume( + uint32_t& maxVolume) const { + if (_paInputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "input device index has not been set"; + return -1; + } + + // PA_VOLUME_NORM corresponds to 100% (0db) + // PA allows up to 150 db amplification (PA_VOLUME_MAX) + // but that doesn't work well for all sound cards + maxVolume = static_cast(PA_VOLUME_NORM); + + return 0; +} + +int32_t AudioMixerManagerLinuxPulse::MinMicrophoneVolume( + uint32_t& minVolume) const { + if (_paInputDeviceIndex == -1) { + RTC_LOG(LS_WARNING) << "input device index has not been set"; + return -1; + } + + minVolume = static_cast(PA_VOLUME_MUTED); + + return 0; +} + +// =========================================================================== +// Private Methods +// =========================================================================== + +void AudioMixerManagerLinuxPulse::PaSinkInfoCallback(pa_context* /*c*/, + const pa_sink_info* i, + int eol, + void* pThis) { + static_cast(pThis)->PaSinkInfoCallbackHandler( + i, eol); +} + +void AudioMixerManagerLinuxPulse::PaSinkInputInfoCallback( + pa_context* /*c*/, + const pa_sink_input_info* i, + int eol, + void* pThis) { + static_cast(pThis) + ->PaSinkInputInfoCallbackHandler(i, eol); +} + +void AudioMixerManagerLinuxPulse::PaSourceInfoCallback(pa_context* /*c*/, + const pa_source_info* i, + int eol, + void* pThis) { + static_cast(pThis)->PaSourceInfoCallbackHandler( + i, eol); +} + +void AudioMixerManagerLinuxPulse::PaSetVolumeCallback(pa_context* c, + int success, + void* /*pThis*/) { + if (!success) { + RTC_LOG(LS_ERROR) << "failed to set volume"; + } +} + +void AudioMixerManagerLinuxPulse::PaSinkInfoCallbackHandler( + const pa_sink_info* i, + int eol) { + if (eol) { + // Signal that we are done + LATE(pa_threaded_mainloop_signal)(_paMainloop, 0); + return; + } + + _paChannels = i->channel_map.channels; // Get number of channels + pa_volume_t paVolume = PA_VOLUME_MUTED; // Minimum possible value. + for (int j = 0; j < _paChannels; ++j) { + if (paVolume < i->volume.values[j]) { + paVolume = i->volume.values[j]; + } + } + _paVolume = paVolume; // get the max volume for any channel + _paMute = i->mute; // get mute status + + // supported since PA 0.9.15 + //_paVolSteps = i->n_volume_steps; // get the number of volume steps + // default value is PA_VOLUME_NORM+1 + _paVolSteps = PA_VOLUME_NORM + 1; +} + +void AudioMixerManagerLinuxPulse::PaSinkInputInfoCallbackHandler( + const pa_sink_input_info* i, + int eol) { + if (eol) { + // Signal that we are done + LATE(pa_threaded_mainloop_signal)(_paMainloop, 0); + return; + } + + _paChannels = i->channel_map.channels; // Get number of channels + pa_volume_t paVolume = PA_VOLUME_MUTED; // Minimum possible value. + for (int j = 0; j < _paChannels; ++j) { + if (paVolume < i->volume.values[j]) { + paVolume = i->volume.values[j]; + } + } + _paVolume = paVolume; // Get the max volume for any channel + _paMute = i->mute; // Get mute status +} + +void AudioMixerManagerLinuxPulse::PaSourceInfoCallbackHandler( + const pa_source_info* i, + int eol) { + if (eol) { + // Signal that we are done + LATE(pa_threaded_mainloop_signal)(_paMainloop, 0); + return; + } + + _paChannels = i->channel_map.channels; // Get number of channels + pa_volume_t paVolume = PA_VOLUME_MUTED; // Minimum possible value. + for (int j = 0; j < _paChannels; ++j) { + if (paVolume < i->volume.values[j]) { + paVolume = i->volume.values[j]; + } + } + _paVolume = paVolume; // Get the max volume for any channel + _paMute = i->mute; // Get mute status + + // supported since PA 0.9.15 + //_paVolSteps = i->n_volume_steps; // Get the number of volume steps + // default value is PA_VOLUME_NORM+1 + _paVolSteps = PA_VOLUME_NORM + 1; +} + +void AudioMixerManagerLinuxPulse::WaitForOperationCompletion( + pa_operation* paOperation) const { + while (LATE(pa_operation_get_state)(paOperation) == PA_OPERATION_RUNNING) { + LATE(pa_threaded_mainloop_wait)(_paMainloop); + } + + LATE(pa_operation_unref)(paOperation); +} + +bool AudioMixerManagerLinuxPulse::GetSinkInputInfo() const { + pa_operation* paOperation = NULL; + + AutoPulseLock auto_lock(_paMainloop); + // Get info for this stream (sink input). + paOperation = LATE(pa_context_get_sink_input_info)( + _paContext, LATE(pa_stream_get_index)(_paPlayStream), + PaSinkInputInfoCallback, (void*)this); + + WaitForOperationCompletion(paOperation); + return true; +} + +bool AudioMixerManagerLinuxPulse::GetSinkInfoByIndex(int device_index) const { + pa_operation* paOperation = NULL; + + AutoPulseLock auto_lock(_paMainloop); + paOperation = LATE(pa_context_get_sink_info_by_index)( + _paContext, device_index, PaSinkInfoCallback, (void*)this); + + WaitForOperationCompletion(paOperation); + return true; +} + +bool AudioMixerManagerLinuxPulse::GetSourceInfoByIndex(int device_index) const { + pa_operation* paOperation = NULL; + + AutoPulseLock auto_lock(_paMainloop); + paOperation = LATE(pa_context_get_source_info_by_index)( + _paContext, device_index, PaSourceInfoCallback, (void*)this); + + WaitForOperationCompletion(paOperation); + return true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_pulse_linux.h b/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_pulse_linux.h new file mode 100644 index 0000000000..546440c4a6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/audio_mixer_manager_pulse_linux.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_AUDIO_MIXER_MANAGER_PULSE_LINUX_H_ +#define AUDIO_DEVICE_AUDIO_MIXER_MANAGER_PULSE_LINUX_H_ + +#include +#include + +#include "api/sequence_checker.h" + +#ifndef UINT32_MAX +#define UINT32_MAX ((uint32_t)-1) +#endif + +namespace webrtc { + +class AudioMixerManagerLinuxPulse { + public: + int32_t SetPlayStream(pa_stream* playStream); + int32_t SetRecStream(pa_stream* recStream); + int32_t OpenSpeaker(uint16_t deviceIndex); + int32_t OpenMicrophone(uint16_t deviceIndex); + int32_t SetSpeakerVolume(uint32_t volume); + int32_t SpeakerVolume(uint32_t& volume) const; + int32_t MaxSpeakerVolume(uint32_t& maxVolume) const; + int32_t MinSpeakerVolume(uint32_t& minVolume) const; + int32_t SpeakerVolumeIsAvailable(bool& available); + int32_t SpeakerMuteIsAvailable(bool& available); + int32_t SetSpeakerMute(bool enable); + int32_t StereoPlayoutIsAvailable(bool& available); + int32_t StereoRecordingIsAvailable(bool& available); + int32_t SpeakerMute(bool& enabled) const; + int32_t MicrophoneMuteIsAvailable(bool& available); + int32_t SetMicrophoneMute(bool enable); + int32_t MicrophoneMute(bool& enabled) const; + int32_t MicrophoneVolumeIsAvailable(bool& available); + int32_t SetMicrophoneVolume(uint32_t volume); + int32_t MicrophoneVolume(uint32_t& volume) const; + int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const; + int32_t MinMicrophoneVolume(uint32_t& minVolume) const; + int32_t SetPulseAudioObjects(pa_threaded_mainloop* mainloop, + pa_context* context); + int32_t Close(); + int32_t CloseSpeaker(); + int32_t CloseMicrophone(); + bool SpeakerIsInitialized() const; + bool MicrophoneIsInitialized() const; + + public: + AudioMixerManagerLinuxPulse(); + ~AudioMixerManagerLinuxPulse(); + + private: + static void PaSinkInfoCallback(pa_context* c, + const pa_sink_info* i, + int eol, + void* pThis); + static void PaSinkInputInfoCallback(pa_context* c, + const pa_sink_input_info* i, + int eol, + void* pThis); + static void PaSourceInfoCallback(pa_context* c, + const pa_source_info* i, + int eol, + void* pThis); + static void PaSetVolumeCallback(pa_context* /*c*/, + int success, + void* /*pThis*/); + void PaSinkInfoCallbackHandler(const pa_sink_info* i, int eol); + void PaSinkInputInfoCallbackHandler(const pa_sink_input_info* i, int eol); + void PaSourceInfoCallbackHandler(const pa_source_info* i, int eol); + + void WaitForOperationCompletion(pa_operation* paOperation) const; + + bool GetSinkInputInfo() const; + bool GetSinkInfoByIndex(int device_index) const; + bool GetSourceInfoByIndex(int device_index) const; + + private: + int16_t _paOutputDeviceIndex; + int16_t _paInputDeviceIndex; + + pa_stream* _paPlayStream; + pa_stream* _paRecStream; + + pa_threaded_mainloop* _paMainloop; + pa_context* _paContext; + + mutable uint32_t _paVolume; + mutable uint32_t _paMute; + mutable uint32_t _paVolSteps; + bool _paSpeakerMute; + mutable uint32_t _paSpeakerVolume; + mutable uint8_t _paChannels; + bool _paObjectsSet; + + // Stores thread ID in constructor. + // We can then use RTC_DCHECK_RUN_ON(&worker_thread_checker_) to ensure that + // other methods are called from the same thread. + // Currently only does RTC_DCHECK(thread_checker_.IsCurrent()). + SequenceChecker thread_checker_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_MAIN_SOURCE_LINUX_AUDIO_MIXER_MANAGER_PULSE_LINUX_H_ diff --git a/third_party/libwebrtc/modules/audio_device/linux/latebindingsymboltable_linux.cc b/third_party/libwebrtc/modules/audio_device/linux/latebindingsymboltable_linux.cc new file mode 100644 index 0000000000..751edafd8b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/latebindingsymboltable_linux.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2010 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/linux/latebindingsymboltable_linux.h" + +#include "absl/strings/string_view.h" +#include "rtc_base/logging.h" + +#ifdef WEBRTC_LINUX +#include +#endif + +namespace webrtc { +namespace adm_linux { + +inline static const char* GetDllError() { +#ifdef WEBRTC_LINUX + char* err = dlerror(); + if (err) { + return err; + } else { + return "No error"; + } +#else +#error Not implemented +#endif +} + +DllHandle InternalLoadDll(absl::string_view dll_name) { +#ifdef WEBRTC_LINUX + DllHandle handle = dlopen(std::string(dll_name).c_str(), RTLD_NOW); +#else +#error Not implemented +#endif + if (handle == kInvalidDllHandle) { + RTC_LOG(LS_WARNING) << "Can't load " << dll_name << " : " << GetDllError(); + } + return handle; +} + +void InternalUnloadDll(DllHandle handle) { +#ifdef WEBRTC_LINUX +// TODO(pbos): Remove this dlclose() exclusion when leaks and suppressions from +// here are gone (or AddressSanitizer can display them properly). +// +// Skip dlclose() on AddressSanitizer as leaks including this module in the +// stack trace gets displayed as instead of the actual library +// -> it can not be suppressed. +// https://code.google.com/p/address-sanitizer/issues/detail?id=89 +#if !defined(ADDRESS_SANITIZER) + if (dlclose(handle) != 0) { + RTC_LOG(LS_ERROR) << GetDllError(); + } +#endif // !defined(ADDRESS_SANITIZER) +#else +#error Not implemented +#endif +} + +static bool LoadSymbol(DllHandle handle, + absl::string_view symbol_name, + void** symbol) { +#ifdef WEBRTC_LINUX + *symbol = dlsym(handle, std::string(symbol_name).c_str()); + char* err = dlerror(); + if (err) { + RTC_LOG(LS_ERROR) << "Error loading symbol " << symbol_name << " : " << err; + return false; + } else if (!*symbol) { + RTC_LOG(LS_ERROR) << "Symbol " << symbol_name << " is NULL"; + return false; + } + return true; +#else +#error Not implemented +#endif +} + +// This routine MUST assign SOME value for every symbol, even if that value is +// NULL, or else some symbols may be left with uninitialized data that the +// caller may later interpret as a valid address. +bool InternalLoadSymbols(DllHandle handle, + int num_symbols, + const char* const symbol_names[], + void* symbols[]) { +#ifdef WEBRTC_LINUX + // Clear any old errors. + dlerror(); +#endif + for (int i = 0; i < num_symbols; ++i) { + if (!LoadSymbol(handle, symbol_names[i], &symbols[i])) { + return false; + } + } + return true; +} + +} // namespace adm_linux +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/linux/latebindingsymboltable_linux.h b/third_party/libwebrtc/modules/audio_device/linux/latebindingsymboltable_linux.h new file mode 100644 index 0000000000..00f3c5a449 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/latebindingsymboltable_linux.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2010 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_LATEBINDINGSYMBOLTABLE_LINUX_H_ +#define AUDIO_DEVICE_LATEBINDINGSYMBOLTABLE_LINUX_H_ + +#include // for NULL +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" + +// This file provides macros for creating "symbol table" classes to simplify the +// dynamic loading of symbols from DLLs. Currently the implementation only +// supports Linux and pure C symbols. +// See talk/sound/pulseaudiosymboltable.(h|cc) for an example. + +namespace webrtc { +namespace adm_linux { + +#ifdef WEBRTC_LINUX +typedef void* DllHandle; + +const DllHandle kInvalidDllHandle = NULL; +#else +#error Not implemented +#endif + +// These are helpers for use only by the class below. +DllHandle InternalLoadDll(absl::string_view); + +void InternalUnloadDll(DllHandle handle); + +bool InternalLoadSymbols(DllHandle handle, + int num_symbols, + const char* const symbol_names[], + void* symbols[]); + +template +class LateBindingSymbolTable { + public: + LateBindingSymbolTable() + : handle_(kInvalidDllHandle), undefined_symbols_(false) { + memset(symbols_, 0, sizeof(symbols_)); + } + + ~LateBindingSymbolTable() { Unload(); } + + LateBindingSymbolTable(const LateBindingSymbolTable&) = delete; + LateBindingSymbolTable& operator=(LateBindingSymbolTable&) = delete; + + static int NumSymbols() { return SYMBOL_TABLE_SIZE; } + + // We do not use this, but we offer it for theoretical convenience. + static const char* GetSymbolName(int index) { + RTC_DCHECK_LT(index, NumSymbols()); + return kSymbolNames[index]; + } + + bool IsLoaded() const { return handle_ != kInvalidDllHandle; } + + // Loads the DLL and the symbol table. Returns true iff the DLL and symbol + // table loaded successfully. + bool Load() { + if (IsLoaded()) { + return true; + } + if (undefined_symbols_) { + // We do not attempt to load again because repeated attempts are not + // likely to succeed and DLL loading is costly. + return false; + } + handle_ = InternalLoadDll(kDllName); + if (!IsLoaded()) { + return false; + } + if (!InternalLoadSymbols(handle_, NumSymbols(), kSymbolNames, symbols_)) { + undefined_symbols_ = true; + Unload(); + return false; + } + return true; + } + + void Unload() { + if (!IsLoaded()) { + return; + } + InternalUnloadDll(handle_); + handle_ = kInvalidDllHandle; + memset(symbols_, 0, sizeof(symbols_)); + } + + // Retrieves the given symbol. NOTE: Recommended to use LATESYM_GET below + // instead of this. + void* GetSymbol(int index) const { + RTC_DCHECK(IsLoaded()); + RTC_DCHECK_LT(index, NumSymbols()); + return symbols_[index]; + } + + private: + DllHandle handle_; + bool undefined_symbols_; + void* symbols_[SYMBOL_TABLE_SIZE]; +}; + +// This macro must be invoked in a header to declare a symbol table class. +#define LATE_BINDING_SYMBOL_TABLE_DECLARE_BEGIN(ClassName) enum { +// This macro must be invoked in the header declaration once for each symbol +// (recommended to use an X-Macro to avoid duplication). +// This macro defines an enum with names built from the symbols, which +// essentially creates a hash table in the compiler from symbol names to their +// indices in the symbol table class. +#define LATE_BINDING_SYMBOL_TABLE_DECLARE_ENTRY(ClassName, sym) \ + ClassName##_SYMBOL_TABLE_INDEX_##sym, + +// This macro completes the header declaration. +#define LATE_BINDING_SYMBOL_TABLE_DECLARE_END(ClassName) \ + ClassName##_SYMBOL_TABLE_SIZE \ + } \ + ; \ + \ + extern const char ClassName##_kDllName[]; \ + extern const char* const \ + ClassName##_kSymbolNames[ClassName##_SYMBOL_TABLE_SIZE]; \ + \ + typedef ::webrtc::adm_linux::LateBindingSymbolTable< \ + ClassName##_SYMBOL_TABLE_SIZE, ClassName##_kDllName, \ + ClassName##_kSymbolNames> \ + ClassName; + +// This macro must be invoked in a .cc file to define a previously-declared +// symbol table class. +#define LATE_BINDING_SYMBOL_TABLE_DEFINE_BEGIN(ClassName, dllName) \ + const char ClassName##_kDllName[] = dllName; \ + const char* const ClassName##_kSymbolNames[ClassName##_SYMBOL_TABLE_SIZE] = { +// This macro must be invoked in the .cc definition once for each symbol +// (recommended to use an X-Macro to avoid duplication). +// This would have to use the mangled name if we were to ever support C++ +// symbols. +#define LATE_BINDING_SYMBOL_TABLE_DEFINE_ENTRY(ClassName, sym) #sym, + +#define LATE_BINDING_SYMBOL_TABLE_DEFINE_END(ClassName) \ + } \ + ; + +// Index of a given symbol in the given symbol table class. +#define LATESYM_INDEXOF(ClassName, sym) (ClassName##_SYMBOL_TABLE_INDEX_##sym) + +// Returns a reference to the given late-binded symbol, with the correct type. +#define LATESYM_GET(ClassName, inst, sym) \ + (*reinterpret_cast<__typeof__(&sym)>( \ + (inst)->GetSymbol(LATESYM_INDEXOF(ClassName, sym)))) + +} // namespace adm_linux +} // namespace webrtc + +#endif // ADM_LATEBINDINGSYMBOLTABLE_LINUX_H_ diff --git a/third_party/libwebrtc/modules/audio_device/linux/pulseaudiosymboltable_linux.cc b/third_party/libwebrtc/modules/audio_device/linux/pulseaudiosymboltable_linux.cc new file mode 100644 index 0000000000..e0759e6ca3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/pulseaudiosymboltable_linux.cc @@ -0,0 +1,41 @@ +/* + * libjingle + * Copyright 2004--2010, Google Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "modules/audio_device/linux/pulseaudiosymboltable_linux.h" + +namespace webrtc { +namespace adm_linux_pulse { + +LATE_BINDING_SYMBOL_TABLE_DEFINE_BEGIN(PulseAudioSymbolTable, "libpulse.so.0") +#define X(sym) \ + LATE_BINDING_SYMBOL_TABLE_DEFINE_ENTRY(PulseAudioSymbolTable, sym) +PULSE_AUDIO_SYMBOLS_LIST +#undef X +LATE_BINDING_SYMBOL_TABLE_DEFINE_END(PulseAudioSymbolTable) + +} // namespace adm_linux_pulse +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/linux/pulseaudiosymboltable_linux.h b/third_party/libwebrtc/modules/audio_device/linux/pulseaudiosymboltable_linux.h new file mode 100644 index 0000000000..2f6a9510d8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/linux/pulseaudiosymboltable_linux.h @@ -0,0 +1,106 @@ +/* + * libjingle + * Copyright 2004--2010, Google Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AUDIO_DEVICE_PULSEAUDIOSYMBOLTABLE_LINUX_H_ +#define AUDIO_DEVICE_PULSEAUDIOSYMBOLTABLE_LINUX_H_ + +#include "modules/audio_device/linux/latebindingsymboltable_linux.h" + +namespace webrtc { +namespace adm_linux_pulse { + +// The PulseAudio symbols we need, as an X-Macro list. +// This list must contain precisely every libpulse function that is used in +// the ADM LINUX PULSE Device and Mixer classes +#define PULSE_AUDIO_SYMBOLS_LIST \ + X(pa_bytes_per_second) \ + X(pa_context_connect) \ + X(pa_context_disconnect) \ + X(pa_context_errno) \ + X(pa_context_get_protocol_version) \ + X(pa_context_get_server_info) \ + X(pa_context_get_sink_info_list) \ + X(pa_context_get_sink_info_by_index) \ + X(pa_context_get_sink_info_by_name) \ + X(pa_context_get_sink_input_info) \ + X(pa_context_get_source_info_by_index) \ + X(pa_context_get_source_info_by_name) \ + X(pa_context_get_source_info_list) \ + X(pa_context_get_state) \ + X(pa_context_new) \ + X(pa_context_set_sink_input_volume) \ + X(pa_context_set_sink_input_mute) \ + X(pa_context_set_source_volume_by_index) \ + X(pa_context_set_source_mute_by_index) \ + X(pa_context_set_state_callback) \ + X(pa_context_unref) \ + X(pa_cvolume_set) \ + X(pa_operation_get_state) \ + X(pa_operation_unref) \ + X(pa_stream_connect_playback) \ + X(pa_stream_connect_record) \ + X(pa_stream_disconnect) \ + X(pa_stream_drop) \ + X(pa_stream_get_device_index) \ + X(pa_stream_get_index) \ + X(pa_stream_get_latency) \ + X(pa_stream_get_sample_spec) \ + X(pa_stream_get_state) \ + X(pa_stream_new) \ + X(pa_stream_peek) \ + X(pa_stream_readable_size) \ + X(pa_stream_set_buffer_attr) \ + X(pa_stream_set_overflow_callback) \ + X(pa_stream_set_read_callback) \ + X(pa_stream_set_state_callback) \ + X(pa_stream_set_underflow_callback) \ + X(pa_stream_set_write_callback) \ + X(pa_stream_unref) \ + X(pa_stream_writable_size) \ + X(pa_stream_write) \ + X(pa_strerror) \ + X(pa_threaded_mainloop_free) \ + X(pa_threaded_mainloop_get_api) \ + X(pa_threaded_mainloop_lock) \ + X(pa_threaded_mainloop_new) \ + X(pa_threaded_mainloop_signal) \ + X(pa_threaded_mainloop_start) \ + X(pa_threaded_mainloop_stop) \ + X(pa_threaded_mainloop_unlock) \ + X(pa_threaded_mainloop_wait) + +LATE_BINDING_SYMBOL_TABLE_DECLARE_BEGIN(PulseAudioSymbolTable) +#define X(sym) \ + LATE_BINDING_SYMBOL_TABLE_DECLARE_ENTRY(PulseAudioSymbolTable, sym) +PULSE_AUDIO_SYMBOLS_LIST +#undef X +LATE_BINDING_SYMBOL_TABLE_DECLARE_END(PulseAudioSymbolTable) + +} // namespace adm_linux_pulse +} // namespace webrtc + +#endif // AUDIO_DEVICE_PULSEAUDIOSYMBOLTABLE_LINUX_H_ diff --git a/third_party/libwebrtc/modules/audio_device/mac/audio_device_mac.cc b/third_party/libwebrtc/modules/audio_device/mac/audio_device_mac.cc new file mode 100644 index 0000000000..527f76a371 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/mac/audio_device_mac.cc @@ -0,0 +1,2500 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/mac/audio_device_mac.h" + +#include +#include // mach_task_self() +#include // sysctlbyname() + +#include + +#include "modules/audio_device/audio_device_config.h" +#include "modules/third_party/portaudio/pa_ringbuffer.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { + +#define WEBRTC_CA_RETURN_ON_ERR(expr) \ + do { \ + err = expr; \ + if (err != noErr) { \ + logCAMsg(rtc::LS_ERROR, "Error in " #expr, (const char*)&err); \ + return -1; \ + } \ + } while (0) + +#define WEBRTC_CA_LOG_ERR(expr) \ + do { \ + err = expr; \ + if (err != noErr) { \ + logCAMsg(rtc::LS_ERROR, "Error in " #expr, (const char*)&err); \ + } \ + } while (0) + +#define WEBRTC_CA_LOG_WARN(expr) \ + do { \ + err = expr; \ + if (err != noErr) { \ + logCAMsg(rtc::LS_WARNING, "Error in " #expr, (const char*)&err); \ + } \ + } while (0) + +enum { MaxNumberDevices = 64 }; + +// CoreAudio errors are best interpreted as four character strings. +void AudioDeviceMac::logCAMsg(const rtc::LoggingSeverity sev, + const char* msg, + const char* err) { + RTC_DCHECK(msg != NULL); + RTC_DCHECK(err != NULL); + +#ifdef WEBRTC_ARCH_BIG_ENDIAN + switch (sev) { + case rtc::LS_ERROR: + RTC_LOG(LS_ERROR) << msg << ": " << err[0] << err[1] << err[2] << err[3]; + break; + case rtc::LS_WARNING: + RTC_LOG(LS_WARNING) << msg << ": " << err[0] << err[1] << err[2] + << err[3]; + break; + case rtc::LS_VERBOSE: + RTC_LOG(LS_VERBOSE) << msg << ": " << err[0] << err[1] << err[2] + << err[3]; + break; + default: + break; + } +#else + // We need to flip the characters in this case. + switch (sev) { + case rtc::LS_ERROR: + RTC_LOG(LS_ERROR) << msg << ": " << err[3] << err[2] << err[1] << err[0]; + break; + case rtc::LS_WARNING: + RTC_LOG(LS_WARNING) << msg << ": " << err[3] << err[2] << err[1] + << err[0]; + break; + case rtc::LS_VERBOSE: + RTC_LOG(LS_VERBOSE) << msg << ": " << err[3] << err[2] << err[1] + << err[0]; + break; + default: + break; + } +#endif +} + +AudioDeviceMac::AudioDeviceMac() + : _ptrAudioBuffer(NULL), + _mixerManager(), + _inputDeviceIndex(0), + _outputDeviceIndex(0), + _inputDeviceID(kAudioObjectUnknown), + _outputDeviceID(kAudioObjectUnknown), + _inputDeviceIsSpecified(false), + _outputDeviceIsSpecified(false), + _recChannels(N_REC_CHANNELS), + _playChannels(N_PLAY_CHANNELS), + _captureBufData(NULL), + _renderBufData(NULL), + _initialized(false), + _isShutDown(false), + _recording(false), + _playing(false), + _recIsInitialized(false), + _playIsInitialized(false), + _renderDeviceIsAlive(1), + _captureDeviceIsAlive(1), + _twoDevices(true), + _doStop(false), + _doStopRec(false), + _macBookPro(false), + _macBookProPanRight(false), + _captureLatencyUs(0), + _renderLatencyUs(0), + _captureDelayUs(0), + _renderDelayUs(0), + _renderDelayOffsetSamples(0), + _paCaptureBuffer(NULL), + _paRenderBuffer(NULL), + _captureBufSizeSamples(0), + _renderBufSizeSamples(0), + prev_key_state_() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " created"; + + memset(_renderConvertData, 0, sizeof(_renderConvertData)); + memset(&_outStreamFormat, 0, sizeof(AudioStreamBasicDescription)); + memset(&_outDesiredFormat, 0, sizeof(AudioStreamBasicDescription)); + memset(&_inStreamFormat, 0, sizeof(AudioStreamBasicDescription)); + memset(&_inDesiredFormat, 0, sizeof(AudioStreamBasicDescription)); +} + +AudioDeviceMac::~AudioDeviceMac() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " destroyed"; + + if (!_isShutDown) { + Terminate(); + } + + RTC_DCHECK(capture_worker_thread_.empty()); + RTC_DCHECK(render_worker_thread_.empty()); + + if (_paRenderBuffer) { + delete _paRenderBuffer; + _paRenderBuffer = NULL; + } + + if (_paCaptureBuffer) { + delete _paCaptureBuffer; + _paCaptureBuffer = NULL; + } + + if (_renderBufData) { + delete[] _renderBufData; + _renderBufData = NULL; + } + + if (_captureBufData) { + delete[] _captureBufData; + _captureBufData = NULL; + } + + kern_return_t kernErr = KERN_SUCCESS; + kernErr = semaphore_destroy(mach_task_self(), _renderSemaphore); + if (kernErr != KERN_SUCCESS) { + RTC_LOG(LS_ERROR) << "semaphore_destroy() error: " << kernErr; + } + + kernErr = semaphore_destroy(mach_task_self(), _captureSemaphore); + if (kernErr != KERN_SUCCESS) { + RTC_LOG(LS_ERROR) << "semaphore_destroy() error: " << kernErr; + } +} + +// ============================================================================ +// API +// ============================================================================ + +void AudioDeviceMac::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + MutexLock lock(&mutex_); + + _ptrAudioBuffer = audioBuffer; + + // inform the AudioBuffer about default settings for this implementation + _ptrAudioBuffer->SetRecordingSampleRate(N_REC_SAMPLES_PER_SEC); + _ptrAudioBuffer->SetPlayoutSampleRate(N_PLAY_SAMPLES_PER_SEC); + _ptrAudioBuffer->SetRecordingChannels(N_REC_CHANNELS); + _ptrAudioBuffer->SetPlayoutChannels(N_PLAY_CHANNELS); +} + +int32_t AudioDeviceMac::ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const { + audioLayer = AudioDeviceModule::kPlatformDefaultAudio; + return 0; +} + +AudioDeviceGeneric::InitStatus AudioDeviceMac::Init() { + MutexLock lock(&mutex_); + + if (_initialized) { + return InitStatus::OK; + } + + OSStatus err = noErr; + + _isShutDown = false; + + // PortAudio ring buffers require an elementCount which is a power of two. + if (_renderBufData == NULL) { + UInt32 powerOfTwo = 1; + while (powerOfTwo < PLAY_BUF_SIZE_IN_SAMPLES) { + powerOfTwo <<= 1; + } + _renderBufSizeSamples = powerOfTwo; + _renderBufData = new SInt16[_renderBufSizeSamples]; + } + + if (_paRenderBuffer == NULL) { + _paRenderBuffer = new PaUtilRingBuffer; + ring_buffer_size_t bufSize = -1; + bufSize = PaUtil_InitializeRingBuffer( + _paRenderBuffer, sizeof(SInt16), _renderBufSizeSamples, _renderBufData); + if (bufSize == -1) { + RTC_LOG(LS_ERROR) << "PaUtil_InitializeRingBuffer() error"; + return InitStatus::PLAYOUT_ERROR; + } + } + + if (_captureBufData == NULL) { + UInt32 powerOfTwo = 1; + while (powerOfTwo < REC_BUF_SIZE_IN_SAMPLES) { + powerOfTwo <<= 1; + } + _captureBufSizeSamples = powerOfTwo; + _captureBufData = new Float32[_captureBufSizeSamples]; + } + + if (_paCaptureBuffer == NULL) { + _paCaptureBuffer = new PaUtilRingBuffer; + ring_buffer_size_t bufSize = -1; + bufSize = + PaUtil_InitializeRingBuffer(_paCaptureBuffer, sizeof(Float32), + _captureBufSizeSamples, _captureBufData); + if (bufSize == -1) { + RTC_LOG(LS_ERROR) << "PaUtil_InitializeRingBuffer() error"; + return InitStatus::RECORDING_ERROR; + } + } + + kern_return_t kernErr = KERN_SUCCESS; + kernErr = semaphore_create(mach_task_self(), &_renderSemaphore, + SYNC_POLICY_FIFO, 0); + if (kernErr != KERN_SUCCESS) { + RTC_LOG(LS_ERROR) << "semaphore_create() error: " << kernErr; + return InitStatus::OTHER_ERROR; + } + + kernErr = semaphore_create(mach_task_self(), &_captureSemaphore, + SYNC_POLICY_FIFO, 0); + if (kernErr != KERN_SUCCESS) { + RTC_LOG(LS_ERROR) << "semaphore_create() error: " << kernErr; + return InitStatus::OTHER_ERROR; + } + + // Setting RunLoop to NULL here instructs HAL to manage its own thread for + // notifications. This was the default behaviour on OS X 10.5 and earlier, + // but now must be explicitly specified. HAL would otherwise try to use the + // main thread to issue notifications. + AudioObjectPropertyAddress propertyAddress = { + kAudioHardwarePropertyRunLoop, kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster}; + CFRunLoopRef runLoop = NULL; + UInt32 size = sizeof(CFRunLoopRef); + int aoerr = AudioObjectSetPropertyData( + kAudioObjectSystemObject, &propertyAddress, 0, NULL, size, &runLoop); + if (aoerr != noErr) { + RTC_LOG(LS_ERROR) << "Error in AudioObjectSetPropertyData: " + << (const char*)&aoerr; + return InitStatus::OTHER_ERROR; + } + + // Listen for any device changes. + propertyAddress.mSelector = kAudioHardwarePropertyDevices; + WEBRTC_CA_LOG_ERR(AudioObjectAddPropertyListener( + kAudioObjectSystemObject, &propertyAddress, &objectListenerProc, this)); + + // Determine if this is a MacBook Pro + _macBookPro = false; + _macBookProPanRight = false; + char buf[128]; + size_t length = sizeof(buf); + memset(buf, 0, length); + + int intErr = sysctlbyname("hw.model", buf, &length, NULL, 0); + if (intErr != 0) { + RTC_LOG(LS_ERROR) << "Error in sysctlbyname(): " << err; + } else { + RTC_LOG(LS_VERBOSE) << "Hardware model: " << buf; + if (strncmp(buf, "MacBookPro", 10) == 0) { + _macBookPro = true; + } + } + + _initialized = true; + + return InitStatus::OK; +} + +int32_t AudioDeviceMac::Terminate() { + if (!_initialized) { + return 0; + } + + if (_recording) { + RTC_LOG(LS_ERROR) << "Recording must be stopped"; + return -1; + } + + if (_playing) { + RTC_LOG(LS_ERROR) << "Playback must be stopped"; + return -1; + } + + MutexLock lock(&mutex_); + _mixerManager.Close(); + + OSStatus err = noErr; + int retVal = 0; + + AudioObjectPropertyAddress propertyAddress = { + kAudioHardwarePropertyDevices, kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster}; + WEBRTC_CA_LOG_WARN(AudioObjectRemovePropertyListener( + kAudioObjectSystemObject, &propertyAddress, &objectListenerProc, this)); + + err = AudioHardwareUnload(); + if (err != noErr) { + logCAMsg(rtc::LS_ERROR, "Error in AudioHardwareUnload()", + (const char*)&err); + retVal = -1; + } + + _isShutDown = true; + _initialized = false; + _outputDeviceIsSpecified = false; + _inputDeviceIsSpecified = false; + + return retVal; +} + +bool AudioDeviceMac::Initialized() const { + return (_initialized); +} + +int32_t AudioDeviceMac::SpeakerIsAvailable(bool& available) { + MutexLock lock(&mutex_); + return SpeakerIsAvailableLocked(available); +} + +int32_t AudioDeviceMac::SpeakerIsAvailableLocked(bool& available) { + bool wasInitialized = _mixerManager.SpeakerIsInitialized(); + + // Make an attempt to open up the + // output mixer corresponding to the currently selected output device. + // + if (!wasInitialized && InitSpeakerLocked() == -1) { + available = false; + return 0; + } + + // Given that InitSpeaker was successful, we know that a valid speaker + // exists. + available = true; + + // Close the initialized output mixer + // + if (!wasInitialized) { + _mixerManager.CloseSpeaker(); + } + + return 0; +} + +int32_t AudioDeviceMac::InitSpeaker() { + MutexLock lock(&mutex_); + return InitSpeakerLocked(); +} + +int32_t AudioDeviceMac::InitSpeakerLocked() { + if (_playing) { + return -1; + } + + if (InitDevice(_outputDeviceIndex, _outputDeviceID, false) == -1) { + return -1; + } + + if (_inputDeviceID == _outputDeviceID) { + _twoDevices = false; + } else { + _twoDevices = true; + } + + if (_mixerManager.OpenSpeaker(_outputDeviceID) == -1) { + return -1; + } + + return 0; +} + +int32_t AudioDeviceMac::MicrophoneIsAvailable(bool& available) { + MutexLock lock(&mutex_); + return MicrophoneIsAvailableLocked(available); +} + +int32_t AudioDeviceMac::MicrophoneIsAvailableLocked(bool& available) { + bool wasInitialized = _mixerManager.MicrophoneIsInitialized(); + + // Make an attempt to open up the + // input mixer corresponding to the currently selected output device. + // + if (!wasInitialized && InitMicrophoneLocked() == -1) { + available = false; + return 0; + } + + // Given that InitMicrophone was successful, we know that a valid microphone + // exists. + available = true; + + // Close the initialized input mixer + // + if (!wasInitialized) { + _mixerManager.CloseMicrophone(); + } + + return 0; +} + +int32_t AudioDeviceMac::InitMicrophone() { + MutexLock lock(&mutex_); + return InitMicrophoneLocked(); +} + +int32_t AudioDeviceMac::InitMicrophoneLocked() { + if (_recording) { + return -1; + } + + if (InitDevice(_inputDeviceIndex, _inputDeviceID, true) == -1) { + return -1; + } + + if (_inputDeviceID == _outputDeviceID) { + _twoDevices = false; + } else { + _twoDevices = true; + } + + if (_mixerManager.OpenMicrophone(_inputDeviceID) == -1) { + return -1; + } + + return 0; +} + +bool AudioDeviceMac::SpeakerIsInitialized() const { + return (_mixerManager.SpeakerIsInitialized()); +} + +bool AudioDeviceMac::MicrophoneIsInitialized() const { + return (_mixerManager.MicrophoneIsInitialized()); +} + +int32_t AudioDeviceMac::SpeakerVolumeIsAvailable(bool& available) { + bool wasInitialized = _mixerManager.SpeakerIsInitialized(); + + // Make an attempt to open up the + // output mixer corresponding to the currently selected output device. + // + if (!wasInitialized && InitSpeaker() == -1) { + // If we end up here it means that the selected speaker has no volume + // control. + available = false; + return 0; + } + + // Given that InitSpeaker was successful, we know that a volume control exists + // + available = true; + + // Close the initialized output mixer + // + if (!wasInitialized) { + _mixerManager.CloseSpeaker(); + } + + return 0; +} + +int32_t AudioDeviceMac::SetSpeakerVolume(uint32_t volume) { + return (_mixerManager.SetSpeakerVolume(volume)); +} + +int32_t AudioDeviceMac::SpeakerVolume(uint32_t& volume) const { + uint32_t level(0); + + if (_mixerManager.SpeakerVolume(level) == -1) { + return -1; + } + + volume = level; + return 0; +} + +int32_t AudioDeviceMac::MaxSpeakerVolume(uint32_t& maxVolume) const { + uint32_t maxVol(0); + + if (_mixerManager.MaxSpeakerVolume(maxVol) == -1) { + return -1; + } + + maxVolume = maxVol; + return 0; +} + +int32_t AudioDeviceMac::MinSpeakerVolume(uint32_t& minVolume) const { + uint32_t minVol(0); + + if (_mixerManager.MinSpeakerVolume(minVol) == -1) { + return -1; + } + + minVolume = minVol; + return 0; +} + +int32_t AudioDeviceMac::SpeakerMuteIsAvailable(bool& available) { + bool isAvailable(false); + bool wasInitialized = _mixerManager.SpeakerIsInitialized(); + + // Make an attempt to open up the + // output mixer corresponding to the currently selected output device. + // + if (!wasInitialized && InitSpeaker() == -1) { + // If we end up here it means that the selected speaker has no volume + // control, hence it is safe to state that there is no mute control + // already at this stage. + available = false; + return 0; + } + + // Check if the selected speaker has a mute control + // + _mixerManager.SpeakerMuteIsAvailable(isAvailable); + + available = isAvailable; + + // Close the initialized output mixer + // + if (!wasInitialized) { + _mixerManager.CloseSpeaker(); + } + + return 0; +} + +int32_t AudioDeviceMac::SetSpeakerMute(bool enable) { + return (_mixerManager.SetSpeakerMute(enable)); +} + +int32_t AudioDeviceMac::SpeakerMute(bool& enabled) const { + bool muted(0); + + if (_mixerManager.SpeakerMute(muted) == -1) { + return -1; + } + + enabled = muted; + return 0; +} + +int32_t AudioDeviceMac::MicrophoneMuteIsAvailable(bool& available) { + bool isAvailable(false); + bool wasInitialized = _mixerManager.MicrophoneIsInitialized(); + + // Make an attempt to open up the + // input mixer corresponding to the currently selected input device. + // + if (!wasInitialized && InitMicrophone() == -1) { + // If we end up here it means that the selected microphone has no volume + // control, hence it is safe to state that there is no boost control + // already at this stage. + available = false; + return 0; + } + + // Check if the selected microphone has a mute control + // + _mixerManager.MicrophoneMuteIsAvailable(isAvailable); + available = isAvailable; + + // Close the initialized input mixer + // + if (!wasInitialized) { + _mixerManager.CloseMicrophone(); + } + + return 0; +} + +int32_t AudioDeviceMac::SetMicrophoneMute(bool enable) { + return (_mixerManager.SetMicrophoneMute(enable)); +} + +int32_t AudioDeviceMac::MicrophoneMute(bool& enabled) const { + bool muted(0); + + if (_mixerManager.MicrophoneMute(muted) == -1) { + return -1; + } + + enabled = muted; + return 0; +} + +int32_t AudioDeviceMac::StereoRecordingIsAvailable(bool& available) { + bool isAvailable(false); + bool wasInitialized = _mixerManager.MicrophoneIsInitialized(); + + if (!wasInitialized && InitMicrophone() == -1) { + // Cannot open the specified device + available = false; + return 0; + } + + // Check if the selected microphone can record stereo + // + _mixerManager.StereoRecordingIsAvailable(isAvailable); + available = isAvailable; + + // Close the initialized input mixer + // + if (!wasInitialized) { + _mixerManager.CloseMicrophone(); + } + + return 0; +} + +int32_t AudioDeviceMac::SetStereoRecording(bool enable) { + if (enable) + _recChannels = 2; + else + _recChannels = 1; + + return 0; +} + +int32_t AudioDeviceMac::StereoRecording(bool& enabled) const { + if (_recChannels == 2) + enabled = true; + else + enabled = false; + + return 0; +} + +int32_t AudioDeviceMac::StereoPlayoutIsAvailable(bool& available) { + bool isAvailable(false); + bool wasInitialized = _mixerManager.SpeakerIsInitialized(); + + if (!wasInitialized && InitSpeaker() == -1) { + // Cannot open the specified device + available = false; + return 0; + } + + // Check if the selected microphone can record stereo + // + _mixerManager.StereoPlayoutIsAvailable(isAvailable); + available = isAvailable; + + // Close the initialized input mixer + // + if (!wasInitialized) { + _mixerManager.CloseSpeaker(); + } + + return 0; +} + +int32_t AudioDeviceMac::SetStereoPlayout(bool enable) { + if (enable) + _playChannels = 2; + else + _playChannels = 1; + + return 0; +} + +int32_t AudioDeviceMac::StereoPlayout(bool& enabled) const { + if (_playChannels == 2) + enabled = true; + else + enabled = false; + + return 0; +} + +int32_t AudioDeviceMac::MicrophoneVolumeIsAvailable(bool& available) { + bool wasInitialized = _mixerManager.MicrophoneIsInitialized(); + + // Make an attempt to open up the + // input mixer corresponding to the currently selected output device. + // + if (!wasInitialized && InitMicrophone() == -1) { + // If we end up here it means that the selected microphone has no volume + // control. + available = false; + return 0; + } + + // Given that InitMicrophone was successful, we know that a volume control + // exists + // + available = true; + + // Close the initialized input mixer + // + if (!wasInitialized) { + _mixerManager.CloseMicrophone(); + } + + return 0; +} + +int32_t AudioDeviceMac::SetMicrophoneVolume(uint32_t volume) { + return (_mixerManager.SetMicrophoneVolume(volume)); +} + +int32_t AudioDeviceMac::MicrophoneVolume(uint32_t& volume) const { + uint32_t level(0); + + if (_mixerManager.MicrophoneVolume(level) == -1) { + RTC_LOG(LS_WARNING) << "failed to retrieve current microphone level"; + return -1; + } + + volume = level; + return 0; +} + +int32_t AudioDeviceMac::MaxMicrophoneVolume(uint32_t& maxVolume) const { + uint32_t maxVol(0); + + if (_mixerManager.MaxMicrophoneVolume(maxVol) == -1) { + return -1; + } + + maxVolume = maxVol; + return 0; +} + +int32_t AudioDeviceMac::MinMicrophoneVolume(uint32_t& minVolume) const { + uint32_t minVol(0); + + if (_mixerManager.MinMicrophoneVolume(minVol) == -1) { + return -1; + } + + minVolume = minVol; + return 0; +} + +int16_t AudioDeviceMac::PlayoutDevices() { + AudioDeviceID playDevices[MaxNumberDevices]; + return GetNumberDevices(kAudioDevicePropertyScopeOutput, playDevices, + MaxNumberDevices); +} + +int32_t AudioDeviceMac::SetPlayoutDevice(uint16_t index) { + MutexLock lock(&mutex_); + + if (_playIsInitialized) { + return -1; + } + + AudioDeviceID playDevices[MaxNumberDevices]; + uint32_t nDevices = GetNumberDevices(kAudioDevicePropertyScopeOutput, + playDevices, MaxNumberDevices); + RTC_LOG(LS_VERBOSE) << "number of available waveform-audio output devices is " + << nDevices; + + if (index > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "device index is out of range [0," << (nDevices - 1) + << "]"; + return -1; + } + + _outputDeviceIndex = index; + _outputDeviceIsSpecified = true; + + return 0; +} + +int32_t AudioDeviceMac::SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType /*device*/) { + RTC_LOG(LS_ERROR) << "WindowsDeviceType not supported"; + return -1; +} + +int32_t AudioDeviceMac::PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + const uint16_t nDevices(PlayoutDevices()); + + if ((index > (nDevices - 1)) || (name == NULL)) { + return -1; + } + + memset(name, 0, kAdmMaxDeviceNameSize); + + if (guid != NULL) { + memset(guid, 0, kAdmMaxGuidSize); + } + + return GetDeviceName(kAudioDevicePropertyScopeOutput, index, + rtc::ArrayView(name, kAdmMaxDeviceNameSize)); +} + +int32_t AudioDeviceMac::RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + const uint16_t nDevices(RecordingDevices()); + + if ((index > (nDevices - 1)) || (name == NULL)) { + return -1; + } + + memset(name, 0, kAdmMaxDeviceNameSize); + + if (guid != NULL) { + memset(guid, 0, kAdmMaxGuidSize); + } + + return GetDeviceName(kAudioDevicePropertyScopeInput, index, + rtc::ArrayView(name, kAdmMaxDeviceNameSize)); +} + +int16_t AudioDeviceMac::RecordingDevices() { + AudioDeviceID recDevices[MaxNumberDevices]; + return GetNumberDevices(kAudioDevicePropertyScopeInput, recDevices, + MaxNumberDevices); +} + +int32_t AudioDeviceMac::SetRecordingDevice(uint16_t index) { + if (_recIsInitialized) { + return -1; + } + + AudioDeviceID recDevices[MaxNumberDevices]; + uint32_t nDevices = GetNumberDevices(kAudioDevicePropertyScopeInput, + recDevices, MaxNumberDevices); + RTC_LOG(LS_VERBOSE) << "number of available waveform-audio input devices is " + << nDevices; + + if (index > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "device index is out of range [0," << (nDevices - 1) + << "]"; + return -1; + } + + _inputDeviceIndex = index; + _inputDeviceIsSpecified = true; + + return 0; +} + +int32_t AudioDeviceMac::SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType /*device*/) { + RTC_LOG(LS_ERROR) << "WindowsDeviceType not supported"; + return -1; +} + +int32_t AudioDeviceMac::PlayoutIsAvailable(bool& available) { + available = true; + + // Try to initialize the playout side + if (InitPlayout() == -1) { + available = false; + } + + // We destroy the IOProc created by InitPlayout() in implDeviceIOProc(). + // We must actually start playout here in order to have the IOProc + // deleted by calling StopPlayout(). + if (StartPlayout() == -1) { + available = false; + } + + // Cancel effect of initialization + if (StopPlayout() == -1) { + available = false; + } + + return 0; +} + +int32_t AudioDeviceMac::RecordingIsAvailable(bool& available) { + available = true; + + // Try to initialize the recording side + if (InitRecording() == -1) { + available = false; + } + + // We destroy the IOProc created by InitRecording() in implInDeviceIOProc(). + // We must actually start recording here in order to have the IOProc + // deleted by calling StopRecording(). + if (StartRecording() == -1) { + available = false; + } + + // Cancel effect of initialization + if (StopRecording() == -1) { + available = false; + } + + return 0; +} + +int32_t AudioDeviceMac::InitPlayout() { + RTC_LOG(LS_INFO) << "InitPlayout"; + MutexLock lock(&mutex_); + + if (_playing) { + return -1; + } + + if (!_outputDeviceIsSpecified) { + return -1; + } + + if (_playIsInitialized) { + return 0; + } + + // Initialize the speaker (devices might have been added or removed) + if (InitSpeakerLocked() == -1) { + RTC_LOG(LS_WARNING) << "InitSpeaker() failed"; + } + + if (!MicrophoneIsInitialized()) { + // Make this call to check if we are using + // one or two devices (_twoDevices) + bool available = false; + if (MicrophoneIsAvailableLocked(available) == -1) { + RTC_LOG(LS_WARNING) << "MicrophoneIsAvailable() failed"; + } + } + + PaUtil_FlushRingBuffer(_paRenderBuffer); + + OSStatus err = noErr; + UInt32 size = 0; + _renderDelayOffsetSamples = 0; + _renderDelayUs = 0; + _renderLatencyUs = 0; + _renderDeviceIsAlive = 1; + _doStop = false; + + // The internal microphone of a MacBook Pro is located under the left speaker + // grille. When the internal speakers are in use, we want to fully stereo + // pan to the right. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyDataSource, kAudioDevicePropertyScopeOutput, 0}; + if (_macBookPro) { + _macBookProPanRight = false; + Boolean hasProperty = + AudioObjectHasProperty(_outputDeviceID, &propertyAddress); + if (hasProperty) { + UInt32 dataSource = 0; + size = sizeof(dataSource); + WEBRTC_CA_LOG_WARN(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &dataSource)); + + if (dataSource == 'ispk') { + _macBookProPanRight = true; + RTC_LOG(LS_VERBOSE) + << "MacBook Pro using internal speakers; stereo panning right"; + } else { + RTC_LOG(LS_VERBOSE) << "MacBook Pro not using internal speakers"; + } + + // Add a listener to determine if the status changes. + WEBRTC_CA_LOG_WARN(AudioObjectAddPropertyListener( + _outputDeviceID, &propertyAddress, &objectListenerProc, this)); + } + } + + // Get current stream description + propertyAddress.mSelector = kAudioDevicePropertyStreamFormat; + memset(&_outStreamFormat, 0, sizeof(_outStreamFormat)); + size = sizeof(_outStreamFormat); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &_outStreamFormat)); + + if (_outStreamFormat.mFormatID != kAudioFormatLinearPCM) { + logCAMsg(rtc::LS_ERROR, "Unacceptable output stream format -> mFormatID", + (const char*)&_outStreamFormat.mFormatID); + return -1; + } + + if (_outStreamFormat.mChannelsPerFrame > N_DEVICE_CHANNELS) { + RTC_LOG(LS_ERROR) + << "Too many channels on output device (mChannelsPerFrame = " + << _outStreamFormat.mChannelsPerFrame << ")"; + return -1; + } + + if (_outStreamFormat.mFormatFlags & kAudioFormatFlagIsNonInterleaved) { + RTC_LOG(LS_ERROR) << "Non-interleaved audio data is not supported." + "AudioHardware streams should not have this format."; + return -1; + } + + RTC_LOG(LS_VERBOSE) << "Ouput stream format:"; + RTC_LOG(LS_VERBOSE) << "mSampleRate = " << _outStreamFormat.mSampleRate + << ", mChannelsPerFrame = " + << _outStreamFormat.mChannelsPerFrame; + RTC_LOG(LS_VERBOSE) << "mBytesPerPacket = " + << _outStreamFormat.mBytesPerPacket + << ", mFramesPerPacket = " + << _outStreamFormat.mFramesPerPacket; + RTC_LOG(LS_VERBOSE) << "mBytesPerFrame = " << _outStreamFormat.mBytesPerFrame + << ", mBitsPerChannel = " + << _outStreamFormat.mBitsPerChannel; + RTC_LOG(LS_VERBOSE) << "mFormatFlags = " << _outStreamFormat.mFormatFlags; + logCAMsg(rtc::LS_VERBOSE, "mFormatID", + (const char*)&_outStreamFormat.mFormatID); + + // Our preferred format to work with. + if (_outStreamFormat.mChannelsPerFrame < 2) { + // Disable stereo playout when we only have one channel on the device. + _playChannels = 1; + RTC_LOG(LS_VERBOSE) << "Stereo playout unavailable on this device"; + } + WEBRTC_CA_RETURN_ON_ERR(SetDesiredPlayoutFormat()); + + // Listen for format changes. + propertyAddress.mSelector = kAudioDevicePropertyStreamFormat; + WEBRTC_CA_LOG_WARN(AudioObjectAddPropertyListener( + _outputDeviceID, &propertyAddress, &objectListenerProc, this)); + + // Listen for processor overloads. + propertyAddress.mSelector = kAudioDeviceProcessorOverload; + WEBRTC_CA_LOG_WARN(AudioObjectAddPropertyListener( + _outputDeviceID, &propertyAddress, &objectListenerProc, this)); + + if (_twoDevices || !_recIsInitialized) { + WEBRTC_CA_RETURN_ON_ERR(AudioDeviceCreateIOProcID( + _outputDeviceID, deviceIOProc, this, &_deviceIOProcID)); + } + + _playIsInitialized = true; + + return 0; +} + +int32_t AudioDeviceMac::InitRecording() { + RTC_LOG(LS_INFO) << "InitRecording"; + MutexLock lock(&mutex_); + + if (_recording) { + return -1; + } + + if (!_inputDeviceIsSpecified) { + return -1; + } + + if (_recIsInitialized) { + return 0; + } + + // Initialize the microphone (devices might have been added or removed) + if (InitMicrophoneLocked() == -1) { + RTC_LOG(LS_WARNING) << "InitMicrophone() failed"; + } + + if (!SpeakerIsInitialized()) { + // Make this call to check if we are using + // one or two devices (_twoDevices) + bool available = false; + if (SpeakerIsAvailableLocked(available) == -1) { + RTC_LOG(LS_WARNING) << "SpeakerIsAvailable() failed"; + } + } + + OSStatus err = noErr; + UInt32 size = 0; + + PaUtil_FlushRingBuffer(_paCaptureBuffer); + + _captureDelayUs = 0; + _captureLatencyUs = 0; + _captureDeviceIsAlive = 1; + _doStopRec = false; + + // Get current stream description + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyStreamFormat, kAudioDevicePropertyScopeInput, 0}; + memset(&_inStreamFormat, 0, sizeof(_inStreamFormat)); + size = sizeof(_inStreamFormat); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &_inStreamFormat)); + + if (_inStreamFormat.mFormatID != kAudioFormatLinearPCM) { + logCAMsg(rtc::LS_ERROR, "Unacceptable input stream format -> mFormatID", + (const char*)&_inStreamFormat.mFormatID); + return -1; + } + + if (_inStreamFormat.mChannelsPerFrame > N_DEVICE_CHANNELS) { + RTC_LOG(LS_ERROR) + << "Too many channels on input device (mChannelsPerFrame = " + << _inStreamFormat.mChannelsPerFrame << ")"; + return -1; + } + + const int io_block_size_samples = _inStreamFormat.mChannelsPerFrame * + _inStreamFormat.mSampleRate / 100 * + N_BLOCKS_IO; + if (io_block_size_samples > _captureBufSizeSamples) { + RTC_LOG(LS_ERROR) << "Input IO block size (" << io_block_size_samples + << ") is larger than ring buffer (" + << _captureBufSizeSamples << ")"; + return -1; + } + + RTC_LOG(LS_VERBOSE) << "Input stream format:"; + RTC_LOG(LS_VERBOSE) << "mSampleRate = " << _inStreamFormat.mSampleRate + << ", mChannelsPerFrame = " + << _inStreamFormat.mChannelsPerFrame; + RTC_LOG(LS_VERBOSE) << "mBytesPerPacket = " << _inStreamFormat.mBytesPerPacket + << ", mFramesPerPacket = " + << _inStreamFormat.mFramesPerPacket; + RTC_LOG(LS_VERBOSE) << "mBytesPerFrame = " << _inStreamFormat.mBytesPerFrame + << ", mBitsPerChannel = " + << _inStreamFormat.mBitsPerChannel; + RTC_LOG(LS_VERBOSE) << "mFormatFlags = " << _inStreamFormat.mFormatFlags; + logCAMsg(rtc::LS_VERBOSE, "mFormatID", + (const char*)&_inStreamFormat.mFormatID); + + // Our preferred format to work with + if (_inStreamFormat.mChannelsPerFrame >= 2 && (_recChannels == 2)) { + _inDesiredFormat.mChannelsPerFrame = 2; + } else { + // Disable stereo recording when we only have one channel on the device. + _inDesiredFormat.mChannelsPerFrame = 1; + _recChannels = 1; + RTC_LOG(LS_VERBOSE) << "Stereo recording unavailable on this device"; + } + + if (_ptrAudioBuffer) { + // Update audio buffer with the selected parameters + _ptrAudioBuffer->SetRecordingSampleRate(N_REC_SAMPLES_PER_SEC); + _ptrAudioBuffer->SetRecordingChannels((uint8_t)_recChannels); + } + + _inDesiredFormat.mSampleRate = N_REC_SAMPLES_PER_SEC; + _inDesiredFormat.mBytesPerPacket = + _inDesiredFormat.mChannelsPerFrame * sizeof(SInt16); + _inDesiredFormat.mFramesPerPacket = 1; + _inDesiredFormat.mBytesPerFrame = + _inDesiredFormat.mChannelsPerFrame * sizeof(SInt16); + _inDesiredFormat.mBitsPerChannel = sizeof(SInt16) * 8; + + _inDesiredFormat.mFormatFlags = + kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; +#ifdef WEBRTC_ARCH_BIG_ENDIAN + _inDesiredFormat.mFormatFlags |= kLinearPCMFormatFlagIsBigEndian; +#endif + _inDesiredFormat.mFormatID = kAudioFormatLinearPCM; + + WEBRTC_CA_RETURN_ON_ERR(AudioConverterNew(&_inStreamFormat, &_inDesiredFormat, + &_captureConverter)); + + // First try to set buffer size to desired value (10 ms * N_BLOCKS_IO) + // TODO(xians): investigate this block. + UInt32 bufByteCount = + (UInt32)((_inStreamFormat.mSampleRate / 1000.0) * 10.0 * N_BLOCKS_IO * + _inStreamFormat.mChannelsPerFrame * sizeof(Float32)); + if (_inStreamFormat.mFramesPerPacket != 0) { + if (bufByteCount % _inStreamFormat.mFramesPerPacket != 0) { + bufByteCount = + ((UInt32)(bufByteCount / _inStreamFormat.mFramesPerPacket) + 1) * + _inStreamFormat.mFramesPerPacket; + } + } + + // Ensure the buffer size is within the acceptable range provided by the + // device. + propertyAddress.mSelector = kAudioDevicePropertyBufferSizeRange; + AudioValueRange range; + size = sizeof(range); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &range)); + if (range.mMinimum > bufByteCount) { + bufByteCount = range.mMinimum; + } else if (range.mMaximum < bufByteCount) { + bufByteCount = range.mMaximum; + } + + propertyAddress.mSelector = kAudioDevicePropertyBufferSize; + size = sizeof(bufByteCount); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, size, &bufByteCount)); + + // Get capture device latency + propertyAddress.mSelector = kAudioDevicePropertyLatency; + UInt32 latency = 0; + size = sizeof(UInt32); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &latency)); + _captureLatencyUs = (UInt32)((1.0e6 * latency) / _inStreamFormat.mSampleRate); + + // Get capture stream latency + propertyAddress.mSelector = kAudioDevicePropertyStreams; + AudioStreamID stream = 0; + size = sizeof(AudioStreamID); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &stream)); + propertyAddress.mSelector = kAudioStreamPropertyLatency; + size = sizeof(UInt32); + latency = 0; + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &latency)); + _captureLatencyUs += + (UInt32)((1.0e6 * latency) / _inStreamFormat.mSampleRate); + + // Listen for format changes + // TODO(xians): should we be using kAudioDevicePropertyDeviceHasChanged? + propertyAddress.mSelector = kAudioDevicePropertyStreamFormat; + WEBRTC_CA_LOG_WARN(AudioObjectAddPropertyListener( + _inputDeviceID, &propertyAddress, &objectListenerProc, this)); + + // Listen for processor overloads + propertyAddress.mSelector = kAudioDeviceProcessorOverload; + WEBRTC_CA_LOG_WARN(AudioObjectAddPropertyListener( + _inputDeviceID, &propertyAddress, &objectListenerProc, this)); + + if (_twoDevices) { + WEBRTC_CA_RETURN_ON_ERR(AudioDeviceCreateIOProcID( + _inputDeviceID, inDeviceIOProc, this, &_inDeviceIOProcID)); + } else if (!_playIsInitialized) { + WEBRTC_CA_RETURN_ON_ERR(AudioDeviceCreateIOProcID( + _inputDeviceID, deviceIOProc, this, &_deviceIOProcID)); + } + + // Mark recording side as initialized + _recIsInitialized = true; + + return 0; +} + +int32_t AudioDeviceMac::StartRecording() { + RTC_LOG(LS_INFO) << "StartRecording"; + MutexLock lock(&mutex_); + + if (!_recIsInitialized) { + return -1; + } + + if (_recording) { + return 0; + } + + if (!_initialized) { + RTC_LOG(LS_ERROR) << "Recording worker thread has not been started"; + return -1; + } + + RTC_DCHECK(capture_worker_thread_.empty()); + capture_worker_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (CaptureWorkerThread()) { + } + }, + "CaptureWorkerThread", + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime)); + + OSStatus err = noErr; + if (_twoDevices) { + WEBRTC_CA_RETURN_ON_ERR( + AudioDeviceStart(_inputDeviceID, _inDeviceIOProcID)); + } else if (!_playing) { + WEBRTC_CA_RETURN_ON_ERR(AudioDeviceStart(_inputDeviceID, _deviceIOProcID)); + } + + _recording = true; + + return 0; +} + +int32_t AudioDeviceMac::StopRecording() { + RTC_LOG(LS_INFO) << "StopRecording"; + MutexLock lock(&mutex_); + + if (!_recIsInitialized) { + return 0; + } + + OSStatus err = noErr; + int32_t captureDeviceIsAlive = _captureDeviceIsAlive; + if (_twoDevices && captureDeviceIsAlive == 1) { + // Recording side uses its own dedicated device and IOProc. + if (_recording) { + _recording = false; + _doStopRec = true; // Signal to io proc to stop audio device + mutex_.Unlock(); // Cannot be under lock, risk of deadlock + if (!_stopEventRec.Wait(TimeDelta::Seconds(2))) { + MutexLock lockScoped(&mutex_); + RTC_LOG(LS_WARNING) << "Timed out stopping the capture IOProc." + "We may have failed to detect a device removal."; + WEBRTC_CA_LOG_WARN(AudioDeviceStop(_inputDeviceID, _inDeviceIOProcID)); + WEBRTC_CA_LOG_WARN( + AudioDeviceDestroyIOProcID(_inputDeviceID, _inDeviceIOProcID)); + } + mutex_.Lock(); + _doStopRec = false; + RTC_LOG(LS_INFO) << "Recording stopped (input device)"; + } else if (_recIsInitialized) { + WEBRTC_CA_LOG_WARN( + AudioDeviceDestroyIOProcID(_inputDeviceID, _inDeviceIOProcID)); + RTC_LOG(LS_INFO) << "Recording uninitialized (input device)"; + } + } else { + // We signal a stop for a shared device even when rendering has + // not yet ended. This is to ensure the IOProc will return early as + // intended (by checking `_recording`) before accessing + // resources we free below (e.g. the capture converter). + // + // In the case of a shared devcie, the IOProc will verify + // rendering has ended before stopping itself. + if (_recording && captureDeviceIsAlive == 1) { + _recording = false; + _doStop = true; // Signal to io proc to stop audio device + mutex_.Unlock(); // Cannot be under lock, risk of deadlock + if (!_stopEvent.Wait(TimeDelta::Seconds(2))) { + MutexLock lockScoped(&mutex_); + RTC_LOG(LS_WARNING) << "Timed out stopping the shared IOProc." + "We may have failed to detect a device removal."; + // We assume rendering on a shared device has stopped as well if + // the IOProc times out. + WEBRTC_CA_LOG_WARN(AudioDeviceStop(_outputDeviceID, _deviceIOProcID)); + WEBRTC_CA_LOG_WARN( + AudioDeviceDestroyIOProcID(_outputDeviceID, _deviceIOProcID)); + } + mutex_.Lock(); + _doStop = false; + RTC_LOG(LS_INFO) << "Recording stopped (shared device)"; + } else if (_recIsInitialized && !_playing && !_playIsInitialized) { + WEBRTC_CA_LOG_WARN( + AudioDeviceDestroyIOProcID(_outputDeviceID, _deviceIOProcID)); + RTC_LOG(LS_INFO) << "Recording uninitialized (shared device)"; + } + } + + // Setting this signal will allow the worker thread to be stopped. + _captureDeviceIsAlive = 0; + + if (!capture_worker_thread_.empty()) { + mutex_.Unlock(); + capture_worker_thread_.Finalize(); + mutex_.Lock(); + } + + WEBRTC_CA_LOG_WARN(AudioConverterDispose(_captureConverter)); + + // Remove listeners. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyStreamFormat, kAudioDevicePropertyScopeInput, 0}; + WEBRTC_CA_LOG_WARN(AudioObjectRemovePropertyListener( + _inputDeviceID, &propertyAddress, &objectListenerProc, this)); + + propertyAddress.mSelector = kAudioDeviceProcessorOverload; + WEBRTC_CA_LOG_WARN(AudioObjectRemovePropertyListener( + _inputDeviceID, &propertyAddress, &objectListenerProc, this)); + + _recIsInitialized = false; + _recording = false; + + return 0; +} + +bool AudioDeviceMac::RecordingIsInitialized() const { + return (_recIsInitialized); +} + +bool AudioDeviceMac::Recording() const { + return (_recording); +} + +bool AudioDeviceMac::PlayoutIsInitialized() const { + return (_playIsInitialized); +} + +int32_t AudioDeviceMac::StartPlayout() { + RTC_LOG(LS_INFO) << "StartPlayout"; + MutexLock lock(&mutex_); + + if (!_playIsInitialized) { + return -1; + } + + if (_playing) { + return 0; + } + + RTC_DCHECK(render_worker_thread_.empty()); + render_worker_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (RenderWorkerThread()) { + } + }, + "RenderWorkerThread", + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime)); + + if (_twoDevices || !_recording) { + OSStatus err = noErr; + WEBRTC_CA_RETURN_ON_ERR(AudioDeviceStart(_outputDeviceID, _deviceIOProcID)); + } + _playing = true; + + return 0; +} + +int32_t AudioDeviceMac::StopPlayout() { + RTC_LOG(LS_INFO) << "StopPlayout"; + MutexLock lock(&mutex_); + + if (!_playIsInitialized) { + return 0; + } + + OSStatus err = noErr; + int32_t renderDeviceIsAlive = _renderDeviceIsAlive; + if (_playing && renderDeviceIsAlive == 1) { + // We signal a stop for a shared device even when capturing has not + // yet ended. This is to ensure the IOProc will return early as + // intended (by checking `_playing`) before accessing resources we + // free below (e.g. the render converter). + // + // In the case of a shared device, the IOProc will verify capturing + // has ended before stopping itself. + _playing = false; + _doStop = true; // Signal to io proc to stop audio device + mutex_.Unlock(); // Cannot be under lock, risk of deadlock + if (!_stopEvent.Wait(TimeDelta::Seconds(2))) { + MutexLock lockScoped(&mutex_); + RTC_LOG(LS_WARNING) << "Timed out stopping the render IOProc." + "We may have failed to detect a device removal."; + + // We assume capturing on a shared device has stopped as well if the + // IOProc times out. + WEBRTC_CA_LOG_WARN(AudioDeviceStop(_outputDeviceID, _deviceIOProcID)); + WEBRTC_CA_LOG_WARN( + AudioDeviceDestroyIOProcID(_outputDeviceID, _deviceIOProcID)); + } + mutex_.Lock(); + _doStop = false; + RTC_LOG(LS_INFO) << "Playout stopped"; + } else if (_twoDevices && _playIsInitialized) { + WEBRTC_CA_LOG_WARN( + AudioDeviceDestroyIOProcID(_outputDeviceID, _deviceIOProcID)); + RTC_LOG(LS_INFO) << "Playout uninitialized (output device)"; + } else if (!_twoDevices && _playIsInitialized && !_recIsInitialized) { + WEBRTC_CA_LOG_WARN( + AudioDeviceDestroyIOProcID(_outputDeviceID, _deviceIOProcID)); + RTC_LOG(LS_INFO) << "Playout uninitialized (shared device)"; + } + + // Setting this signal will allow the worker thread to be stopped. + _renderDeviceIsAlive = 0; + if (!render_worker_thread_.empty()) { + mutex_.Unlock(); + render_worker_thread_.Finalize(); + mutex_.Lock(); + } + + WEBRTC_CA_LOG_WARN(AudioConverterDispose(_renderConverter)); + + // Remove listeners. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyStreamFormat, kAudioDevicePropertyScopeOutput, 0}; + WEBRTC_CA_LOG_WARN(AudioObjectRemovePropertyListener( + _outputDeviceID, &propertyAddress, &objectListenerProc, this)); + + propertyAddress.mSelector = kAudioDeviceProcessorOverload; + WEBRTC_CA_LOG_WARN(AudioObjectRemovePropertyListener( + _outputDeviceID, &propertyAddress, &objectListenerProc, this)); + + if (_macBookPro) { + Boolean hasProperty = + AudioObjectHasProperty(_outputDeviceID, &propertyAddress); + if (hasProperty) { + propertyAddress.mSelector = kAudioDevicePropertyDataSource; + WEBRTC_CA_LOG_WARN(AudioObjectRemovePropertyListener( + _outputDeviceID, &propertyAddress, &objectListenerProc, this)); + } + } + + _playIsInitialized = false; + _playing = false; + + return 0; +} + +int32_t AudioDeviceMac::PlayoutDelay(uint16_t& delayMS) const { + int32_t renderDelayUs = _renderDelayUs; + delayMS = + static_cast(1e-3 * (renderDelayUs + _renderLatencyUs) + 0.5); + return 0; +} + +bool AudioDeviceMac::Playing() const { + return (_playing); +} + +// ============================================================================ +// Private Methods +// ============================================================================ + +int32_t AudioDeviceMac::GetNumberDevices(const AudioObjectPropertyScope scope, + AudioDeviceID scopedDeviceIds[], + const uint32_t deviceListLength) { + OSStatus err = noErr; + + AudioObjectPropertyAddress propertyAddress = { + kAudioHardwarePropertyDevices, kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster}; + UInt32 size = 0; + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyDataSize( + kAudioObjectSystemObject, &propertyAddress, 0, NULL, &size)); + if (size == 0) { + RTC_LOG(LS_WARNING) << "No devices"; + return 0; + } + + UInt32 numberDevices = size / sizeof(AudioDeviceID); + const auto deviceIds = std::make_unique(numberDevices); + AudioBufferList* bufferList = NULL; + UInt32 numberScopedDevices = 0; + + // First check if there is a default device and list it + UInt32 hardwareProperty = 0; + if (scope == kAudioDevicePropertyScopeOutput) { + hardwareProperty = kAudioHardwarePropertyDefaultOutputDevice; + } else { + hardwareProperty = kAudioHardwarePropertyDefaultInputDevice; + } + + AudioObjectPropertyAddress propertyAddressDefault = { + hardwareProperty, kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster}; + + AudioDeviceID usedID; + UInt32 uintSize = sizeof(UInt32); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData(kAudioObjectSystemObject, + &propertyAddressDefault, 0, + NULL, &uintSize, &usedID)); + if (usedID != kAudioDeviceUnknown) { + scopedDeviceIds[numberScopedDevices] = usedID; + numberScopedDevices++; + } else { + RTC_LOG(LS_WARNING) << "GetNumberDevices(): Default device unknown"; + } + + // Then list the rest of the devices + bool listOK = true; + + WEBRTC_CA_LOG_ERR(AudioObjectGetPropertyData(kAudioObjectSystemObject, + &propertyAddress, 0, NULL, &size, + deviceIds.get())); + if (err != noErr) { + listOK = false; + } else { + propertyAddress.mSelector = kAudioDevicePropertyStreamConfiguration; + propertyAddress.mScope = scope; + propertyAddress.mElement = 0; + for (UInt32 i = 0; i < numberDevices; i++) { + // Check for input channels + WEBRTC_CA_LOG_ERR(AudioObjectGetPropertyDataSize( + deviceIds[i], &propertyAddress, 0, NULL, &size)); + if (err == kAudioHardwareBadDeviceError) { + // This device doesn't actually exist; continue iterating. + continue; + } else if (err != noErr) { + listOK = false; + break; + } + + bufferList = (AudioBufferList*)malloc(size); + WEBRTC_CA_LOG_ERR(AudioObjectGetPropertyData( + deviceIds[i], &propertyAddress, 0, NULL, &size, bufferList)); + if (err != noErr) { + listOK = false; + break; + } + + if (bufferList->mNumberBuffers > 0) { + if (numberScopedDevices >= deviceListLength) { + RTC_LOG(LS_ERROR) << "Device list is not long enough"; + listOK = false; + break; + } + + scopedDeviceIds[numberScopedDevices] = deviceIds[i]; + numberScopedDevices++; + } + + free(bufferList); + bufferList = NULL; + } // for + } + + if (!listOK) { + if (bufferList) { + free(bufferList); + bufferList = NULL; + } + return -1; + } + + return numberScopedDevices; +} + +int32_t AudioDeviceMac::GetDeviceName(const AudioObjectPropertyScope scope, + const uint16_t index, + rtc::ArrayView name) { + OSStatus err = noErr; + AudioDeviceID deviceIds[MaxNumberDevices]; + + int numberDevices = GetNumberDevices(scope, deviceIds, MaxNumberDevices); + if (numberDevices < 0) { + return -1; + } else if (numberDevices == 0) { + RTC_LOG(LS_ERROR) << "No devices"; + return -1; + } + + // If the number is below the number of devices, assume it's "WEBRTC ID" + // otherwise assume it's a CoreAudio ID + AudioDeviceID usedID; + + // Check if there is a default device + bool isDefaultDevice = false; + if (index == 0) { + UInt32 hardwareProperty = 0; + if (scope == kAudioDevicePropertyScopeOutput) { + hardwareProperty = kAudioHardwarePropertyDefaultOutputDevice; + } else { + hardwareProperty = kAudioHardwarePropertyDefaultInputDevice; + } + AudioObjectPropertyAddress propertyAddress = { + hardwareProperty, kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster}; + UInt32 size = sizeof(UInt32); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + kAudioObjectSystemObject, &propertyAddress, 0, NULL, &size, &usedID)); + if (usedID == kAudioDeviceUnknown) { + RTC_LOG(LS_WARNING) << "GetDeviceName(): Default device unknown"; + } else { + isDefaultDevice = true; + } + } + + AudioObjectPropertyAddress propertyAddress = {kAudioDevicePropertyDeviceName, + scope, 0}; + + if (isDefaultDevice) { + std::array devName; + UInt32 len = devName.size(); + + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + usedID, &propertyAddress, 0, NULL, &len, devName.data())); + + rtc::SimpleStringBuilder ss(name); + ss.AppendFormat("default (%s)", devName.data()); + } else { + if (index < numberDevices) { + usedID = deviceIds[index]; + } else { + usedID = index; + } + UInt32 len = name.size(); + + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + usedID, &propertyAddress, 0, NULL, &len, name.data())); + } + + return 0; +} + +int32_t AudioDeviceMac::InitDevice(const uint16_t userDeviceIndex, + AudioDeviceID& deviceId, + const bool isInput) { + OSStatus err = noErr; + UInt32 size = 0; + AudioObjectPropertyScope deviceScope; + AudioObjectPropertySelector defaultDeviceSelector; + AudioDeviceID deviceIds[MaxNumberDevices]; + + if (isInput) { + deviceScope = kAudioDevicePropertyScopeInput; + defaultDeviceSelector = kAudioHardwarePropertyDefaultInputDevice; + } else { + deviceScope = kAudioDevicePropertyScopeOutput; + defaultDeviceSelector = kAudioHardwarePropertyDefaultOutputDevice; + } + + AudioObjectPropertyAddress propertyAddress = { + defaultDeviceSelector, kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster}; + + // Get the actual device IDs + int numberDevices = + GetNumberDevices(deviceScope, deviceIds, MaxNumberDevices); + if (numberDevices < 0) { + return -1; + } else if (numberDevices == 0) { + RTC_LOG(LS_ERROR) << "InitDevice(): No devices"; + return -1; + } + + bool isDefaultDevice = false; + deviceId = kAudioDeviceUnknown; + if (userDeviceIndex == 0) { + // Try to use default system device + size = sizeof(AudioDeviceID); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + kAudioObjectSystemObject, &propertyAddress, 0, NULL, &size, &deviceId)); + if (deviceId == kAudioDeviceUnknown) { + RTC_LOG(LS_WARNING) << "No default device exists"; + } else { + isDefaultDevice = true; + } + } + + if (!isDefaultDevice) { + deviceId = deviceIds[userDeviceIndex]; + } + + // Obtain device name and manufacturer for logging. + // Also use this as a test to ensure a user-set device ID is valid. + char devName[128]; + char devManf[128]; + memset(devName, 0, sizeof(devName)); + memset(devManf, 0, sizeof(devManf)); + + propertyAddress.mSelector = kAudioDevicePropertyDeviceName; + propertyAddress.mScope = deviceScope; + propertyAddress.mElement = 0; + size = sizeof(devName); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData(deviceId, &propertyAddress, + 0, NULL, &size, devName)); + + propertyAddress.mSelector = kAudioDevicePropertyDeviceManufacturer; + size = sizeof(devManf); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData(deviceId, &propertyAddress, + 0, NULL, &size, devManf)); + + if (isInput) { + RTC_LOG(LS_INFO) << "Input device: " << devManf << " " << devName; + } else { + RTC_LOG(LS_INFO) << "Output device: " << devManf << " " << devName; + } + + return 0; +} + +OSStatus AudioDeviceMac::SetDesiredPlayoutFormat() { + // Our preferred format to work with. + _outDesiredFormat.mSampleRate = N_PLAY_SAMPLES_PER_SEC; + _outDesiredFormat.mChannelsPerFrame = _playChannels; + + if (_ptrAudioBuffer) { + // Update audio buffer with the selected parameters. + _ptrAudioBuffer->SetPlayoutSampleRate(N_PLAY_SAMPLES_PER_SEC); + _ptrAudioBuffer->SetPlayoutChannels((uint8_t)_playChannels); + } + + _renderDelayOffsetSamples = + _renderBufSizeSamples - N_BUFFERS_OUT * ENGINE_PLAY_BUF_SIZE_IN_SAMPLES * + _outDesiredFormat.mChannelsPerFrame; + + _outDesiredFormat.mBytesPerPacket = + _outDesiredFormat.mChannelsPerFrame * sizeof(SInt16); + // In uncompressed audio, a packet is one frame. + _outDesiredFormat.mFramesPerPacket = 1; + _outDesiredFormat.mBytesPerFrame = + _outDesiredFormat.mChannelsPerFrame * sizeof(SInt16); + _outDesiredFormat.mBitsPerChannel = sizeof(SInt16) * 8; + + _outDesiredFormat.mFormatFlags = + kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; +#ifdef WEBRTC_ARCH_BIG_ENDIAN + _outDesiredFormat.mFormatFlags |= kLinearPCMFormatFlagIsBigEndian; +#endif + _outDesiredFormat.mFormatID = kAudioFormatLinearPCM; + + OSStatus err = noErr; + WEBRTC_CA_RETURN_ON_ERR(AudioConverterNew( + &_outDesiredFormat, &_outStreamFormat, &_renderConverter)); + + // Try to set buffer size to desired value set to 20ms. + const uint16_t kPlayBufDelayFixed = 20; + UInt32 bufByteCount = static_cast( + (_outStreamFormat.mSampleRate / 1000.0) * kPlayBufDelayFixed * + _outStreamFormat.mChannelsPerFrame * sizeof(Float32)); + if (_outStreamFormat.mFramesPerPacket != 0) { + if (bufByteCount % _outStreamFormat.mFramesPerPacket != 0) { + bufByteCount = (static_cast(bufByteCount / + _outStreamFormat.mFramesPerPacket) + + 1) * + _outStreamFormat.mFramesPerPacket; + } + } + + // Ensure the buffer size is within the range provided by the device. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyDataSource, kAudioDevicePropertyScopeOutput, 0}; + propertyAddress.mSelector = kAudioDevicePropertyBufferSizeRange; + AudioValueRange range; + UInt32 size = sizeof(range); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &range)); + if (range.mMinimum > bufByteCount) { + bufByteCount = range.mMinimum; + } else if (range.mMaximum < bufByteCount) { + bufByteCount = range.mMaximum; + } + + propertyAddress.mSelector = kAudioDevicePropertyBufferSize; + size = sizeof(bufByteCount); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, size, &bufByteCount)); + + // Get render device latency. + propertyAddress.mSelector = kAudioDevicePropertyLatency; + UInt32 latency = 0; + size = sizeof(UInt32); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &latency)); + _renderLatencyUs = + static_cast((1.0e6 * latency) / _outStreamFormat.mSampleRate); + + // Get render stream latency. + propertyAddress.mSelector = kAudioDevicePropertyStreams; + AudioStreamID stream = 0; + size = sizeof(AudioStreamID); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &stream)); + propertyAddress.mSelector = kAudioStreamPropertyLatency; + size = sizeof(UInt32); + latency = 0; + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &latency)); + _renderLatencyUs += + static_cast((1.0e6 * latency) / _outStreamFormat.mSampleRate); + + RTC_LOG(LS_VERBOSE) << "initial playout status: _renderDelayOffsetSamples=" + << _renderDelayOffsetSamples + << ", _renderDelayUs=" << _renderDelayUs + << ", _renderLatencyUs=" << _renderLatencyUs; + return 0; +} + +OSStatus AudioDeviceMac::objectListenerProc( + AudioObjectID objectId, + UInt32 numberAddresses, + const AudioObjectPropertyAddress addresses[], + void* clientData) { + AudioDeviceMac* ptrThis = (AudioDeviceMac*)clientData; + RTC_DCHECK(ptrThis != NULL); + + ptrThis->implObjectListenerProc(objectId, numberAddresses, addresses); + + // AudioObjectPropertyListenerProc functions are supposed to return 0 + return 0; +} + +OSStatus AudioDeviceMac::implObjectListenerProc( + const AudioObjectID objectId, + const UInt32 numberAddresses, + const AudioObjectPropertyAddress addresses[]) { + RTC_LOG(LS_VERBOSE) << "AudioDeviceMac::implObjectListenerProc()"; + + for (UInt32 i = 0; i < numberAddresses; i++) { + if (addresses[i].mSelector == kAudioHardwarePropertyDevices) { + HandleDeviceChange(); + } else if (addresses[i].mSelector == kAudioDevicePropertyStreamFormat) { + HandleStreamFormatChange(objectId, addresses[i]); + } else if (addresses[i].mSelector == kAudioDevicePropertyDataSource) { + HandleDataSourceChange(objectId, addresses[i]); + } else if (addresses[i].mSelector == kAudioDeviceProcessorOverload) { + HandleProcessorOverload(addresses[i]); + } + } + + return 0; +} + +int32_t AudioDeviceMac::HandleDeviceChange() { + OSStatus err = noErr; + + RTC_LOG(LS_VERBOSE) << "kAudioHardwarePropertyDevices"; + + // A device has changed. Check if our registered devices have been removed. + // Ensure the devices have been initialized, meaning the IDs are valid. + if (MicrophoneIsInitialized()) { + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyDeviceIsAlive, kAudioDevicePropertyScopeInput, 0}; + UInt32 deviceIsAlive = 1; + UInt32 size = sizeof(UInt32); + err = AudioObjectGetPropertyData(_inputDeviceID, &propertyAddress, 0, NULL, + &size, &deviceIsAlive); + + if (err == kAudioHardwareBadDeviceError || deviceIsAlive == 0) { + RTC_LOG(LS_WARNING) << "Capture device is not alive (probably removed)"; + _captureDeviceIsAlive = 0; + _mixerManager.CloseMicrophone(); + } else if (err != noErr) { + logCAMsg(rtc::LS_ERROR, "Error in AudioDeviceGetProperty()", + (const char*)&err); + return -1; + } + } + + if (SpeakerIsInitialized()) { + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyDeviceIsAlive, kAudioDevicePropertyScopeOutput, 0}; + UInt32 deviceIsAlive = 1; + UInt32 size = sizeof(UInt32); + err = AudioObjectGetPropertyData(_outputDeviceID, &propertyAddress, 0, NULL, + &size, &deviceIsAlive); + + if (err == kAudioHardwareBadDeviceError || deviceIsAlive == 0) { + RTC_LOG(LS_WARNING) << "Render device is not alive (probably removed)"; + _renderDeviceIsAlive = 0; + _mixerManager.CloseSpeaker(); + } else if (err != noErr) { + logCAMsg(rtc::LS_ERROR, "Error in AudioDeviceGetProperty()", + (const char*)&err); + return -1; + } + } + + return 0; +} + +int32_t AudioDeviceMac::HandleStreamFormatChange( + const AudioObjectID objectId, + const AudioObjectPropertyAddress propertyAddress) { + OSStatus err = noErr; + + RTC_LOG(LS_VERBOSE) << "Stream format changed"; + + if (objectId != _inputDeviceID && objectId != _outputDeviceID) { + return 0; + } + + // Get the new device format + AudioStreamBasicDescription streamFormat; + UInt32 size = sizeof(streamFormat); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + objectId, &propertyAddress, 0, NULL, &size, &streamFormat)); + + if (streamFormat.mFormatID != kAudioFormatLinearPCM) { + logCAMsg(rtc::LS_ERROR, "Unacceptable input stream format -> mFormatID", + (const char*)&streamFormat.mFormatID); + return -1; + } + + if (streamFormat.mChannelsPerFrame > N_DEVICE_CHANNELS) { + RTC_LOG(LS_ERROR) << "Too many channels on device (mChannelsPerFrame = " + << streamFormat.mChannelsPerFrame << ")"; + return -1; + } + + if (_ptrAudioBuffer && streamFormat.mChannelsPerFrame != _recChannels) { + RTC_LOG(LS_ERROR) << "Changing channels not supported (mChannelsPerFrame = " + << streamFormat.mChannelsPerFrame << ")"; + return -1; + } + + RTC_LOG(LS_VERBOSE) << "Stream format:"; + RTC_LOG(LS_VERBOSE) << "mSampleRate = " << streamFormat.mSampleRate + << ", mChannelsPerFrame = " + << streamFormat.mChannelsPerFrame; + RTC_LOG(LS_VERBOSE) << "mBytesPerPacket = " << streamFormat.mBytesPerPacket + << ", mFramesPerPacket = " + << streamFormat.mFramesPerPacket; + RTC_LOG(LS_VERBOSE) << "mBytesPerFrame = " << streamFormat.mBytesPerFrame + << ", mBitsPerChannel = " << streamFormat.mBitsPerChannel; + RTC_LOG(LS_VERBOSE) << "mFormatFlags = " << streamFormat.mFormatFlags; + logCAMsg(rtc::LS_VERBOSE, "mFormatID", (const char*)&streamFormat.mFormatID); + + if (propertyAddress.mScope == kAudioDevicePropertyScopeInput) { + const int io_block_size_samples = streamFormat.mChannelsPerFrame * + streamFormat.mSampleRate / 100 * + N_BLOCKS_IO; + if (io_block_size_samples > _captureBufSizeSamples) { + RTC_LOG(LS_ERROR) << "Input IO block size (" << io_block_size_samples + << ") is larger than ring buffer (" + << _captureBufSizeSamples << ")"; + return -1; + } + + memcpy(&_inStreamFormat, &streamFormat, sizeof(streamFormat)); + + if (_inStreamFormat.mChannelsPerFrame >= 2 && (_recChannels == 2)) { + _inDesiredFormat.mChannelsPerFrame = 2; + } else { + // Disable stereo recording when we only have one channel on the device. + _inDesiredFormat.mChannelsPerFrame = 1; + _recChannels = 1; + RTC_LOG(LS_VERBOSE) << "Stereo recording unavailable on this device"; + } + + // Recreate the converter with the new format + // TODO(xians): make this thread safe + WEBRTC_CA_RETURN_ON_ERR(AudioConverterDispose(_captureConverter)); + + WEBRTC_CA_RETURN_ON_ERR(AudioConverterNew(&streamFormat, &_inDesiredFormat, + &_captureConverter)); + } else { + memcpy(&_outStreamFormat, &streamFormat, sizeof(streamFormat)); + + // Our preferred format to work with + if (_outStreamFormat.mChannelsPerFrame < 2) { + _playChannels = 1; + RTC_LOG(LS_VERBOSE) << "Stereo playout unavailable on this device"; + } + WEBRTC_CA_RETURN_ON_ERR(SetDesiredPlayoutFormat()); + } + return 0; +} + +int32_t AudioDeviceMac::HandleDataSourceChange( + const AudioObjectID objectId, + const AudioObjectPropertyAddress propertyAddress) { + OSStatus err = noErr; + + if (_macBookPro && + propertyAddress.mScope == kAudioDevicePropertyScopeOutput) { + RTC_LOG(LS_VERBOSE) << "Data source changed"; + + _macBookProPanRight = false; + UInt32 dataSource = 0; + UInt32 size = sizeof(UInt32); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + objectId, &propertyAddress, 0, NULL, &size, &dataSource)); + if (dataSource == 'ispk') { + _macBookProPanRight = true; + RTC_LOG(LS_VERBOSE) + << "MacBook Pro using internal speakers; stereo panning right"; + } else { + RTC_LOG(LS_VERBOSE) << "MacBook Pro not using internal speakers"; + } + } + + return 0; +} +int32_t AudioDeviceMac::HandleProcessorOverload( + const AudioObjectPropertyAddress propertyAddress) { + // TODO(xians): we probably want to notify the user in some way of the + // overload. However, the Windows interpretations of these errors seem to + // be more severe than what ProcessorOverload is thrown for. + // + // We don't log the notification, as it's sent from the HAL's IO thread. We + // don't want to slow it down even further. + if (propertyAddress.mScope == kAudioDevicePropertyScopeInput) { + // RTC_LOG(LS_WARNING) << "Capture processor // overload"; + //_callback->ProblemIsReported( + // SndCardStreamObserver::ERecordingProblem); + } else { + // RTC_LOG(LS_WARNING) << "Render processor overload"; + //_callback->ProblemIsReported( + // SndCardStreamObserver::EPlaybackProblem); + } + + return 0; +} + +// ============================================================================ +// Thread Methods +// ============================================================================ + +OSStatus AudioDeviceMac::deviceIOProc(AudioDeviceID, + const AudioTimeStamp*, + const AudioBufferList* inputData, + const AudioTimeStamp* inputTime, + AudioBufferList* outputData, + const AudioTimeStamp* outputTime, + void* clientData) { + AudioDeviceMac* ptrThis = (AudioDeviceMac*)clientData; + RTC_DCHECK(ptrThis != NULL); + + ptrThis->implDeviceIOProc(inputData, inputTime, outputData, outputTime); + + // AudioDeviceIOProc functions are supposed to return 0 + return 0; +} + +OSStatus AudioDeviceMac::outConverterProc(AudioConverterRef, + UInt32* numberDataPackets, + AudioBufferList* data, + AudioStreamPacketDescription**, + void* userData) { + AudioDeviceMac* ptrThis = (AudioDeviceMac*)userData; + RTC_DCHECK(ptrThis != NULL); + + return ptrThis->implOutConverterProc(numberDataPackets, data); +} + +OSStatus AudioDeviceMac::inDeviceIOProc(AudioDeviceID, + const AudioTimeStamp*, + const AudioBufferList* inputData, + const AudioTimeStamp* inputTime, + AudioBufferList*, + const AudioTimeStamp*, + void* clientData) { + AudioDeviceMac* ptrThis = (AudioDeviceMac*)clientData; + RTC_DCHECK(ptrThis != NULL); + + ptrThis->implInDeviceIOProc(inputData, inputTime); + + // AudioDeviceIOProc functions are supposed to return 0 + return 0; +} + +OSStatus AudioDeviceMac::inConverterProc( + AudioConverterRef, + UInt32* numberDataPackets, + AudioBufferList* data, + AudioStreamPacketDescription** /*dataPacketDescription*/, + void* userData) { + AudioDeviceMac* ptrThis = static_cast(userData); + RTC_DCHECK(ptrThis != NULL); + + return ptrThis->implInConverterProc(numberDataPackets, data); +} + +OSStatus AudioDeviceMac::implDeviceIOProc(const AudioBufferList* inputData, + const AudioTimeStamp* inputTime, + AudioBufferList* outputData, + const AudioTimeStamp* outputTime) { + OSStatus err = noErr; + UInt64 outputTimeNs = AudioConvertHostTimeToNanos(outputTime->mHostTime); + UInt64 nowNs = AudioConvertHostTimeToNanos(AudioGetCurrentHostTime()); + + if (!_twoDevices && _recording) { + implInDeviceIOProc(inputData, inputTime); + } + + // Check if we should close down audio device + // Double-checked locking optimization to remove locking overhead + if (_doStop) { + MutexLock lock(&mutex_); + if (_doStop) { + if (_twoDevices || (!_recording && !_playing)) { + // In the case of a shared device, the single driving ioProc + // is stopped here + WEBRTC_CA_LOG_ERR(AudioDeviceStop(_outputDeviceID, _deviceIOProcID)); + WEBRTC_CA_LOG_WARN( + AudioDeviceDestroyIOProcID(_outputDeviceID, _deviceIOProcID)); + if (err == noErr) { + RTC_LOG(LS_VERBOSE) << "Playout or shared device stopped"; + } + } + + _doStop = false; + _stopEvent.Set(); + return 0; + } + } + + if (!_playing) { + // This can be the case when a shared device is capturing but not + // rendering. We allow the checks above before returning to avoid a + // timeout when capturing is stopped. + return 0; + } + + RTC_DCHECK(_outStreamFormat.mBytesPerFrame != 0); + UInt32 size = + outputData->mBuffers->mDataByteSize / _outStreamFormat.mBytesPerFrame; + + // TODO(xians): signal an error somehow? + err = AudioConverterFillComplexBuffer(_renderConverter, outConverterProc, + this, &size, outputData, NULL); + if (err != noErr) { + if (err == 1) { + // This is our own error. + RTC_LOG(LS_ERROR) << "Error in AudioConverterFillComplexBuffer()"; + return 1; + } else { + logCAMsg(rtc::LS_ERROR, "Error in AudioConverterFillComplexBuffer()", + (const char*)&err); + return 1; + } + } + + ring_buffer_size_t bufSizeSamples = + PaUtil_GetRingBufferReadAvailable(_paRenderBuffer); + + int32_t renderDelayUs = + static_cast(1e-3 * (outputTimeNs - nowNs) + 0.5); + renderDelayUs += static_cast( + (1.0e6 * bufSizeSamples) / _outDesiredFormat.mChannelsPerFrame / + _outDesiredFormat.mSampleRate + + 0.5); + + _renderDelayUs = renderDelayUs; + + return 0; +} + +OSStatus AudioDeviceMac::implOutConverterProc(UInt32* numberDataPackets, + AudioBufferList* data) { + RTC_DCHECK(data->mNumberBuffers == 1); + ring_buffer_size_t numSamples = + *numberDataPackets * _outDesiredFormat.mChannelsPerFrame; + + data->mBuffers->mNumberChannels = _outDesiredFormat.mChannelsPerFrame; + // Always give the converter as much as it wants, zero padding as required. + data->mBuffers->mDataByteSize = + *numberDataPackets * _outDesiredFormat.mBytesPerPacket; + data->mBuffers->mData = _renderConvertData; + memset(_renderConvertData, 0, sizeof(_renderConvertData)); + + PaUtil_ReadRingBuffer(_paRenderBuffer, _renderConvertData, numSamples); + + kern_return_t kernErr = semaphore_signal_all(_renderSemaphore); + if (kernErr != KERN_SUCCESS) { + RTC_LOG(LS_ERROR) << "semaphore_signal_all() error: " << kernErr; + return 1; + } + + return 0; +} + +OSStatus AudioDeviceMac::implInDeviceIOProc(const AudioBufferList* inputData, + const AudioTimeStamp* inputTime) { + OSStatus err = noErr; + UInt64 inputTimeNs = AudioConvertHostTimeToNanos(inputTime->mHostTime); + UInt64 nowNs = AudioConvertHostTimeToNanos(AudioGetCurrentHostTime()); + + // Check if we should close down audio device + // Double-checked locking optimization to remove locking overhead + if (_doStopRec) { + MutexLock lock(&mutex_); + if (_doStopRec) { + // This will be signalled only when a shared device is not in use. + WEBRTC_CA_LOG_ERR(AudioDeviceStop(_inputDeviceID, _inDeviceIOProcID)); + WEBRTC_CA_LOG_WARN( + AudioDeviceDestroyIOProcID(_inputDeviceID, _inDeviceIOProcID)); + if (err == noErr) { + RTC_LOG(LS_VERBOSE) << "Recording device stopped"; + } + + _doStopRec = false; + _stopEventRec.Set(); + return 0; + } + } + + if (!_recording) { + // Allow above checks to avoid a timeout on stopping capture. + return 0; + } + + ring_buffer_size_t bufSizeSamples = + PaUtil_GetRingBufferReadAvailable(_paCaptureBuffer); + + int32_t captureDelayUs = + static_cast(1e-3 * (nowNs - inputTimeNs) + 0.5); + captureDelayUs += static_cast((1.0e6 * bufSizeSamples) / + _inStreamFormat.mChannelsPerFrame / + _inStreamFormat.mSampleRate + + 0.5); + + _captureDelayUs = captureDelayUs; + + RTC_DCHECK(inputData->mNumberBuffers == 1); + ring_buffer_size_t numSamples = inputData->mBuffers->mDataByteSize * + _inStreamFormat.mChannelsPerFrame / + _inStreamFormat.mBytesPerPacket; + PaUtil_WriteRingBuffer(_paCaptureBuffer, inputData->mBuffers->mData, + numSamples); + + kern_return_t kernErr = semaphore_signal_all(_captureSemaphore); + if (kernErr != KERN_SUCCESS) { + RTC_LOG(LS_ERROR) << "semaphore_signal_all() error: " << kernErr; + } + + return err; +} + +OSStatus AudioDeviceMac::implInConverterProc(UInt32* numberDataPackets, + AudioBufferList* data) { + RTC_DCHECK(data->mNumberBuffers == 1); + ring_buffer_size_t numSamples = + *numberDataPackets * _inStreamFormat.mChannelsPerFrame; + + while (PaUtil_GetRingBufferReadAvailable(_paCaptureBuffer) < numSamples) { + mach_timespec_t timeout; + timeout.tv_sec = 0; + timeout.tv_nsec = TIMER_PERIOD_MS; + + kern_return_t kernErr = semaphore_timedwait(_captureSemaphore, timeout); + if (kernErr == KERN_OPERATION_TIMED_OUT) { + int32_t signal = _captureDeviceIsAlive; + if (signal == 0) { + // The capture device is no longer alive; stop the worker thread. + *numberDataPackets = 0; + return 1; + } + } else if (kernErr != KERN_SUCCESS) { + RTC_LOG(LS_ERROR) << "semaphore_wait() error: " << kernErr; + } + } + + // Pass the read pointer directly to the converter to avoid a memcpy. + void* dummyPtr; + ring_buffer_size_t dummySize; + PaUtil_GetRingBufferReadRegions(_paCaptureBuffer, numSamples, + &data->mBuffers->mData, &numSamples, + &dummyPtr, &dummySize); + PaUtil_AdvanceRingBufferReadIndex(_paCaptureBuffer, numSamples); + + data->mBuffers->mNumberChannels = _inStreamFormat.mChannelsPerFrame; + *numberDataPackets = numSamples / _inStreamFormat.mChannelsPerFrame; + data->mBuffers->mDataByteSize = + *numberDataPackets * _inStreamFormat.mBytesPerPacket; + + return 0; +} + +bool AudioDeviceMac::RenderWorkerThread() { + ring_buffer_size_t numSamples = + ENGINE_PLAY_BUF_SIZE_IN_SAMPLES * _outDesiredFormat.mChannelsPerFrame; + while (PaUtil_GetRingBufferWriteAvailable(_paRenderBuffer) - + _renderDelayOffsetSamples < + numSamples) { + mach_timespec_t timeout; + timeout.tv_sec = 0; + timeout.tv_nsec = TIMER_PERIOD_MS; + + kern_return_t kernErr = semaphore_timedwait(_renderSemaphore, timeout); + if (kernErr == KERN_OPERATION_TIMED_OUT) { + int32_t signal = _renderDeviceIsAlive; + if (signal == 0) { + // The render device is no longer alive; stop the worker thread. + return false; + } + } else if (kernErr != KERN_SUCCESS) { + RTC_LOG(LS_ERROR) << "semaphore_timedwait() error: " << kernErr; + } + } + + int8_t playBuffer[4 * ENGINE_PLAY_BUF_SIZE_IN_SAMPLES]; + + if (!_ptrAudioBuffer) { + RTC_LOG(LS_ERROR) << "capture AudioBuffer is invalid"; + return false; + } + + // Ask for new PCM data to be played out using the AudioDeviceBuffer. + uint32_t nSamples = + _ptrAudioBuffer->RequestPlayoutData(ENGINE_PLAY_BUF_SIZE_IN_SAMPLES); + + nSamples = _ptrAudioBuffer->GetPlayoutData(playBuffer); + if (nSamples != ENGINE_PLAY_BUF_SIZE_IN_SAMPLES) { + RTC_LOG(LS_ERROR) << "invalid number of output samples(" << nSamples << ")"; + } + + uint32_t nOutSamples = nSamples * _outDesiredFormat.mChannelsPerFrame; + + SInt16* pPlayBuffer = (SInt16*)&playBuffer; + if (_macBookProPanRight && (_playChannels == 2)) { + // Mix entirely into the right channel and zero the left channel. + SInt32 sampleInt32 = 0; + for (uint32_t sampleIdx = 0; sampleIdx < nOutSamples; sampleIdx += 2) { + sampleInt32 = pPlayBuffer[sampleIdx]; + sampleInt32 += pPlayBuffer[sampleIdx + 1]; + sampleInt32 /= 2; + + if (sampleInt32 > 32767) { + sampleInt32 = 32767; + } else if (sampleInt32 < -32768) { + sampleInt32 = -32768; + } + + pPlayBuffer[sampleIdx] = 0; + pPlayBuffer[sampleIdx + 1] = static_cast(sampleInt32); + } + } + + PaUtil_WriteRingBuffer(_paRenderBuffer, pPlayBuffer, nOutSamples); + + return true; +} + +bool AudioDeviceMac::CaptureWorkerThread() { + OSStatus err = noErr; + UInt32 noRecSamples = + ENGINE_REC_BUF_SIZE_IN_SAMPLES * _inDesiredFormat.mChannelsPerFrame; + SInt16 recordBuffer[noRecSamples]; + UInt32 size = ENGINE_REC_BUF_SIZE_IN_SAMPLES; + + AudioBufferList engineBuffer; + engineBuffer.mNumberBuffers = 1; // Interleaved channels. + engineBuffer.mBuffers->mNumberChannels = _inDesiredFormat.mChannelsPerFrame; + engineBuffer.mBuffers->mDataByteSize = + _inDesiredFormat.mBytesPerPacket * noRecSamples; + engineBuffer.mBuffers->mData = recordBuffer; + + err = AudioConverterFillComplexBuffer(_captureConverter, inConverterProc, + this, &size, &engineBuffer, NULL); + if (err != noErr) { + if (err == 1) { + // This is our own error. + return false; + } else { + logCAMsg(rtc::LS_ERROR, "Error in AudioConverterFillComplexBuffer()", + (const char*)&err); + return false; + } + } + + // TODO(xians): what if the returned size is incorrect? + if (size == ENGINE_REC_BUF_SIZE_IN_SAMPLES) { + int32_t msecOnPlaySide; + int32_t msecOnRecordSide; + + int32_t captureDelayUs = _captureDelayUs; + int32_t renderDelayUs = _renderDelayUs; + + msecOnPlaySide = + static_cast(1e-3 * (renderDelayUs + _renderLatencyUs) + 0.5); + msecOnRecordSide = + static_cast(1e-3 * (captureDelayUs + _captureLatencyUs) + 0.5); + + if (!_ptrAudioBuffer) { + RTC_LOG(LS_ERROR) << "capture AudioBuffer is invalid"; + return false; + } + + // store the recorded buffer (no action will be taken if the + // #recorded samples is not a full buffer) + _ptrAudioBuffer->SetRecordedBuffer((int8_t*)&recordBuffer, (uint32_t)size); + _ptrAudioBuffer->SetVQEData(msecOnPlaySide, msecOnRecordSide); + _ptrAudioBuffer->SetTypingStatus(KeyPressed()); + + // deliver recorded samples at specified sample rate, mic level etc. + // to the observer using callback + _ptrAudioBuffer->DeliverRecordedData(); + } + + return true; +} + +bool AudioDeviceMac::KeyPressed() { + bool key_down = false; + // Loop through all Mac virtual key constant values. + for (unsigned int key_index = 0; key_index < arraysize(prev_key_state_); + ++key_index) { + bool keyState = + CGEventSourceKeyState(kCGEventSourceStateHIDSystemState, key_index); + // A false -> true change in keymap means a key is pressed. + key_down |= (keyState && !prev_key_state_[key_index]); + // Save current state. + prev_key_state_[key_index] = keyState; + } + return key_down; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/mac/audio_device_mac.h b/third_party/libwebrtc/modules/audio_device/mac/audio_device_mac.h new file mode 100644 index 0000000000..bb06395d03 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/mac/audio_device_mac.h @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_AUDIO_DEVICE_MAC_H_ +#define AUDIO_DEVICE_AUDIO_DEVICE_MAC_H_ + +#include +#include +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_device/audio_device_generic.h" +#include "modules/audio_device/mac/audio_mixer_manager_mac.h" +#include "rtc_base/event.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +struct PaUtilRingBuffer; + +namespace webrtc { + +const uint32_t N_REC_SAMPLES_PER_SEC = 48000; +const uint32_t N_PLAY_SAMPLES_PER_SEC = 48000; + +const uint32_t N_REC_CHANNELS = 1; // default is mono recording +const uint32_t N_PLAY_CHANNELS = 2; // default is stereo playout +const uint32_t N_DEVICE_CHANNELS = 64; + +const int kBufferSizeMs = 10; + +const uint32_t ENGINE_REC_BUF_SIZE_IN_SAMPLES = + N_REC_SAMPLES_PER_SEC * kBufferSizeMs / 1000; +const uint32_t ENGINE_PLAY_BUF_SIZE_IN_SAMPLES = + N_PLAY_SAMPLES_PER_SEC * kBufferSizeMs / 1000; + +const int N_BLOCKS_IO = 2; +const int N_BUFFERS_IN = 2; // Must be at least N_BLOCKS_IO. +const int N_BUFFERS_OUT = 3; // Must be at least N_BLOCKS_IO. + +const uint32_t TIMER_PERIOD_MS = 2 * 10 * N_BLOCKS_IO * 1000000; + +const uint32_t REC_BUF_SIZE_IN_SAMPLES = + ENGINE_REC_BUF_SIZE_IN_SAMPLES * N_DEVICE_CHANNELS * N_BUFFERS_IN; +const uint32_t PLAY_BUF_SIZE_IN_SAMPLES = + ENGINE_PLAY_BUF_SIZE_IN_SAMPLES * N_PLAY_CHANNELS * N_BUFFERS_OUT; + +const int kGetMicVolumeIntervalMs = 1000; + +class AudioDeviceMac : public AudioDeviceGeneric { + public: + AudioDeviceMac(); + ~AudioDeviceMac(); + + // Retrieve the currently utilized audio layer + virtual int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const; + + // Main initializaton and termination + virtual InitStatus Init() RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t Terminate() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool Initialized() const; + + // Device enumeration + virtual int16_t PlayoutDevices(); + virtual int16_t RecordingDevices(); + virtual int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]); + virtual int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]); + + // Device selection + virtual int32_t SetPlayoutDevice(uint16_t index) RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetPlayoutDevice(AudioDeviceModule::WindowsDeviceType device); + virtual int32_t SetRecordingDevice(uint16_t index); + virtual int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device); + + // Audio transport initialization + virtual int32_t PlayoutIsAvailable(bool& available); + virtual int32_t InitPlayout() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool PlayoutIsInitialized() const; + virtual int32_t RecordingIsAvailable(bool& available); + virtual int32_t InitRecording() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool RecordingIsInitialized() const; + + // Audio transport control + virtual int32_t StartPlayout() RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t StopPlayout() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool Playing() const; + virtual int32_t StartRecording() RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t StopRecording() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool Recording() const; + + // Audio mixer initialization + virtual int32_t InitSpeaker() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool SpeakerIsInitialized() const; + virtual int32_t InitMicrophone() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool MicrophoneIsInitialized() const; + + // Speaker volume controls + virtual int32_t SpeakerVolumeIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetSpeakerVolume(uint32_t volume); + virtual int32_t SpeakerVolume(uint32_t& volume) const; + virtual int32_t MaxSpeakerVolume(uint32_t& maxVolume) const; + virtual int32_t MinSpeakerVolume(uint32_t& minVolume) const; + + // Microphone volume controls + virtual int32_t MicrophoneVolumeIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetMicrophoneVolume(uint32_t volume); + virtual int32_t MicrophoneVolume(uint32_t& volume) const; + virtual int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const; + virtual int32_t MinMicrophoneVolume(uint32_t& minVolume) const; + + // Microphone mute control + virtual int32_t MicrophoneMuteIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetMicrophoneMute(bool enable); + virtual int32_t MicrophoneMute(bool& enabled) const; + + // Speaker mute control + virtual int32_t SpeakerMuteIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetSpeakerMute(bool enable); + virtual int32_t SpeakerMute(bool& enabled) const; + + // Stereo support + virtual int32_t StereoPlayoutIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetStereoPlayout(bool enable); + virtual int32_t StereoPlayout(bool& enabled) const; + virtual int32_t StereoRecordingIsAvailable(bool& available); + virtual int32_t SetStereoRecording(bool enable); + virtual int32_t StereoRecording(bool& enabled) const; + + // Delay information and control + virtual int32_t PlayoutDelay(uint16_t& delayMS) const; + + virtual void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) + RTC_LOCKS_EXCLUDED(mutex_); + + private: + int32_t InitSpeakerLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t InitMicrophoneLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + virtual int32_t MicrophoneIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t MicrophoneIsAvailableLocked(bool& available) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + virtual int32_t SpeakerIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SpeakerIsAvailableLocked(bool& available) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + static void AtomicSet32(int32_t* theValue, int32_t newValue); + static int32_t AtomicGet32(int32_t* theValue); + + static void logCAMsg(rtc::LoggingSeverity sev, + const char* msg, + const char* err); + + int32_t GetNumberDevices(AudioObjectPropertyScope scope, + AudioDeviceID scopedDeviceIds[], + uint32_t deviceListLength); + + int32_t GetDeviceName(AudioObjectPropertyScope scope, + uint16_t index, + rtc::ArrayView name); + + int32_t InitDevice(uint16_t userDeviceIndex, + AudioDeviceID& deviceId, + bool isInput); + + // Always work with our preferred playout format inside VoE. + // Then convert the output to the OS setting using an AudioConverter. + OSStatus SetDesiredPlayoutFormat(); + + static OSStatus objectListenerProc( + AudioObjectID objectId, + UInt32 numberAddresses, + const AudioObjectPropertyAddress addresses[], + void* clientData); + + OSStatus implObjectListenerProc(AudioObjectID objectId, + UInt32 numberAddresses, + const AudioObjectPropertyAddress addresses[]); + + int32_t HandleDeviceChange(); + + int32_t HandleStreamFormatChange(AudioObjectID objectId, + AudioObjectPropertyAddress propertyAddress); + + int32_t HandleDataSourceChange(AudioObjectID objectId, + AudioObjectPropertyAddress propertyAddress); + + int32_t HandleProcessorOverload(AudioObjectPropertyAddress propertyAddress); + + static OSStatus deviceIOProc(AudioDeviceID device, + const AudioTimeStamp* now, + const AudioBufferList* inputData, + const AudioTimeStamp* inputTime, + AudioBufferList* outputData, + const AudioTimeStamp* outputTime, + void* clientData); + + static OSStatus outConverterProc( + AudioConverterRef audioConverter, + UInt32* numberDataPackets, + AudioBufferList* data, + AudioStreamPacketDescription** dataPacketDescription, + void* userData); + + static OSStatus inDeviceIOProc(AudioDeviceID device, + const AudioTimeStamp* now, + const AudioBufferList* inputData, + const AudioTimeStamp* inputTime, + AudioBufferList* outputData, + const AudioTimeStamp* outputTime, + void* clientData); + + static OSStatus inConverterProc( + AudioConverterRef audioConverter, + UInt32* numberDataPackets, + AudioBufferList* data, + AudioStreamPacketDescription** dataPacketDescription, + void* inUserData); + + OSStatus implDeviceIOProc(const AudioBufferList* inputData, + const AudioTimeStamp* inputTime, + AudioBufferList* outputData, + const AudioTimeStamp* outputTime) + RTC_LOCKS_EXCLUDED(mutex_); + + OSStatus implOutConverterProc(UInt32* numberDataPackets, + AudioBufferList* data); + + OSStatus implInDeviceIOProc(const AudioBufferList* inputData, + const AudioTimeStamp* inputTime) + RTC_LOCKS_EXCLUDED(mutex_); + + OSStatus implInConverterProc(UInt32* numberDataPackets, + AudioBufferList* data); + + static void RunCapture(void*); + static void RunRender(void*); + bool CaptureWorkerThread(); + bool RenderWorkerThread(); + + bool KeyPressed(); + + AudioDeviceBuffer* _ptrAudioBuffer; + + Mutex mutex_; + + rtc::Event _stopEventRec; + rtc::Event _stopEvent; + + // Only valid/running between calls to StartRecording and StopRecording. + rtc::PlatformThread capture_worker_thread_; + + // Only valid/running between calls to StartPlayout and StopPlayout. + rtc::PlatformThread render_worker_thread_; + + AudioMixerManagerMac _mixerManager; + + uint16_t _inputDeviceIndex; + uint16_t _outputDeviceIndex; + AudioDeviceID _inputDeviceID; + AudioDeviceID _outputDeviceID; +#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 1050 + AudioDeviceIOProcID _inDeviceIOProcID; + AudioDeviceIOProcID _deviceIOProcID; +#endif + bool _inputDeviceIsSpecified; + bool _outputDeviceIsSpecified; + + uint8_t _recChannels; + uint8_t _playChannels; + + Float32* _captureBufData; + SInt16* _renderBufData; + + SInt16 _renderConvertData[PLAY_BUF_SIZE_IN_SAMPLES]; + + bool _initialized; + bool _isShutDown; + bool _recording; + bool _playing; + bool _recIsInitialized; + bool _playIsInitialized; + + // Atomically set varaibles + std::atomic _renderDeviceIsAlive; + std::atomic _captureDeviceIsAlive; + + bool _twoDevices; + bool _doStop; // For play if not shared device or play+rec if shared device + bool _doStopRec; // For rec if not shared device + bool _macBookPro; + bool _macBookProPanRight; + + AudioConverterRef _captureConverter; + AudioConverterRef _renderConverter; + + AudioStreamBasicDescription _outStreamFormat; + AudioStreamBasicDescription _outDesiredFormat; + AudioStreamBasicDescription _inStreamFormat; + AudioStreamBasicDescription _inDesiredFormat; + + uint32_t _captureLatencyUs; + uint32_t _renderLatencyUs; + + // Atomically set variables + mutable std::atomic _captureDelayUs; + mutable std::atomic _renderDelayUs; + + int32_t _renderDelayOffsetSamples; + + PaUtilRingBuffer* _paCaptureBuffer; + PaUtilRingBuffer* _paRenderBuffer; + + semaphore_t _renderSemaphore; + semaphore_t _captureSemaphore; + + int _captureBufSizeSamples; + int _renderBufSizeSamples; + + // Typing detection + // 0x5c is key "9", after that comes function keys. + bool prev_key_state_[0x5d]; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_MAIN_SOURCE_MAC_AUDIO_DEVICE_MAC_H_ diff --git a/third_party/libwebrtc/modules/audio_device/mac/audio_mixer_manager_mac.cc b/third_party/libwebrtc/modules/audio_device/mac/audio_mixer_manager_mac.cc new file mode 100644 index 0000000000..942e7db3b3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/mac/audio_mixer_manager_mac.cc @@ -0,0 +1,924 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/mac/audio_mixer_manager_mac.h" + +#include // getpid() + +#include "rtc_base/system/arch.h" + +namespace webrtc { + +#define WEBRTC_CA_RETURN_ON_ERR(expr) \ + do { \ + err = expr; \ + if (err != noErr) { \ + logCAMsg(rtc::LS_ERROR, "Error in " #expr, (const char*)&err); \ + return -1; \ + } \ + } while (0) + +#define WEBRTC_CA_LOG_ERR(expr) \ + do { \ + err = expr; \ + if (err != noErr) { \ + logCAMsg(rtc::LS_ERROR, "Error in " #expr, (const char*)&err); \ + } \ + } while (0) + +#define WEBRTC_CA_LOG_WARN(expr) \ + do { \ + err = expr; \ + if (err != noErr) { \ + logCAMsg(rtc::LS_WARNING, "Error in " #expr, (const char*)&err); \ + } \ + } while (0) + +AudioMixerManagerMac::AudioMixerManagerMac() + : _inputDeviceID(kAudioObjectUnknown), + _outputDeviceID(kAudioObjectUnknown), + _noInputChannels(0), + _noOutputChannels(0) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " created"; +} + +AudioMixerManagerMac::~AudioMixerManagerMac() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " destroyed"; + Close(); +} + +// ============================================================================ +// PUBLIC METHODS +// ============================================================================ + +int32_t AudioMixerManagerMac::Close() { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + MutexLock lock(&mutex_); + + CloseSpeakerLocked(); + CloseMicrophoneLocked(); + + return 0; +} + +int32_t AudioMixerManagerMac::CloseSpeaker() { + MutexLock lock(&mutex_); + return CloseSpeakerLocked(); +} + +int32_t AudioMixerManagerMac::CloseSpeakerLocked() { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + _outputDeviceID = kAudioObjectUnknown; + _noOutputChannels = 0; + + return 0; +} + +int32_t AudioMixerManagerMac::CloseMicrophone() { + MutexLock lock(&mutex_); + return CloseMicrophoneLocked(); +} + +int32_t AudioMixerManagerMac::CloseMicrophoneLocked() { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + _inputDeviceID = kAudioObjectUnknown; + _noInputChannels = 0; + + return 0; +} + +int32_t AudioMixerManagerMac::OpenSpeaker(AudioDeviceID deviceID) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::OpenSpeaker(id=" << deviceID + << ")"; + + MutexLock lock(&mutex_); + + OSStatus err = noErr; + UInt32 size = 0; + pid_t hogPid = -1; + + _outputDeviceID = deviceID; + + // Check which process, if any, has hogged the device. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyHogMode, kAudioDevicePropertyScopeOutput, 0}; + + // First, does it have the property? Aggregate devices don't. + if (AudioObjectHasProperty(_outputDeviceID, &propertyAddress)) { + size = sizeof(hogPid); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &hogPid)); + + if (hogPid == -1) { + RTC_LOG(LS_VERBOSE) << "No process has hogged the output device"; + } + // getpid() is apparently "always successful" + else if (hogPid == getpid()) { + RTC_LOG(LS_VERBOSE) << "Our process has hogged the output device"; + } else { + RTC_LOG(LS_WARNING) << "Another process (pid = " + << static_cast(hogPid) + << ") has hogged the output device"; + + return -1; + } + } + + // get number of channels from stream format + propertyAddress.mSelector = kAudioDevicePropertyStreamFormat; + + // Get the stream format, to be able to read the number of channels. + AudioStreamBasicDescription streamFormat; + size = sizeof(AudioStreamBasicDescription); + memset(&streamFormat, 0, size); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &streamFormat)); + + _noOutputChannels = streamFormat.mChannelsPerFrame; + + return 0; +} + +int32_t AudioMixerManagerMac::OpenMicrophone(AudioDeviceID deviceID) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::OpenMicrophone(id=" << deviceID + << ")"; + + MutexLock lock(&mutex_); + + OSStatus err = noErr; + UInt32 size = 0; + pid_t hogPid = -1; + + _inputDeviceID = deviceID; + + // Check which process, if any, has hogged the device. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyHogMode, kAudioDevicePropertyScopeInput, 0}; + size = sizeof(hogPid); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &hogPid)); + if (hogPid == -1) { + RTC_LOG(LS_VERBOSE) << "No process has hogged the input device"; + } + // getpid() is apparently "always successful" + else if (hogPid == getpid()) { + RTC_LOG(LS_VERBOSE) << "Our process has hogged the input device"; + } else { + RTC_LOG(LS_WARNING) << "Another process (pid = " << static_cast(hogPid) + << ") has hogged the input device"; + + return -1; + } + + // get number of channels from stream format + propertyAddress.mSelector = kAudioDevicePropertyStreamFormat; + + // Get the stream format, to be able to read the number of channels. + AudioStreamBasicDescription streamFormat; + size = sizeof(AudioStreamBasicDescription); + memset(&streamFormat, 0, size); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &streamFormat)); + + _noInputChannels = streamFormat.mChannelsPerFrame; + + return 0; +} + +bool AudioMixerManagerMac::SpeakerIsInitialized() const { + RTC_DLOG(LS_INFO) << __FUNCTION__; + + return (_outputDeviceID != kAudioObjectUnknown); +} + +bool AudioMixerManagerMac::MicrophoneIsInitialized() const { + RTC_DLOG(LS_INFO) << __FUNCTION__; + + return (_inputDeviceID != kAudioObjectUnknown); +} + +int32_t AudioMixerManagerMac::SetSpeakerVolume(uint32_t volume) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::SetSpeakerVolume(volume=" + << volume << ")"; + + MutexLock lock(&mutex_); + + if (_outputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + UInt32 size = 0; + bool success = false; + + // volume range is 0.0 - 1.0, convert from 0 -255 + const Float32 vol = (Float32)(volume / 255.0); + + RTC_DCHECK(vol <= 1.0 && vol >= 0.0); + + // Does the capture device have a master volume control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyVolumeScalar, kAudioDevicePropertyScopeOutput, 0}; + Boolean isSettable = false; + err = AudioObjectIsPropertySettable(_outputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + size = sizeof(vol); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, size, &vol)); + + return 0; + } + + // Otherwise try to set each channel. + for (UInt32 i = 1; i <= _noOutputChannels; i++) { + propertyAddress.mElement = i; + isSettable = false; + err = AudioObjectIsPropertySettable(_outputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + size = sizeof(vol); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, size, &vol)); + } + success = true; + } + + if (!success) { + RTC_LOG(LS_WARNING) << "Unable to set a volume on any output channel"; + return -1; + } + + return 0; +} + +int32_t AudioMixerManagerMac::SpeakerVolume(uint32_t& volume) const { + if (_outputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + UInt32 size = 0; + unsigned int channels = 0; + Float32 channelVol = 0; + Float32 vol = 0; + + // Does the device have a master volume control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyVolumeScalar, kAudioDevicePropertyScopeOutput, 0}; + Boolean hasProperty = + AudioObjectHasProperty(_outputDeviceID, &propertyAddress); + if (hasProperty) { + size = sizeof(vol); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &vol)); + + // vol 0.0 to 1.0 -> convert to 0 - 255 + volume = static_cast(vol * 255 + 0.5); + } else { + // Otherwise get the average volume across channels. + vol = 0; + for (UInt32 i = 1; i <= _noOutputChannels; i++) { + channelVol = 0; + propertyAddress.mElement = i; + hasProperty = AudioObjectHasProperty(_outputDeviceID, &propertyAddress); + if (hasProperty) { + size = sizeof(channelVol); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &channelVol)); + + vol += channelVol; + channels++; + } + } + + if (channels == 0) { + RTC_LOG(LS_WARNING) << "Unable to get a volume on any channel"; + return -1; + } + + RTC_DCHECK_GT(channels, 0); + // vol 0.0 to 1.0 -> convert to 0 - 255 + volume = static_cast(255 * vol / channels + 0.5); + } + + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::SpeakerVolume() => vol=" << vol; + + return 0; +} + +int32_t AudioMixerManagerMac::MaxSpeakerVolume(uint32_t& maxVolume) const { + if (_outputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + // volume range is 0.0 to 1.0 + // we convert that to 0 - 255 + maxVolume = 255; + + return 0; +} + +int32_t AudioMixerManagerMac::MinSpeakerVolume(uint32_t& minVolume) const { + if (_outputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + // volume range is 0.0 to 1.0 + // we convert that to 0 - 255 + minVolume = 0; + + return 0; +} + +int32_t AudioMixerManagerMac::SpeakerVolumeIsAvailable(bool& available) { + if (_outputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + + // Does the capture device have a master volume control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyVolumeScalar, kAudioDevicePropertyScopeOutput, 0}; + Boolean isSettable = false; + err = AudioObjectIsPropertySettable(_outputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + available = true; + return 0; + } + + // Otherwise try to set each channel. + for (UInt32 i = 1; i <= _noOutputChannels; i++) { + propertyAddress.mElement = i; + isSettable = false; + err = AudioObjectIsPropertySettable(_outputDeviceID, &propertyAddress, + &isSettable); + if (err != noErr || !isSettable) { + available = false; + RTC_LOG(LS_WARNING) << "Volume cannot be set for output channel " << i + << ", err=" << err; + return -1; + } + } + + available = true; + return 0; +} + +int32_t AudioMixerManagerMac::SpeakerMuteIsAvailable(bool& available) { + if (_outputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + + // Does the capture device have a master mute control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyMute, kAudioDevicePropertyScopeOutput, 0}; + Boolean isSettable = false; + err = AudioObjectIsPropertySettable(_outputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + available = true; + return 0; + } + + // Otherwise try to set each channel. + for (UInt32 i = 1; i <= _noOutputChannels; i++) { + propertyAddress.mElement = i; + isSettable = false; + err = AudioObjectIsPropertySettable(_outputDeviceID, &propertyAddress, + &isSettable); + if (err != noErr || !isSettable) { + available = false; + RTC_LOG(LS_WARNING) << "Mute cannot be set for output channel " << i + << ", err=" << err; + return -1; + } + } + + available = true; + return 0; +} + +int32_t AudioMixerManagerMac::SetSpeakerMute(bool enable) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::SetSpeakerMute(enable=" + << enable << ")"; + + MutexLock lock(&mutex_); + + if (_outputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + UInt32 size = 0; + UInt32 mute = enable ? 1 : 0; + bool success = false; + + // Does the render device have a master mute control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyMute, kAudioDevicePropertyScopeOutput, 0}; + Boolean isSettable = false; + err = AudioObjectIsPropertySettable(_outputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + size = sizeof(mute); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, size, &mute)); + + return 0; + } + + // Otherwise try to set each channel. + for (UInt32 i = 1; i <= _noOutputChannels; i++) { + propertyAddress.mElement = i; + isSettable = false; + err = AudioObjectIsPropertySettable(_outputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + size = sizeof(mute); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, size, &mute)); + } + success = true; + } + + if (!success) { + RTC_LOG(LS_WARNING) << "Unable to set mute on any input channel"; + return -1; + } + + return 0; +} + +int32_t AudioMixerManagerMac::SpeakerMute(bool& enabled) const { + if (_outputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + UInt32 size = 0; + unsigned int channels = 0; + UInt32 channelMuted = 0; + UInt32 muted = 0; + + // Does the device have a master volume control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyMute, kAudioDevicePropertyScopeOutput, 0}; + Boolean hasProperty = + AudioObjectHasProperty(_outputDeviceID, &propertyAddress); + if (hasProperty) { + size = sizeof(muted); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &muted)); + + // 1 means muted + enabled = static_cast(muted); + } else { + // Otherwise check if all channels are muted. + for (UInt32 i = 1; i <= _noOutputChannels; i++) { + muted = 0; + propertyAddress.mElement = i; + hasProperty = AudioObjectHasProperty(_outputDeviceID, &propertyAddress); + if (hasProperty) { + size = sizeof(channelMuted); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _outputDeviceID, &propertyAddress, 0, NULL, &size, &channelMuted)); + + muted = (muted && channelMuted); + channels++; + } + } + + if (channels == 0) { + RTC_LOG(LS_WARNING) << "Unable to get mute for any channel"; + return -1; + } + + RTC_DCHECK_GT(channels, 0); + // 1 means muted + enabled = static_cast(muted); + } + + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::SpeakerMute() => enabled=" + << enabled; + + return 0; +} + +int32_t AudioMixerManagerMac::StereoPlayoutIsAvailable(bool& available) { + if (_outputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + available = (_noOutputChannels == 2); + return 0; +} + +int32_t AudioMixerManagerMac::StereoRecordingIsAvailable(bool& available) { + if (_inputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + available = (_noInputChannels == 2); + return 0; +} + +int32_t AudioMixerManagerMac::MicrophoneMuteIsAvailable(bool& available) { + if (_inputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + + // Does the capture device have a master mute control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyMute, kAudioDevicePropertyScopeInput, 0}; + Boolean isSettable = false; + err = AudioObjectIsPropertySettable(_inputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + available = true; + return 0; + } + + // Otherwise try to set each channel. + for (UInt32 i = 1; i <= _noInputChannels; i++) { + propertyAddress.mElement = i; + isSettable = false; + err = AudioObjectIsPropertySettable(_inputDeviceID, &propertyAddress, + &isSettable); + if (err != noErr || !isSettable) { + available = false; + RTC_LOG(LS_WARNING) << "Mute cannot be set for output channel " << i + << ", err=" << err; + return -1; + } + } + + available = true; + return 0; +} + +int32_t AudioMixerManagerMac::SetMicrophoneMute(bool enable) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::SetMicrophoneMute(enable=" + << enable << ")"; + + MutexLock lock(&mutex_); + + if (_inputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + UInt32 size = 0; + UInt32 mute = enable ? 1 : 0; + bool success = false; + + // Does the capture device have a master mute control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyMute, kAudioDevicePropertyScopeInput, 0}; + Boolean isSettable = false; + err = AudioObjectIsPropertySettable(_inputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + size = sizeof(mute); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, size, &mute)); + + return 0; + } + + // Otherwise try to set each channel. + for (UInt32 i = 1; i <= _noInputChannels; i++) { + propertyAddress.mElement = i; + isSettable = false; + err = AudioObjectIsPropertySettable(_inputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + size = sizeof(mute); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, size, &mute)); + } + success = true; + } + + if (!success) { + RTC_LOG(LS_WARNING) << "Unable to set mute on any input channel"; + return -1; + } + + return 0; +} + +int32_t AudioMixerManagerMac::MicrophoneMute(bool& enabled) const { + if (_inputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + UInt32 size = 0; + unsigned int channels = 0; + UInt32 channelMuted = 0; + UInt32 muted = 0; + + // Does the device have a master volume control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyMute, kAudioDevicePropertyScopeInput, 0}; + Boolean hasProperty = + AudioObjectHasProperty(_inputDeviceID, &propertyAddress); + if (hasProperty) { + size = sizeof(muted); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &muted)); + + // 1 means muted + enabled = static_cast(muted); + } else { + // Otherwise check if all channels are muted. + for (UInt32 i = 1; i <= _noInputChannels; i++) { + muted = 0; + propertyAddress.mElement = i; + hasProperty = AudioObjectHasProperty(_inputDeviceID, &propertyAddress); + if (hasProperty) { + size = sizeof(channelMuted); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &channelMuted)); + + muted = (muted && channelMuted); + channels++; + } + } + + if (channels == 0) { + RTC_LOG(LS_WARNING) << "Unable to get mute for any channel"; + return -1; + } + + RTC_DCHECK_GT(channels, 0); + // 1 means muted + enabled = static_cast(muted); + } + + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::MicrophoneMute() => enabled=" + << enabled; + + return 0; +} + +int32_t AudioMixerManagerMac::MicrophoneVolumeIsAvailable(bool& available) { + if (_inputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + + // Does the capture device have a master volume control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyVolumeScalar, kAudioDevicePropertyScopeInput, 0}; + Boolean isSettable = false; + err = AudioObjectIsPropertySettable(_inputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + available = true; + return 0; + } + + // Otherwise try to set each channel. + for (UInt32 i = 1; i <= _noInputChannels; i++) { + propertyAddress.mElement = i; + isSettable = false; + err = AudioObjectIsPropertySettable(_inputDeviceID, &propertyAddress, + &isSettable); + if (err != noErr || !isSettable) { + available = false; + RTC_LOG(LS_WARNING) << "Volume cannot be set for input channel " << i + << ", err=" << err; + return -1; + } + } + + available = true; + return 0; +} + +int32_t AudioMixerManagerMac::SetMicrophoneVolume(uint32_t volume) { + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::SetMicrophoneVolume(volume=" + << volume << ")"; + + MutexLock lock(&mutex_); + + if (_inputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + UInt32 size = 0; + bool success = false; + + // volume range is 0.0 - 1.0, convert from 0 - 255 + const Float32 vol = (Float32)(volume / 255.0); + + RTC_DCHECK(vol <= 1.0 && vol >= 0.0); + + // Does the capture device have a master volume control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyVolumeScalar, kAudioDevicePropertyScopeInput, 0}; + Boolean isSettable = false; + err = AudioObjectIsPropertySettable(_inputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + size = sizeof(vol); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, size, &vol)); + + return 0; + } + + // Otherwise try to set each channel. + for (UInt32 i = 1; i <= _noInputChannels; i++) { + propertyAddress.mElement = i; + isSettable = false; + err = AudioObjectIsPropertySettable(_inputDeviceID, &propertyAddress, + &isSettable); + if (err == noErr && isSettable) { + size = sizeof(vol); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectSetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, size, &vol)); + } + success = true; + } + + if (!success) { + RTC_LOG(LS_WARNING) << "Unable to set a level on any input channel"; + return -1; + } + + return 0; +} + +int32_t AudioMixerManagerMac::MicrophoneVolume(uint32_t& volume) const { + if (_inputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + OSStatus err = noErr; + UInt32 size = 0; + unsigned int channels = 0; + Float32 channelVol = 0; + Float32 volFloat32 = 0; + + // Does the device have a master volume control? + // If so, use it exclusively. + AudioObjectPropertyAddress propertyAddress = { + kAudioDevicePropertyVolumeScalar, kAudioDevicePropertyScopeInput, 0}; + Boolean hasProperty = + AudioObjectHasProperty(_inputDeviceID, &propertyAddress); + if (hasProperty) { + size = sizeof(volFloat32); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &volFloat32)); + + // vol 0.0 to 1.0 -> convert to 0 - 255 + volume = static_cast(volFloat32 * 255 + 0.5); + } else { + // Otherwise get the average volume across channels. + volFloat32 = 0; + for (UInt32 i = 1; i <= _noInputChannels; i++) { + channelVol = 0; + propertyAddress.mElement = i; + hasProperty = AudioObjectHasProperty(_inputDeviceID, &propertyAddress); + if (hasProperty) { + size = sizeof(channelVol); + WEBRTC_CA_RETURN_ON_ERR(AudioObjectGetPropertyData( + _inputDeviceID, &propertyAddress, 0, NULL, &size, &channelVol)); + + volFloat32 += channelVol; + channels++; + } + } + + if (channels == 0) { + RTC_LOG(LS_WARNING) << "Unable to get a level on any channel"; + return -1; + } + + RTC_DCHECK_GT(channels, 0); + // vol 0.0 to 1.0 -> convert to 0 - 255 + volume = static_cast(255 * volFloat32 / channels + 0.5); + } + + RTC_LOG(LS_VERBOSE) << "AudioMixerManagerMac::MicrophoneVolume() => vol=" + << volume; + + return 0; +} + +int32_t AudioMixerManagerMac::MaxMicrophoneVolume(uint32_t& maxVolume) const { + if (_inputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + // volume range is 0.0 to 1.0 + // we convert that to 0 - 255 + maxVolume = 255; + + return 0; +} + +int32_t AudioMixerManagerMac::MinMicrophoneVolume(uint32_t& minVolume) const { + if (_inputDeviceID == kAudioObjectUnknown) { + RTC_LOG(LS_WARNING) << "device ID has not been set"; + return -1; + } + + // volume range is 0.0 to 1.0 + // we convert that to 0 - 10 + minVolume = 0; + + return 0; +} + +// ============================================================================ +// Private Methods +// ============================================================================ + +// CoreAudio errors are best interpreted as four character strings. +void AudioMixerManagerMac::logCAMsg(const rtc::LoggingSeverity sev, + const char* msg, + const char* err) { + RTC_DCHECK(msg != NULL); + RTC_DCHECK(err != NULL); + RTC_DCHECK(sev == rtc::LS_ERROR || sev == rtc::LS_WARNING); + +#ifdef WEBRTC_ARCH_BIG_ENDIAN + switch (sev) { + case rtc::LS_ERROR: + RTC_LOG(LS_ERROR) << msg << ": " << err[0] << err[1] << err[2] << err[3]; + break; + case rtc::LS_WARNING: + RTC_LOG(LS_WARNING) << msg << ": " << err[0] << err[1] << err[2] + << err[3]; + break; + default: + break; + } +#else + // We need to flip the characters in this case. + switch (sev) { + case rtc::LS_ERROR: + RTC_LOG(LS_ERROR) << msg << ": " << err[3] << err[2] << err[1] << err[0]; + break; + case rtc::LS_WARNING: + RTC_LOG(LS_WARNING) << msg << ": " << err[3] << err[2] << err[1] + << err[0]; + break; + default: + break; + } +#endif +} + +} // namespace webrtc +// EOF diff --git a/third_party/libwebrtc/modules/audio_device/mac/audio_mixer_manager_mac.h b/third_party/libwebrtc/modules/audio_device/mac/audio_mixer_manager_mac.h new file mode 100644 index 0000000000..0ccab4879b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/mac/audio_mixer_manager_mac.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_DEVICE_AUDIO_MIXER_MANAGER_MAC_H_ +#define AUDIO_DEVICE_AUDIO_MIXER_MANAGER_MAC_H_ + +#include + +#include "modules/audio_device/include/audio_device.h" +#include "rtc_base/logging.h" +#include "rtc_base/synchronization/mutex.h" + +namespace webrtc { + +class AudioMixerManagerMac { + public: + int32_t OpenSpeaker(AudioDeviceID deviceID) RTC_LOCKS_EXCLUDED(mutex_); + int32_t OpenMicrophone(AudioDeviceID deviceID) RTC_LOCKS_EXCLUDED(mutex_); + int32_t SetSpeakerVolume(uint32_t volume) RTC_LOCKS_EXCLUDED(mutex_); + int32_t SpeakerVolume(uint32_t& volume) const; + int32_t MaxSpeakerVolume(uint32_t& maxVolume) const; + int32_t MinSpeakerVolume(uint32_t& minVolume) const; + int32_t SpeakerVolumeIsAvailable(bool& available); + int32_t SpeakerMuteIsAvailable(bool& available); + int32_t SetSpeakerMute(bool enable) RTC_LOCKS_EXCLUDED(mutex_); + int32_t SpeakerMute(bool& enabled) const; + int32_t StereoPlayoutIsAvailable(bool& available); + int32_t StereoRecordingIsAvailable(bool& available); + int32_t MicrophoneMuteIsAvailable(bool& available); + int32_t SetMicrophoneMute(bool enable) RTC_LOCKS_EXCLUDED(mutex_); + int32_t MicrophoneMute(bool& enabled) const; + int32_t MicrophoneVolumeIsAvailable(bool& available); + int32_t SetMicrophoneVolume(uint32_t volume) RTC_LOCKS_EXCLUDED(mutex_); + int32_t MicrophoneVolume(uint32_t& volume) const; + int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const; + int32_t MinMicrophoneVolume(uint32_t& minVolume) const; + int32_t Close() RTC_LOCKS_EXCLUDED(mutex_); + int32_t CloseSpeaker() RTC_LOCKS_EXCLUDED(mutex_); + int32_t CloseMicrophone() RTC_LOCKS_EXCLUDED(mutex_); + bool SpeakerIsInitialized() const; + bool MicrophoneIsInitialized() const; + + public: + AudioMixerManagerMac(); + ~AudioMixerManagerMac(); + + private: + int32_t CloseSpeakerLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t CloseMicrophoneLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + static void logCAMsg(rtc::LoggingSeverity sev, + const char* msg, + const char* err); + + private: + Mutex mutex_; + + AudioDeviceID _inputDeviceID; + AudioDeviceID _outputDeviceID; + + uint16_t _noInputChannels; + uint16_t _noOutputChannels; +}; + +} // namespace webrtc + +#endif // AUDIO_MIXER_MAC_H diff --git a/third_party/libwebrtc/modules/audio_device/mock_audio_device_buffer.h b/third_party/libwebrtc/modules/audio_device/mock_audio_device_buffer.h new file mode 100644 index 0000000000..b0f54c20ff --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/mock_audio_device_buffer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_MOCK_AUDIO_DEVICE_BUFFER_H_ +#define MODULES_AUDIO_DEVICE_MOCK_AUDIO_DEVICE_BUFFER_H_ + +#include "modules/audio_device/audio_device_buffer.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockAudioDeviceBuffer : public AudioDeviceBuffer { + public: + using AudioDeviceBuffer::AudioDeviceBuffer; + virtual ~MockAudioDeviceBuffer() {} + MOCK_METHOD(int32_t, RequestPlayoutData, (size_t nSamples), (override)); + MOCK_METHOD(int32_t, GetPlayoutData, (void* audioBuffer), (override)); + MOCK_METHOD(int32_t, + SetRecordedBuffer, + (const void* audioBuffer, size_t nSamples), + (override)); + MOCK_METHOD(void, SetVQEData, (int playDelayMS, int recDelayMS), (override)); + MOCK_METHOD(int32_t, DeliverRecordedData, (), (override)); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_MOCK_AUDIO_DEVICE_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_device/win/audio_device_core_win.cc b/third_party/libwebrtc/modules/audio_device/win/audio_device_core_win.cc new file mode 100644 index 0000000000..1e3a94edf6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/audio_device_core_win.cc @@ -0,0 +1,4178 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#pragma warning(disable : 4995) // name was marked as #pragma deprecated + +#if (_MSC_VER >= 1310) && (_MSC_VER < 1400) +// Reports the major and minor versions of the compiler. +// For example, 1310 for Microsoft Visual C++ .NET 2003. 1310 represents version +// 13 and a 1.0 point release. The Visual C++ 2005 compiler version is 1400. +// Type cl /? at the command line to see the major and minor versions of your +// compiler along with the build number. +#pragma message(">> INFO: Windows Core Audio is not supported in VS 2003") +#endif + +#include "modules/audio_device/audio_device_config.h" + +#ifdef WEBRTC_WINDOWS_CORE_AUDIO_BUILD + +// clang-format off +// To get Windows includes in the right order, this must come before the Windows +// includes below. +#include "modules/audio_device/win/audio_device_core_win.h" +// clang-format on + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "api/make_ref_counted.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/string_utils.h" +#include "rtc_base/thread_annotations.h" +#include "system_wrappers/include/sleep.h" + +// Macro that calls a COM method returning HRESULT value. +#define EXIT_ON_ERROR(hres) \ + do { \ + if (FAILED(hres)) \ + goto Exit; \ + } while (0) + +// Macro that continues to a COM error. +#define CONTINUE_ON_ERROR(hres) \ + do { \ + if (FAILED(hres)) \ + goto Next; \ + } while (0) + +// Macro that releases a COM object if not NULL. +#define SAFE_RELEASE(p) \ + do { \ + if ((p)) { \ + (p)->Release(); \ + (p) = NULL; \ + } \ + } while (0) + +#define ROUND(x) ((x) >= 0 ? (int)((x) + 0.5) : (int)((x)-0.5)) + +// REFERENCE_TIME time units per millisecond +#define REFTIMES_PER_MILLISEC 10000 + +typedef struct tagTHREADNAME_INFO { + DWORD dwType; // must be 0x1000 + LPCSTR szName; // pointer to name (in user addr space) + DWORD dwThreadID; // thread ID (-1=caller thread) + DWORD dwFlags; // reserved for future use, must be zero +} THREADNAME_INFO; + +namespace webrtc { +namespace { + +enum { COM_THREADING_MODEL = COINIT_MULTITHREADED }; + +enum { kAecCaptureStreamIndex = 0, kAecRenderStreamIndex = 1 }; + +// An implementation of IMediaBuffer, as required for +// IMediaObject::ProcessOutput(). After consuming data provided by +// ProcessOutput(), call SetLength() to update the buffer availability. +// +// Example implementation: +// http://msdn.microsoft.com/en-us/library/dd376684(v=vs.85).aspx +class MediaBufferImpl final : public IMediaBuffer { + public: + explicit MediaBufferImpl(DWORD maxLength) + : _data(new BYTE[maxLength]), + _length(0), + _maxLength(maxLength), + _refCount(0) {} + + // IMediaBuffer methods. + STDMETHOD(GetBufferAndLength(BYTE** ppBuffer, DWORD* pcbLength)) { + if (!ppBuffer || !pcbLength) { + return E_POINTER; + } + + *ppBuffer = _data; + *pcbLength = _length; + + return S_OK; + } + + STDMETHOD(GetMaxLength(DWORD* pcbMaxLength)) { + if (!pcbMaxLength) { + return E_POINTER; + } + + *pcbMaxLength = _maxLength; + return S_OK; + } + + STDMETHOD(SetLength(DWORD cbLength)) { + if (cbLength > _maxLength) { + return E_INVALIDARG; + } + + _length = cbLength; + return S_OK; + } + + // IUnknown methods. + STDMETHOD_(ULONG, AddRef()) { return InterlockedIncrement(&_refCount); } + + STDMETHOD(QueryInterface(REFIID riid, void** ppv)) { + if (!ppv) { + return E_POINTER; + } else if (riid != IID_IMediaBuffer && riid != IID_IUnknown) { + return E_NOINTERFACE; + } + + *ppv = static_cast(this); + AddRef(); + return S_OK; + } + + STDMETHOD_(ULONG, Release()) { + LONG refCount = InterlockedDecrement(&_refCount); + if (refCount == 0) { + delete this; + } + + return refCount; + } + + private: + ~MediaBufferImpl() { delete[] _data; } + + BYTE* _data; + DWORD _length; + const DWORD _maxLength; + LONG _refCount; +}; +} // namespace + +// ============================================================================ +// Static Methods +// ============================================================================ + +// ---------------------------------------------------------------------------- +// CoreAudioIsSupported +// ---------------------------------------------------------------------------- + +bool AudioDeviceWindowsCore::CoreAudioIsSupported() { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + bool MMDeviceIsAvailable(false); + bool coreAudioIsSupported(false); + + HRESULT hr(S_OK); + wchar_t buf[MAXERRORLENGTH]; + wchar_t errorText[MAXERRORLENGTH]; + + // 1) Check if Windows version is Vista SP1 or later. + // + // CoreAudio is only available on Vista SP1 and later. + // + OSVERSIONINFOEX osvi; + DWORDLONG dwlConditionMask = 0; + int op = VER_LESS_EQUAL; + + // Initialize the OSVERSIONINFOEX structure. + ZeroMemory(&osvi, sizeof(OSVERSIONINFOEX)); + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); + osvi.dwMajorVersion = 6; + osvi.dwMinorVersion = 0; + osvi.wServicePackMajor = 0; + osvi.wServicePackMinor = 0; + osvi.wProductType = VER_NT_WORKSTATION; + + // Initialize the condition mask. + VER_SET_CONDITION(dwlConditionMask, VER_MAJORVERSION, op); + VER_SET_CONDITION(dwlConditionMask, VER_MINORVERSION, op); + VER_SET_CONDITION(dwlConditionMask, VER_SERVICEPACKMAJOR, op); + VER_SET_CONDITION(dwlConditionMask, VER_SERVICEPACKMINOR, op); + VER_SET_CONDITION(dwlConditionMask, VER_PRODUCT_TYPE, VER_EQUAL); + + DWORD dwTypeMask = VER_MAJORVERSION | VER_MINORVERSION | + VER_SERVICEPACKMAJOR | VER_SERVICEPACKMINOR | + VER_PRODUCT_TYPE; + + // Perform the test. + BOOL isVistaRTMorXP = VerifyVersionInfo(&osvi, dwTypeMask, dwlConditionMask); + if (isVistaRTMorXP != 0) { + RTC_LOG(LS_VERBOSE) + << "*** Windows Core Audio is only supported on Vista SP1 or later"; + return false; + } + + // 2) Initializes the COM library for use by the calling thread. + + // The COM init wrapper sets the thread's concurrency model to MTA, + // and creates a new apartment for the thread if one is required. The + // wrapper also ensures that each call to CoInitializeEx is balanced + // by a corresponding call to CoUninitialize. + // + ScopedCOMInitializer comInit(ScopedCOMInitializer::kMTA); + if (!comInit.Succeeded()) { + // Things will work even if an STA thread is calling this method but we + // want to ensure that MTA is used and therefore return false here. + return false; + } + + // 3) Check if the MMDevice API is available. + // + // The Windows Multimedia Device (MMDevice) API enables audio clients to + // discover audio endpoint devices, determine their capabilities, and create + // driver instances for those devices. + // Header file Mmdeviceapi.h defines the interfaces in the MMDevice API. + // The MMDevice API consists of several interfaces. The first of these is the + // IMMDeviceEnumerator interface. To access the interfaces in the MMDevice + // API, a client obtains a reference to the IMMDeviceEnumerator interface of a + // device-enumerator object by calling the CoCreateInstance function. + // + // Through the IMMDeviceEnumerator interface, the client can obtain references + // to the other interfaces in the MMDevice API. The MMDevice API implements + // the following interfaces: + // + // IMMDevice Represents an audio device. + // IMMDeviceCollection Represents a collection of audio devices. + // IMMDeviceEnumerator Provides methods for enumerating audio devices. + // IMMEndpoint Represents an audio endpoint device. + // + IMMDeviceEnumerator* pIMMD(NULL); + const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator); + const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator); + + hr = CoCreateInstance( + CLSID_MMDeviceEnumerator, // GUID value of MMDeviceEnumerator coclass + NULL, CLSCTX_ALL, + IID_IMMDeviceEnumerator, // GUID value of the IMMDeviceEnumerator + // interface + (void**)&pIMMD); + + if (FAILED(hr)) { + RTC_LOG(LS_ERROR) << "AudioDeviceWindowsCore::CoreAudioIsSupported()" + " Failed to create the required COM object (hr=" + << hr << ")"; + RTC_LOG(LS_VERBOSE) << "AudioDeviceWindowsCore::CoreAudioIsSupported()" + " CoCreateInstance(MMDeviceEnumerator) failed (hr=" + << hr << ")"; + + const DWORD dwFlags = + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS; + const DWORD dwLangID = MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US); + + // Gets the system's human readable message string for this HRESULT. + // All error message in English by default. + DWORD messageLength = ::FormatMessageW(dwFlags, 0, hr, dwLangID, errorText, + MAXERRORLENGTH, NULL); + + RTC_DCHECK_LE(messageLength, MAXERRORLENGTH); + + // Trims tailing white space (FormatMessage() leaves a trailing cr-lf.). + for (; messageLength && ::isspace(errorText[messageLength - 1]); + --messageLength) { + errorText[messageLength - 1] = '\0'; + } + + StringCchPrintfW(buf, MAXERRORLENGTH, L"Error details: "); + StringCchCatW(buf, MAXERRORLENGTH, errorText); + RTC_LOG(LS_VERBOSE) << buf; + } else { + MMDeviceIsAvailable = true; + RTC_LOG(LS_VERBOSE) + << "AudioDeviceWindowsCore::CoreAudioIsSupported()" + " CoCreateInstance(MMDeviceEnumerator) succeeded (hr=" + << hr << ")"; + SAFE_RELEASE(pIMMD); + } + + // 4) Verify that we can create and initialize our Core Audio class. + // + if (MMDeviceIsAvailable) { + coreAudioIsSupported = false; + + AudioDeviceWindowsCore* p = new (std::nothrow) AudioDeviceWindowsCore(); + if (p == NULL) { + return false; + } + + int ok(0); + + if (p->Init() != InitStatus::OK) { + ok |= -1; + } + + ok |= p->Terminate(); + + if (ok == 0) { + coreAudioIsSupported = true; + } + + delete p; + } + + if (coreAudioIsSupported) { + RTC_LOG(LS_VERBOSE) << "*** Windows Core Audio is supported ***"; + } else { + RTC_LOG(LS_VERBOSE) << "*** Windows Core Audio is NOT supported"; + } + + return (coreAudioIsSupported); +} + +// ============================================================================ +// Construction & Destruction +// ============================================================================ + +// ---------------------------------------------------------------------------- +// AudioDeviceWindowsCore() - ctor +// ---------------------------------------------------------------------------- + +AudioDeviceWindowsCore::AudioDeviceWindowsCore() + : _avrtLibrary(nullptr), + _winSupportAvrt(false), + _comInit(ScopedCOMInitializer::kMTA), + _ptrAudioBuffer(nullptr), + _ptrEnumerator(nullptr), + _ptrRenderCollection(nullptr), + _ptrCaptureCollection(nullptr), + _ptrDeviceOut(nullptr), + _ptrDeviceIn(nullptr), + _ptrClientOut(nullptr), + _ptrClientIn(nullptr), + _ptrRenderClient(nullptr), + _ptrCaptureClient(nullptr), + _ptrCaptureVolume(nullptr), + _ptrRenderSimpleVolume(nullptr), + _dmo(nullptr), + _mediaBuffer(nullptr), + _builtInAecEnabled(false), + _hRenderSamplesReadyEvent(nullptr), + _hPlayThread(nullptr), + _hRenderStartedEvent(nullptr), + _hShutdownRenderEvent(nullptr), + _hCaptureSamplesReadyEvent(nullptr), + _hRecThread(nullptr), + _hCaptureStartedEvent(nullptr), + _hShutdownCaptureEvent(nullptr), + _hMmTask(nullptr), + _playAudioFrameSize(0), + _playSampleRate(0), + _playBlockSize(0), + _playChannels(2), + _sndCardPlayDelay(0), + _writtenSamples(0), + _readSamples(0), + _recAudioFrameSize(0), + _recSampleRate(0), + _recBlockSize(0), + _recChannels(2), + _initialized(false), + _recording(false), + _playing(false), + _recIsInitialized(false), + _playIsInitialized(false), + _speakerIsInitialized(false), + _microphoneIsInitialized(false), + _usingInputDeviceIndex(false), + _usingOutputDeviceIndex(false), + _inputDevice(AudioDeviceModule::kDefaultCommunicationDevice), + _outputDevice(AudioDeviceModule::kDefaultCommunicationDevice), + _inputDeviceIndex(0), + _outputDeviceIndex(0) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " created"; + RTC_DCHECK(_comInit.Succeeded()); + + // Try to load the Avrt DLL + if (!_avrtLibrary) { + // Get handle to the Avrt DLL module. + _avrtLibrary = LoadLibrary(TEXT("Avrt.dll")); + if (_avrtLibrary) { + // Handle is valid (should only happen if OS larger than vista & win7). + // Try to get the function addresses. + RTC_LOG(LS_VERBOSE) << "AudioDeviceWindowsCore::AudioDeviceWindowsCore()" + " The Avrt DLL module is now loaded"; + + _PAvRevertMmThreadCharacteristics = + (PAvRevertMmThreadCharacteristics)GetProcAddress( + _avrtLibrary, "AvRevertMmThreadCharacteristics"); + _PAvSetMmThreadCharacteristicsA = + (PAvSetMmThreadCharacteristicsA)GetProcAddress( + _avrtLibrary, "AvSetMmThreadCharacteristicsA"); + _PAvSetMmThreadPriority = (PAvSetMmThreadPriority)GetProcAddress( + _avrtLibrary, "AvSetMmThreadPriority"); + + if (_PAvRevertMmThreadCharacteristics && + _PAvSetMmThreadCharacteristicsA && _PAvSetMmThreadPriority) { + RTC_LOG(LS_VERBOSE) + << "AudioDeviceWindowsCore::AudioDeviceWindowsCore()" + " AvRevertMmThreadCharacteristics() is OK"; + RTC_LOG(LS_VERBOSE) + << "AudioDeviceWindowsCore::AudioDeviceWindowsCore()" + " AvSetMmThreadCharacteristicsA() is OK"; + RTC_LOG(LS_VERBOSE) + << "AudioDeviceWindowsCore::AudioDeviceWindowsCore()" + " AvSetMmThreadPriority() is OK"; + _winSupportAvrt = true; + } + } + } + + // Create our samples ready events - we want auto reset events that start in + // the not-signaled state. The state of an auto-reset event object remains + // signaled until a single waiting thread is released, at which time the + // system automatically sets the state to nonsignaled. If no threads are + // waiting, the event object's state remains signaled. (Except for + // _hShutdownCaptureEvent, which is used to shutdown multiple threads). + _hRenderSamplesReadyEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + _hCaptureSamplesReadyEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + _hShutdownRenderEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + _hShutdownCaptureEvent = CreateEvent(NULL, TRUE, FALSE, NULL); + _hRenderStartedEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + _hCaptureStartedEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + + _perfCounterFreq.QuadPart = 1; + _perfCounterFactor = 0.0; + + // list of number of channels to use on recording side + _recChannelsPrioList[0] = 2; // stereo is prio 1 + _recChannelsPrioList[1] = 1; // mono is prio 2 + _recChannelsPrioList[2] = 4; // quad is prio 3 + + // list of number of channels to use on playout side + _playChannelsPrioList[0] = 2; // stereo is prio 1 + _playChannelsPrioList[1] = 1; // mono is prio 2 + + HRESULT hr; + + // We know that this API will work since it has already been verified in + // CoreAudioIsSupported, hence no need to check for errors here as well. + + // Retrive the IMMDeviceEnumerator API (should load the MMDevAPI.dll) + // TODO(henrika): we should probably move this allocation to Init() instead + // and deallocate in Terminate() to make the implementation more symmetric. + CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, + __uuidof(IMMDeviceEnumerator), + reinterpret_cast(&_ptrEnumerator)); + RTC_DCHECK(_ptrEnumerator); + + // DMO initialization for built-in WASAPI AEC. + { + IMediaObject* ptrDMO = NULL; + hr = CoCreateInstance(CLSID_CWMAudioAEC, NULL, CLSCTX_INPROC_SERVER, + IID_IMediaObject, reinterpret_cast(&ptrDMO)); + if (FAILED(hr) || ptrDMO == NULL) { + // Since we check that _dmo is non-NULL in EnableBuiltInAEC(), the + // feature is prevented from being enabled. + _builtInAecEnabled = false; + _TraceCOMError(hr); + } + _dmo = ptrDMO; + SAFE_RELEASE(ptrDMO); + } +} + +// ---------------------------------------------------------------------------- +// AudioDeviceWindowsCore() - dtor +// ---------------------------------------------------------------------------- + +AudioDeviceWindowsCore::~AudioDeviceWindowsCore() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << " destroyed"; + + Terminate(); + + // The IMMDeviceEnumerator is created during construction. Must release + // it here and not in Terminate() since we don't recreate it in Init(). + SAFE_RELEASE(_ptrEnumerator); + + _ptrAudioBuffer = NULL; + + if (NULL != _hRenderSamplesReadyEvent) { + CloseHandle(_hRenderSamplesReadyEvent); + _hRenderSamplesReadyEvent = NULL; + } + + if (NULL != _hCaptureSamplesReadyEvent) { + CloseHandle(_hCaptureSamplesReadyEvent); + _hCaptureSamplesReadyEvent = NULL; + } + + if (NULL != _hRenderStartedEvent) { + CloseHandle(_hRenderStartedEvent); + _hRenderStartedEvent = NULL; + } + + if (NULL != _hCaptureStartedEvent) { + CloseHandle(_hCaptureStartedEvent); + _hCaptureStartedEvent = NULL; + } + + if (NULL != _hShutdownRenderEvent) { + CloseHandle(_hShutdownRenderEvent); + _hShutdownRenderEvent = NULL; + } + + if (NULL != _hShutdownCaptureEvent) { + CloseHandle(_hShutdownCaptureEvent); + _hShutdownCaptureEvent = NULL; + } + + if (_avrtLibrary) { + BOOL freeOK = FreeLibrary(_avrtLibrary); + if (!freeOK) { + RTC_LOG(LS_WARNING) + << "AudioDeviceWindowsCore::~AudioDeviceWindowsCore()" + " failed to free the loaded Avrt DLL module correctly"; + } else { + RTC_LOG(LS_WARNING) << "AudioDeviceWindowsCore::~AudioDeviceWindowsCore()" + " the Avrt DLL module is now unloaded"; + } + } +} + +// ============================================================================ +// API +// ============================================================================ + +// ---------------------------------------------------------------------------- +// AttachAudioBuffer +// ---------------------------------------------------------------------------- + +void AudioDeviceWindowsCore::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { + _ptrAudioBuffer = audioBuffer; + + // Inform the AudioBuffer about default settings for this implementation. + // Set all values to zero here since the actual settings will be done by + // InitPlayout and InitRecording later. + _ptrAudioBuffer->SetRecordingSampleRate(0); + _ptrAudioBuffer->SetPlayoutSampleRate(0); + _ptrAudioBuffer->SetRecordingChannels(0); + _ptrAudioBuffer->SetPlayoutChannels(0); +} + +// ---------------------------------------------------------------------------- +// ActiveAudioLayer +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const { + audioLayer = AudioDeviceModule::kWindowsCoreAudio; + return 0; +} + +// ---------------------------------------------------------------------------- +// Init +// ---------------------------------------------------------------------------- + +AudioDeviceGeneric::InitStatus AudioDeviceWindowsCore::Init() { + MutexLock lock(&mutex_); + + if (_initialized) { + return InitStatus::OK; + } + + // Enumerate all audio rendering and capturing endpoint devices. + // Note that, some of these will not be able to select by the user. + // The complete collection is for internal use only. + _EnumerateEndpointDevicesAll(eRender); + _EnumerateEndpointDevicesAll(eCapture); + + _initialized = true; + + return InitStatus::OK; +} + +// ---------------------------------------------------------------------------- +// Terminate +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::Terminate() { + MutexLock lock(&mutex_); + + if (!_initialized) { + return 0; + } + + _initialized = false; + _speakerIsInitialized = false; + _microphoneIsInitialized = false; + _playing = false; + _recording = false; + + SAFE_RELEASE(_ptrRenderCollection); + SAFE_RELEASE(_ptrCaptureCollection); + SAFE_RELEASE(_ptrDeviceOut); + SAFE_RELEASE(_ptrDeviceIn); + SAFE_RELEASE(_ptrClientOut); + SAFE_RELEASE(_ptrClientIn); + SAFE_RELEASE(_ptrRenderClient); + SAFE_RELEASE(_ptrCaptureClient); + SAFE_RELEASE(_ptrCaptureVolume); + SAFE_RELEASE(_ptrRenderSimpleVolume); + + return 0; +} + +// ---------------------------------------------------------------------------- +// Initialized +// ---------------------------------------------------------------------------- + +bool AudioDeviceWindowsCore::Initialized() const { + return (_initialized); +} + +// ---------------------------------------------------------------------------- +// InitSpeaker +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::InitSpeaker() { + MutexLock lock(&mutex_); + return InitSpeakerLocked(); +} + +int32_t AudioDeviceWindowsCore::InitSpeakerLocked() { + if (_playing) { + return -1; + } + + if (_ptrDeviceOut == NULL) { + return -1; + } + + if (_usingOutputDeviceIndex) { + int16_t nDevices = PlayoutDevicesLocked(); + if (_outputDeviceIndex > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "current device selection is invalid => unable to" + " initialize"; + return -1; + } + } + + int32_t ret(0); + + SAFE_RELEASE(_ptrDeviceOut); + if (_usingOutputDeviceIndex) { + // Refresh the selected rendering endpoint device using current index + ret = _GetListDevice(eRender, _outputDeviceIndex, &_ptrDeviceOut); + } else { + ERole role; + (_outputDevice == AudioDeviceModule::kDefaultDevice) + ? role = eConsole + : role = eCommunications; + // Refresh the selected rendering endpoint device using role + ret = _GetDefaultDevice(eRender, role, &_ptrDeviceOut); + } + + if (ret != 0 || (_ptrDeviceOut == NULL)) { + RTC_LOG(LS_ERROR) << "failed to initialize the rendering enpoint device"; + SAFE_RELEASE(_ptrDeviceOut); + return -1; + } + + IAudioSessionManager* pManager = NULL; + ret = _ptrDeviceOut->Activate(__uuidof(IAudioSessionManager), CLSCTX_ALL, + NULL, (void**)&pManager); + if (ret != 0 || pManager == NULL) { + RTC_LOG(LS_ERROR) << "failed to initialize the render manager"; + SAFE_RELEASE(pManager); + return -1; + } + + SAFE_RELEASE(_ptrRenderSimpleVolume); + ret = pManager->GetSimpleAudioVolume(NULL, FALSE, &_ptrRenderSimpleVolume); + if (ret != 0 || _ptrRenderSimpleVolume == NULL) { + RTC_LOG(LS_ERROR) << "failed to initialize the render simple volume"; + SAFE_RELEASE(pManager); + SAFE_RELEASE(_ptrRenderSimpleVolume); + return -1; + } + SAFE_RELEASE(pManager); + + _speakerIsInitialized = true; + + return 0; +} + +// ---------------------------------------------------------------------------- +// InitMicrophone +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::InitMicrophone() { + MutexLock lock(&mutex_); + return InitMicrophoneLocked(); +} + +int32_t AudioDeviceWindowsCore::InitMicrophoneLocked() { + if (_recording) { + return -1; + } + + if (_ptrDeviceIn == NULL) { + return -1; + } + + if (_usingInputDeviceIndex) { + int16_t nDevices = RecordingDevicesLocked(); + if (_inputDeviceIndex > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "current device selection is invalid => unable to" + " initialize"; + return -1; + } + } + + int32_t ret(0); + + SAFE_RELEASE(_ptrDeviceIn); + if (_usingInputDeviceIndex) { + // Refresh the selected capture endpoint device using current index + ret = _GetListDevice(eCapture, _inputDeviceIndex, &_ptrDeviceIn); + } else { + ERole role; + (_inputDevice == AudioDeviceModule::kDefaultDevice) + ? role = eConsole + : role = eCommunications; + // Refresh the selected capture endpoint device using role + ret = _GetDefaultDevice(eCapture, role, &_ptrDeviceIn); + } + + if (ret != 0 || (_ptrDeviceIn == NULL)) { + RTC_LOG(LS_ERROR) << "failed to initialize the capturing enpoint device"; + SAFE_RELEASE(_ptrDeviceIn); + return -1; + } + + SAFE_RELEASE(_ptrCaptureVolume); + ret = _ptrDeviceIn->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, NULL, + reinterpret_cast(&_ptrCaptureVolume)); + if (ret != 0 || _ptrCaptureVolume == NULL) { + RTC_LOG(LS_ERROR) << "failed to initialize the capture volume"; + SAFE_RELEASE(_ptrCaptureVolume); + return -1; + } + + _microphoneIsInitialized = true; + + return 0; +} + +// ---------------------------------------------------------------------------- +// SpeakerIsInitialized +// ---------------------------------------------------------------------------- + +bool AudioDeviceWindowsCore::SpeakerIsInitialized() const { + return (_speakerIsInitialized); +} + +// ---------------------------------------------------------------------------- +// MicrophoneIsInitialized +// ---------------------------------------------------------------------------- + +bool AudioDeviceWindowsCore::MicrophoneIsInitialized() const { + return (_microphoneIsInitialized); +} + +// ---------------------------------------------------------------------------- +// SpeakerVolumeIsAvailable +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SpeakerVolumeIsAvailable(bool& available) { + MutexLock lock(&mutex_); + + if (_ptrDeviceOut == NULL) { + return -1; + } + + HRESULT hr = S_OK; + IAudioSessionManager* pManager = NULL; + ISimpleAudioVolume* pVolume = NULL; + + hr = _ptrDeviceOut->Activate(__uuidof(IAudioSessionManager), CLSCTX_ALL, NULL, + (void**)&pManager); + EXIT_ON_ERROR(hr); + + hr = pManager->GetSimpleAudioVolume(NULL, FALSE, &pVolume); + EXIT_ON_ERROR(hr); + + float volume(0.0f); + hr = pVolume->GetMasterVolume(&volume); + if (FAILED(hr)) { + available = false; + } + available = true; + + SAFE_RELEASE(pManager); + SAFE_RELEASE(pVolume); + + return 0; + +Exit: + _TraceCOMError(hr); + SAFE_RELEASE(pManager); + SAFE_RELEASE(pVolume); + return -1; +} + +// ---------------------------------------------------------------------------- +// SetSpeakerVolume +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetSpeakerVolume(uint32_t volume) { + { + MutexLock lock(&mutex_); + + if (!_speakerIsInitialized) { + return -1; + } + + if (_ptrDeviceOut == NULL) { + return -1; + } + } + + if (volume < (uint32_t)MIN_CORE_SPEAKER_VOLUME || + volume > (uint32_t)MAX_CORE_SPEAKER_VOLUME) { + return -1; + } + + HRESULT hr = S_OK; + + // scale input volume to valid range (0.0 to 1.0) + const float fLevel = (float)volume / MAX_CORE_SPEAKER_VOLUME; + volume_mutex_.Lock(); + hr = _ptrRenderSimpleVolume->SetMasterVolume(fLevel, NULL); + volume_mutex_.Unlock(); + EXIT_ON_ERROR(hr); + + return 0; + +Exit: + _TraceCOMError(hr); + return -1; +} + +// ---------------------------------------------------------------------------- +// SpeakerVolume +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SpeakerVolume(uint32_t& volume) const { + { + MutexLock lock(&mutex_); + + if (!_speakerIsInitialized) { + return -1; + } + + if (_ptrDeviceOut == NULL) { + return -1; + } + } + + HRESULT hr = S_OK; + float fLevel(0.0f); + + volume_mutex_.Lock(); + hr = _ptrRenderSimpleVolume->GetMasterVolume(&fLevel); + volume_mutex_.Unlock(); + EXIT_ON_ERROR(hr); + + // scale input volume range [0.0,1.0] to valid output range + volume = static_cast(fLevel * MAX_CORE_SPEAKER_VOLUME); + + return 0; + +Exit: + _TraceCOMError(hr); + return -1; +} + +// ---------------------------------------------------------------------------- +// MaxSpeakerVolume +// +// The internal range for Core Audio is 0.0 to 1.0, where 0.0 indicates +// silence and 1.0 indicates full volume (no attenuation). +// We add our (webrtc-internal) own max level to match the Wave API and +// how it is used today in VoE. +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::MaxSpeakerVolume(uint32_t& maxVolume) const { + if (!_speakerIsInitialized) { + return -1; + } + + maxVolume = static_cast(MAX_CORE_SPEAKER_VOLUME); + + return 0; +} + +// ---------------------------------------------------------------------------- +// MinSpeakerVolume +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::MinSpeakerVolume(uint32_t& minVolume) const { + if (!_speakerIsInitialized) { + return -1; + } + + minVolume = static_cast(MIN_CORE_SPEAKER_VOLUME); + + return 0; +} + +// ---------------------------------------------------------------------------- +// SpeakerMuteIsAvailable +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SpeakerMuteIsAvailable(bool& available) { + MutexLock lock(&mutex_); + + if (_ptrDeviceOut == NULL) { + return -1; + } + + HRESULT hr = S_OK; + IAudioEndpointVolume* pVolume = NULL; + + // Query the speaker system mute state. + hr = _ptrDeviceOut->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, NULL, + reinterpret_cast(&pVolume)); + EXIT_ON_ERROR(hr); + + BOOL mute; + hr = pVolume->GetMute(&mute); + if (FAILED(hr)) + available = false; + else + available = true; + + SAFE_RELEASE(pVolume); + + return 0; + +Exit: + _TraceCOMError(hr); + SAFE_RELEASE(pVolume); + return -1; +} + +// ---------------------------------------------------------------------------- +// SetSpeakerMute +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetSpeakerMute(bool enable) { + MutexLock lock(&mutex_); + + if (!_speakerIsInitialized) { + return -1; + } + + if (_ptrDeviceOut == NULL) { + return -1; + } + + HRESULT hr = S_OK; + IAudioEndpointVolume* pVolume = NULL; + + // Set the speaker system mute state. + hr = _ptrDeviceOut->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, NULL, + reinterpret_cast(&pVolume)); + EXIT_ON_ERROR(hr); + + const BOOL mute(enable); + hr = pVolume->SetMute(mute, NULL); + EXIT_ON_ERROR(hr); + + SAFE_RELEASE(pVolume); + + return 0; + +Exit: + _TraceCOMError(hr); + SAFE_RELEASE(pVolume); + return -1; +} + +// ---------------------------------------------------------------------------- +// SpeakerMute +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SpeakerMute(bool& enabled) const { + if (!_speakerIsInitialized) { + return -1; + } + + if (_ptrDeviceOut == NULL) { + return -1; + } + + HRESULT hr = S_OK; + IAudioEndpointVolume* pVolume = NULL; + + // Query the speaker system mute state. + hr = _ptrDeviceOut->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, NULL, + reinterpret_cast(&pVolume)); + EXIT_ON_ERROR(hr); + + BOOL mute; + hr = pVolume->GetMute(&mute); + EXIT_ON_ERROR(hr); + + enabled = (mute == TRUE) ? true : false; + + SAFE_RELEASE(pVolume); + + return 0; + +Exit: + _TraceCOMError(hr); + SAFE_RELEASE(pVolume); + return -1; +} + +// ---------------------------------------------------------------------------- +// MicrophoneMuteIsAvailable +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::MicrophoneMuteIsAvailable(bool& available) { + MutexLock lock(&mutex_); + + if (_ptrDeviceIn == NULL) { + return -1; + } + + HRESULT hr = S_OK; + IAudioEndpointVolume* pVolume = NULL; + + // Query the microphone system mute state. + hr = _ptrDeviceIn->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, NULL, + reinterpret_cast(&pVolume)); + EXIT_ON_ERROR(hr); + + BOOL mute; + hr = pVolume->GetMute(&mute); + if (FAILED(hr)) + available = false; + else + available = true; + + SAFE_RELEASE(pVolume); + return 0; + +Exit: + _TraceCOMError(hr); + SAFE_RELEASE(pVolume); + return -1; +} + +// ---------------------------------------------------------------------------- +// SetMicrophoneMute +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetMicrophoneMute(bool enable) { + if (!_microphoneIsInitialized) { + return -1; + } + + if (_ptrDeviceIn == NULL) { + return -1; + } + + HRESULT hr = S_OK; + IAudioEndpointVolume* pVolume = NULL; + + // Set the microphone system mute state. + hr = _ptrDeviceIn->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, NULL, + reinterpret_cast(&pVolume)); + EXIT_ON_ERROR(hr); + + const BOOL mute(enable); + hr = pVolume->SetMute(mute, NULL); + EXIT_ON_ERROR(hr); + + SAFE_RELEASE(pVolume); + return 0; + +Exit: + _TraceCOMError(hr); + SAFE_RELEASE(pVolume); + return -1; +} + +// ---------------------------------------------------------------------------- +// MicrophoneMute +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::MicrophoneMute(bool& enabled) const { + if (!_microphoneIsInitialized) { + return -1; + } + + HRESULT hr = S_OK; + IAudioEndpointVolume* pVolume = NULL; + + // Query the microphone system mute state. + hr = _ptrDeviceIn->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, NULL, + reinterpret_cast(&pVolume)); + EXIT_ON_ERROR(hr); + + BOOL mute; + hr = pVolume->GetMute(&mute); + EXIT_ON_ERROR(hr); + + enabled = (mute == TRUE) ? true : false; + + SAFE_RELEASE(pVolume); + return 0; + +Exit: + _TraceCOMError(hr); + SAFE_RELEASE(pVolume); + return -1; +} + +// ---------------------------------------------------------------------------- +// StereoRecordingIsAvailable +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::StereoRecordingIsAvailable(bool& available) { + available = true; + return 0; +} + +// ---------------------------------------------------------------------------- +// SetStereoRecording +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetStereoRecording(bool enable) { + MutexLock lock(&mutex_); + + if (enable) { + _recChannelsPrioList[0] = 2; // try stereo first + _recChannelsPrioList[1] = 1; + _recChannels = 2; + } else { + _recChannelsPrioList[0] = 1; // try mono first + _recChannelsPrioList[1] = 2; + _recChannels = 1; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// StereoRecording +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::StereoRecording(bool& enabled) const { + if (_recChannels == 2) + enabled = true; + else + enabled = false; + + return 0; +} + +// ---------------------------------------------------------------------------- +// StereoPlayoutIsAvailable +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::StereoPlayoutIsAvailable(bool& available) { + available = true; + return 0; +} + +// ---------------------------------------------------------------------------- +// SetStereoPlayout +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetStereoPlayout(bool enable) { + MutexLock lock(&mutex_); + + if (enable) { + _playChannelsPrioList[0] = 2; // try stereo first + _playChannelsPrioList[1] = 1; + _playChannels = 2; + } else { + _playChannelsPrioList[0] = 1; // try mono first + _playChannelsPrioList[1] = 2; + _playChannels = 1; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// StereoPlayout +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::StereoPlayout(bool& enabled) const { + if (_playChannels == 2) + enabled = true; + else + enabled = false; + + return 0; +} + +// ---------------------------------------------------------------------------- +// MicrophoneVolumeIsAvailable +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::MicrophoneVolumeIsAvailable(bool& available) { + MutexLock lock(&mutex_); + + if (_ptrDeviceIn == NULL) { + return -1; + } + + HRESULT hr = S_OK; + IAudioEndpointVolume* pVolume = NULL; + + hr = _ptrDeviceIn->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, NULL, + reinterpret_cast(&pVolume)); + EXIT_ON_ERROR(hr); + + float volume(0.0f); + hr = pVolume->GetMasterVolumeLevelScalar(&volume); + if (FAILED(hr)) { + available = false; + } + available = true; + + SAFE_RELEASE(pVolume); + return 0; + +Exit: + _TraceCOMError(hr); + SAFE_RELEASE(pVolume); + return -1; +} + +// ---------------------------------------------------------------------------- +// SetMicrophoneVolume +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetMicrophoneVolume(uint32_t volume) { + RTC_LOG(LS_VERBOSE) << "AudioDeviceWindowsCore::SetMicrophoneVolume(volume=" + << volume << ")"; + + { + MutexLock lock(&mutex_); + + if (!_microphoneIsInitialized) { + return -1; + } + + if (_ptrDeviceIn == NULL) { + return -1; + } + } + + if (volume < static_cast(MIN_CORE_MICROPHONE_VOLUME) || + volume > static_cast(MAX_CORE_MICROPHONE_VOLUME)) { + return -1; + } + + HRESULT hr = S_OK; + // scale input volume to valid range (0.0 to 1.0) + const float fLevel = static_cast(volume) / MAX_CORE_MICROPHONE_VOLUME; + volume_mutex_.Lock(); + _ptrCaptureVolume->SetMasterVolumeLevelScalar(fLevel, NULL); + volume_mutex_.Unlock(); + EXIT_ON_ERROR(hr); + + return 0; + +Exit: + _TraceCOMError(hr); + return -1; +} + +// ---------------------------------------------------------------------------- +// MicrophoneVolume +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::MicrophoneVolume(uint32_t& volume) const { + { + MutexLock lock(&mutex_); + + if (!_microphoneIsInitialized) { + return -1; + } + + if (_ptrDeviceIn == NULL) { + return -1; + } + } + + HRESULT hr = S_OK; + float fLevel(0.0f); + volume = 0; + volume_mutex_.Lock(); + hr = _ptrCaptureVolume->GetMasterVolumeLevelScalar(&fLevel); + volume_mutex_.Unlock(); + EXIT_ON_ERROR(hr); + + // scale input volume range [0.0,1.0] to valid output range + volume = static_cast(fLevel * MAX_CORE_MICROPHONE_VOLUME); + + return 0; + +Exit: + _TraceCOMError(hr); + return -1; +} + +// ---------------------------------------------------------------------------- +// MaxMicrophoneVolume +// +// The internal range for Core Audio is 0.0 to 1.0, where 0.0 indicates +// silence and 1.0 indicates full volume (no attenuation). +// We add our (webrtc-internal) own max level to match the Wave API and +// how it is used today in VoE. +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::MaxMicrophoneVolume(uint32_t& maxVolume) const { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + if (!_microphoneIsInitialized) { + return -1; + } + + maxVolume = static_cast(MAX_CORE_MICROPHONE_VOLUME); + + return 0; +} + +// ---------------------------------------------------------------------------- +// MinMicrophoneVolume +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::MinMicrophoneVolume(uint32_t& minVolume) const { + if (!_microphoneIsInitialized) { + return -1; + } + + minVolume = static_cast(MIN_CORE_MICROPHONE_VOLUME); + + return 0; +} + +// ---------------------------------------------------------------------------- +// PlayoutDevices +// ---------------------------------------------------------------------------- +int16_t AudioDeviceWindowsCore::PlayoutDevices() { + MutexLock lock(&mutex_); + return PlayoutDevicesLocked(); +} + +int16_t AudioDeviceWindowsCore::PlayoutDevicesLocked() { + if (_RefreshDeviceList(eRender) != -1) { + return (_DeviceListCount(eRender)); + } + + return -1; +} + +// ---------------------------------------------------------------------------- +// SetPlayoutDevice I (II) +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetPlayoutDevice(uint16_t index) { + if (_playIsInitialized) { + return -1; + } + + // Get current number of available rendering endpoint devices and refresh the + // rendering collection. + UINT nDevices = PlayoutDevices(); + + if (index < 0 || index > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "device index is out of range [0," << (nDevices - 1) + << "]"; + return -1; + } + + MutexLock lock(&mutex_); + + HRESULT hr(S_OK); + + RTC_DCHECK(_ptrRenderCollection); + + // Select an endpoint rendering device given the specified index + SAFE_RELEASE(_ptrDeviceOut); + hr = _ptrRenderCollection->Item(index, &_ptrDeviceOut); + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(_ptrDeviceOut); + return -1; + } + + WCHAR szDeviceName[MAX_PATH]; + const int bufferLen = sizeof(szDeviceName) / sizeof(szDeviceName)[0]; + + // Get the endpoint device's friendly-name + if (_GetDeviceName(_ptrDeviceOut, szDeviceName, bufferLen) == 0) { + RTC_LOG(LS_VERBOSE) << "friendly name: \"" << szDeviceName << "\""; + } + + _usingOutputDeviceIndex = true; + _outputDeviceIndex = index; + + return 0; +} + +// ---------------------------------------------------------------------------- +// SetPlayoutDevice II (II) +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) { + if (_playIsInitialized) { + return -1; + } + + ERole role(eCommunications); + + if (device == AudioDeviceModule::kDefaultDevice) { + role = eConsole; + } else if (device == AudioDeviceModule::kDefaultCommunicationDevice) { + role = eCommunications; + } + + MutexLock lock(&mutex_); + + // Refresh the list of rendering endpoint devices + _RefreshDeviceList(eRender); + + HRESULT hr(S_OK); + + RTC_DCHECK(_ptrEnumerator); + + // Select an endpoint rendering device given the specified role + SAFE_RELEASE(_ptrDeviceOut); + hr = _ptrEnumerator->GetDefaultAudioEndpoint(eRender, role, &_ptrDeviceOut); + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(_ptrDeviceOut); + return -1; + } + + WCHAR szDeviceName[MAX_PATH]; + const int bufferLen = sizeof(szDeviceName) / sizeof(szDeviceName)[0]; + + // Get the endpoint device's friendly-name + if (_GetDeviceName(_ptrDeviceOut, szDeviceName, bufferLen) == 0) { + RTC_LOG(LS_VERBOSE) << "friendly name: \"" << szDeviceName << "\""; + } + + _usingOutputDeviceIndex = false; + _outputDevice = device; + + return 0; +} + +// ---------------------------------------------------------------------------- +// PlayoutDeviceName +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::PlayoutDeviceName( + uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + bool defaultCommunicationDevice(false); + const int16_t nDevices(PlayoutDevices()); // also updates the list of devices + + // Special fix for the case when the user selects '-1' as index (<=> Default + // Communication Device) + if (index == (uint16_t)(-1)) { + defaultCommunicationDevice = true; + index = 0; + RTC_LOG(LS_VERBOSE) << "Default Communication endpoint device will be used"; + } + + if ((index > (nDevices - 1)) || (name == NULL)) { + return -1; + } + + memset(name, 0, kAdmMaxDeviceNameSize); + + if (guid != NULL) { + memset(guid, 0, kAdmMaxGuidSize); + } + + MutexLock lock(&mutex_); + + int32_t ret(-1); + WCHAR szDeviceName[MAX_PATH]; + const int bufferLen = sizeof(szDeviceName) / sizeof(szDeviceName)[0]; + + // Get the endpoint device's friendly-name + if (defaultCommunicationDevice) { + ret = _GetDefaultDeviceName(eRender, eCommunications, szDeviceName, + bufferLen); + } else { + ret = _GetListDeviceName(eRender, index, szDeviceName, bufferLen); + } + + if (ret == 0) { + // Convert the endpoint device's friendly-name to UTF-8 + if (WideCharToMultiByte(CP_UTF8, 0, szDeviceName, -1, name, + kAdmMaxDeviceNameSize, NULL, NULL) == 0) { + RTC_LOG(LS_ERROR) + << "WideCharToMultiByte(CP_UTF8) failed with error code " + << GetLastError(); + } + } + + // Get the endpoint ID string (uniquely identifies the device among all audio + // endpoint devices) + if (defaultCommunicationDevice) { + ret = + _GetDefaultDeviceID(eRender, eCommunications, szDeviceName, bufferLen); + } else { + ret = _GetListDeviceID(eRender, index, szDeviceName, bufferLen); + } + + if (guid != NULL && ret == 0) { + // Convert the endpoint device's ID string to UTF-8 + if (WideCharToMultiByte(CP_UTF8, 0, szDeviceName, -1, guid, kAdmMaxGuidSize, + NULL, NULL) == 0) { + RTC_LOG(LS_ERROR) + << "WideCharToMultiByte(CP_UTF8) failed with error code " + << GetLastError(); + } + } + + return ret; +} + +// ---------------------------------------------------------------------------- +// RecordingDeviceName +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::RecordingDeviceName( + uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) { + bool defaultCommunicationDevice(false); + const int16_t nDevices( + RecordingDevices()); // also updates the list of devices + + // Special fix for the case when the user selects '-1' as index (<=> Default + // Communication Device) + if (index == (uint16_t)(-1)) { + defaultCommunicationDevice = true; + index = 0; + RTC_LOG(LS_VERBOSE) << "Default Communication endpoint device will be used"; + } + + if ((index > (nDevices - 1)) || (name == NULL)) { + return -1; + } + + memset(name, 0, kAdmMaxDeviceNameSize); + + if (guid != NULL) { + memset(guid, 0, kAdmMaxGuidSize); + } + + MutexLock lock(&mutex_); + + int32_t ret(-1); + WCHAR szDeviceName[MAX_PATH]; + const int bufferLen = sizeof(szDeviceName) / sizeof(szDeviceName)[0]; + + // Get the endpoint device's friendly-name + if (defaultCommunicationDevice) { + ret = _GetDefaultDeviceName(eCapture, eCommunications, szDeviceName, + bufferLen); + } else { + ret = _GetListDeviceName(eCapture, index, szDeviceName, bufferLen); + } + + if (ret == 0) { + // Convert the endpoint device's friendly-name to UTF-8 + if (WideCharToMultiByte(CP_UTF8, 0, szDeviceName, -1, name, + kAdmMaxDeviceNameSize, NULL, NULL) == 0) { + RTC_LOG(LS_ERROR) + << "WideCharToMultiByte(CP_UTF8) failed with error code " + << GetLastError(); + } + } + + // Get the endpoint ID string (uniquely identifies the device among all audio + // endpoint devices) + if (defaultCommunicationDevice) { + ret = + _GetDefaultDeviceID(eCapture, eCommunications, szDeviceName, bufferLen); + } else { + ret = _GetListDeviceID(eCapture, index, szDeviceName, bufferLen); + } + + if (guid != NULL && ret == 0) { + // Convert the endpoint device's ID string to UTF-8 + if (WideCharToMultiByte(CP_UTF8, 0, szDeviceName, -1, guid, kAdmMaxGuidSize, + NULL, NULL) == 0) { + RTC_LOG(LS_ERROR) + << "WideCharToMultiByte(CP_UTF8) failed with error code " + << GetLastError(); + } + } + + return ret; +} + +// ---------------------------------------------------------------------------- +// RecordingDevices +// ---------------------------------------------------------------------------- + +int16_t AudioDeviceWindowsCore::RecordingDevices() { + MutexLock lock(&mutex_); + return RecordingDevicesLocked(); +} + +int16_t AudioDeviceWindowsCore::RecordingDevicesLocked() { + if (_RefreshDeviceList(eCapture) != -1) { + return (_DeviceListCount(eCapture)); + } + + return -1; +} + +// ---------------------------------------------------------------------------- +// SetRecordingDevice I (II) +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetRecordingDevice(uint16_t index) { + if (_recIsInitialized) { + return -1; + } + + // Get current number of available capture endpoint devices and refresh the + // capture collection. + UINT nDevices = RecordingDevices(); + + if (index < 0 || index > (nDevices - 1)) { + RTC_LOG(LS_ERROR) << "device index is out of range [0," << (nDevices - 1) + << "]"; + return -1; + } + + MutexLock lock(&mutex_); + + HRESULT hr(S_OK); + + RTC_DCHECK(_ptrCaptureCollection); + + // Select an endpoint capture device given the specified index + SAFE_RELEASE(_ptrDeviceIn); + hr = _ptrCaptureCollection->Item(index, &_ptrDeviceIn); + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(_ptrDeviceIn); + return -1; + } + + WCHAR szDeviceName[MAX_PATH]; + const int bufferLen = sizeof(szDeviceName) / sizeof(szDeviceName)[0]; + + // Get the endpoint device's friendly-name + if (_GetDeviceName(_ptrDeviceIn, szDeviceName, bufferLen) == 0) { + RTC_LOG(LS_VERBOSE) << "friendly name: \"" << szDeviceName << "\""; + } + + _usingInputDeviceIndex = true; + _inputDeviceIndex = index; + + return 0; +} + +// ---------------------------------------------------------------------------- +// SetRecordingDevice II (II) +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) { + if (_recIsInitialized) { + return -1; + } + + ERole role(eCommunications); + + if (device == AudioDeviceModule::kDefaultDevice) { + role = eConsole; + } else if (device == AudioDeviceModule::kDefaultCommunicationDevice) { + role = eCommunications; + } + + MutexLock lock(&mutex_); + + // Refresh the list of capture endpoint devices + _RefreshDeviceList(eCapture); + + HRESULT hr(S_OK); + + RTC_DCHECK(_ptrEnumerator); + + // Select an endpoint capture device given the specified role + SAFE_RELEASE(_ptrDeviceIn); + hr = _ptrEnumerator->GetDefaultAudioEndpoint(eCapture, role, &_ptrDeviceIn); + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(_ptrDeviceIn); + return -1; + } + + WCHAR szDeviceName[MAX_PATH]; + const int bufferLen = sizeof(szDeviceName) / sizeof(szDeviceName)[0]; + + // Get the endpoint device's friendly-name + if (_GetDeviceName(_ptrDeviceIn, szDeviceName, bufferLen) == 0) { + RTC_LOG(LS_VERBOSE) << "friendly name: \"" << szDeviceName << "\""; + } + + _usingInputDeviceIndex = false; + _inputDevice = device; + + return 0; +} + +// ---------------------------------------------------------------------------- +// PlayoutIsAvailable +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::PlayoutIsAvailable(bool& available) { + available = false; + + // Try to initialize the playout side + int32_t res = InitPlayout(); + + // Cancel effect of initialization + StopPlayout(); + + if (res != -1) { + available = true; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// RecordingIsAvailable +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::RecordingIsAvailable(bool& available) { + available = false; + + // Try to initialize the recording side + int32_t res = InitRecording(); + + // Cancel effect of initialization + StopRecording(); + + if (res != -1) { + available = true; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// InitPlayout +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::InitPlayout() { + MutexLock lock(&mutex_); + + if (_playing) { + return -1; + } + + if (_playIsInitialized) { + return 0; + } + + if (_ptrDeviceOut == NULL) { + return -1; + } + + // Initialize the speaker (devices might have been added or removed) + if (InitSpeakerLocked() == -1) { + RTC_LOG(LS_WARNING) << "InitSpeaker() failed"; + } + + // Ensure that the updated rendering endpoint device is valid + if (_ptrDeviceOut == NULL) { + return -1; + } + + if (_builtInAecEnabled && _recIsInitialized) { + // Ensure the correct render device is configured in case + // InitRecording() was called before InitPlayout(). + if (SetDMOProperties() == -1) { + return -1; + } + } + + HRESULT hr = S_OK; + WAVEFORMATEX* pWfxOut = NULL; + WAVEFORMATEX Wfx = WAVEFORMATEX(); + WAVEFORMATEX* pWfxClosestMatch = NULL; + + // Create COM object with IAudioClient interface. + SAFE_RELEASE(_ptrClientOut); + hr = _ptrDeviceOut->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, + (void**)&_ptrClientOut); + EXIT_ON_ERROR(hr); + + // Retrieve the stream format that the audio engine uses for its internal + // processing (mixing) of shared-mode streams. + hr = _ptrClientOut->GetMixFormat(&pWfxOut); + if (SUCCEEDED(hr)) { + RTC_LOG(LS_VERBOSE) << "Audio Engine's current rendering mix format:"; + // format type + RTC_LOG(LS_VERBOSE) << "wFormatTag : 0x" + << rtc::ToHex(pWfxOut->wFormatTag) << " (" + << pWfxOut->wFormatTag << ")"; + // number of channels (i.e. mono, stereo...) + RTC_LOG(LS_VERBOSE) << "nChannels : " << pWfxOut->nChannels; + // sample rate + RTC_LOG(LS_VERBOSE) << "nSamplesPerSec : " << pWfxOut->nSamplesPerSec; + // for buffer estimation + RTC_LOG(LS_VERBOSE) << "nAvgBytesPerSec: " << pWfxOut->nAvgBytesPerSec; + // block size of data + RTC_LOG(LS_VERBOSE) << "nBlockAlign : " << pWfxOut->nBlockAlign; + // number of bits per sample of mono data + RTC_LOG(LS_VERBOSE) << "wBitsPerSample : " << pWfxOut->wBitsPerSample; + RTC_LOG(LS_VERBOSE) << "cbSize : " << pWfxOut->cbSize; + } + + // Set wave format + Wfx.wFormatTag = WAVE_FORMAT_PCM; + Wfx.wBitsPerSample = 16; + Wfx.cbSize = 0; + + const int freqs[] = {48000, 44100, 16000, 96000, 32000, 8000}; + hr = S_FALSE; + + // Iterate over frequencies and channels, in order of priority + for (unsigned int freq = 0; freq < sizeof(freqs) / sizeof(freqs[0]); freq++) { + for (unsigned int chan = 0; chan < sizeof(_playChannelsPrioList) / + sizeof(_playChannelsPrioList[0]); + chan++) { + Wfx.nChannels = _playChannelsPrioList[chan]; + Wfx.nSamplesPerSec = freqs[freq]; + Wfx.nBlockAlign = Wfx.nChannels * Wfx.wBitsPerSample / 8; + Wfx.nAvgBytesPerSec = Wfx.nSamplesPerSec * Wfx.nBlockAlign; + // If the method succeeds and the audio endpoint device supports the + // specified stream format, it returns S_OK. If the method succeeds and + // provides a closest match to the specified format, it returns S_FALSE. + hr = _ptrClientOut->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED, &Wfx, + &pWfxClosestMatch); + if (hr == S_OK) { + break; + } else { + if (pWfxClosestMatch) { + RTC_LOG(LS_INFO) << "nChannels=" << Wfx.nChannels + << ", nSamplesPerSec=" << Wfx.nSamplesPerSec + << " is not supported. Closest match: " + "nChannels=" + << pWfxClosestMatch->nChannels << ", nSamplesPerSec=" + << pWfxClosestMatch->nSamplesPerSec; + CoTaskMemFree(pWfxClosestMatch); + pWfxClosestMatch = NULL; + } else { + RTC_LOG(LS_INFO) << "nChannels=" << Wfx.nChannels + << ", nSamplesPerSec=" << Wfx.nSamplesPerSec + << " is not supported. No closest match."; + } + } + } + if (hr == S_OK) + break; + } + + // TODO(andrew): what happens in the event of failure in the above loop? + // Is _ptrClientOut->Initialize expected to fail? + // Same in InitRecording(). + if (hr == S_OK) { + _playAudioFrameSize = Wfx.nBlockAlign; + // Block size is the number of samples each channel in 10ms. + _playBlockSize = Wfx.nSamplesPerSec / 100; + _playSampleRate = Wfx.nSamplesPerSec; + _devicePlaySampleRate = + Wfx.nSamplesPerSec; // The device itself continues to run at 44.1 kHz. + _devicePlayBlockSize = Wfx.nSamplesPerSec / 100; + _playChannels = Wfx.nChannels; + + RTC_LOG(LS_VERBOSE) << "VoE selected this rendering format:"; + RTC_LOG(LS_VERBOSE) << "wFormatTag : 0x" + << rtc::ToHex(Wfx.wFormatTag) << " (" << Wfx.wFormatTag + << ")"; + RTC_LOG(LS_VERBOSE) << "nChannels : " << Wfx.nChannels; + RTC_LOG(LS_VERBOSE) << "nSamplesPerSec : " << Wfx.nSamplesPerSec; + RTC_LOG(LS_VERBOSE) << "nAvgBytesPerSec : " << Wfx.nAvgBytesPerSec; + RTC_LOG(LS_VERBOSE) << "nBlockAlign : " << Wfx.nBlockAlign; + RTC_LOG(LS_VERBOSE) << "wBitsPerSample : " << Wfx.wBitsPerSample; + RTC_LOG(LS_VERBOSE) << "cbSize : " << Wfx.cbSize; + RTC_LOG(LS_VERBOSE) << "Additional settings:"; + RTC_LOG(LS_VERBOSE) << "_playAudioFrameSize: " << _playAudioFrameSize; + RTC_LOG(LS_VERBOSE) << "_playBlockSize : " << _playBlockSize; + RTC_LOG(LS_VERBOSE) << "_playChannels : " << _playChannels; + } + + // Create a rendering stream. + // + // **************************************************************************** + // For a shared-mode stream that uses event-driven buffering, the caller must + // set both hnsPeriodicity and hnsBufferDuration to 0. The Initialize method + // determines how large a buffer to allocate based on the scheduling period + // of the audio engine. Although the client's buffer processing thread is + // event driven, the basic buffer management process, as described previously, + // is unaltered. + // Each time the thread awakens, it should call + // IAudioClient::GetCurrentPadding to determine how much data to write to a + // rendering buffer or read from a capture buffer. In contrast to the two + // buffers that the Initialize method allocates for an exclusive-mode stream + // that uses event-driven buffering, a shared-mode stream requires a single + // buffer. + // **************************************************************************** + // + REFERENCE_TIME hnsBufferDuration = + 0; // ask for minimum buffer size (default) + if (_devicePlaySampleRate == 44100) { + // Ask for a larger buffer size (30ms) when using 44.1kHz as render rate. + // There seems to be a larger risk of underruns for 44.1 compared + // with the default rate (48kHz). When using default, we set the requested + // buffer duration to 0, which sets the buffer to the minimum size + // required by the engine thread. The actual buffer size can then be + // read by GetBufferSize() and it is 20ms on most machines. + hnsBufferDuration = 30 * 10000; + } + hr = _ptrClientOut->Initialize( + AUDCLNT_SHAREMODE_SHARED, // share Audio Engine with other applications + AUDCLNT_STREAMFLAGS_EVENTCALLBACK, // processing of the audio buffer by + // the client will be event driven + hnsBufferDuration, // requested buffer capacity as a time value (in + // 100-nanosecond units) + 0, // periodicity + &Wfx, // selected wave format + NULL); // session GUID + + if (FAILED(hr)) { + RTC_LOG(LS_ERROR) << "IAudioClient::Initialize() failed:"; + } + EXIT_ON_ERROR(hr); + + if (_ptrAudioBuffer) { + // Update the audio buffer with the selected parameters + _ptrAudioBuffer->SetPlayoutSampleRate(_playSampleRate); + _ptrAudioBuffer->SetPlayoutChannels((uint8_t)_playChannels); + } else { + // We can enter this state during CoreAudioIsSupported() when no + // AudioDeviceImplementation has been created, hence the AudioDeviceBuffer + // does not exist. It is OK to end up here since we don't initiate any media + // in CoreAudioIsSupported(). + RTC_LOG(LS_VERBOSE) + << "AudioDeviceBuffer must be attached before streaming can start"; + } + + // Get the actual size of the shared (endpoint buffer). + // Typical value is 960 audio frames <=> 20ms @ 48kHz sample rate. + UINT bufferFrameCount(0); + hr = _ptrClientOut->GetBufferSize(&bufferFrameCount); + if (SUCCEEDED(hr)) { + RTC_LOG(LS_VERBOSE) << "IAudioClient::GetBufferSize() => " + << bufferFrameCount << " (<=> " + << bufferFrameCount * _playAudioFrameSize << " bytes)"; + } + + // Set the event handle that the system signals when an audio buffer is ready + // to be processed by the client. + hr = _ptrClientOut->SetEventHandle(_hRenderSamplesReadyEvent); + EXIT_ON_ERROR(hr); + + // Get an IAudioRenderClient interface. + SAFE_RELEASE(_ptrRenderClient); + hr = _ptrClientOut->GetService(__uuidof(IAudioRenderClient), + (void**)&_ptrRenderClient); + EXIT_ON_ERROR(hr); + + // Mark playout side as initialized + _playIsInitialized = true; + + CoTaskMemFree(pWfxOut); + CoTaskMemFree(pWfxClosestMatch); + + RTC_LOG(LS_VERBOSE) << "render side is now initialized"; + return 0; + +Exit: + _TraceCOMError(hr); + CoTaskMemFree(pWfxOut); + CoTaskMemFree(pWfxClosestMatch); + SAFE_RELEASE(_ptrClientOut); + SAFE_RELEASE(_ptrRenderClient); + return -1; +} + +// Capture initialization when the built-in AEC DirectX Media Object (DMO) is +// used. Called from InitRecording(), most of which is skipped over. The DMO +// handles device initialization itself. +// Reference: http://msdn.microsoft.com/en-us/library/ff819492(v=vs.85).aspx +int32_t AudioDeviceWindowsCore::InitRecordingDMO() { + RTC_DCHECK(_builtInAecEnabled); + RTC_DCHECK(_dmo); + + if (SetDMOProperties() == -1) { + return -1; + } + + DMO_MEDIA_TYPE mt = {}; + HRESULT hr = MoInitMediaType(&mt, sizeof(WAVEFORMATEX)); + if (FAILED(hr)) { + MoFreeMediaType(&mt); + _TraceCOMError(hr); + return -1; + } + mt.majortype = MEDIATYPE_Audio; + mt.subtype = MEDIASUBTYPE_PCM; + mt.formattype = FORMAT_WaveFormatEx; + + // Supported formats + // nChannels: 1 (in AEC-only mode) + // nSamplesPerSec: 8000, 11025, 16000, 22050 + // wBitsPerSample: 16 + WAVEFORMATEX* ptrWav = reinterpret_cast(mt.pbFormat); + ptrWav->wFormatTag = WAVE_FORMAT_PCM; + ptrWav->nChannels = 1; + // 16000 is the highest we can support with our resampler. + ptrWav->nSamplesPerSec = 16000; + ptrWav->nAvgBytesPerSec = 32000; + ptrWav->nBlockAlign = 2; + ptrWav->wBitsPerSample = 16; + ptrWav->cbSize = 0; + + // Set the VoE format equal to the AEC output format. + _recAudioFrameSize = ptrWav->nBlockAlign; + _recSampleRate = ptrWav->nSamplesPerSec; + _recBlockSize = ptrWav->nSamplesPerSec / 100; + _recChannels = ptrWav->nChannels; + + // Set the DMO output format parameters. + hr = _dmo->SetOutputType(kAecCaptureStreamIndex, &mt, 0); + MoFreeMediaType(&mt); + if (FAILED(hr)) { + _TraceCOMError(hr); + return -1; + } + + if (_ptrAudioBuffer) { + _ptrAudioBuffer->SetRecordingSampleRate(_recSampleRate); + _ptrAudioBuffer->SetRecordingChannels(_recChannels); + } else { + // Refer to InitRecording() for comments. + RTC_LOG(LS_VERBOSE) + << "AudioDeviceBuffer must be attached before streaming can start"; + } + + _mediaBuffer = rtc::make_ref_counted(_recBlockSize * + _recAudioFrameSize); + + // Optional, but if called, must be after media types are set. + hr = _dmo->AllocateStreamingResources(); + if (FAILED(hr)) { + _TraceCOMError(hr); + return -1; + } + + _recIsInitialized = true; + RTC_LOG(LS_VERBOSE) << "Capture side is now initialized"; + + return 0; +} + +// ---------------------------------------------------------------------------- +// InitRecording +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::InitRecording() { + MutexLock lock(&mutex_); + + if (_recording) { + return -1; + } + + if (_recIsInitialized) { + return 0; + } + + if (QueryPerformanceFrequency(&_perfCounterFreq) == 0) { + return -1; + } + _perfCounterFactor = 10000000.0 / (double)_perfCounterFreq.QuadPart; + + if (_ptrDeviceIn == NULL) { + return -1; + } + + // Initialize the microphone (devices might have been added or removed) + if (InitMicrophoneLocked() == -1) { + RTC_LOG(LS_WARNING) << "InitMicrophone() failed"; + } + + // Ensure that the updated capturing endpoint device is valid + if (_ptrDeviceIn == NULL) { + return -1; + } + + if (_builtInAecEnabled) { + // The DMO will configure the capture device. + return InitRecordingDMO(); + } + + HRESULT hr = S_OK; + WAVEFORMATEX* pWfxIn = NULL; + WAVEFORMATEXTENSIBLE Wfx = WAVEFORMATEXTENSIBLE(); + WAVEFORMATEX* pWfxClosestMatch = NULL; + + // Create COM object with IAudioClient interface. + SAFE_RELEASE(_ptrClientIn); + hr = _ptrDeviceIn->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, + (void**)&_ptrClientIn); + EXIT_ON_ERROR(hr); + + // Retrieve the stream format that the audio engine uses for its internal + // processing (mixing) of shared-mode streams. + hr = _ptrClientIn->GetMixFormat(&pWfxIn); + if (SUCCEEDED(hr)) { + RTC_LOG(LS_VERBOSE) << "Audio Engine's current capturing mix format:"; + // format type + RTC_LOG(LS_VERBOSE) << "wFormatTag : 0x" + << rtc::ToHex(pWfxIn->wFormatTag) << " (" + << pWfxIn->wFormatTag << ")"; + // number of channels (i.e. mono, stereo...) + RTC_LOG(LS_VERBOSE) << "nChannels : " << pWfxIn->nChannels; + // sample rate + RTC_LOG(LS_VERBOSE) << "nSamplesPerSec : " << pWfxIn->nSamplesPerSec; + // for buffer estimation + RTC_LOG(LS_VERBOSE) << "nAvgBytesPerSec: " << pWfxIn->nAvgBytesPerSec; + // block size of data + RTC_LOG(LS_VERBOSE) << "nBlockAlign : " << pWfxIn->nBlockAlign; + // number of bits per sample of mono data + RTC_LOG(LS_VERBOSE) << "wBitsPerSample : " << pWfxIn->wBitsPerSample; + RTC_LOG(LS_VERBOSE) << "cbSize : " << pWfxIn->cbSize; + } + + // Set wave format + Wfx.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE; + Wfx.Format.wBitsPerSample = 16; + Wfx.Format.cbSize = 22; + Wfx.dwChannelMask = 0; + Wfx.Samples.wValidBitsPerSample = Wfx.Format.wBitsPerSample; + Wfx.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + + const int freqs[6] = {48000, 44100, 16000, 96000, 32000, 8000}; + hr = S_FALSE; + + // Iterate over frequencies and channels, in order of priority + for (unsigned int freq = 0; freq < sizeof(freqs) / sizeof(freqs[0]); freq++) { + for (unsigned int chan = 0; + chan < sizeof(_recChannelsPrioList) / sizeof(_recChannelsPrioList[0]); + chan++) { + Wfx.Format.nChannels = _recChannelsPrioList[chan]; + Wfx.Format.nSamplesPerSec = freqs[freq]; + Wfx.Format.nBlockAlign = + Wfx.Format.nChannels * Wfx.Format.wBitsPerSample / 8; + Wfx.Format.nAvgBytesPerSec = + Wfx.Format.nSamplesPerSec * Wfx.Format.nBlockAlign; + // If the method succeeds and the audio endpoint device supports the + // specified stream format, it returns S_OK. If the method succeeds and + // provides a closest match to the specified format, it returns S_FALSE. + hr = _ptrClientIn->IsFormatSupported( + AUDCLNT_SHAREMODE_SHARED, (WAVEFORMATEX*)&Wfx, &pWfxClosestMatch); + if (hr == S_OK) { + break; + } else { + if (pWfxClosestMatch) { + RTC_LOG(LS_INFO) << "nChannels=" << Wfx.Format.nChannels + << ", nSamplesPerSec=" << Wfx.Format.nSamplesPerSec + << " is not supported. Closest match: " + "nChannels=" + << pWfxClosestMatch->nChannels << ", nSamplesPerSec=" + << pWfxClosestMatch->nSamplesPerSec; + CoTaskMemFree(pWfxClosestMatch); + pWfxClosestMatch = NULL; + } else { + RTC_LOG(LS_INFO) << "nChannels=" << Wfx.Format.nChannels + << ", nSamplesPerSec=" << Wfx.Format.nSamplesPerSec + << " is not supported. No closest match."; + } + } + } + if (hr == S_OK) + break; + } + + if (hr == S_OK) { + _recAudioFrameSize = Wfx.Format.nBlockAlign; + _recSampleRate = Wfx.Format.nSamplesPerSec; + _recBlockSize = Wfx.Format.nSamplesPerSec / 100; + _recChannels = Wfx.Format.nChannels; + + RTC_LOG(LS_VERBOSE) << "VoE selected this capturing format:"; + RTC_LOG(LS_VERBOSE) << "wFormatTag : 0x" + << rtc::ToHex(Wfx.Format.wFormatTag) << " (" + << Wfx.Format.wFormatTag << ")"; + RTC_LOG(LS_VERBOSE) << "nChannels : " << Wfx.Format.nChannels; + RTC_LOG(LS_VERBOSE) << "nSamplesPerSec : " << Wfx.Format.nSamplesPerSec; + RTC_LOG(LS_VERBOSE) << "nAvgBytesPerSec : " << Wfx.Format.nAvgBytesPerSec; + RTC_LOG(LS_VERBOSE) << "nBlockAlign : " << Wfx.Format.nBlockAlign; + RTC_LOG(LS_VERBOSE) << "wBitsPerSample : " << Wfx.Format.wBitsPerSample; + RTC_LOG(LS_VERBOSE) << "cbSize : " << Wfx.Format.cbSize; + RTC_LOG(LS_VERBOSE) << "Additional settings:"; + RTC_LOG(LS_VERBOSE) << "_recAudioFrameSize: " << _recAudioFrameSize; + RTC_LOG(LS_VERBOSE) << "_recBlockSize : " << _recBlockSize; + RTC_LOG(LS_VERBOSE) << "_recChannels : " << _recChannels; + } + + // Create a capturing stream. + hr = _ptrClientIn->Initialize( + AUDCLNT_SHAREMODE_SHARED, // share Audio Engine with other applications + AUDCLNT_STREAMFLAGS_EVENTCALLBACK | // processing of the audio buffer by + // the client will be event driven + AUDCLNT_STREAMFLAGS_NOPERSIST, // volume and mute settings for an + // audio session will not persist + // across system restarts + 0, // required for event-driven shared mode + 0, // periodicity + (WAVEFORMATEX*)&Wfx, // selected wave format + NULL); // session GUID + + if (hr != S_OK) { + RTC_LOG(LS_ERROR) << "IAudioClient::Initialize() failed:"; + } + EXIT_ON_ERROR(hr); + + if (_ptrAudioBuffer) { + // Update the audio buffer with the selected parameters + _ptrAudioBuffer->SetRecordingSampleRate(_recSampleRate); + _ptrAudioBuffer->SetRecordingChannels((uint8_t)_recChannels); + } else { + // We can enter this state during CoreAudioIsSupported() when no + // AudioDeviceImplementation has been created, hence the AudioDeviceBuffer + // does not exist. It is OK to end up here since we don't initiate any media + // in CoreAudioIsSupported(). + RTC_LOG(LS_VERBOSE) + << "AudioDeviceBuffer must be attached before streaming can start"; + } + + // Get the actual size of the shared (endpoint buffer). + // Typical value is 960 audio frames <=> 20ms @ 48kHz sample rate. + UINT bufferFrameCount(0); + hr = _ptrClientIn->GetBufferSize(&bufferFrameCount); + if (SUCCEEDED(hr)) { + RTC_LOG(LS_VERBOSE) << "IAudioClient::GetBufferSize() => " + << bufferFrameCount << " (<=> " + << bufferFrameCount * _recAudioFrameSize << " bytes)"; + } + + // Set the event handle that the system signals when an audio buffer is ready + // to be processed by the client. + hr = _ptrClientIn->SetEventHandle(_hCaptureSamplesReadyEvent); + EXIT_ON_ERROR(hr); + + // Get an IAudioCaptureClient interface. + SAFE_RELEASE(_ptrCaptureClient); + hr = _ptrClientIn->GetService(__uuidof(IAudioCaptureClient), + (void**)&_ptrCaptureClient); + EXIT_ON_ERROR(hr); + + // Mark capture side as initialized + _recIsInitialized = true; + + CoTaskMemFree(pWfxIn); + CoTaskMemFree(pWfxClosestMatch); + + RTC_LOG(LS_VERBOSE) << "capture side is now initialized"; + return 0; + +Exit: + _TraceCOMError(hr); + CoTaskMemFree(pWfxIn); + CoTaskMemFree(pWfxClosestMatch); + SAFE_RELEASE(_ptrClientIn); + SAFE_RELEASE(_ptrCaptureClient); + return -1; +} + +// ---------------------------------------------------------------------------- +// StartRecording +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::StartRecording() { + if (!_recIsInitialized) { + return -1; + } + + if (_hRecThread != NULL) { + return 0; + } + + if (_recording) { + return 0; + } + + { + MutexLock lockScoped(&mutex_); + + // Create thread which will drive the capturing + LPTHREAD_START_ROUTINE lpStartAddress = WSAPICaptureThread; + if (_builtInAecEnabled) { + // Redirect to the DMO polling method. + lpStartAddress = WSAPICaptureThreadPollDMO; + + if (!_playing) { + // The DMO won't provide us captured output data unless we + // give it render data to process. + RTC_LOG(LS_ERROR) + << "Playout must be started before recording when using" + " the built-in AEC"; + return -1; + } + } + + RTC_DCHECK(_hRecThread == NULL); + _hRecThread = CreateThread(NULL, 0, lpStartAddress, this, 0, NULL); + if (_hRecThread == NULL) { + RTC_LOG(LS_ERROR) << "failed to create the recording thread"; + return -1; + } + + // Set thread priority to highest possible + SetThreadPriority(_hRecThread, THREAD_PRIORITY_TIME_CRITICAL); + } // critScoped + + DWORD ret = WaitForSingleObject(_hCaptureStartedEvent, 1000); + if (ret != WAIT_OBJECT_0) { + RTC_LOG(LS_VERBOSE) << "capturing did not start up properly"; + return -1; + } + RTC_LOG(LS_VERBOSE) << "capture audio stream has now started..."; + + _recording = true; + + return 0; +} + +// ---------------------------------------------------------------------------- +// StopRecording +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::StopRecording() { + int32_t err = 0; + + if (!_recIsInitialized) { + return 0; + } + + _Lock(); + + if (_hRecThread == NULL) { + RTC_LOG(LS_VERBOSE) + << "no capturing stream is active => close down WASAPI only"; + SAFE_RELEASE(_ptrClientIn); + SAFE_RELEASE(_ptrCaptureClient); + _recIsInitialized = false; + _recording = false; + _UnLock(); + return 0; + } + + // Stop the driving thread... + RTC_LOG(LS_VERBOSE) << "closing down the webrtc_core_audio_capture_thread..."; + // Manual-reset event; it will remain signalled to stop all capture threads. + SetEvent(_hShutdownCaptureEvent); + + _UnLock(); + DWORD ret = WaitForSingleObject(_hRecThread, 2000); + if (ret != WAIT_OBJECT_0) { + RTC_LOG(LS_ERROR) + << "failed to close down webrtc_core_audio_capture_thread"; + err = -1; + } else { + RTC_LOG(LS_VERBOSE) << "webrtc_core_audio_capture_thread is now closed"; + } + _Lock(); + + ResetEvent(_hShutdownCaptureEvent); // Must be manually reset. + // Ensure that the thread has released these interfaces properly. + RTC_DCHECK(err == -1 || _ptrClientIn == NULL); + RTC_DCHECK(err == -1 || _ptrCaptureClient == NULL); + + _recIsInitialized = false; + _recording = false; + + // These will create thread leaks in the result of an error, + // but we can at least resume the call. + CloseHandle(_hRecThread); + _hRecThread = NULL; + + if (_builtInAecEnabled) { + RTC_DCHECK(_dmo); + // This is necessary. Otherwise the DMO can generate garbage render + // audio even after rendering has stopped. + HRESULT hr = _dmo->FreeStreamingResources(); + if (FAILED(hr)) { + _TraceCOMError(hr); + err = -1; + } + } + + _UnLock(); + + return err; +} + +// ---------------------------------------------------------------------------- +// RecordingIsInitialized +// ---------------------------------------------------------------------------- + +bool AudioDeviceWindowsCore::RecordingIsInitialized() const { + return (_recIsInitialized); +} + +// ---------------------------------------------------------------------------- +// Recording +// ---------------------------------------------------------------------------- + +bool AudioDeviceWindowsCore::Recording() const { + return (_recording); +} + +// ---------------------------------------------------------------------------- +// PlayoutIsInitialized +// ---------------------------------------------------------------------------- + +bool AudioDeviceWindowsCore::PlayoutIsInitialized() const { + return (_playIsInitialized); +} + +// ---------------------------------------------------------------------------- +// StartPlayout +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::StartPlayout() { + if (!_playIsInitialized) { + return -1; + } + + if (_hPlayThread != NULL) { + return 0; + } + + if (_playing) { + return 0; + } + + { + MutexLock lockScoped(&mutex_); + + // Create thread which will drive the rendering. + RTC_DCHECK(_hPlayThread == NULL); + _hPlayThread = CreateThread(NULL, 0, WSAPIRenderThread, this, 0, NULL); + if (_hPlayThread == NULL) { + RTC_LOG(LS_ERROR) << "failed to create the playout thread"; + return -1; + } + + // Set thread priority to highest possible. + SetThreadPriority(_hPlayThread, THREAD_PRIORITY_TIME_CRITICAL); + } // critScoped + + DWORD ret = WaitForSingleObject(_hRenderStartedEvent, 1000); + if (ret != WAIT_OBJECT_0) { + RTC_LOG(LS_VERBOSE) << "rendering did not start up properly"; + return -1; + } + + _playing = true; + RTC_LOG(LS_VERBOSE) << "rendering audio stream has now started..."; + + return 0; +} + +// ---------------------------------------------------------------------------- +// StopPlayout +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::StopPlayout() { + if (!_playIsInitialized) { + return 0; + } + + { + MutexLock lockScoped(&mutex_); + + if (_hPlayThread == NULL) { + RTC_LOG(LS_VERBOSE) + << "no rendering stream is active => close down WASAPI only"; + SAFE_RELEASE(_ptrClientOut); + SAFE_RELEASE(_ptrRenderClient); + _playIsInitialized = false; + _playing = false; + return 0; + } + + // stop the driving thread... + RTC_LOG(LS_VERBOSE) + << "closing down the webrtc_core_audio_render_thread..."; + SetEvent(_hShutdownRenderEvent); + } // critScoped + + DWORD ret = WaitForSingleObject(_hPlayThread, 2000); + if (ret != WAIT_OBJECT_0) { + // the thread did not stop as it should + RTC_LOG(LS_ERROR) << "failed to close down webrtc_core_audio_render_thread"; + CloseHandle(_hPlayThread); + _hPlayThread = NULL; + _playIsInitialized = false; + _playing = false; + return -1; + } + + { + MutexLock lockScoped(&mutex_); + RTC_LOG(LS_VERBOSE) << "webrtc_core_audio_render_thread is now closed"; + + // to reset this event manually at each time we finish with it, + // in case that the render thread has exited before StopPlayout(), + // this event might be caught by the new render thread within same VoE + // instance. + ResetEvent(_hShutdownRenderEvent); + + SAFE_RELEASE(_ptrClientOut); + SAFE_RELEASE(_ptrRenderClient); + + _playIsInitialized = false; + _playing = false; + + CloseHandle(_hPlayThread); + _hPlayThread = NULL; + + if (_builtInAecEnabled && _recording) { + // The DMO won't provide us captured output data unless we + // give it render data to process. + // + // We still permit the playout to shutdown, and trace a warning. + // Otherwise, VoE can get into a state which will never permit + // playout to stop properly. + RTC_LOG(LS_WARNING) + << "Recording should be stopped before playout when using the" + " built-in AEC"; + } + + // Reset the playout delay value. + _sndCardPlayDelay = 0; + } // critScoped + + return 0; +} + +// ---------------------------------------------------------------------------- +// PlayoutDelay +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::PlayoutDelay(uint16_t& delayMS) const { + MutexLock lockScoped(&mutex_); + delayMS = static_cast(_sndCardPlayDelay); + return 0; +} + +bool AudioDeviceWindowsCore::BuiltInAECIsAvailable() const { + return _dmo != nullptr; +} + +// ---------------------------------------------------------------------------- +// Playing +// ---------------------------------------------------------------------------- + +bool AudioDeviceWindowsCore::Playing() const { + return (_playing); +} + +// ============================================================================ +// Private Methods +// ============================================================================ + +// ---------------------------------------------------------------------------- +// [static] WSAPIRenderThread +// ---------------------------------------------------------------------------- + +DWORD WINAPI AudioDeviceWindowsCore::WSAPIRenderThread(LPVOID context) { + return reinterpret_cast(context)->DoRenderThread(); +} + +// ---------------------------------------------------------------------------- +// [static] WSAPICaptureThread +// ---------------------------------------------------------------------------- + +DWORD WINAPI AudioDeviceWindowsCore::WSAPICaptureThread(LPVOID context) { + return reinterpret_cast(context)->DoCaptureThread(); +} + +DWORD WINAPI AudioDeviceWindowsCore::WSAPICaptureThreadPollDMO(LPVOID context) { + return reinterpret_cast(context) + ->DoCaptureThreadPollDMO(); +} + +// ---------------------------------------------------------------------------- +// DoRenderThread +// ---------------------------------------------------------------------------- + +DWORD AudioDeviceWindowsCore::DoRenderThread() { + bool keepPlaying = true; + HANDLE waitArray[2] = {_hShutdownRenderEvent, _hRenderSamplesReadyEvent}; + HRESULT hr = S_OK; + HANDLE hMmTask = NULL; + + // Initialize COM as MTA in this thread. + ScopedCOMInitializer comInit(ScopedCOMInitializer::kMTA); + if (!comInit.Succeeded()) { + RTC_LOG(LS_ERROR) << "failed to initialize COM in render thread"; + return 1; + } + + rtc::SetCurrentThreadName("webrtc_core_audio_render_thread"); + + // Use Multimedia Class Scheduler Service (MMCSS) to boost the thread + // priority. + // + if (_winSupportAvrt) { + DWORD taskIndex(0); + hMmTask = _PAvSetMmThreadCharacteristicsA("Pro Audio", &taskIndex); + if (hMmTask) { + if (FALSE == _PAvSetMmThreadPriority(hMmTask, AVRT_PRIORITY_CRITICAL)) { + RTC_LOG(LS_WARNING) << "failed to boost play-thread using MMCSS"; + } + RTC_LOG(LS_VERBOSE) + << "render thread is now registered with MMCSS (taskIndex=" + << taskIndex << ")"; + } else { + RTC_LOG(LS_WARNING) << "failed to enable MMCSS on render thread (err=" + << GetLastError() << ")"; + _TraceCOMError(GetLastError()); + } + } + + _Lock(); + + IAudioClock* clock = NULL; + + // Get size of rendering buffer (length is expressed as the number of audio + // frames the buffer can hold). This value is fixed during the rendering + // session. + // + UINT32 bufferLength = 0; + hr = _ptrClientOut->GetBufferSize(&bufferLength); + EXIT_ON_ERROR(hr); + RTC_LOG(LS_VERBOSE) << "[REND] size of buffer : " << bufferLength; + + // Get maximum latency for the current stream (will not change for the + // lifetime of the IAudioClient object). + // + REFERENCE_TIME latency; + _ptrClientOut->GetStreamLatency(&latency); + RTC_LOG(LS_VERBOSE) << "[REND] max stream latency : " << (DWORD)latency + << " (" << (double)(latency / 10000.0) << " ms)"; + + // Get the length of the periodic interval separating successive processing + // passes by the audio engine on the data in the endpoint buffer. + // + // The period between processing passes by the audio engine is fixed for a + // particular audio endpoint device and represents the smallest processing + // quantum for the audio engine. This period plus the stream latency between + // the buffer and endpoint device represents the minimum possible latency that + // an audio application can achieve. Typical value: 100000 <=> 0.01 sec = + // 10ms. + // + REFERENCE_TIME devPeriod = 0; + REFERENCE_TIME devPeriodMin = 0; + _ptrClientOut->GetDevicePeriod(&devPeriod, &devPeriodMin); + RTC_LOG(LS_VERBOSE) << "[REND] device period : " << (DWORD)devPeriod + << " (" << (double)(devPeriod / 10000.0) << " ms)"; + + // Derive initial rendering delay. + // Example: 10*(960/480) + 15 = 20 + 15 = 35ms + // + int playout_delay = 10 * (bufferLength / _playBlockSize) + + (int)((latency + devPeriod) / 10000); + _sndCardPlayDelay = playout_delay; + _writtenSamples = 0; + RTC_LOG(LS_VERBOSE) << "[REND] initial delay : " << playout_delay; + + double endpointBufferSizeMS = + 10.0 * ((double)bufferLength / (double)_devicePlayBlockSize); + RTC_LOG(LS_VERBOSE) << "[REND] endpointBufferSizeMS : " + << endpointBufferSizeMS; + + // Before starting the stream, fill the rendering buffer with silence. + // + BYTE* pData = NULL; + hr = _ptrRenderClient->GetBuffer(bufferLength, &pData); + EXIT_ON_ERROR(hr); + + hr = + _ptrRenderClient->ReleaseBuffer(bufferLength, AUDCLNT_BUFFERFLAGS_SILENT); + EXIT_ON_ERROR(hr); + + _writtenSamples += bufferLength; + + hr = _ptrClientOut->GetService(__uuidof(IAudioClock), (void**)&clock); + if (FAILED(hr)) { + RTC_LOG(LS_WARNING) + << "failed to get IAudioClock interface from the IAudioClient"; + } + + // Start up the rendering audio stream. + hr = _ptrClientOut->Start(); + EXIT_ON_ERROR(hr); + + _UnLock(); + + // Set event which will ensure that the calling thread modifies the playing + // state to true. + // + SetEvent(_hRenderStartedEvent); + + // >> ------------------ THREAD LOOP ------------------ + + while (keepPlaying) { + // Wait for a render notification event or a shutdown event + DWORD waitResult = WaitForMultipleObjects(2, waitArray, FALSE, 500); + switch (waitResult) { + case WAIT_OBJECT_0 + 0: // _hShutdownRenderEvent + keepPlaying = false; + break; + case WAIT_OBJECT_0 + 1: // _hRenderSamplesReadyEvent + break; + case WAIT_TIMEOUT: // timeout notification + RTC_LOG(LS_WARNING) << "render event timed out after 0.5 seconds"; + goto Exit; + default: // unexpected error + RTC_LOG(LS_WARNING) << "unknown wait termination on render side"; + goto Exit; + } + + while (keepPlaying) { + _Lock(); + + // Sanity check to ensure that essential states are not modified + // during the unlocked period. + if (_ptrRenderClient == NULL || _ptrClientOut == NULL) { + _UnLock(); + RTC_LOG(LS_ERROR) + << "output state has been modified during unlocked period"; + goto Exit; + } + + // Get the number of frames of padding (queued up to play) in the endpoint + // buffer. + UINT32 padding = 0; + hr = _ptrClientOut->GetCurrentPadding(&padding); + EXIT_ON_ERROR(hr); + + // Derive the amount of available space in the output buffer + uint32_t framesAvailable = bufferLength - padding; + + // Do we have 10 ms available in the render buffer? + if (framesAvailable < _playBlockSize) { + // Not enough space in render buffer to store next render packet. + _UnLock(); + break; + } + + // Write n*10ms buffers to the render buffer + const uint32_t n10msBuffers = (framesAvailable / _playBlockSize); + for (uint32_t n = 0; n < n10msBuffers; n++) { + // Get pointer (i.e., grab the buffer) to next space in the shared + // render buffer. + hr = _ptrRenderClient->GetBuffer(_playBlockSize, &pData); + EXIT_ON_ERROR(hr); + + if (_ptrAudioBuffer) { + // Request data to be played out (#bytes = + // _playBlockSize*_audioFrameSize) + _UnLock(); + int32_t nSamples = + _ptrAudioBuffer->RequestPlayoutData(_playBlockSize); + _Lock(); + + if (nSamples == -1) { + _UnLock(); + RTC_LOG(LS_ERROR) << "failed to read data from render client"; + goto Exit; + } + + // Sanity check to ensure that essential states are not modified + // during the unlocked period + if (_ptrRenderClient == NULL || _ptrClientOut == NULL) { + _UnLock(); + RTC_LOG(LS_ERROR) + << "output state has been modified during unlocked" + " period"; + goto Exit; + } + if (nSamples != static_cast(_playBlockSize)) { + RTC_LOG(LS_WARNING) + << "nSamples(" << nSamples << ") != _playBlockSize" + << _playBlockSize << ")"; + } + + // Get the actual (stored) data + nSamples = _ptrAudioBuffer->GetPlayoutData((int8_t*)pData); + } + + DWORD dwFlags(0); + hr = _ptrRenderClient->ReleaseBuffer(_playBlockSize, dwFlags); + // See http://msdn.microsoft.com/en-us/library/dd316605(VS.85).aspx + // for more details regarding AUDCLNT_E_DEVICE_INVALIDATED. + EXIT_ON_ERROR(hr); + + _writtenSamples += _playBlockSize; + } + + // Check the current delay on the playout side. + if (clock) { + UINT64 pos = 0; + UINT64 freq = 1; + clock->GetPosition(&pos, NULL); + clock->GetFrequency(&freq); + playout_delay = ROUND((double(_writtenSamples) / _devicePlaySampleRate - + double(pos) / freq) * + 1000.0); + _sndCardPlayDelay = playout_delay; + } + + _UnLock(); + } + } + + // ------------------ THREAD LOOP ------------------ << + + SleepMs(static_cast(endpointBufferSizeMS + 0.5)); + hr = _ptrClientOut->Stop(); + +Exit: + SAFE_RELEASE(clock); + + if (FAILED(hr)) { + _ptrClientOut->Stop(); + _UnLock(); + _TraceCOMError(hr); + } + + if (_winSupportAvrt) { + if (NULL != hMmTask) { + _PAvRevertMmThreadCharacteristics(hMmTask); + } + } + + _Lock(); + + if (keepPlaying) { + if (_ptrClientOut != NULL) { + hr = _ptrClientOut->Stop(); + if (FAILED(hr)) { + _TraceCOMError(hr); + } + hr = _ptrClientOut->Reset(); + if (FAILED(hr)) { + _TraceCOMError(hr); + } + } + RTC_LOG(LS_ERROR) + << "Playout error: rendering thread has ended pre-maturely"; + } else { + RTC_LOG(LS_VERBOSE) << "_Rendering thread is now terminated properly"; + } + + _UnLock(); + + return (DWORD)hr; +} + +DWORD AudioDeviceWindowsCore::InitCaptureThreadPriority() { + _hMmTask = NULL; + + rtc::SetCurrentThreadName("webrtc_core_audio_capture_thread"); + + // Use Multimedia Class Scheduler Service (MMCSS) to boost the thread + // priority. + if (_winSupportAvrt) { + DWORD taskIndex(0); + _hMmTask = _PAvSetMmThreadCharacteristicsA("Pro Audio", &taskIndex); + if (_hMmTask) { + if (!_PAvSetMmThreadPriority(_hMmTask, AVRT_PRIORITY_CRITICAL)) { + RTC_LOG(LS_WARNING) << "failed to boost rec-thread using MMCSS"; + } + RTC_LOG(LS_VERBOSE) + << "capture thread is now registered with MMCSS (taskIndex=" + << taskIndex << ")"; + } else { + RTC_LOG(LS_WARNING) << "failed to enable MMCSS on capture thread (err=" + << GetLastError() << ")"; + _TraceCOMError(GetLastError()); + } + } + + return S_OK; +} + +void AudioDeviceWindowsCore::RevertCaptureThreadPriority() { + if (_winSupportAvrt) { + if (NULL != _hMmTask) { + _PAvRevertMmThreadCharacteristics(_hMmTask); + } + } + + _hMmTask = NULL; +} + +DWORD AudioDeviceWindowsCore::DoCaptureThreadPollDMO() { + RTC_DCHECK(_mediaBuffer); + bool keepRecording = true; + + // Initialize COM as MTA in this thread. + ScopedCOMInitializer comInit(ScopedCOMInitializer::kMTA); + if (!comInit.Succeeded()) { + RTC_LOG(LS_ERROR) << "failed to initialize COM in polling DMO thread"; + return 1; + } + + HRESULT hr = InitCaptureThreadPriority(); + if (FAILED(hr)) { + return hr; + } + + // Set event which will ensure that the calling thread modifies the + // recording state to true. + SetEvent(_hCaptureStartedEvent); + + // >> ---------------------------- THREAD LOOP ---------------------------- + while (keepRecording) { + // Poll the DMO every 5 ms. + // (The same interval used in the Wave implementation.) + DWORD waitResult = WaitForSingleObject(_hShutdownCaptureEvent, 5); + switch (waitResult) { + case WAIT_OBJECT_0: // _hShutdownCaptureEvent + keepRecording = false; + break; + case WAIT_TIMEOUT: // timeout notification + break; + default: // unexpected error + RTC_LOG(LS_WARNING) << "Unknown wait termination on capture side"; + hr = -1; // To signal an error callback. + keepRecording = false; + break; + } + + while (keepRecording) { + MutexLock lockScoped(&mutex_); + + DWORD dwStatus = 0; + { + DMO_OUTPUT_DATA_BUFFER dmoBuffer = {0}; + dmoBuffer.pBuffer = _mediaBuffer.get(); + dmoBuffer.pBuffer->AddRef(); + + // Poll the DMO for AEC processed capture data. The DMO will + // copy available data to `dmoBuffer`, and should only return + // 10 ms frames. The value of `dwStatus` should be ignored. + hr = _dmo->ProcessOutput(0, 1, &dmoBuffer, &dwStatus); + SAFE_RELEASE(dmoBuffer.pBuffer); + dwStatus = dmoBuffer.dwStatus; + } + if (FAILED(hr)) { + _TraceCOMError(hr); + keepRecording = false; + RTC_DCHECK_NOTREACHED(); + break; + } + + ULONG bytesProduced = 0; + BYTE* data; + // Get a pointer to the data buffer. This should be valid until + // the next call to ProcessOutput. + hr = _mediaBuffer->GetBufferAndLength(&data, &bytesProduced); + if (FAILED(hr)) { + _TraceCOMError(hr); + keepRecording = false; + RTC_DCHECK_NOTREACHED(); + break; + } + + if (bytesProduced > 0) { + const int kSamplesProduced = bytesProduced / _recAudioFrameSize; + // TODO(andrew): verify that this is always satisfied. It might + // be that ProcessOutput will try to return more than 10 ms if + // we fail to call it frequently enough. + RTC_DCHECK_EQ(kSamplesProduced, static_cast(_recBlockSize)); + RTC_DCHECK_EQ(sizeof(BYTE), sizeof(int8_t)); + _ptrAudioBuffer->SetRecordedBuffer(reinterpret_cast(data), + kSamplesProduced); + _ptrAudioBuffer->SetVQEData(0, 0); + + _UnLock(); // Release lock while making the callback. + _ptrAudioBuffer->DeliverRecordedData(); + _Lock(); + } + + // Reset length to indicate buffer availability. + hr = _mediaBuffer->SetLength(0); + if (FAILED(hr)) { + _TraceCOMError(hr); + keepRecording = false; + RTC_DCHECK_NOTREACHED(); + break; + } + + if (!(dwStatus & DMO_OUTPUT_DATA_BUFFERF_INCOMPLETE)) { + // The DMO cannot currently produce more data. This is the + // normal case; otherwise it means the DMO had more than 10 ms + // of data available and ProcessOutput should be called again. + break; + } + } + } + // ---------------------------- THREAD LOOP ---------------------------- << + + RevertCaptureThreadPriority(); + + if (FAILED(hr)) { + RTC_LOG(LS_ERROR) + << "Recording error: capturing thread has ended prematurely"; + } else { + RTC_LOG(LS_VERBOSE) << "Capturing thread is now terminated properly"; + } + + return hr; +} + +// ---------------------------------------------------------------------------- +// DoCaptureThread +// ---------------------------------------------------------------------------- + +DWORD AudioDeviceWindowsCore::DoCaptureThread() { + bool keepRecording = true; + HANDLE waitArray[2] = {_hShutdownCaptureEvent, _hCaptureSamplesReadyEvent}; + HRESULT hr = S_OK; + + LARGE_INTEGER t1; + + BYTE* syncBuffer = NULL; + UINT32 syncBufIndex = 0; + + _readSamples = 0; + + // Initialize COM as MTA in this thread. + ScopedCOMInitializer comInit(ScopedCOMInitializer::kMTA); + if (!comInit.Succeeded()) { + RTC_LOG(LS_ERROR) << "failed to initialize COM in capture thread"; + return 1; + } + + hr = InitCaptureThreadPriority(); + if (FAILED(hr)) { + return hr; + } + + _Lock(); + + // Get size of capturing buffer (length is expressed as the number of audio + // frames the buffer can hold). This value is fixed during the capturing + // session. + // + UINT32 bufferLength = 0; + if (_ptrClientIn == NULL) { + RTC_LOG(LS_ERROR) + << "input state has been modified before capture loop starts."; + return 1; + } + hr = _ptrClientIn->GetBufferSize(&bufferLength); + EXIT_ON_ERROR(hr); + RTC_LOG(LS_VERBOSE) << "[CAPT] size of buffer : " << bufferLength; + + // Allocate memory for sync buffer. + // It is used for compensation between native 44.1 and internal 44.0 and + // for cases when the capture buffer is larger than 10ms. + // + const UINT32 syncBufferSize = 2 * (bufferLength * _recAudioFrameSize); + syncBuffer = new BYTE[syncBufferSize]; + if (syncBuffer == NULL) { + return (DWORD)E_POINTER; + } + RTC_LOG(LS_VERBOSE) << "[CAPT] size of sync buffer : " << syncBufferSize + << " [bytes]"; + + // Get maximum latency for the current stream (will not change for the + // lifetime of the IAudioClient object). + // + REFERENCE_TIME latency; + _ptrClientIn->GetStreamLatency(&latency); + RTC_LOG(LS_VERBOSE) << "[CAPT] max stream latency : " << (DWORD)latency + << " (" << (double)(latency / 10000.0) << " ms)"; + + // Get the length of the periodic interval separating successive processing + // passes by the audio engine on the data in the endpoint buffer. + // + REFERENCE_TIME devPeriod = 0; + REFERENCE_TIME devPeriodMin = 0; + _ptrClientIn->GetDevicePeriod(&devPeriod, &devPeriodMin); + RTC_LOG(LS_VERBOSE) << "[CAPT] device period : " << (DWORD)devPeriod + << " (" << (double)(devPeriod / 10000.0) << " ms)"; + + double extraDelayMS = (double)((latency + devPeriod) / 10000.0); + RTC_LOG(LS_VERBOSE) << "[CAPT] extraDelayMS : " << extraDelayMS; + + double endpointBufferSizeMS = + 10.0 * ((double)bufferLength / (double)_recBlockSize); + RTC_LOG(LS_VERBOSE) << "[CAPT] endpointBufferSizeMS : " + << endpointBufferSizeMS; + + // Start up the capturing stream. + // + hr = _ptrClientIn->Start(); + EXIT_ON_ERROR(hr); + + _UnLock(); + + // Set event which will ensure that the calling thread modifies the recording + // state to true. + // + SetEvent(_hCaptureStartedEvent); + + // >> ---------------------------- THREAD LOOP ---------------------------- + + while (keepRecording) { + // Wait for a capture notification event or a shutdown event + DWORD waitResult = WaitForMultipleObjects(2, waitArray, FALSE, 500); + switch (waitResult) { + case WAIT_OBJECT_0 + 0: // _hShutdownCaptureEvent + keepRecording = false; + break; + case WAIT_OBJECT_0 + 1: // _hCaptureSamplesReadyEvent + break; + case WAIT_TIMEOUT: // timeout notification + RTC_LOG(LS_WARNING) << "capture event timed out after 0.5 seconds"; + goto Exit; + default: // unexpected error + RTC_LOG(LS_WARNING) << "unknown wait termination on capture side"; + goto Exit; + } + + while (keepRecording) { + BYTE* pData = 0; + UINT32 framesAvailable = 0; + DWORD flags = 0; + UINT64 recTime = 0; + UINT64 recPos = 0; + + _Lock(); + + // Sanity check to ensure that essential states are not modified + // during the unlocked period. + if (_ptrCaptureClient == NULL || _ptrClientIn == NULL) { + _UnLock(); + RTC_LOG(LS_ERROR) + << "input state has been modified during unlocked period"; + goto Exit; + } + + // Find out how much capture data is available + // + hr = _ptrCaptureClient->GetBuffer( + &pData, // packet which is ready to be read by used + &framesAvailable, // #frames in the captured packet (can be zero) + &flags, // support flags (check) + &recPos, // device position of first audio frame in data packet + &recTime); // value of performance counter at the time of recording + // the first audio frame + + if (SUCCEEDED(hr)) { + if (AUDCLNT_S_BUFFER_EMPTY == hr) { + // Buffer was empty => start waiting for a new capture notification + // event + _UnLock(); + break; + } + + if (flags & AUDCLNT_BUFFERFLAGS_SILENT) { + // Treat all of the data in the packet as silence and ignore the + // actual data values. + RTC_LOG(LS_WARNING) << "AUDCLNT_BUFFERFLAGS_SILENT"; + pData = NULL; + } + + RTC_DCHECK_NE(framesAvailable, 0); + + if (pData) { + CopyMemory(&syncBuffer[syncBufIndex * _recAudioFrameSize], pData, + framesAvailable * _recAudioFrameSize); + } else { + ZeroMemory(&syncBuffer[syncBufIndex * _recAudioFrameSize], + framesAvailable * _recAudioFrameSize); + } + RTC_DCHECK_GE(syncBufferSize, (syncBufIndex * _recAudioFrameSize) + + framesAvailable * _recAudioFrameSize); + + // Release the capture buffer + // + hr = _ptrCaptureClient->ReleaseBuffer(framesAvailable); + EXIT_ON_ERROR(hr); + + _readSamples += framesAvailable; + syncBufIndex += framesAvailable; + + QueryPerformanceCounter(&t1); + + // Get the current recording and playout delay. + uint32_t sndCardRecDelay = (uint32_t)( + ((((UINT64)t1.QuadPart * _perfCounterFactor) - recTime) / 10000) + + (10 * syncBufIndex) / _recBlockSize - 10); + uint32_t sndCardPlayDelay = static_cast(_sndCardPlayDelay); + + while (syncBufIndex >= _recBlockSize) { + if (_ptrAudioBuffer) { + _ptrAudioBuffer->SetRecordedBuffer((const int8_t*)syncBuffer, + _recBlockSize); + _ptrAudioBuffer->SetVQEData(sndCardPlayDelay, sndCardRecDelay); + + _ptrAudioBuffer->SetTypingStatus(KeyPressed()); + + _UnLock(); // release lock while making the callback + _ptrAudioBuffer->DeliverRecordedData(); + _Lock(); // restore the lock + + // Sanity check to ensure that essential states are not modified + // during the unlocked period + if (_ptrCaptureClient == NULL || _ptrClientIn == NULL) { + _UnLock(); + RTC_LOG(LS_ERROR) << "input state has been modified during" + " unlocked period"; + goto Exit; + } + } + + // store remaining data which was not able to deliver as 10ms segment + MoveMemory(&syncBuffer[0], + &syncBuffer[_recBlockSize * _recAudioFrameSize], + (syncBufIndex - _recBlockSize) * _recAudioFrameSize); + syncBufIndex -= _recBlockSize; + sndCardRecDelay -= 10; + } + } else { + // If GetBuffer returns AUDCLNT_E_BUFFER_ERROR, the thread consuming the + // audio samples must wait for the next processing pass. The client + // might benefit from keeping a count of the failed GetBuffer calls. If + // GetBuffer returns this error repeatedly, the client can start a new + // processing loop after shutting down the current client by calling + // IAudioClient::Stop, IAudioClient::Reset, and releasing the audio + // client. + RTC_LOG(LS_ERROR) << "IAudioCaptureClient::GetBuffer returned" + " AUDCLNT_E_BUFFER_ERROR, hr = 0x" + << rtc::ToHex(hr); + goto Exit; + } + + _UnLock(); + } + } + + // ---------------------------- THREAD LOOP ---------------------------- << + + if (_ptrClientIn) { + hr = _ptrClientIn->Stop(); + } + +Exit: + if (FAILED(hr)) { + _ptrClientIn->Stop(); + _UnLock(); + _TraceCOMError(hr); + } + + RevertCaptureThreadPriority(); + + _Lock(); + + if (keepRecording) { + if (_ptrClientIn != NULL) { + hr = _ptrClientIn->Stop(); + if (FAILED(hr)) { + _TraceCOMError(hr); + } + hr = _ptrClientIn->Reset(); + if (FAILED(hr)) { + _TraceCOMError(hr); + } + } + + RTC_LOG(LS_ERROR) + << "Recording error: capturing thread has ended pre-maturely"; + } else { + RTC_LOG(LS_VERBOSE) << "_Capturing thread is now terminated properly"; + } + + SAFE_RELEASE(_ptrClientIn); + SAFE_RELEASE(_ptrCaptureClient); + + _UnLock(); + + if (syncBuffer) { + delete[] syncBuffer; + } + + return (DWORD)hr; +} + +int32_t AudioDeviceWindowsCore::EnableBuiltInAEC(bool enable) { + if (_recIsInitialized) { + RTC_LOG(LS_ERROR) + << "Attempt to set Windows AEC with recording already initialized"; + return -1; + } + + if (_dmo == NULL) { + RTC_LOG(LS_ERROR) + << "Built-in AEC DMO was not initialized properly at create time"; + return -1; + } + + _builtInAecEnabled = enable; + return 0; +} + +void AudioDeviceWindowsCore::_Lock() RTC_NO_THREAD_SAFETY_ANALYSIS { + mutex_.Lock(); +} + +void AudioDeviceWindowsCore::_UnLock() RTC_NO_THREAD_SAFETY_ANALYSIS { + mutex_.Unlock(); +} + +int AudioDeviceWindowsCore::SetDMOProperties() { + HRESULT hr = S_OK; + RTC_DCHECK(_dmo); + + rtc::scoped_refptr ps; + { + IPropertyStore* ptrPS = NULL; + hr = _dmo->QueryInterface(IID_IPropertyStore, + reinterpret_cast(&ptrPS)); + if (FAILED(hr) || ptrPS == NULL) { + _TraceCOMError(hr); + return -1; + } + ps = ptrPS; + SAFE_RELEASE(ptrPS); + } + + // Set the AEC system mode. + // SINGLE_CHANNEL_AEC - AEC processing only. + if (SetVtI4Property(ps.get(), MFPKEY_WMAAECMA_SYSTEM_MODE, + SINGLE_CHANNEL_AEC)) { + return -1; + } + + // Set the AEC source mode. + // VARIANT_TRUE - Source mode (we poll the AEC for captured data). + if (SetBoolProperty(ps.get(), MFPKEY_WMAAECMA_DMO_SOURCE_MODE, + VARIANT_TRUE) == -1) { + return -1; + } + + // Enable the feature mode. + // This lets us override all the default processing settings below. + if (SetBoolProperty(ps.get(), MFPKEY_WMAAECMA_FEATURE_MODE, VARIANT_TRUE) == + -1) { + return -1; + } + + // Disable analog AGC (default enabled). + if (SetBoolProperty(ps.get(), MFPKEY_WMAAECMA_MIC_GAIN_BOUNDER, + VARIANT_FALSE) == -1) { + return -1; + } + + // Disable noise suppression (default enabled). + // 0 - Disabled, 1 - Enabled + if (SetVtI4Property(ps.get(), MFPKEY_WMAAECMA_FEATR_NS, 0) == -1) { + return -1; + } + + // Relevant parameters to leave at default settings: + // MFPKEY_WMAAECMA_FEATR_AGC - Digital AGC (disabled). + // MFPKEY_WMAAECMA_FEATR_CENTER_CLIP - AEC center clipping (enabled). + // MFPKEY_WMAAECMA_FEATR_ECHO_LENGTH - Filter length (256 ms). + // TODO(andrew): investigate decresing the length to 128 ms. + // MFPKEY_WMAAECMA_FEATR_FRAME_SIZE - Frame size (0). + // 0 is automatic; defaults to 160 samples (or 10 ms frames at the + // selected 16 kHz) as long as mic array processing is disabled. + // MFPKEY_WMAAECMA_FEATR_NOISE_FILL - Comfort noise (enabled). + // MFPKEY_WMAAECMA_FEATR_VAD - VAD (disabled). + + // Set the devices selected by VoE. If using a default device, we need to + // search for the device index. + int inDevIndex = _inputDeviceIndex; + int outDevIndex = _outputDeviceIndex; + if (!_usingInputDeviceIndex) { + ERole role = eCommunications; + if (_inputDevice == AudioDeviceModule::kDefaultDevice) { + role = eConsole; + } + + if (_GetDefaultDeviceIndex(eCapture, role, &inDevIndex) == -1) { + return -1; + } + } + + if (!_usingOutputDeviceIndex) { + ERole role = eCommunications; + if (_outputDevice == AudioDeviceModule::kDefaultDevice) { + role = eConsole; + } + + if (_GetDefaultDeviceIndex(eRender, role, &outDevIndex) == -1) { + return -1; + } + } + + DWORD devIndex = static_cast(outDevIndex << 16) + + static_cast(0x0000ffff & inDevIndex); + RTC_LOG(LS_VERBOSE) << "Capture device index: " << inDevIndex + << ", render device index: " << outDevIndex; + if (SetVtI4Property(ps.get(), MFPKEY_WMAAECMA_DEVICE_INDEXES, devIndex) == + -1) { + return -1; + } + + return 0; +} + +int AudioDeviceWindowsCore::SetBoolProperty(IPropertyStore* ptrPS, + REFPROPERTYKEY key, + VARIANT_BOOL value) { + PROPVARIANT pv; + PropVariantInit(&pv); + pv.vt = VT_BOOL; + pv.boolVal = value; + HRESULT hr = ptrPS->SetValue(key, pv); + PropVariantClear(&pv); + if (FAILED(hr)) { + _TraceCOMError(hr); + return -1; + } + return 0; +} + +int AudioDeviceWindowsCore::SetVtI4Property(IPropertyStore* ptrPS, + REFPROPERTYKEY key, + LONG value) { + PROPVARIANT pv; + PropVariantInit(&pv); + pv.vt = VT_I4; + pv.lVal = value; + HRESULT hr = ptrPS->SetValue(key, pv); + PropVariantClear(&pv); + if (FAILED(hr)) { + _TraceCOMError(hr); + return -1; + } + return 0; +} + +// ---------------------------------------------------------------------------- +// _RefreshDeviceList +// +// Creates a new list of endpoint rendering or capture devices after +// deleting any previously created (and possibly out-of-date) list of +// such devices. +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_RefreshDeviceList(EDataFlow dir) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + HRESULT hr = S_OK; + IMMDeviceCollection* pCollection = NULL; + + RTC_DCHECK(dir == eRender || dir == eCapture); + RTC_DCHECK(_ptrEnumerator); + + // Create a fresh list of devices using the specified direction + hr = _ptrEnumerator->EnumAudioEndpoints(dir, DEVICE_STATE_ACTIVE, + &pCollection); + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(pCollection); + return -1; + } + + if (dir == eRender) { + SAFE_RELEASE(_ptrRenderCollection); + _ptrRenderCollection = pCollection; + } else { + SAFE_RELEASE(_ptrCaptureCollection); + _ptrCaptureCollection = pCollection; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// _DeviceListCount +// +// Gets a count of the endpoint rendering or capture devices in the +// current list of such devices. +// ---------------------------------------------------------------------------- + +int16_t AudioDeviceWindowsCore::_DeviceListCount(EDataFlow dir) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + HRESULT hr = S_OK; + UINT count = 0; + + RTC_DCHECK(eRender == dir || eCapture == dir); + + if (eRender == dir && NULL != _ptrRenderCollection) { + hr = _ptrRenderCollection->GetCount(&count); + } else if (NULL != _ptrCaptureCollection) { + hr = _ptrCaptureCollection->GetCount(&count); + } + + if (FAILED(hr)) { + _TraceCOMError(hr); + return -1; + } + + return static_cast(count); +} + +// ---------------------------------------------------------------------------- +// _GetListDeviceName +// +// Gets the friendly name of an endpoint rendering or capture device +// from the current list of such devices. The caller uses an index +// into the list to identify the device. +// +// Uses: _ptrRenderCollection or _ptrCaptureCollection which is updated +// in _RefreshDeviceList(). +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_GetListDeviceName(EDataFlow dir, + int index, + LPWSTR szBuffer, + int bufferLen) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + HRESULT hr = S_OK; + IMMDevice* pDevice = NULL; + + RTC_DCHECK(dir == eRender || dir == eCapture); + + if (eRender == dir && NULL != _ptrRenderCollection) { + hr = _ptrRenderCollection->Item(index, &pDevice); + } else if (NULL != _ptrCaptureCollection) { + hr = _ptrCaptureCollection->Item(index, &pDevice); + } + + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(pDevice); + return -1; + } + + int32_t res = _GetDeviceName(pDevice, szBuffer, bufferLen); + SAFE_RELEASE(pDevice); + return res; +} + +// ---------------------------------------------------------------------------- +// _GetDefaultDeviceName +// +// Gets the friendly name of an endpoint rendering or capture device +// given a specified device role. +// +// Uses: _ptrEnumerator +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_GetDefaultDeviceName(EDataFlow dir, + ERole role, + LPWSTR szBuffer, + int bufferLen) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + HRESULT hr = S_OK; + IMMDevice* pDevice = NULL; + + RTC_DCHECK(dir == eRender || dir == eCapture); + RTC_DCHECK(role == eConsole || role == eCommunications); + RTC_DCHECK(_ptrEnumerator); + + hr = _ptrEnumerator->GetDefaultAudioEndpoint(dir, role, &pDevice); + + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(pDevice); + return -1; + } + + int32_t res = _GetDeviceName(pDevice, szBuffer, bufferLen); + SAFE_RELEASE(pDevice); + return res; +} + +// ---------------------------------------------------------------------------- +// _GetListDeviceID +// +// Gets the unique ID string of an endpoint rendering or capture device +// from the current list of such devices. The caller uses an index +// into the list to identify the device. +// +// Uses: _ptrRenderCollection or _ptrCaptureCollection which is updated +// in _RefreshDeviceList(). +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_GetListDeviceID(EDataFlow dir, + int index, + LPWSTR szBuffer, + int bufferLen) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + HRESULT hr = S_OK; + IMMDevice* pDevice = NULL; + + RTC_DCHECK(dir == eRender || dir == eCapture); + + if (eRender == dir && NULL != _ptrRenderCollection) { + hr = _ptrRenderCollection->Item(index, &pDevice); + } else if (NULL != _ptrCaptureCollection) { + hr = _ptrCaptureCollection->Item(index, &pDevice); + } + + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(pDevice); + return -1; + } + + int32_t res = _GetDeviceID(pDevice, szBuffer, bufferLen); + SAFE_RELEASE(pDevice); + return res; +} + +// ---------------------------------------------------------------------------- +// _GetDefaultDeviceID +// +// Gets the uniqe device ID of an endpoint rendering or capture device +// given a specified device role. +// +// Uses: _ptrEnumerator +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_GetDefaultDeviceID(EDataFlow dir, + ERole role, + LPWSTR szBuffer, + int bufferLen) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + HRESULT hr = S_OK; + IMMDevice* pDevice = NULL; + + RTC_DCHECK(dir == eRender || dir == eCapture); + RTC_DCHECK(role == eConsole || role == eCommunications); + RTC_DCHECK(_ptrEnumerator); + + hr = _ptrEnumerator->GetDefaultAudioEndpoint(dir, role, &pDevice); + + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(pDevice); + return -1; + } + + int32_t res = _GetDeviceID(pDevice, szBuffer, bufferLen); + SAFE_RELEASE(pDevice); + return res; +} + +int32_t AudioDeviceWindowsCore::_GetDefaultDeviceIndex(EDataFlow dir, + ERole role, + int* index) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + HRESULT hr = S_OK; + WCHAR szDefaultDeviceID[MAX_PATH] = {0}; + WCHAR szDeviceID[MAX_PATH] = {0}; + + const size_t kDeviceIDLength = sizeof(szDeviceID) / sizeof(szDeviceID[0]); + RTC_DCHECK_EQ(kDeviceIDLength, + sizeof(szDefaultDeviceID) / sizeof(szDefaultDeviceID[0])); + + if (_GetDefaultDeviceID(dir, role, szDefaultDeviceID, kDeviceIDLength) == + -1) { + return -1; + } + + IMMDeviceCollection* collection = _ptrCaptureCollection; + if (dir == eRender) { + collection = _ptrRenderCollection; + } + + if (!collection) { + RTC_LOG(LS_ERROR) << "Device collection not valid"; + return -1; + } + + UINT count = 0; + hr = collection->GetCount(&count); + if (FAILED(hr)) { + _TraceCOMError(hr); + return -1; + } + + *index = -1; + for (UINT i = 0; i < count; i++) { + memset(szDeviceID, 0, sizeof(szDeviceID)); + rtc::scoped_refptr device; + { + IMMDevice* ptrDevice = NULL; + hr = collection->Item(i, &ptrDevice); + if (FAILED(hr) || ptrDevice == NULL) { + _TraceCOMError(hr); + return -1; + } + device = ptrDevice; + SAFE_RELEASE(ptrDevice); + } + + if (_GetDeviceID(device.get(), szDeviceID, kDeviceIDLength) == -1) { + return -1; + } + + if (wcsncmp(szDefaultDeviceID, szDeviceID, kDeviceIDLength) == 0) { + // Found a match. + *index = i; + break; + } + } + + if (*index == -1) { + RTC_LOG(LS_ERROR) << "Unable to find collection index for default device"; + return -1; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// _GetDeviceName +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_GetDeviceName(IMMDevice* pDevice, + LPWSTR pszBuffer, + int bufferLen) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + static const WCHAR szDefault[] = L""; + + HRESULT hr = E_FAIL; + IPropertyStore* pProps = NULL; + PROPVARIANT varName; + + RTC_DCHECK(pszBuffer); + RTC_DCHECK_GT(bufferLen, 0); + + if (pDevice != NULL) { + hr = pDevice->OpenPropertyStore(STGM_READ, &pProps); + if (FAILED(hr)) { + RTC_LOG(LS_ERROR) << "IMMDevice::OpenPropertyStore failed, hr = 0x" + << rtc::ToHex(hr); + } + } + + // Initialize container for property value. + PropVariantInit(&varName); + + if (SUCCEEDED(hr)) { + // Get the endpoint device's friendly-name property. + hr = pProps->GetValue(PKEY_Device_FriendlyName, &varName); + if (FAILED(hr)) { + RTC_LOG(LS_ERROR) << "IPropertyStore::GetValue failed, hr = 0x" + << rtc::ToHex(hr); + } + } + + if ((SUCCEEDED(hr)) && (VT_EMPTY == varName.vt)) { + hr = E_FAIL; + RTC_LOG(LS_ERROR) << "IPropertyStore::GetValue returned no value," + " hr = 0x" + << rtc::ToHex(hr); + } + + if ((SUCCEEDED(hr)) && (VT_LPWSTR != varName.vt)) { + // The returned value is not a wide null terminated string. + hr = E_UNEXPECTED; + RTC_LOG(LS_ERROR) << "IPropertyStore::GetValue returned unexpected" + " type, hr = 0x" + << rtc::ToHex(hr); + } + + if (SUCCEEDED(hr) && (varName.pwszVal != NULL)) { + // Copy the valid device name to the provided ouput buffer. + wcsncpy_s(pszBuffer, bufferLen, varName.pwszVal, _TRUNCATE); + } else { + // Failed to find the device name. + wcsncpy_s(pszBuffer, bufferLen, szDefault, _TRUNCATE); + } + + PropVariantClear(&varName); + SAFE_RELEASE(pProps); + + return 0; +} + +// ---------------------------------------------------------------------------- +// _GetDeviceID +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_GetDeviceID(IMMDevice* pDevice, + LPWSTR pszBuffer, + int bufferLen) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + static const WCHAR szDefault[] = L""; + + HRESULT hr = E_FAIL; + LPWSTR pwszID = NULL; + + RTC_DCHECK(pszBuffer); + RTC_DCHECK_GT(bufferLen, 0); + + if (pDevice != NULL) { + hr = pDevice->GetId(&pwszID); + } + + if (hr == S_OK) { + // Found the device ID. + wcsncpy_s(pszBuffer, bufferLen, pwszID, _TRUNCATE); + } else { + // Failed to find the device ID. + wcsncpy_s(pszBuffer, bufferLen, szDefault, _TRUNCATE); + } + + CoTaskMemFree(pwszID); + return 0; +} + +// ---------------------------------------------------------------------------- +// _GetDefaultDevice +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_GetDefaultDevice(EDataFlow dir, + ERole role, + IMMDevice** ppDevice) { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + HRESULT hr(S_OK); + + RTC_DCHECK(_ptrEnumerator); + + hr = _ptrEnumerator->GetDefaultAudioEndpoint(dir, role, ppDevice); + if (FAILED(hr)) { + _TraceCOMError(hr); + return -1; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// _GetListDevice +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_GetListDevice(EDataFlow dir, + int index, + IMMDevice** ppDevice) { + HRESULT hr(S_OK); + + RTC_DCHECK(_ptrEnumerator); + + IMMDeviceCollection* pCollection = NULL; + + hr = _ptrEnumerator->EnumAudioEndpoints( + dir, + DEVICE_STATE_ACTIVE, // only active endpoints are OK + &pCollection); + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(pCollection); + return -1; + } + + hr = pCollection->Item(index, ppDevice); + if (FAILED(hr)) { + _TraceCOMError(hr); + SAFE_RELEASE(pCollection); + return -1; + } + + SAFE_RELEASE(pCollection); + + return 0; +} + +// ---------------------------------------------------------------------------- +// _EnumerateEndpointDevicesAll +// ---------------------------------------------------------------------------- + +int32_t AudioDeviceWindowsCore::_EnumerateEndpointDevicesAll( + EDataFlow dataFlow) const { + RTC_DLOG(LS_VERBOSE) << __FUNCTION__; + + RTC_DCHECK(_ptrEnumerator); + + HRESULT hr = S_OK; + IMMDeviceCollection* pCollection = NULL; + IMMDevice* pEndpoint = NULL; + IPropertyStore* pProps = NULL; + IAudioEndpointVolume* pEndpointVolume = NULL; + LPWSTR pwszID = NULL; + + // Generate a collection of audio endpoint devices in the system. + // Get states for *all* endpoint devices. + // Output: IMMDeviceCollection interface. + hr = _ptrEnumerator->EnumAudioEndpoints( + dataFlow, // data-flow direction (input parameter) + DEVICE_STATE_ACTIVE | DEVICE_STATE_DISABLED | DEVICE_STATE_UNPLUGGED, + &pCollection); // release interface when done + + EXIT_ON_ERROR(hr); + + // use the IMMDeviceCollection interface... + + UINT count = 0; + + // Retrieve a count of the devices in the device collection. + hr = pCollection->GetCount(&count); + EXIT_ON_ERROR(hr); + if (dataFlow == eRender) + RTC_LOG(LS_VERBOSE) << "#rendering endpoint devices (counting all): " + << count; + else if (dataFlow == eCapture) + RTC_LOG(LS_VERBOSE) << "#capturing endpoint devices (counting all): " + << count; + + if (count == 0) { + return 0; + } + + // Each loop prints the name of an endpoint device. + for (ULONG i = 0; i < count; i++) { + RTC_LOG(LS_VERBOSE) << "Endpoint " << i << ":"; + + // Get pointer to endpoint number i. + // Output: IMMDevice interface. + hr = pCollection->Item(i, &pEndpoint); + CONTINUE_ON_ERROR(hr); + + // use the IMMDevice interface of the specified endpoint device... + + // Get the endpoint ID string (uniquely identifies the device among all + // audio endpoint devices) + hr = pEndpoint->GetId(&pwszID); + CONTINUE_ON_ERROR(hr); + RTC_LOG(LS_VERBOSE) << "ID string : " << pwszID; + + // Retrieve an interface to the device's property store. + // Output: IPropertyStore interface. + hr = pEndpoint->OpenPropertyStore(STGM_READ, &pProps); + CONTINUE_ON_ERROR(hr); + + // use the IPropertyStore interface... + + PROPVARIANT varName; + // Initialize container for property value. + PropVariantInit(&varName); + + // Get the endpoint's friendly-name property. + // Example: "Speakers (Realtek High Definition Audio)" + hr = pProps->GetValue(PKEY_Device_FriendlyName, &varName); + CONTINUE_ON_ERROR(hr); + RTC_LOG(LS_VERBOSE) << "friendly name: \"" << varName.pwszVal << "\""; + + // Get the endpoint's current device state + DWORD dwState; + hr = pEndpoint->GetState(&dwState); + CONTINUE_ON_ERROR(hr); + if (dwState & DEVICE_STATE_ACTIVE) + RTC_LOG(LS_VERBOSE) << "state (0x" << rtc::ToHex(dwState) + << ") : *ACTIVE*"; + if (dwState & DEVICE_STATE_DISABLED) + RTC_LOG(LS_VERBOSE) << "state (0x" << rtc::ToHex(dwState) + << ") : DISABLED"; + if (dwState & DEVICE_STATE_NOTPRESENT) + RTC_LOG(LS_VERBOSE) << "state (0x" << rtc::ToHex(dwState) + << ") : NOTPRESENT"; + if (dwState & DEVICE_STATE_UNPLUGGED) + RTC_LOG(LS_VERBOSE) << "state (0x" << rtc::ToHex(dwState) + << ") : UNPLUGGED"; + + // Check the hardware volume capabilities. + DWORD dwHwSupportMask = 0; + hr = pEndpoint->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, NULL, + (void**)&pEndpointVolume); + CONTINUE_ON_ERROR(hr); + hr = pEndpointVolume->QueryHardwareSupport(&dwHwSupportMask); + CONTINUE_ON_ERROR(hr); + if (dwHwSupportMask & ENDPOINT_HARDWARE_SUPPORT_VOLUME) + // The audio endpoint device supports a hardware volume control + RTC_LOG(LS_VERBOSE) << "hwmask (0x" << rtc::ToHex(dwHwSupportMask) + << ") : HARDWARE_SUPPORT_VOLUME"; + if (dwHwSupportMask & ENDPOINT_HARDWARE_SUPPORT_MUTE) + // The audio endpoint device supports a hardware mute control + RTC_LOG(LS_VERBOSE) << "hwmask (0x" << rtc::ToHex(dwHwSupportMask) + << ") : HARDWARE_SUPPORT_MUTE"; + if (dwHwSupportMask & ENDPOINT_HARDWARE_SUPPORT_METER) + // The audio endpoint device supports a hardware peak meter + RTC_LOG(LS_VERBOSE) << "hwmask (0x" << rtc::ToHex(dwHwSupportMask) + << ") : HARDWARE_SUPPORT_METER"; + + // Check the channel count (#channels in the audio stream that enters or + // leaves the audio endpoint device) + UINT nChannelCount(0); + hr = pEndpointVolume->GetChannelCount(&nChannelCount); + CONTINUE_ON_ERROR(hr); + RTC_LOG(LS_VERBOSE) << "#channels : " << nChannelCount; + + if (dwHwSupportMask & ENDPOINT_HARDWARE_SUPPORT_VOLUME) { + // Get the volume range. + float fLevelMinDB(0.0); + float fLevelMaxDB(0.0); + float fVolumeIncrementDB(0.0); + hr = pEndpointVolume->GetVolumeRange(&fLevelMinDB, &fLevelMaxDB, + &fVolumeIncrementDB); + CONTINUE_ON_ERROR(hr); + RTC_LOG(LS_VERBOSE) << "volume range : " << fLevelMinDB << " (min), " + << fLevelMaxDB << " (max), " << fVolumeIncrementDB + << " (inc) [dB]"; + + // The volume range from vmin = fLevelMinDB to vmax = fLevelMaxDB is + // divided into n uniform intervals of size vinc = fVolumeIncrementDB, + // where n = (vmax ?vmin) / vinc. The values vmin, vmax, and vinc are + // measured in decibels. The client can set the volume level to one of n + + // 1 discrete values in the range from vmin to vmax. + int n = (int)((fLevelMaxDB - fLevelMinDB) / fVolumeIncrementDB); + RTC_LOG(LS_VERBOSE) << "#intervals : " << n; + + // Get information about the current step in the volume range. + // This method represents the volume level of the audio stream that enters + // or leaves the audio endpoint device as an index or "step" in a range of + // discrete volume levels. Output value nStepCount is the number of steps + // in the range. Output value nStep is the step index of the current + // volume level. If the number of steps is n = nStepCount, then step index + // nStep can assume values from 0 (minimum volume) to n ?1 (maximum + // volume). + UINT nStep(0); + UINT nStepCount(0); + hr = pEndpointVolume->GetVolumeStepInfo(&nStep, &nStepCount); + CONTINUE_ON_ERROR(hr); + RTC_LOG(LS_VERBOSE) << "volume steps : " << nStep << " (nStep), " + << nStepCount << " (nStepCount)"; + } + Next: + if (FAILED(hr)) { + RTC_LOG(LS_VERBOSE) << "Error when logging device information"; + } + CoTaskMemFree(pwszID); + pwszID = NULL; + PropVariantClear(&varName); + SAFE_RELEASE(pProps); + SAFE_RELEASE(pEndpoint); + SAFE_RELEASE(pEndpointVolume); + } + SAFE_RELEASE(pCollection); + return 0; + +Exit: + _TraceCOMError(hr); + CoTaskMemFree(pwszID); + pwszID = NULL; + SAFE_RELEASE(pCollection); + SAFE_RELEASE(pEndpoint); + SAFE_RELEASE(pEndpointVolume); + SAFE_RELEASE(pProps); + return -1; +} + +// ---------------------------------------------------------------------------- +// _TraceCOMError +// ---------------------------------------------------------------------------- + +void AudioDeviceWindowsCore::_TraceCOMError(HRESULT hr) const { + wchar_t buf[MAXERRORLENGTH]; + wchar_t errorText[MAXERRORLENGTH]; + + const DWORD dwFlags = + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS; + const DWORD dwLangID = MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US); + + // Gets the system's human readable message string for this HRESULT. + // All error message in English by default. + DWORD messageLength = ::FormatMessageW(dwFlags, 0, hr, dwLangID, errorText, + MAXERRORLENGTH, NULL); + + RTC_DCHECK_LE(messageLength, MAXERRORLENGTH); + + // Trims tailing white space (FormatMessage() leaves a trailing cr-lf.). + for (; messageLength && ::isspace(errorText[messageLength - 1]); + --messageLength) { + errorText[messageLength - 1] = '\0'; + } + + RTC_LOG(LS_ERROR) << "Core Audio method failed (hr=" << hr << ")"; + StringCchPrintfW(buf, MAXERRORLENGTH, L"Error details: "); + StringCchCatW(buf, MAXERRORLENGTH, errorText); + RTC_LOG(LS_ERROR) << rtc::ToUtf8(buf); +} + +bool AudioDeviceWindowsCore::KeyPressed() const { + int key_down = 0; + for (int key = VK_SPACE; key < VK_NUMLOCK; key++) { + short res = GetAsyncKeyState(key); + key_down |= res & 0x1; // Get the LSB + } + return (key_down > 0); +} +} // namespace webrtc + +#endif // WEBRTC_WINDOWS_CORE_AUDIO_BUILD diff --git a/third_party/libwebrtc/modules/audio_device/win/audio_device_core_win.h b/third_party/libwebrtc/modules/audio_device/win/audio_device_core_win.h new file mode 100644 index 0000000000..7e7ef21157 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/audio_device_core_win.h @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_WIN_AUDIO_DEVICE_CORE_WIN_H_ +#define MODULES_AUDIO_DEVICE_WIN_AUDIO_DEVICE_CORE_WIN_H_ + +#if (_MSC_VER >= 1400) // only include for VS 2005 and higher + +#include "rtc_base/win32.h" + +#include "modules/audio_device/audio_device_generic.h" + +#include // CLSID_CWMAudioAEC + // (must be before audioclient.h) +#include // WASAPI +#include +#include // Avrt +#include +#include // IMediaObject +#include // MMDevice + +#include "api/scoped_refptr.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/win/scoped_com_initializer.h" + +// Use Multimedia Class Scheduler Service (MMCSS) to boost the thread priority +#pragma comment(lib, "avrt.lib") +// AVRT function pointers +typedef BOOL(WINAPI* PAvRevertMmThreadCharacteristics)(HANDLE); +typedef HANDLE(WINAPI* PAvSetMmThreadCharacteristicsA)(LPCSTR, LPDWORD); +typedef BOOL(WINAPI* PAvSetMmThreadPriority)(HANDLE, AVRT_PRIORITY); + +namespace webrtc { + +const float MAX_CORE_SPEAKER_VOLUME = 255.0f; +const float MIN_CORE_SPEAKER_VOLUME = 0.0f; +const float MAX_CORE_MICROPHONE_VOLUME = 255.0f; +const float MIN_CORE_MICROPHONE_VOLUME = 0.0f; +const uint16_t CORE_SPEAKER_VOLUME_STEP_SIZE = 1; +const uint16_t CORE_MICROPHONE_VOLUME_STEP_SIZE = 1; + +class AudioDeviceWindowsCore : public AudioDeviceGeneric { + public: + AudioDeviceWindowsCore(); + ~AudioDeviceWindowsCore(); + + static bool CoreAudioIsSupported(); + + // Retrieve the currently utilized audio layer + virtual int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer& audioLayer) const; + + // Main initializaton and termination + virtual InitStatus Init() RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t Terminate() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool Initialized() const; + + // Device enumeration + virtual int16_t PlayoutDevices() RTC_LOCKS_EXCLUDED(mutex_); + virtual int16_t RecordingDevices() RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) + RTC_LOCKS_EXCLUDED(mutex_); + + // Device selection + virtual int32_t SetPlayoutDevice(uint16_t index) RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetPlayoutDevice(AudioDeviceModule::WindowsDeviceType device); + virtual int32_t SetRecordingDevice(uint16_t index) RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) RTC_LOCKS_EXCLUDED(mutex_); + + // Audio transport initialization + virtual int32_t PlayoutIsAvailable(bool& available); + virtual int32_t InitPlayout() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool PlayoutIsInitialized() const; + virtual int32_t RecordingIsAvailable(bool& available); + virtual int32_t InitRecording() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool RecordingIsInitialized() const; + + // Audio transport control + virtual int32_t StartPlayout() RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t StopPlayout() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool Playing() const; + virtual int32_t StartRecording() RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t StopRecording(); + virtual bool Recording() const; + + // Audio mixer initialization + virtual int32_t InitSpeaker() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool SpeakerIsInitialized() const; + virtual int32_t InitMicrophone() RTC_LOCKS_EXCLUDED(mutex_); + virtual bool MicrophoneIsInitialized() const; + + // Speaker volume controls + virtual int32_t SpeakerVolumeIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetSpeakerVolume(uint32_t volume) RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SpeakerVolume(uint32_t& volume) const + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t MaxSpeakerVolume(uint32_t& maxVolume) const; + virtual int32_t MinSpeakerVolume(uint32_t& minVolume) const; + + // Microphone volume controls + virtual int32_t MicrophoneVolumeIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetMicrophoneVolume(uint32_t volume) + RTC_LOCKS_EXCLUDED(mutex_, volume_mutex_); + virtual int32_t MicrophoneVolume(uint32_t& volume) const + RTC_LOCKS_EXCLUDED(mutex_, volume_mutex_); + virtual int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const; + virtual int32_t MinMicrophoneVolume(uint32_t& minVolume) const; + + // Speaker mute control + virtual int32_t SpeakerMuteIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetSpeakerMute(bool enable) RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SpeakerMute(bool& enabled) const; + + // Microphone mute control + virtual int32_t MicrophoneMuteIsAvailable(bool& available) + RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t SetMicrophoneMute(bool enable); + virtual int32_t MicrophoneMute(bool& enabled) const; + + // Stereo support + virtual int32_t StereoPlayoutIsAvailable(bool& available); + virtual int32_t SetStereoPlayout(bool enable) RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t StereoPlayout(bool& enabled) const; + virtual int32_t StereoRecordingIsAvailable(bool& available); + virtual int32_t SetStereoRecording(bool enable) RTC_LOCKS_EXCLUDED(mutex_); + virtual int32_t StereoRecording(bool& enabled) const + RTC_LOCKS_EXCLUDED(mutex_); + + // Delay information and control + virtual int32_t PlayoutDelay(uint16_t& delayMS) const + RTC_LOCKS_EXCLUDED(mutex_); + + virtual bool BuiltInAECIsAvailable() const; + + virtual int32_t EnableBuiltInAEC(bool enable); + + public: + virtual void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer); + + private: + bool KeyPressed() const; + + private: // avrt function pointers + PAvRevertMmThreadCharacteristics _PAvRevertMmThreadCharacteristics; + PAvSetMmThreadCharacteristicsA _PAvSetMmThreadCharacteristicsA; + PAvSetMmThreadPriority _PAvSetMmThreadPriority; + HMODULE _avrtLibrary; + bool _winSupportAvrt; + + private: // thread functions + int32_t InitSpeakerLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int32_t InitMicrophoneLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int16_t PlayoutDevicesLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + int16_t RecordingDevicesLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + DWORD InitCaptureThreadPriority(); + void RevertCaptureThreadPriority(); + static DWORD WINAPI WSAPICaptureThread(LPVOID context); + DWORD DoCaptureThread(); + + static DWORD WINAPI WSAPICaptureThreadPollDMO(LPVOID context); + DWORD DoCaptureThreadPollDMO() RTC_LOCKS_EXCLUDED(mutex_); + + static DWORD WINAPI WSAPIRenderThread(LPVOID context); + DWORD DoRenderThread(); + + void _Lock(); + void _UnLock(); + + int SetDMOProperties(); + + int SetBoolProperty(IPropertyStore* ptrPS, + REFPROPERTYKEY key, + VARIANT_BOOL value); + + int SetVtI4Property(IPropertyStore* ptrPS, REFPROPERTYKEY key, LONG value); + + int32_t _EnumerateEndpointDevicesAll(EDataFlow dataFlow) const; + void _TraceCOMError(HRESULT hr) const; + + int32_t _RefreshDeviceList(EDataFlow dir); + int16_t _DeviceListCount(EDataFlow dir); + int32_t _GetDefaultDeviceName(EDataFlow dir, + ERole role, + LPWSTR szBuffer, + int bufferLen); + int32_t _GetListDeviceName(EDataFlow dir, + int index, + LPWSTR szBuffer, + int bufferLen); + int32_t _GetDeviceName(IMMDevice* pDevice, LPWSTR pszBuffer, int bufferLen); + int32_t _GetListDeviceID(EDataFlow dir, + int index, + LPWSTR szBuffer, + int bufferLen); + int32_t _GetDefaultDeviceID(EDataFlow dir, + ERole role, + LPWSTR szBuffer, + int bufferLen); + int32_t _GetDefaultDeviceIndex(EDataFlow dir, ERole role, int* index); + int32_t _GetDeviceID(IMMDevice* pDevice, LPWSTR pszBuffer, int bufferLen); + int32_t _GetDefaultDevice(EDataFlow dir, ERole role, IMMDevice** ppDevice); + int32_t _GetListDevice(EDataFlow dir, int index, IMMDevice** ppDevice); + + int32_t InitRecordingDMO(); + + ScopedCOMInitializer _comInit; + AudioDeviceBuffer* _ptrAudioBuffer; + mutable Mutex mutex_; + mutable Mutex volume_mutex_ RTC_ACQUIRED_AFTER(mutex_); + + IMMDeviceEnumerator* _ptrEnumerator; + IMMDeviceCollection* _ptrRenderCollection; + IMMDeviceCollection* _ptrCaptureCollection; + IMMDevice* _ptrDeviceOut; + IMMDevice* _ptrDeviceIn; + + IAudioClient* _ptrClientOut; + IAudioClient* _ptrClientIn; + IAudioRenderClient* _ptrRenderClient; + IAudioCaptureClient* _ptrCaptureClient; + IAudioEndpointVolume* _ptrCaptureVolume; + ISimpleAudioVolume* _ptrRenderSimpleVolume; + + // DirectX Media Object (DMO) for the built-in AEC. + rtc::scoped_refptr _dmo; + rtc::scoped_refptr _mediaBuffer; + bool _builtInAecEnabled; + + HANDLE _hRenderSamplesReadyEvent; + HANDLE _hPlayThread; + HANDLE _hRenderStartedEvent; + HANDLE _hShutdownRenderEvent; + + HANDLE _hCaptureSamplesReadyEvent; + HANDLE _hRecThread; + HANDLE _hCaptureStartedEvent; + HANDLE _hShutdownCaptureEvent; + + HANDLE _hMmTask; + + UINT _playAudioFrameSize; + uint32_t _playSampleRate; + uint32_t _devicePlaySampleRate; + uint32_t _playBlockSize; + uint32_t _devicePlayBlockSize; + uint32_t _playChannels; + uint32_t _sndCardPlayDelay; + UINT64 _writtenSamples; + UINT64 _readSamples; + + UINT _recAudioFrameSize; + uint32_t _recSampleRate; + uint32_t _recBlockSize; + uint32_t _recChannels; + + uint16_t _recChannelsPrioList[3]; + uint16_t _playChannelsPrioList[2]; + + LARGE_INTEGER _perfCounterFreq; + double _perfCounterFactor; + + private: + bool _initialized; + bool _recording; + bool _playing; + bool _recIsInitialized; + bool _playIsInitialized; + bool _speakerIsInitialized; + bool _microphoneIsInitialized; + + bool _usingInputDeviceIndex; + bool _usingOutputDeviceIndex; + AudioDeviceModule::WindowsDeviceType _inputDevice; + AudioDeviceModule::WindowsDeviceType _outputDevice; + uint16_t _inputDeviceIndex; + uint16_t _outputDeviceIndex; +}; + +#endif // #if (_MSC_VER >= 1400) + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_WIN_AUDIO_DEVICE_CORE_WIN_H_ diff --git a/third_party/libwebrtc/modules/audio_device/win/audio_device_module_win.cc b/third_party/libwebrtc/modules/audio_device/win/audio_device_module_win.cc new file mode 100644 index 0000000000..a36c40735e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/audio_device_module_win.cc @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/win/audio_device_module_win.h" + +#include +#include + +#include "api/make_ref_counted.h" +#include "api/sequence_checker.h" +#include "modules/audio_device/audio_device_buffer.h" +#include "modules/audio_device/include/audio_device.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/string_utils.h" + +namespace webrtc { +namespace webrtc_win { +namespace { + +#define RETURN_IF_OUTPUT_RESTARTS(...) \ + do { \ + if (output_->Restarting()) { \ + return __VA_ARGS__; \ + } \ + } while (0) + +#define RETURN_IF_INPUT_RESTARTS(...) \ + do { \ + if (input_->Restarting()) { \ + return __VA_ARGS__; \ + } \ + } while (0) + +#define RETURN_IF_OUTPUT_IS_INITIALIZED(...) \ + do { \ + if (output_->PlayoutIsInitialized()) { \ + return __VA_ARGS__; \ + } \ + } while (0) + +#define RETURN_IF_INPUT_IS_INITIALIZED(...) \ + do { \ + if (input_->RecordingIsInitialized()) { \ + return __VA_ARGS__; \ + } \ + } while (0) + +#define RETURN_IF_OUTPUT_IS_ACTIVE(...) \ + do { \ + if (output_->Playing()) { \ + return __VA_ARGS__; \ + } \ + } while (0) + +#define RETURN_IF_INPUT_IS_ACTIVE(...) \ + do { \ + if (input_->Recording()) { \ + return __VA_ARGS__; \ + } \ + } while (0) + +// This class combines a generic instance of an AudioInput and a generic +// instance of an AudioOutput to create an AudioDeviceModule. This is mostly +// done by delegating to the audio input/output with some glue code. This class +// also directly implements some of the AudioDeviceModule methods with dummy +// implementations. +// +// An instance must be created, destroyed and used on one and the same thread, +// i.e., all public methods must also be called on the same thread. A thread +// checker will RTC_DCHECK if any method is called on an invalid thread. +// TODO(henrika): is thread checking needed in AudioInput and AudioOutput? +class WindowsAudioDeviceModule : public AudioDeviceModuleForTest { + public: + enum class InitStatus { + OK = 0, + PLAYOUT_ERROR = 1, + RECORDING_ERROR = 2, + OTHER_ERROR = 3, + NUM_STATUSES = 4 + }; + + WindowsAudioDeviceModule(std::unique_ptr audio_input, + std::unique_ptr audio_output, + TaskQueueFactory* task_queue_factory) + : input_(std::move(audio_input)), + output_(std::move(audio_output)), + task_queue_factory_(task_queue_factory) { + RTC_CHECK(input_); + RTC_CHECK(output_); + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + } + + ~WindowsAudioDeviceModule() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + Terminate(); + } + + WindowsAudioDeviceModule(const WindowsAudioDeviceModule&) = delete; + WindowsAudioDeviceModule& operator=(const WindowsAudioDeviceModule&) = delete; + + int32_t ActiveAudioLayer( + AudioDeviceModule::AudioLayer* audioLayer) const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + // TODO(henrika): it might be possible to remove this unique signature. + *audioLayer = AudioDeviceModule::kWindowsCoreAudio2; + return 0; + } + + int32_t RegisterAudioCallback(AudioTransport* audioCallback) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK(audio_device_buffer_); + RTC_DCHECK_RUN_ON(&thread_checker_); + return audio_device_buffer_->RegisterAudioCallback(audioCallback); + } + + int32_t Init() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(0); + RETURN_IF_INPUT_RESTARTS(0); + if (initialized_) { + return 0; + } + audio_device_buffer_ = + std::make_unique(task_queue_factory_); + AttachAudioBuffer(); + InitStatus status; + if (output_->Init() != 0) { + status = InitStatus::PLAYOUT_ERROR; + } else if (input_->Init() != 0) { + output_->Terminate(); + status = InitStatus::RECORDING_ERROR; + } else { + initialized_ = true; + status = InitStatus::OK; + } + if (status != InitStatus::OK) { + RTC_LOG(LS_ERROR) << "Audio device initialization failed"; + return -1; + } + return 0; + } + + int32_t Terminate() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(0); + RETURN_IF_INPUT_RESTARTS(0); + if (!initialized_) + return 0; + int32_t err = input_->Terminate(); + err |= output_->Terminate(); + initialized_ = false; + RTC_DCHECK_EQ(err, 0); + return err; + } + + bool Initialized() const override { + RTC_DCHECK_RUN_ON(&thread_checker_); + return initialized_; + } + + int16_t PlayoutDevices() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(0); + return output_->NumDevices(); + } + + int16_t RecordingDevices() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_INPUT_RESTARTS(0); + return input_->NumDevices(); + } + + int32_t PlayoutDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(0); + std::string name_str, guid_str; + int ret = -1; + if (guid != nullptr) { + ret = output_->DeviceName(index, &name_str, &guid_str); + rtc::strcpyn(guid, kAdmMaxGuidSize, guid_str.c_str()); + } else { + ret = output_->DeviceName(index, &name_str, nullptr); + } + rtc::strcpyn(name, kAdmMaxDeviceNameSize, name_str.c_str()); + return ret; + } + int32_t RecordingDeviceName(uint16_t index, + char name[kAdmMaxDeviceNameSize], + char guid[kAdmMaxGuidSize]) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_INPUT_RESTARTS(0); + std::string name_str, guid_str; + int ret = -1; + if (guid != nullptr) { + ret = input_->DeviceName(index, &name_str, &guid_str); + rtc::strcpyn(guid, kAdmMaxGuidSize, guid_str.c_str()); + } else { + ret = input_->DeviceName(index, &name_str, nullptr); + } + rtc::strcpyn(name, kAdmMaxDeviceNameSize, name_str.c_str()); + return ret; + } + + int32_t SetPlayoutDevice(uint16_t index) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(0); + return output_->SetDevice(index); + } + + int32_t SetPlayoutDevice( + AudioDeviceModule::WindowsDeviceType device) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(0); + return output_->SetDevice(device); + } + int32_t SetRecordingDevice(uint16_t index) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return input_->SetDevice(index); + } + + int32_t SetRecordingDevice( + AudioDeviceModule::WindowsDeviceType device) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return input_->SetDevice(device); + } + + int32_t PlayoutIsAvailable(bool* available) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + *available = true; + return 0; + } + + int32_t InitPlayout() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(0); + RETURN_IF_OUTPUT_IS_INITIALIZED(0); + return output_->InitPlayout(); + } + + bool PlayoutIsInitialized() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(true); + return output_->PlayoutIsInitialized(); + } + + int32_t RecordingIsAvailable(bool* available) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + *available = true; + return 0; + } + + int32_t InitRecording() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_INPUT_RESTARTS(0); + RETURN_IF_INPUT_IS_INITIALIZED(0); + return input_->InitRecording(); + } + + bool RecordingIsInitialized() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_INPUT_RESTARTS(true); + return input_->RecordingIsInitialized(); + } + + int32_t StartPlayout() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(0); + RETURN_IF_OUTPUT_IS_ACTIVE(0); + return output_->StartPlayout(); + } + + int32_t StopPlayout() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(-1); + return output_->StopPlayout(); + } + + bool Playing() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(true); + return output_->Playing(); + } + + int32_t StartRecording() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_INPUT_RESTARTS(0); + RETURN_IF_INPUT_IS_ACTIVE(0); + return input_->StartRecording(); + } + + int32_t StopRecording() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_INPUT_RESTARTS(-1); + return input_->StopRecording(); + } + + bool Recording() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RETURN_IF_INPUT_RESTARTS(true); + return input_->Recording(); + } + + int32_t InitSpeaker() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RTC_DLOG(LS_WARNING) << "This method has no effect"; + return initialized_ ? 0 : -1; + } + + bool SpeakerIsInitialized() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RTC_DLOG(LS_WARNING) << "This method has no effect"; + return initialized_; + } + + int32_t InitMicrophone() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RTC_DLOG(LS_WARNING) << "This method has no effect"; + return initialized_ ? 0 : -1; + } + + bool MicrophoneIsInitialized() const override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RTC_DLOG(LS_WARNING) << "This method has no effect"; + return initialized_; + } + + int32_t SpeakerVolumeIsAvailable(bool* available) override { + // TODO(henrika): improve support. + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + *available = false; + return 0; + } + + int32_t SetSpeakerVolume(uint32_t volume) override { return 0; } + int32_t SpeakerVolume(uint32_t* volume) const override { return 0; } + int32_t MaxSpeakerVolume(uint32_t* maxVolume) const override { return 0; } + int32_t MinSpeakerVolume(uint32_t* minVolume) const override { return 0; } + + int32_t MicrophoneVolumeIsAvailable(bool* available) override { + // TODO(henrika): improve support. + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + *available = false; + return 0; + } + + int32_t SetMicrophoneVolume(uint32_t volume) override { return 0; } + int32_t MicrophoneVolume(uint32_t* volume) const override { return 0; } + int32_t MaxMicrophoneVolume(uint32_t* maxVolume) const override { return 0; } + int32_t MinMicrophoneVolume(uint32_t* minVolume) const override { return 0; } + + int32_t SpeakerMuteIsAvailable(bool* available) override { return 0; } + int32_t SetSpeakerMute(bool enable) override { return 0; } + int32_t SpeakerMute(bool* enabled) const override { return 0; } + + int32_t MicrophoneMuteIsAvailable(bool* available) override { return 0; } + int32_t SetMicrophoneMute(bool enable) override { return 0; } + int32_t MicrophoneMute(bool* enabled) const override { return 0; } + + int32_t StereoPlayoutIsAvailable(bool* available) const override { + // TODO(henrika): improve support. + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + *available = true; + return 0; + } + + int32_t SetStereoPlayout(bool enable) override { + // TODO(henrika): improve support. + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return 0; + } + + int32_t StereoPlayout(bool* enabled) const override { + // TODO(henrika): improve support. + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + *enabled = true; + return 0; + } + + int32_t StereoRecordingIsAvailable(bool* available) const override { + // TODO(henrika): improve support. + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + *available = true; + return 0; + } + + int32_t SetStereoRecording(bool enable) override { + // TODO(henrika): improve support. + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return 0; + } + + int32_t StereoRecording(bool* enabled) const override { + // TODO(henrika): improve support. + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + *enabled = true; + return 0; + } + + int32_t PlayoutDelay(uint16_t* delayMS) const override { return 0; } + + bool BuiltInAECIsAvailable() const override { return false; } + bool BuiltInAGCIsAvailable() const override { return false; } + bool BuiltInNSIsAvailable() const override { return false; } + + int32_t EnableBuiltInAEC(bool enable) override { return 0; } + int32_t EnableBuiltInAGC(bool enable) override { return 0; } + int32_t EnableBuiltInNS(bool enable) override { return 0; } + + int32_t AttachAudioBuffer() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + output_->AttachAudioBuffer(audio_device_buffer_.get()); + input_->AttachAudioBuffer(audio_device_buffer_.get()); + return 0; + } + + int RestartPlayoutInternally() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + RETURN_IF_OUTPUT_RESTARTS(0); + return output_->RestartPlayout(); + } + + int RestartRecordingInternally() override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return input_->RestartRecording(); + } + + int SetPlayoutSampleRate(uint32_t sample_rate) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return output_->SetSampleRate(sample_rate); + } + + int SetRecordingSampleRate(uint32_t sample_rate) override { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return input_->SetSampleRate(sample_rate); + } + + private: + // Ensures that the class is used on the same thread as it is constructed + // and destroyed on. + SequenceChecker thread_checker_; + + // Implements the AudioInput interface and deals with audio capturing parts. + const std::unique_ptr input_; + + // Implements the AudioOutput interface and deals with audio rendering parts. + const std::unique_ptr output_; + + TaskQueueFactory* const task_queue_factory_; + + // The AudioDeviceBuffer (ADB) instance is needed for sending/receiving audio + // to/from the WebRTC layer. Created and owned by this object. Used by + // both `input_` and `output_` but they use orthogonal parts of the ADB. + std::unique_ptr audio_device_buffer_; + + // Set to true after a successful call to Init(). Cleared by Terminate(). + bool initialized_ = false; +}; + +} // namespace + +rtc::scoped_refptr +CreateWindowsCoreAudioAudioDeviceModuleFromInputAndOutput( + std::unique_ptr audio_input, + std::unique_ptr audio_output, + TaskQueueFactory* task_queue_factory) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + return rtc::make_ref_counted( + std::move(audio_input), std::move(audio_output), task_queue_factory); +} + +} // namespace webrtc_win +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/win/audio_device_module_win.h b/third_party/libwebrtc/modules/audio_device/win/audio_device_module_win.h new file mode 100644 index 0000000000..1ed0b25620 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/audio_device_module_win.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_WIN_AUDIO_DEVICE_MODULE_WIN_H_ +#define MODULES_AUDIO_DEVICE_WIN_AUDIO_DEVICE_MODULE_WIN_H_ + +#include +#include + +#include "api/scoped_refptr.h" +#include "api/task_queue/task_queue_factory.h" +#include "modules/audio_device/include/audio_device.h" + +namespace webrtc { + +class AudioDeviceBuffer; + +namespace webrtc_win { + +// This interface represents the main input-related parts of the complete +// AudioDeviceModule interface. +class AudioInput { + public: + virtual ~AudioInput() {} + + virtual int Init() = 0; + virtual int Terminate() = 0; + virtual int NumDevices() const = 0; + virtual int SetDevice(int index) = 0; + virtual int SetDevice(AudioDeviceModule::WindowsDeviceType device) = 0; + virtual int DeviceName(int index, std::string* name, std::string* guid) = 0; + virtual void AttachAudioBuffer(AudioDeviceBuffer* audio_buffer) = 0; + virtual bool RecordingIsInitialized() const = 0; + virtual int InitRecording() = 0; + virtual int StartRecording() = 0; + virtual int StopRecording() = 0; + virtual bool Recording() = 0; + virtual int VolumeIsAvailable(bool* available) = 0; + virtual int RestartRecording() = 0; + virtual bool Restarting() const = 0; + virtual int SetSampleRate(uint32_t sample_rate) = 0; +}; + +// This interface represents the main output-related parts of the complete +// AudioDeviceModule interface. +class AudioOutput { + public: + virtual ~AudioOutput() {} + + virtual int Init() = 0; + virtual int Terminate() = 0; + virtual int NumDevices() const = 0; + virtual int SetDevice(int index) = 0; + virtual int SetDevice(AudioDeviceModule::WindowsDeviceType device) = 0; + virtual int DeviceName(int index, std::string* name, std::string* guid) = 0; + virtual void AttachAudioBuffer(AudioDeviceBuffer* audio_buffer) = 0; + virtual bool PlayoutIsInitialized() const = 0; + virtual int InitPlayout() = 0; + virtual int StartPlayout() = 0; + virtual int StopPlayout() = 0; + virtual bool Playing() = 0; + virtual int VolumeIsAvailable(bool* available) = 0; + virtual int RestartPlayout() = 0; + virtual bool Restarting() const = 0; + virtual int SetSampleRate(uint32_t sample_rate) = 0; +}; + +// Combines an AudioInput and an AudioOutput implementation to build an +// AudioDeviceModule. Hides most parts of the full ADM interface. +rtc::scoped_refptr +CreateWindowsCoreAudioAudioDeviceModuleFromInputAndOutput( + std::unique_ptr audio_input, + std::unique_ptr audio_output, + TaskQueueFactory* task_queue_factory); + +} // namespace webrtc_win + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_WIN_AUDIO_DEVICE_MODULE_WIN_H_ diff --git a/third_party/libwebrtc/modules/audio_device/win/core_audio_base_win.cc b/third_party/libwebrtc/modules/audio_device/win/core_audio_base_win.cc new file mode 100644 index 0000000000..dc8526b625 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/core_audio_base_win.cc @@ -0,0 +1,948 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/win/core_audio_base_win.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_device/audio_device_buffer.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/time_utils.h" +#include "rtc_base/win/scoped_com_initializer.h" +#include "rtc_base/win/windows_version.h" + +using Microsoft::WRL::ComPtr; + +namespace webrtc { +namespace webrtc_win { +namespace { + +// Even if the device supports low latency and even if IAudioClient3 can be +// used (requires Win10 or higher), we currently disable any attempts to +// initialize the client for low-latency. +// TODO(henrika): more research is needed before we can enable low-latency. +const bool kEnableLowLatencyIfSupported = false; + +// Each unit of reference time is 100 nanoseconds, hence `kReftimesPerSec` +// corresponds to one second. +// TODO(henrika): possibly add usage in Init(). +// const REFERENCE_TIME kReferenceTimesPerSecond = 10000000; + +enum DefaultDeviceType { + kUndefined = -1, + kDefault = 0, + kDefaultCommunications = 1, + kDefaultDeviceTypeMaxCount = kDefaultCommunications + 1, +}; + +const char* DirectionToString(CoreAudioBase::Direction direction) { + switch (direction) { + case CoreAudioBase::Direction::kOutput: + return "Output"; + case CoreAudioBase::Direction::kInput: + return "Input"; + default: + return "Unkown"; + } +} + +const char* RoleToString(const ERole role) { + switch (role) { + case eConsole: + return "Console"; + case eMultimedia: + return "Multimedia"; + case eCommunications: + return "Communications"; + default: + return "Unsupported"; + } +} + +std::string IndexToString(int index) { + std::string ss = std::to_string(index); + switch (index) { + case kDefault: + ss += " (Default)"; + break; + case kDefaultCommunications: + ss += " (Communications)"; + break; + default: + break; + } + return ss; +} + +const char* SessionStateToString(AudioSessionState state) { + switch (state) { + case AudioSessionStateActive: + return "Active"; + case AudioSessionStateInactive: + return "Inactive"; + case AudioSessionStateExpired: + return "Expired"; + default: + return "Invalid"; + } +} + +const char* SessionDisconnectReasonToString( + AudioSessionDisconnectReason reason) { + switch (reason) { + case DisconnectReasonDeviceRemoval: + return "DeviceRemoval"; + case DisconnectReasonServerShutdown: + return "ServerShutdown"; + case DisconnectReasonFormatChanged: + return "FormatChanged"; + case DisconnectReasonSessionLogoff: + return "SessionLogoff"; + case DisconnectReasonSessionDisconnected: + return "Disconnected"; + case DisconnectReasonExclusiveModeOverride: + return "ExclusiveModeOverride"; + default: + return "Invalid"; + } +} + +// Returns true if the selected audio device supports low latency, i.e, if it +// is possible to initialize the engine using periods less than the default +// period (10ms). +bool IsLowLatencySupported(IAudioClient3* client3, + const WAVEFORMATEXTENSIBLE* format, + uint32_t* min_period_in_frames) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + + // Get the range of periodicities supported by the engine for the specified + // stream format. + uint32_t default_period = 0; + uint32_t fundamental_period = 0; + uint32_t min_period = 0; + uint32_t max_period = 0; + if (FAILED(core_audio_utility::GetSharedModeEnginePeriod( + client3, format, &default_period, &fundamental_period, &min_period, + &max_period))) { + return false; + } + + // Low latency is supported if the shortest allowed period is less than the + // default engine period. + // TODO(henrika): verify that this assumption is correct. + const bool low_latency = min_period < default_period; + RTC_LOG(LS_INFO) << "low_latency: " << low_latency; + *min_period_in_frames = low_latency ? min_period : 0; + return low_latency; +} + +} // namespace + +CoreAudioBase::CoreAudioBase(Direction direction, + bool automatic_restart, + OnDataCallback data_callback, + OnErrorCallback error_callback) + : format_(), + direction_(direction), + automatic_restart_(automatic_restart), + on_data_callback_(data_callback), + on_error_callback_(error_callback), + device_index_(kUndefined), + is_restarting_(false) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "[" << DirectionToString(direction) + << "]"; + RTC_DLOG(LS_INFO) << "Automatic restart: " << automatic_restart; + RTC_DLOG(LS_INFO) << "Windows version: " << rtc::rtc_win::GetVersion(); + + // Create the event which the audio engine will signal each time a buffer + // becomes ready to be processed by the client. + audio_samples_event_.Set(CreateEvent(nullptr, false, false, nullptr)); + RTC_DCHECK(audio_samples_event_.IsValid()); + + // Event to be set in Stop() when rendering/capturing shall stop. + stop_event_.Set(CreateEvent(nullptr, false, false, nullptr)); + RTC_DCHECK(stop_event_.IsValid()); + + // Event to be set when it has been detected that an active device has been + // invalidated or the stream format has changed. + restart_event_.Set(CreateEvent(nullptr, false, false, nullptr)); + RTC_DCHECK(restart_event_.IsValid()); +} + +CoreAudioBase::~CoreAudioBase() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_EQ(ref_count_, 1); +} + +EDataFlow CoreAudioBase::GetDataFlow() const { + return direction_ == CoreAudioBase::Direction::kOutput ? eRender : eCapture; +} + +bool CoreAudioBase::IsRestarting() const { + return is_restarting_; +} + +int64_t CoreAudioBase::TimeSinceStart() const { + return rtc::TimeSince(start_time_); +} + +int CoreAudioBase::NumberOfActiveDevices() const { + return core_audio_utility::NumberOfActiveDevices(GetDataFlow()); +} + +int CoreAudioBase::NumberOfEnumeratedDevices() const { + const int num_active = NumberOfActiveDevices(); + return num_active > 0 ? num_active + kDefaultDeviceTypeMaxCount : 0; +} + +void CoreAudioBase::ReleaseCOMObjects() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + // ComPtr::Reset() sets the ComPtr to nullptr releasing any previous + // reference. + if (audio_client_) { + audio_client_.Reset(); + } + if (audio_clock_.Get()) { + audio_clock_.Reset(); + } + if (audio_session_control_.Get()) { + audio_session_control_.Reset(); + } +} + +bool CoreAudioBase::IsDefaultDevice(int index) const { + return index == kDefault; +} + +bool CoreAudioBase::IsDefaultCommunicationsDevice(int index) const { + return index == kDefaultCommunications; +} + +bool CoreAudioBase::IsDefaultDeviceId(absl::string_view device_id) const { + // Returns true if `device_id` corresponds to the id of the default + // device. Note that, if only one device is available (or if the user has not + // explicitly set a default device), `device_id` will also math + // IsDefaultCommunicationsDeviceId(). + return (IsInput() && + (device_id == core_audio_utility::GetDefaultInputDeviceID())) || + (IsOutput() && + (device_id == core_audio_utility::GetDefaultOutputDeviceID())); +} + +bool CoreAudioBase::IsDefaultCommunicationsDeviceId( + absl::string_view device_id) const { + // Returns true if `device_id` corresponds to the id of the default + // communication device. Note that, if only one device is available (or if + // the user has not explicitly set a communication device), `device_id` will + // also math IsDefaultDeviceId(). + return (IsInput() && + (device_id == + core_audio_utility::GetCommunicationsInputDeviceID())) || + (IsOutput() && + (device_id == core_audio_utility::GetCommunicationsOutputDeviceID())); +} + +bool CoreAudioBase::IsInput() const { + return direction_ == CoreAudioBase::Direction::kInput; +} + +bool CoreAudioBase::IsOutput() const { + return direction_ == CoreAudioBase::Direction::kOutput; +} + +std::string CoreAudioBase::GetDeviceID(int index) const { + if (index >= NumberOfEnumeratedDevices()) { + RTC_LOG(LS_ERROR) << "Invalid device index"; + return std::string(); + } + + std::string device_id; + if (IsDefaultDevice(index)) { + device_id = IsInput() ? core_audio_utility::GetDefaultInputDeviceID() + : core_audio_utility::GetDefaultOutputDeviceID(); + } else if (IsDefaultCommunicationsDevice(index)) { + device_id = IsInput() + ? core_audio_utility::GetCommunicationsInputDeviceID() + : core_audio_utility::GetCommunicationsOutputDeviceID(); + } else { + AudioDeviceNames device_names; + bool ok = IsInput() + ? core_audio_utility::GetInputDeviceNames(&device_names) + : core_audio_utility::GetOutputDeviceNames(&device_names); + if (ok) { + device_id = device_names[index].unique_id; + } + } + return device_id; +} + +int CoreAudioBase::SetDevice(int index) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "[" << DirectionToString(direction()) + << "]: index=" << IndexToString(index); + if (initialized_) { + return -1; + } + + std::string device_id = GetDeviceID(index); + RTC_DLOG(LS_INFO) << "index=" << IndexToString(index) + << " => device_id: " << device_id; + device_index_ = index; + device_id_ = device_id; + + return device_id_.empty() ? -1 : 0; +} + +int CoreAudioBase::DeviceName(int index, + std::string* name, + std::string* guid) const { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "[" << DirectionToString(direction()) + << "]: index=" << IndexToString(index); + if (index > NumberOfEnumeratedDevices() - 1) { + RTC_LOG(LS_ERROR) << "Invalid device index"; + return -1; + } + + AudioDeviceNames device_names; + bool ok = IsInput() ? core_audio_utility::GetInputDeviceNames(&device_names) + : core_audio_utility::GetOutputDeviceNames(&device_names); + // Validate the index one extra time in-case the size of the generated list + // did not match NumberOfEnumeratedDevices(). + if (!ok || static_cast(device_names.size()) <= index) { + RTC_LOG(LS_ERROR) << "Failed to get the device name"; + return -1; + } + + *name = device_names[index].device_name; + RTC_DLOG(LS_INFO) << "name: " << *name; + if (guid != nullptr) { + *guid = device_names[index].unique_id; + RTC_DLOG(LS_INFO) << "guid: " << *guid; + } + return 0; +} + +bool CoreAudioBase::Init() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "[" << DirectionToString(direction()) + << "]"; + RTC_DCHECK_GE(device_index_, 0); + RTC_DCHECK(!device_id_.empty()); + RTC_DCHECK(audio_device_buffer_); + RTC_DCHECK(!audio_client_); + RTC_DCHECK(!audio_session_control_.Get()); + + // Use an existing combination of `device_index_` and `device_id_` to set + // parameters which are required to create an audio client. It is up to the + // parent class to set `device_index_` and `device_id_`. + std::string device_id = AudioDeviceName::kDefaultDeviceId; + ERole role = ERole(); + if (IsDefaultDevice(device_index_)) { + role = eConsole; + } else if (IsDefaultCommunicationsDevice(device_index_)) { + role = eCommunications; + } else { + device_id = device_id_; + } + RTC_LOG(LS_INFO) << "Unique device identifier: device_id=" << device_id + << ", role=" << RoleToString(role); + + // Create an IAudioClient interface which enables us to create and initialize + // an audio stream between an audio application and the audio engine. + ComPtr audio_client; + if (core_audio_utility::GetAudioClientVersion() == 3) { + RTC_DLOG(LS_INFO) << "Using IAudioClient3"; + audio_client = + core_audio_utility::CreateClient3(device_id, GetDataFlow(), role); + } else if (core_audio_utility::GetAudioClientVersion() == 2) { + RTC_DLOG(LS_INFO) << "Using IAudioClient2"; + audio_client = + core_audio_utility::CreateClient2(device_id, GetDataFlow(), role); + } else { + RTC_DLOG(LS_INFO) << "Using IAudioClient"; + audio_client = + core_audio_utility::CreateClient(device_id, GetDataFlow(), role); + } + if (!audio_client) { + return false; + } + + // Set extra client properties before initialization if the audio client + // supports it. + // TODO(henrika): evaluate effect(s) of making these changes. Also, perhaps + // these types of settings belongs to the client and not the utility parts. + if (core_audio_utility::GetAudioClientVersion() >= 2) { + if (FAILED(core_audio_utility::SetClientProperties( + static_cast(audio_client.Get())))) { + return false; + } + } + + // Retrieve preferred audio input or output parameters for the given client + // and the specified client properties. Override the preferred rate if sample + // rate has been defined by the user. Rate conversion will be performed by + // the audio engine to match the client if needed. + AudioParameters params; + HRESULT res = sample_rate_ ? core_audio_utility::GetPreferredAudioParameters( + audio_client.Get(), ¶ms, *sample_rate_) + : core_audio_utility::GetPreferredAudioParameters( + audio_client.Get(), ¶ms); + if (FAILED(res)) { + return false; + } + + // Define the output WAVEFORMATEXTENSIBLE format in `format_`. + WAVEFORMATEX* format = &format_.Format; + format->wFormatTag = WAVE_FORMAT_EXTENSIBLE; + // Check the preferred channel configuration and request implicit channel + // upmixing (audio engine extends from 2 to N channels internally) if the + // preferred number of channels is larger than two; i.e., initialize the + // stream in stereo even if the preferred configuration is multi-channel. + if (params.channels() <= 2) { + format->nChannels = rtc::dchecked_cast(params.channels()); + } else { + // TODO(henrika): ensure that this approach works on different multi-channel + // devices. Verified on: + // - Corsair VOID PRO Surround USB Adapter (supports 7.1) + RTC_LOG(LS_WARNING) + << "Using channel upmixing in WASAPI audio engine (2 => " + << params.channels() << ")"; + format->nChannels = 2; + } + format->nSamplesPerSec = params.sample_rate(); + format->wBitsPerSample = rtc::dchecked_cast(params.bits_per_sample()); + format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels; + format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign; + format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); + // Add the parts which are unique for the WAVE_FORMAT_EXTENSIBLE structure. + format_.Samples.wValidBitsPerSample = + rtc::dchecked_cast(params.bits_per_sample()); + format_.dwChannelMask = + format->nChannels == 1 ? KSAUDIO_SPEAKER_MONO : KSAUDIO_SPEAKER_STEREO; + format_.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + RTC_DLOG(LS_INFO) << core_audio_utility::WaveFormatToString(&format_); + + // Verify that the format is supported but exclude the test if the default + // sample rate has been overridden. If so, the WASAPI audio engine will do + // any necessary conversions between the client format we have given it and + // the playback mix format or recording split format. + if (!sample_rate_) { + if (!core_audio_utility::IsFormatSupported( + audio_client.Get(), AUDCLNT_SHAREMODE_SHARED, &format_)) { + return false; + } + } + + // Check if low-latency is supported and use special initialization if it is. + // Low-latency initialization requires these things: + // - IAudioClient3 (>= Win10) + // - HDAudio driver + // - kEnableLowLatencyIfSupported changed from false (default) to true. + // TODO(henrika): IsLowLatencySupported() returns AUDCLNT_E_UNSUPPORTED_FORMAT + // when `sample_rate_.has_value()` returns true if rate conversion is + // actually required (i.e., client asks for other than the default rate). + bool low_latency_support = false; + uint32_t min_period_in_frames = 0; + if (kEnableLowLatencyIfSupported && + core_audio_utility::GetAudioClientVersion() >= 3) { + low_latency_support = + IsLowLatencySupported(static_cast(audio_client.Get()), + &format_, &min_period_in_frames); + } + + if (low_latency_support) { + RTC_DCHECK_GE(core_audio_utility::GetAudioClientVersion(), 3); + // Use IAudioClient3::InitializeSharedAudioStream() API to initialize a + // low-latency event-driven client. Request the smallest possible + // periodicity. + // TODO(henrika): evaluate this scheme in terms of CPU etc. + if (FAILED(core_audio_utility::SharedModeInitializeLowLatency( + static_cast(audio_client.Get()), &format_, + audio_samples_event_, min_period_in_frames, + sample_rate_.has_value(), &endpoint_buffer_size_frames_))) { + return false; + } + } else { + // Initialize the audio stream between the client and the device in shared + // mode using event-driven buffer handling. Also, using 0 as requested + // buffer size results in a default (minimum) endpoint buffer size. + // TODO(henrika): possibly increase `requested_buffer_size` to add + // robustness. + const REFERENCE_TIME requested_buffer_size = 0; + if (FAILED(core_audio_utility::SharedModeInitialize( + audio_client.Get(), &format_, audio_samples_event_, + requested_buffer_size, sample_rate_.has_value(), + &endpoint_buffer_size_frames_))) { + return false; + } + } + + // Check device period and the preferred buffer size and log a warning if + // WebRTC's buffer size is not an even divisor of the preferred buffer size + // in Core Audio. + // TODO(henrika): sort out if a non-perfect match really is an issue. + // TODO(henrika): compare with IAudioClient3::GetSharedModeEnginePeriod(). + REFERENCE_TIME device_period; + if (FAILED(core_audio_utility::GetDevicePeriod( + audio_client.Get(), AUDCLNT_SHAREMODE_SHARED, &device_period))) { + return false; + } + const double device_period_in_seconds = + static_cast( + core_audio_utility::ReferenceTimeToTimeDelta(device_period).ms()) / + 1000.0L; + const int preferred_frames_per_buffer = + static_cast(params.sample_rate() * device_period_in_seconds + 0.5); + RTC_DLOG(LS_INFO) << "preferred_frames_per_buffer: " + << preferred_frames_per_buffer; + if (preferred_frames_per_buffer % params.frames_per_buffer()) { + RTC_LOG(LS_WARNING) << "Buffer size of " << params.frames_per_buffer() + << " is not an even divisor of " + << preferred_frames_per_buffer; + } + + // Create an AudioSessionControl interface given the initialized client. + // The IAudioControl interface enables a client to configure the control + // parameters for an audio session and to monitor events in the session. + ComPtr audio_session_control = + core_audio_utility::CreateAudioSessionControl(audio_client.Get()); + if (!audio_session_control.Get()) { + return false; + } + + // The Sndvol program displays volume and mute controls for sessions that + // are in the active and inactive states. + AudioSessionState state; + if (FAILED(audio_session_control->GetState(&state))) { + return false; + } + RTC_DLOG(LS_INFO) << "audio session state: " << SessionStateToString(state); + RTC_DCHECK_EQ(state, AudioSessionStateInactive); + + // Register the client to receive notifications of session events, including + // changes in the stream state. + if (FAILED(audio_session_control->RegisterAudioSessionNotification(this))) { + return false; + } + + // Store valid COM interfaces. + audio_client_ = audio_client; + audio_session_control_ = audio_session_control; + + return true; +} + +bool CoreAudioBase::Start() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "[" << DirectionToString(direction()) + << "]"; + if (IsRestarting()) { + // Audio thread should be alive during internal restart since the restart + // callback is triggered on that thread and it also makes the restart + // sequence less complex. + RTC_DCHECK(!audio_thread_.empty()); + } + + // Start an audio thread but only if one does not already exist (which is the + // case during restart). + if (audio_thread_.empty()) { + const absl::string_view name = + IsInput() ? "wasapi_capture_thread" : "wasapi_render_thread"; + audio_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { ThreadRun(); }, name, + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime)); + RTC_DLOG(LS_INFO) << "Started thread with name: " << name + << " and handle: " << *audio_thread_.GetHandle(); + } + + // Start streaming data between the endpoint buffer and the audio engine. + _com_error error = audio_client_->Start(); + if (FAILED(error.Error())) { + StopThread(); + RTC_LOG(LS_ERROR) << "IAudioClient::Start failed: " + << core_audio_utility::ErrorToString(error); + return false; + } + + start_time_ = rtc::TimeMillis(); + num_data_callbacks_ = 0; + + return true; +} + +bool CoreAudioBase::Stop() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "[" << DirectionToString(direction()) + << "]"; + RTC_DLOG(LS_INFO) << "total activity time: " << TimeSinceStart(); + + // Stop audio streaming. + _com_error error = audio_client_->Stop(); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::Stop failed: " + << core_audio_utility::ErrorToString(error); + } + // Stop and destroy the audio thread but only when a restart attempt is not + // ongoing. + if (!IsRestarting()) { + StopThread(); + } + + // Flush all pending data and reset the audio clock stream position to 0. + error = audio_client_->Reset(); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::Reset failed: " + << core_audio_utility::ErrorToString(error); + } + + if (IsOutput()) { + // Extra safety check to ensure that the buffers are cleared. + // If the buffers are not cleared correctly, the next call to Start() + // would fail with AUDCLNT_E_BUFFER_ERROR at + // IAudioRenderClient::GetBuffer(). + UINT32 num_queued_frames = 0; + audio_client_->GetCurrentPadding(&num_queued_frames); + RTC_DCHECK_EQ(0u, num_queued_frames); + } + + // Delete the previous registration by the client to receive notifications + // about audio session events. + RTC_DLOG(LS_INFO) << "audio session state: " + << SessionStateToString(GetAudioSessionState()); + error = audio_session_control_->UnregisterAudioSessionNotification(this); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) + << "IAudioSessionControl::UnregisterAudioSessionNotification failed: " + << core_audio_utility::ErrorToString(error); + } + + // To ensure that the restart process is as simple as possible, the audio + // thread is not destroyed during restart attempts triggered by internal + // error callbacks. + if (!IsRestarting()) { + thread_checker_audio_.Detach(); + } + + // Release all allocated COM interfaces to allow for a restart without + // intermediate destruction. + ReleaseCOMObjects(); + + return true; +} + +bool CoreAudioBase::IsVolumeControlAvailable(bool* available) const { + // A valid IAudioClient is required to access the ISimpleAudioVolume interface + // properly. It is possible to use IAudioSessionManager::GetSimpleAudioVolume + // as well but we use the audio client here to ensure that the initialized + // audio session is visible under group box labeled "Applications" in + // Sndvol.exe. + if (!audio_client_) { + return false; + } + + // Try to create an ISimpleAudioVolume instance. + ComPtr audio_volume = + core_audio_utility::CreateSimpleAudioVolume(audio_client_.Get()); + if (!audio_volume.Get()) { + RTC_DLOG(LS_ERROR) << "Volume control is not supported"; + return false; + } + + // Try to use the valid volume control. + float volume = 0.0; + _com_error error = audio_volume->GetMasterVolume(&volume); + if (error.Error() != S_OK) { + RTC_LOG(LS_ERROR) << "ISimpleAudioVolume::GetMasterVolume failed: " + << core_audio_utility::ErrorToString(error); + *available = false; + } + RTC_DLOG(LS_INFO) << "master volume for output audio session: " << volume; + + *available = true; + return false; +} + +// Internal test method which can be used in tests to emulate a restart signal. +// It simply sets the same event which is normally triggered by session and +// device notifications. Hence, the emulated restart sequence covers most parts +// of a real sequence expect the actual device switch. +bool CoreAudioBase::Restart() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "[" << DirectionToString(direction()) + << "]"; + if (!automatic_restart()) { + return false; + } + is_restarting_ = true; + SetEvent(restart_event_.Get()); + return true; +} + +void CoreAudioBase::StopThread() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK(!IsRestarting()); + if (!audio_thread_.empty()) { + RTC_DLOG(LS_INFO) << "Sets stop_event..."; + SetEvent(stop_event_.Get()); + RTC_DLOG(LS_INFO) << "PlatformThread::Finalize..."; + audio_thread_.Finalize(); + + // Ensure that we don't quit the main thread loop immediately next + // time Start() is called. + ResetEvent(stop_event_.Get()); + ResetEvent(restart_event_.Get()); + } +} + +bool CoreAudioBase::HandleRestartEvent() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "[" << DirectionToString(direction()) + << "]"; + RTC_DCHECK_RUN_ON(&thread_checker_audio_); + RTC_DCHECK(!audio_thread_.empty()); + RTC_DCHECK(IsRestarting()); + // Let each client (input and/or output) take care of its own restart + // sequence since each side might need unique actions. + // TODO(henrika): revisit and investigate if one common base implementation + // is possible + bool restart_ok = on_error_callback_(ErrorType::kStreamDisconnected); + is_restarting_ = false; + return restart_ok; +} + +bool CoreAudioBase::SwitchDeviceIfNeeded() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << "[" << DirectionToString(direction()) + << "]"; + RTC_DCHECK_RUN_ON(&thread_checker_audio_); + RTC_DCHECK(IsRestarting()); + + RTC_DLOG(LS_INFO) << "device_index=" << device_index_ + << " => device_id: " << device_id_; + + // Ensure that at least one device exists and can be utilized. The most + // probable cause for ending up here is that a device has been removed. + if (core_audio_utility::NumberOfActiveDevices(IsInput() ? eCapture + : eRender) < 1) { + RTC_DLOG(LS_ERROR) << "All devices are disabled or removed"; + return false; + } + + // Get the unique device ID for the index which is currently used. It seems + // safe to assume that if the ID is the same as the existing device ID, then + // the device configuration is the same as before. + std::string device_id = GetDeviceID(device_index_); + if (device_id != device_id_) { + RTC_LOG(LS_WARNING) + << "Device configuration has changed => changing device selection..."; + // TODO(henrika): depending on the current state and how we got here, we + // must select a new device here. + if (SetDevice(kDefault) == -1) { + RTC_LOG(LS_WARNING) << "Failed to set new audio device"; + return false; + } + } else { + RTC_LOG(LS_INFO) + << "Device configuration has not changed => keeping selected device"; + } + return true; +} + +AudioSessionState CoreAudioBase::GetAudioSessionState() const { + AudioSessionState state = AudioSessionStateInactive; + RTC_DCHECK(audio_session_control_.Get()); + _com_error error = audio_session_control_->GetState(&state); + if (FAILED(error.Error())) { + RTC_DLOG(LS_ERROR) << "IAudioSessionControl::GetState failed: " + << core_audio_utility::ErrorToString(error); + } + return state; +} + +// TODO(henrika): only used for debugging purposes currently. +ULONG CoreAudioBase::AddRef() { + ULONG new_ref = InterlockedIncrement(&ref_count_); + // RTC_DLOG(LS_INFO) << "__AddRef => " << new_ref; + return new_ref; +} + +// TODO(henrika): does not call delete this. +ULONG CoreAudioBase::Release() { + ULONG new_ref = InterlockedDecrement(&ref_count_); + // RTC_DLOG(LS_INFO) << "__Release => " << new_ref; + return new_ref; +} + +// TODO(henrika): can probably be replaced by "return S_OK" only. +HRESULT CoreAudioBase::QueryInterface(REFIID iid, void** object) { + if (object == nullptr) { + return E_POINTER; + } + if (iid == IID_IUnknown || iid == __uuidof(IAudioSessionEvents)) { + *object = static_cast(this); + return S_OK; + } + *object = nullptr; + return E_NOINTERFACE; +} + +// IAudioSessionEvents::OnStateChanged. +HRESULT CoreAudioBase::OnStateChanged(AudioSessionState new_state) { + RTC_DLOG(LS_INFO) << "___" << __FUNCTION__ << "[" + << DirectionToString(direction()) + << "] new_state: " << SessionStateToString(new_state); + return S_OK; +} + +// When a session is disconnected because of a device removal or format change +// event, we want to inform the audio thread about the lost audio session and +// trigger an attempt to restart audio using a new (default) device. +// This method is called on separate threads owned by the session manager and +// it can happen that the same type of callback is called more than once for the +// same event. +HRESULT CoreAudioBase::OnSessionDisconnected( + AudioSessionDisconnectReason disconnect_reason) { + RTC_DLOG(LS_INFO) << "___" << __FUNCTION__ << "[" + << DirectionToString(direction()) << "] reason: " + << SessionDisconnectReasonToString(disconnect_reason); + // Ignore changes in the audio session (don't try to restart) if the user + // has explicitly asked for this type of ADM during construction. + if (!automatic_restart()) { + RTC_DLOG(LS_WARNING) << "___Automatic restart is disabled"; + return S_OK; + } + + if (IsRestarting()) { + RTC_DLOG(LS_WARNING) << "___Ignoring since restart is already active"; + return S_OK; + } + + // By default, automatic restart is enabled and the restart event will be set + // below if the device was removed or the format was changed. + if (disconnect_reason == DisconnectReasonDeviceRemoval || + disconnect_reason == DisconnectReasonFormatChanged) { + is_restarting_ = true; + SetEvent(restart_event_.Get()); + } + return S_OK; +} + +// IAudioSessionEvents::OnDisplayNameChanged +HRESULT CoreAudioBase::OnDisplayNameChanged(LPCWSTR new_display_name, + LPCGUID event_context) { + return S_OK; +} + +// IAudioSessionEvents::OnIconPathChanged +HRESULT CoreAudioBase::OnIconPathChanged(LPCWSTR new_icon_path, + LPCGUID event_context) { + return S_OK; +} + +// IAudioSessionEvents::OnSimpleVolumeChanged +HRESULT CoreAudioBase::OnSimpleVolumeChanged(float new_simple_volume, + BOOL new_mute, + LPCGUID event_context) { + return S_OK; +} + +// IAudioSessionEvents::OnChannelVolumeChanged +HRESULT CoreAudioBase::OnChannelVolumeChanged(DWORD channel_count, + float new_channel_volumes[], + DWORD changed_channel, + LPCGUID event_context) { + return S_OK; +} + +// IAudioSessionEvents::OnGroupingParamChanged +HRESULT CoreAudioBase::OnGroupingParamChanged(LPCGUID new_grouping_param, + LPCGUID event_context) { + return S_OK; +} + +void CoreAudioBase::ThreadRun() { + if (!core_audio_utility::IsMMCSSSupported()) { + RTC_LOG(LS_ERROR) << "MMCSS is not supported"; + return; + } + RTC_DLOG(LS_INFO) << "[" << DirectionToString(direction()) + << "] ThreadRun starts..."; + // TODO(henrika): difference between "Pro Audio" and "Audio"? + ScopedMMCSSRegistration mmcss_registration(L"Pro Audio"); + ScopedCOMInitializer com_initializer(ScopedCOMInitializer::kMTA); + RTC_DCHECK(mmcss_registration.Succeeded()); + RTC_DCHECK(com_initializer.Succeeded()); + RTC_DCHECK(stop_event_.IsValid()); + RTC_DCHECK(audio_samples_event_.IsValid()); + + bool streaming = true; + bool error = false; + HANDLE wait_array[] = {stop_event_.Get(), restart_event_.Get(), + audio_samples_event_.Get()}; + + // The device frequency is the frequency generated by the hardware clock in + // the audio device. The GetFrequency() method reports a constant frequency. + UINT64 device_frequency = 0; + _com_error result(S_FALSE); + if (audio_clock_) { + RTC_DCHECK(IsOutput()); + result = audio_clock_->GetFrequency(&device_frequency); + if (FAILED(result.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClock::GetFrequency failed: " + << core_audio_utility::ErrorToString(result); + } + } + + // Keep streaming audio until the stop event or the stream-switch event + // is signaled. An error event can also break the main thread loop. + while (streaming && !error) { + // Wait for a close-down event, stream-switch event or a new render event. + DWORD wait_result = WaitForMultipleObjects(arraysize(wait_array), + wait_array, false, INFINITE); + switch (wait_result) { + case WAIT_OBJECT_0 + 0: + // `stop_event_` has been set. + streaming = false; + break; + case WAIT_OBJECT_0 + 1: + // `restart_event_` has been set. + error = !HandleRestartEvent(); + break; + case WAIT_OBJECT_0 + 2: + // `audio_samples_event_` has been set. + error = !on_data_callback_(device_frequency); + break; + default: + error = true; + break; + } + } + + if (streaming && error) { + RTC_LOG(LS_ERROR) << "[" << DirectionToString(direction()) + << "] WASAPI streaming failed."; + // Stop audio streaming since something has gone wrong in our main thread + // loop. Note that, we are still in a "started" state, hence a Stop() call + // is required to join the thread properly. + result = audio_client_->Stop(); + if (FAILED(result.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::Stop failed: " + << core_audio_utility::ErrorToString(result); + } + + // TODO(henrika): notify clients that something has gone wrong and that + // this stream should be destroyed instead of reused in the future. + } + + RTC_DLOG(LS_INFO) << "[" << DirectionToString(direction()) + << "] ...ThreadRun stops"; +} + +} // namespace webrtc_win +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/win/core_audio_base_win.h b/third_party/libwebrtc/modules/audio_device/win/core_audio_base_win.h new file mode 100644 index 0000000000..6c1357e059 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/core_audio_base_win.h @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_BASE_WIN_H_ +#define MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_BASE_WIN_H_ + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/sequence_checker.h" +#include "modules/audio_device/win/core_audio_utility_win.h" +#include "rtc_base/platform_thread.h" + +namespace webrtc { + +class AudioDeviceBuffer; +class FineAudioBuffer; + +namespace webrtc_win { + +// Serves as base class for CoreAudioInput and CoreAudioOutput and supports +// device handling and audio streaming where the direction (input or output) +// is set at constructions by the parent. +// The IAudioSessionEvents interface provides notifications of session-related +// events such as changes in the volume level, display name, and session state. +// This class does not use the default ref-counting memory management method +// provided by IUnknown: calling CoreAudioBase::Release() will not delete the +// object. The client will receive notification from the session manager on +// a separate thread owned and controlled by the manager. +// TODO(henrika): investigate if CoreAudioBase should implement +// IMMNotificationClient as well (might improve support for device changes). +class CoreAudioBase : public IAudioSessionEvents { + public: + enum class Direction { + kInput, + kOutput, + }; + + // TODO(henrika): add more error types. + enum class ErrorType { + kStreamDisconnected, + }; + + template + auto as_integer(T const value) -> typename std::underlying_type::type { + return static_cast::type>(value); + } + + // Callback definition for notifications of new audio data. For input clients, + // it means that "new audio data has now been captured", and for output + // clients, "the output layer now needs new audio data". + typedef std::function OnDataCallback; + + // Callback definition for notifications of run-time error messages. It can + // be called e.g. when an active audio device is removed and an audio stream + // is disconnected (`error` is then set to kStreamDisconnected). Both input + // and output clients implements OnErrorCallback() and will trigger an + // internal restart sequence for kStreamDisconnected. + // This method is currently always called on the audio thread. + // TODO(henrika): add support for more error types. + typedef std::function OnErrorCallback; + + void ThreadRun(); + + CoreAudioBase(const CoreAudioBase&) = delete; + CoreAudioBase& operator=(const CoreAudioBase&) = delete; + + protected: + explicit CoreAudioBase(Direction direction, + bool automatic_restart, + OnDataCallback data_callback, + OnErrorCallback error_callback); + ~CoreAudioBase(); + + std::string GetDeviceID(int index) const; + int SetDevice(int index); + int DeviceName(int index, std::string* name, std::string* guid) const; + + // Checks if the current device ID is no longer in use (e.g. due to a + // disconnected stream), and if so, switches device to the default audio + // device. Called on the audio thread during restart attempts. + bool SwitchDeviceIfNeeded(); + + bool Init(); + bool Start(); + bool Stop(); + bool IsVolumeControlAvailable(bool* available) const; + bool Restart(); + + Direction direction() const { return direction_; } + bool automatic_restart() const { return automatic_restart_; } + + // Releases all allocated COM resources in the base class. + void ReleaseCOMObjects(); + + // Returns number of active devices given the specified `direction_` set + // by the parent (input or output). + int NumberOfActiveDevices() const; + + // Returns total number of enumerated audio devices which is the sum of all + // active devices plus two extra (one default and one default + // communications). The value in `direction_` determines if capture or + // render devices are counted. + int NumberOfEnumeratedDevices() const; + + bool IsInput() const; + bool IsOutput() const; + bool IsDefaultDevice(int index) const; + bool IsDefaultCommunicationsDevice(int index) const; + bool IsDefaultDeviceId(absl::string_view device_id) const; + bool IsDefaultCommunicationsDeviceId(absl::string_view device_id) const; + EDataFlow GetDataFlow() const; + bool IsRestarting() const; + int64_t TimeSinceStart() const; + + // TODO(henrika): is the existing thread checker in WindowsAudioDeviceModule + // sufficient? As is, we have one top-level protection and then a second + // level here. In addition, calls to Init(), Start() and Stop() are not + // included to allow for support of internal restart (where these methods are + // called on the audio thread). + SequenceChecker thread_checker_; + SequenceChecker thread_checker_audio_; + AudioDeviceBuffer* audio_device_buffer_ = nullptr; + bool initialized_ = false; + WAVEFORMATEXTENSIBLE format_ = {}; + uint32_t endpoint_buffer_size_frames_ = 0; + Microsoft::WRL::ComPtr audio_clock_; + Microsoft::WRL::ComPtr audio_client_; + bool is_active_ = false; + int64_t num_data_callbacks_ = 0; + int latency_ms_ = 0; + absl::optional sample_rate_; + + private: + const Direction direction_; + const bool automatic_restart_; + const OnDataCallback on_data_callback_; + const OnErrorCallback on_error_callback_; + ScopedHandle audio_samples_event_; + ScopedHandle stop_event_; + ScopedHandle restart_event_; + int64_t start_time_ = 0; + std::string device_id_; + int device_index_ = -1; + // Used by the IAudioSessionEvents implementations. Currently only utilized + // for debugging purposes. + LONG ref_count_ = 1; + // Set when restart process starts and cleared when restart stops + // successfully. Accessed atomically. + std::atomic is_restarting_; + rtc::PlatformThread audio_thread_; + Microsoft::WRL::ComPtr audio_session_control_; + + void StopThread(); + AudioSessionState GetAudioSessionState() const; + + // Called on the audio thread when a restart event has been set. + // It will then trigger calls to the installed error callbacks with error + // type set to kStreamDisconnected. + bool HandleRestartEvent(); + + // IUnknown (required by IAudioSessionEvents and IMMNotificationClient). + ULONG __stdcall AddRef() override; + ULONG __stdcall Release() override; + HRESULT __stdcall QueryInterface(REFIID iid, void** object) override; + + // IAudioSessionEvents implementation. + // These methods are called on separate threads owned by the session manager. + // More than one thread can be involved depending on the type of callback + // and audio session. + HRESULT __stdcall OnStateChanged(AudioSessionState new_state) override; + HRESULT __stdcall OnSessionDisconnected( + AudioSessionDisconnectReason disconnect_reason) override; + HRESULT __stdcall OnDisplayNameChanged(LPCWSTR new_display_name, + LPCGUID event_context) override; + HRESULT __stdcall OnIconPathChanged(LPCWSTR new_icon_path, + LPCGUID event_context) override; + HRESULT __stdcall OnSimpleVolumeChanged(float new_simple_volume, + BOOL new_mute, + LPCGUID event_context) override; + HRESULT __stdcall OnChannelVolumeChanged(DWORD channel_count, + float new_channel_volumes[], + DWORD changed_channel, + LPCGUID event_context) override; + HRESULT __stdcall OnGroupingParamChanged(LPCGUID new_grouping_param, + LPCGUID event_context) override; +}; + +} // namespace webrtc_win +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_BASE_WIN_H_ diff --git a/third_party/libwebrtc/modules/audio_device/win/core_audio_input_win.cc b/third_party/libwebrtc/modules/audio_device/win/core_audio_input_win.cc new file mode 100644 index 0000000000..17790dafc4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/core_audio_input_win.cc @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/win/core_audio_input_win.h" + +#include + +#include "modules/audio_device/audio_device_buffer.h" +#include "modules/audio_device/fine_audio_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +using Microsoft::WRL::ComPtr; + +namespace webrtc { +namespace webrtc_win { + +enum AudioDeviceMessageType : uint32_t { + kMessageInputStreamDisconnected, +}; + +CoreAudioInput::CoreAudioInput(bool automatic_restart) + : CoreAudioBase( + CoreAudioBase::Direction::kInput, + automatic_restart, + [this](uint64_t freq) { return OnDataCallback(freq); }, + [this](ErrorType err) { return OnErrorCallback(err); }) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + thread_checker_audio_.Detach(); +} + +CoreAudioInput::~CoreAudioInput() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); +} + +int CoreAudioInput::Init() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return 0; +} + +int CoreAudioInput::Terminate() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + StopRecording(); + return 0; +} + +int CoreAudioInput::NumDevices() const { + RTC_DCHECK_RUN_ON(&thread_checker_); + return core_audio_utility::NumberOfActiveDevices(eCapture); +} + +int CoreAudioInput::SetDevice(int index) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << index; + RTC_DCHECK_GE(index, 0); + RTC_DCHECK_RUN_ON(&thread_checker_); + return CoreAudioBase::SetDevice(index); +} + +int CoreAudioInput::SetDevice(AudioDeviceModule::WindowsDeviceType device) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " + << ((device == AudioDeviceModule::kDefaultDevice) + ? "Default" + : "DefaultCommunication"); + RTC_DCHECK_RUN_ON(&thread_checker_); + return SetDevice((device == AudioDeviceModule::kDefaultDevice) ? 0 : 1); +} + +int CoreAudioInput::DeviceName(int index, + std::string* name, + std::string* guid) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << index; + RTC_DCHECK_RUN_ON(&thread_checker_); + RTC_DCHECK(name); + return CoreAudioBase::DeviceName(index, name, guid); +} + +void CoreAudioInput::AttachAudioBuffer(AudioDeviceBuffer* audio_buffer) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + audio_device_buffer_ = audio_buffer; +} + +bool CoreAudioInput::RecordingIsInitialized() const { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << initialized_; + RTC_DCHECK_RUN_ON(&thread_checker_); + return initialized_; +} + +int CoreAudioInput::InitRecording() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK(!initialized_); + RTC_DCHECK(!Recording()); + RTC_DCHECK(!audio_capture_client_); + + // Creates an IAudioClient instance and stores the valid interface pointer in + // `audio_client3_`, `audio_client2_`, or `audio_client_` depending on + // platform support. The base class will use optimal input parameters and do + // an event driven shared mode initialization. The utilized format will be + // stored in `format_` and can be used for configuration and allocation of + // audio buffers. + if (!CoreAudioBase::Init()) { + return -1; + } + RTC_DCHECK(audio_client_); + + // Configure the recording side of the audio device buffer using `format_` + // after a trivial sanity check of the format structure. + RTC_DCHECK(audio_device_buffer_); + WAVEFORMATEX* format = &format_.Format; + RTC_DCHECK_EQ(format->wFormatTag, WAVE_FORMAT_EXTENSIBLE); + audio_device_buffer_->SetRecordingSampleRate(format->nSamplesPerSec); + audio_device_buffer_->SetRecordingChannels(format->nChannels); + + // Create a modified audio buffer class which allows us to supply any number + // of samples (and not only multiple of 10ms) to match the optimal buffer + // size per callback used by Core Audio. + // TODO(henrika): can we share one FineAudioBuffer with the output side? + fine_audio_buffer_ = std::make_unique(audio_device_buffer_); + + // Create an IAudioCaptureClient for an initialized IAudioClient. + // The IAudioCaptureClient interface enables a client to read input data from + // a capture endpoint buffer. + ComPtr audio_capture_client = + core_audio_utility::CreateCaptureClient(audio_client_.Get()); + if (!audio_capture_client) { + return -1; + } + + // Query performance frequency. + LARGE_INTEGER ticks_per_sec = {}; + qpc_to_100ns_.reset(); + if (::QueryPerformanceFrequency(&ticks_per_sec)) { + double qpc_ticks_per_second = + rtc::dchecked_cast(ticks_per_sec.QuadPart); + qpc_to_100ns_ = 10000000.0 / qpc_ticks_per_second; + } + + // Store valid COM interfaces. + audio_capture_client_ = audio_capture_client; + + initialized_ = true; + return 0; +} + +int CoreAudioInput::StartRecording() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK(!Recording()); + RTC_DCHECK(fine_audio_buffer_); + RTC_DCHECK(audio_device_buffer_); + if (!initialized_) { + RTC_DLOG(LS_WARNING) + << "Recording can not start since InitRecording must succeed first"; + return 0; + } + + fine_audio_buffer_->ResetRecord(); + if (!IsRestarting()) { + audio_device_buffer_->StartRecording(); + } + + if (!Start()) { + return -1; + } + + is_active_ = true; + return 0; +} + +int CoreAudioInput::StopRecording() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + if (!initialized_) { + return 0; + } + + // Release resources allocated in InitRecording() and then return if this + // method is called without any active input audio. + if (!Recording()) { + RTC_DLOG(LS_WARNING) << "No input stream is active"; + ReleaseCOMObjects(); + initialized_ = false; + return 0; + } + + if (!Stop()) { + RTC_LOG(LS_ERROR) << "StopRecording failed"; + return -1; + } + + if (!IsRestarting()) { + RTC_DCHECK(audio_device_buffer_); + audio_device_buffer_->StopRecording(); + } + + // Release all allocated resources to allow for a restart without + // intermediate destruction. + ReleaseCOMObjects(); + qpc_to_100ns_.reset(); + + initialized_ = false; + is_active_ = false; + return 0; +} + +bool CoreAudioInput::Recording() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << is_active_; + return is_active_; +} + +// TODO(henrika): finalize support of audio session volume control. As is, we +// are not compatible with the old ADM implementation since it allows accessing +// the volume control with any active audio output stream. +int CoreAudioInput::VolumeIsAvailable(bool* available) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return IsVolumeControlAvailable(available) ? 0 : -1; +} + +// Triggers the restart sequence. Only used for testing purposes to emulate +// a real event where e.g. an active input device is removed. +int CoreAudioInput::RestartRecording() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + if (!Recording()) { + return 0; + } + + if (!Restart()) { + RTC_LOG(LS_ERROR) << "RestartRecording failed"; + return -1; + } + return 0; +} + +bool CoreAudioInput::Restarting() const { + RTC_DCHECK_RUN_ON(&thread_checker_); + return IsRestarting(); +} + +int CoreAudioInput::SetSampleRate(uint32_t sample_rate) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + sample_rate_ = sample_rate; + return 0; +} + +void CoreAudioInput::ReleaseCOMObjects() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + CoreAudioBase::ReleaseCOMObjects(); + if (audio_capture_client_.Get()) { + audio_capture_client_.Reset(); + } +} + +bool CoreAudioInput::OnDataCallback(uint64_t device_frequency) { + RTC_DCHECK_RUN_ON(&thread_checker_audio_); + + if (!initialized_ || !is_active_) { + // This is concurrent examination of state across multiple threads so will + // be somewhat error prone, but we should still be defensive and not use + // audio_capture_client_ if we know it's not there. + return false; + } + if (num_data_callbacks_ == 0) { + RTC_LOG(LS_INFO) << "--- Input audio stream is alive ---"; + } + UINT32 num_frames_in_next_packet = 0; + _com_error error = + audio_capture_client_->GetNextPacketSize(&num_frames_in_next_packet); + if (error.Error() == AUDCLNT_E_DEVICE_INVALIDATED) { + // Avoid breaking the thread loop implicitly by returning false and return + // true instead for AUDCLNT_E_DEVICE_INVALIDATED even it is a valid error + // message. We will use notifications about device changes instead to stop + // data callbacks and attempt to restart streaming . + RTC_DLOG(LS_ERROR) << "AUDCLNT_E_DEVICE_INVALIDATED"; + return true; + } + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioCaptureClient::GetNextPacketSize failed: " + << core_audio_utility::ErrorToString(error); + return false; + } + + // Drain the WASAPI capture buffer fully if audio has been recorded. + while (num_frames_in_next_packet > 0) { + uint8_t* audio_data; + UINT32 num_frames_to_read = 0; + DWORD flags = 0; + UINT64 device_position_frames = 0; + UINT64 capture_time_100ns = 0; + error = audio_capture_client_->GetBuffer(&audio_data, &num_frames_to_read, + &flags, &device_position_frames, + &capture_time_100ns); + if (error.Error() == AUDCLNT_S_BUFFER_EMPTY) { + // The call succeeded but no capture data is available to be read. + // Return and start waiting for new capture event + RTC_DCHECK_EQ(num_frames_to_read, 0u); + return true; + } + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioCaptureClient::GetBuffer failed: " + << core_audio_utility::ErrorToString(error); + return false; + } + + // Update input delay estimate but only about once per second to save + // resources. The estimate is usually stable. + if (num_data_callbacks_ % 100 == 0) { + absl::optional opt_record_delay_ms; + // TODO(henrika): note that FineAudioBuffer adds latency as well. + opt_record_delay_ms = EstimateLatencyMillis(capture_time_100ns); + if (opt_record_delay_ms) { + latency_ms_ = *opt_record_delay_ms; + } else { + RTC_DLOG(LS_WARNING) << "Input latency is set to fixed value"; + latency_ms_ = 20; + } + } + if (num_data_callbacks_ % 500 == 0) { + RTC_DLOG(LS_INFO) << "latency: " << latency_ms_; + } + + // The data in the packet is not correlated with the previous packet's + // device position; possibly due to a stream state transition or timing + // glitch. The behavior of the AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY flag + // is undefined on the application's first call to GetBuffer after Start. + if (device_position_frames != 0 && + flags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY) { + RTC_DLOG(LS_WARNING) << "AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY"; + } + // The time at which the device's stream position was recorded is uncertain. + // Thus, the client might be unable to accurately set a time stamp for the + // current data packet. + if (flags & AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR) { + RTC_DLOG(LS_WARNING) << "AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR"; + } + + // Treat all of the data in the packet as silence and ignore the actual + // data values when AUDCLNT_BUFFERFLAGS_SILENT is set. + if (flags & AUDCLNT_BUFFERFLAGS_SILENT) { + rtc::ExplicitZeroMemory(audio_data, + format_.Format.nBlockAlign * num_frames_to_read); + RTC_DLOG(LS_WARNING) << "Captured audio is replaced by silence"; + } else { + // Copy recorded audio in `audio_data` to the WebRTC sink using the + // FineAudioBuffer object. + fine_audio_buffer_->DeliverRecordedData( + rtc::MakeArrayView(reinterpret_cast(audio_data), + format_.Format.nChannels * num_frames_to_read), + + latency_ms_); + } + + error = audio_capture_client_->ReleaseBuffer(num_frames_to_read); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioCaptureClient::ReleaseBuffer failed: " + << core_audio_utility::ErrorToString(error); + return false; + } + + error = + audio_capture_client_->GetNextPacketSize(&num_frames_in_next_packet); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioCaptureClient::GetNextPacketSize failed: " + << core_audio_utility::ErrorToString(error); + return false; + } + } + ++num_data_callbacks_; + return true; +} + +bool CoreAudioInput::OnErrorCallback(ErrorType error) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << as_integer(error); + RTC_DCHECK_RUN_ON(&thread_checker_audio_); + if (error == CoreAudioBase::ErrorType::kStreamDisconnected) { + HandleStreamDisconnected(); + } else { + RTC_DLOG(LS_WARNING) << "Unsupported error type"; + } + return true; +} + +absl::optional CoreAudioInput::EstimateLatencyMillis( + uint64_t capture_time_100ns) { + if (!qpc_to_100ns_) { + return absl::nullopt; + } + // Input parameter `capture_time_100ns` contains the performance counter at + // the time that the audio endpoint device recorded the device position of + // the first audio frame in the data packet converted into 100ns units. + // We derive a delay estimate by: + // - sampling the current performance counter (qpc_now_raw), + // - converting it into 100ns time units (now_time_100ns), and + // - subtracting `capture_time_100ns` from now_time_100ns. + LARGE_INTEGER perf_counter_now = {}; + if (!::QueryPerformanceCounter(&perf_counter_now)) { + return absl::nullopt; + } + uint64_t qpc_now_raw = perf_counter_now.QuadPart; + uint64_t now_time_100ns = qpc_now_raw * (*qpc_to_100ns_); + webrtc::TimeDelta delay_us = webrtc::TimeDelta::Micros( + 0.1 * (now_time_100ns - capture_time_100ns) + 0.5); + return delay_us.ms(); +} + +// Called from OnErrorCallback() when error type is kStreamDisconnected. +// Note that this method is called on the audio thread and the internal restart +// sequence is also executed on that same thread. The audio thread is therefore +// not stopped during restart. Such a scheme also makes the restart process less +// complex. +// Note that, none of the called methods are thread checked since they can also +// be called on the main thread. Thread checkers are instead added on one layer +// above (in audio_device_module.cc) which ensures that the public API is thread +// safe. +// TODO(henrika): add more details. +bool CoreAudioInput::HandleStreamDisconnected() { + RTC_DLOG(LS_INFO) << "<<<--- " << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_audio_); + RTC_DCHECK(automatic_restart()); + + if (StopRecording() != 0) { + return false; + } + + if (!SwitchDeviceIfNeeded()) { + return false; + } + + if (InitRecording() != 0) { + return false; + } + if (StartRecording() != 0) { + return false; + } + + RTC_DLOG(LS_INFO) << __FUNCTION__ << " --->>>"; + return true; +} + +} // namespace webrtc_win +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/win/core_audio_input_win.h b/third_party/libwebrtc/modules/audio_device/win/core_audio_input_win.h new file mode 100644 index 0000000000..be290f9f4e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/core_audio_input_win.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_INPUT_WIN_H_ +#define MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_INPUT_WIN_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_device/win/audio_device_module_win.h" +#include "modules/audio_device/win/core_audio_base_win.h" + +namespace webrtc { + +class AudioDeviceBuffer; +class FineAudioBuffer; + +namespace webrtc_win { + +// Windows specific AudioInput implementation using a CoreAudioBase class where +// an input direction is set at construction. Supports capture device handling +// and streaming of captured audio to a WebRTC client. +class CoreAudioInput final : public CoreAudioBase, public AudioInput { + public: + CoreAudioInput(bool automatic_restart); + ~CoreAudioInput() override; + + // AudioInput implementation. + int Init() override; + int Terminate() override; + int NumDevices() const override; + int SetDevice(int index) override; + int SetDevice(AudioDeviceModule::WindowsDeviceType device) override; + int DeviceName(int index, std::string* name, std::string* guid) override; + void AttachAudioBuffer(AudioDeviceBuffer* audio_buffer) override; + bool RecordingIsInitialized() const override; + int InitRecording() override; + int StartRecording() override; + int StopRecording() override; + bool Recording() override; + int VolumeIsAvailable(bool* available) override; + int RestartRecording() override; + bool Restarting() const override; + int SetSampleRate(uint32_t sample_rate) override; + + CoreAudioInput(const CoreAudioInput&) = delete; + CoreAudioInput& operator=(const CoreAudioInput&) = delete; + + private: + void ReleaseCOMObjects(); + bool OnDataCallback(uint64_t device_frequency); + bool OnErrorCallback(ErrorType error); + absl::optional EstimateLatencyMillis(uint64_t capture_time_100ns); + bool HandleStreamDisconnected(); + + std::unique_ptr fine_audio_buffer_; + Microsoft::WRL::ComPtr audio_capture_client_; + absl::optional qpc_to_100ns_; +}; + +} // namespace webrtc_win + +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_INPUT_WIN_H_ diff --git a/third_party/libwebrtc/modules/audio_device/win/core_audio_output_win.cc b/third_party/libwebrtc/modules/audio_device/win/core_audio_output_win.cc new file mode 100644 index 0000000000..c92fedf0e9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/core_audio_output_win.cc @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/win/core_audio_output_win.h" + +#include + +#include "modules/audio_device/audio_device_buffer.h" +#include "modules/audio_device/fine_audio_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" + +using Microsoft::WRL::ComPtr; + +namespace webrtc { +namespace webrtc_win { + +CoreAudioOutput::CoreAudioOutput(bool automatic_restart) + : CoreAudioBase( + CoreAudioBase::Direction::kOutput, + automatic_restart, + [this](uint64_t freq) { return OnDataCallback(freq); }, + [this](ErrorType err) { return OnErrorCallback(err); }) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + thread_checker_audio_.Detach(); +} + +CoreAudioOutput::~CoreAudioOutput() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + Terminate(); +} + +int CoreAudioOutput::Init() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return 0; +} + +int CoreAudioOutput::Terminate() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + StopPlayout(); + return 0; +} + +int CoreAudioOutput::NumDevices() const { + RTC_DCHECK_RUN_ON(&thread_checker_); + return core_audio_utility::NumberOfActiveDevices(eRender); +} + +int CoreAudioOutput::SetDevice(int index) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << index; + RTC_DCHECK_GE(index, 0); + RTC_DCHECK_RUN_ON(&thread_checker_); + return CoreAudioBase::SetDevice(index); +} + +int CoreAudioOutput::SetDevice(AudioDeviceModule::WindowsDeviceType device) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " + << ((device == AudioDeviceModule::kDefaultDevice) + ? "Default" + : "DefaultCommunication"); + RTC_DCHECK_RUN_ON(&thread_checker_); + return SetDevice((device == AudioDeviceModule::kDefaultDevice) ? 0 : 1); +} + +int CoreAudioOutput::DeviceName(int index, + std::string* name, + std::string* guid) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << index; + RTC_DCHECK_RUN_ON(&thread_checker_); + RTC_DCHECK(name); + return CoreAudioBase::DeviceName(index, name, guid); +} + +void CoreAudioOutput::AttachAudioBuffer(AudioDeviceBuffer* audio_buffer) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + audio_device_buffer_ = audio_buffer; +} + +bool CoreAudioOutput::PlayoutIsInitialized() const { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return initialized_; +} + +int CoreAudioOutput::InitPlayout() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << IsRestarting(); + RTC_DCHECK(!initialized_); + RTC_DCHECK(!Playing()); + RTC_DCHECK(!audio_render_client_); + + // Creates an IAudioClient instance and stores the valid interface pointer in + // `audio_client3_`, `audio_client2_`, or `audio_client_` depending on + // platform support. The base class will use optimal output parameters and do + // an event driven shared mode initialization. The utilized format will be + // stored in `format_` and can be used for configuration and allocation of + // audio buffers. + if (!CoreAudioBase::Init()) { + return -1; + } + RTC_DCHECK(audio_client_); + + // Configure the playout side of the audio device buffer using `format_` + // after a trivial sanity check of the format structure. + RTC_DCHECK(audio_device_buffer_); + WAVEFORMATEX* format = &format_.Format; + RTC_DCHECK_EQ(format->wFormatTag, WAVE_FORMAT_EXTENSIBLE); + audio_device_buffer_->SetPlayoutSampleRate(format->nSamplesPerSec); + audio_device_buffer_->SetPlayoutChannels(format->nChannels); + + // Create a modified audio buffer class which allows us to ask for any number + // of samples (and not only multiple of 10ms) to match the optimal + // buffer size per callback used by Core Audio. + // TODO(henrika): can we share one FineAudioBuffer with the input side? + fine_audio_buffer_ = std::make_unique(audio_device_buffer_); + + // Create an IAudioRenderClient for an initialized IAudioClient. + // The IAudioRenderClient interface enables us to write output data to + // a rendering endpoint buffer. + ComPtr audio_render_client = + core_audio_utility::CreateRenderClient(audio_client_.Get()); + if (!audio_render_client.Get()) { + return -1; + } + + ComPtr audio_clock = + core_audio_utility::CreateAudioClock(audio_client_.Get()); + if (!audio_clock.Get()) { + return -1; + } + + // Store valid COM interfaces. + audio_render_client_ = audio_render_client; + audio_clock_ = audio_clock; + + initialized_ = true; + return 0; +} + +int CoreAudioOutput::StartPlayout() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << IsRestarting(); + RTC_DCHECK(!Playing()); + RTC_DCHECK(fine_audio_buffer_); + RTC_DCHECK(audio_device_buffer_); + if (!initialized_) { + RTC_DLOG(LS_WARNING) + << "Playout can not start since InitPlayout must succeed first"; + } + + fine_audio_buffer_->ResetPlayout(); + if (!IsRestarting()) { + audio_device_buffer_->StartPlayout(); + } + + if (!core_audio_utility::FillRenderEndpointBufferWithSilence( + audio_client_.Get(), audio_render_client_.Get())) { + RTC_LOG(LS_WARNING) << "Failed to prepare output endpoint with silence"; + } + + num_frames_written_ = endpoint_buffer_size_frames_; + + if (!Start()) { + return -1; + } + + is_active_ = true; + return 0; +} + +int CoreAudioOutput::StopPlayout() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << IsRestarting(); + if (!initialized_) { + return 0; + } + + // Release resources allocated in InitPlayout() and then return if this + // method is called without any active output audio. + if (!Playing()) { + RTC_DLOG(LS_WARNING) << "No output stream is active"; + ReleaseCOMObjects(); + initialized_ = false; + return 0; + } + + if (!Stop()) { + RTC_LOG(LS_ERROR) << "StopPlayout failed"; + return -1; + } + + if (!IsRestarting()) { + RTC_DCHECK(audio_device_buffer_); + audio_device_buffer_->StopPlayout(); + } + + // Release all allocated resources to allow for a restart without + // intermediate destruction. + ReleaseCOMObjects(); + + initialized_ = false; + is_active_ = false; + return 0; +} + +bool CoreAudioOutput::Playing() { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << is_active_; + return is_active_; +} + +// TODO(henrika): finalize support of audio session volume control. As is, we +// are not compatible with the old ADM implementation since it allows accessing +// the volume control with any active audio output stream. +int CoreAudioOutput::VolumeIsAvailable(bool* available) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return IsVolumeControlAvailable(available) ? 0 : -1; +} + +// Triggers the restart sequence. Only used for testing purposes to emulate +// a real event where e.g. an active output device is removed. +int CoreAudioOutput::RestartPlayout() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + if (!Playing()) { + return 0; + } + if (!Restart()) { + RTC_LOG(LS_ERROR) << "RestartPlayout failed"; + return -1; + } + return 0; +} + +bool CoreAudioOutput::Restarting() const { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + return IsRestarting(); +} + +int CoreAudioOutput::SetSampleRate(uint32_t sample_rate) { + RTC_DLOG(LS_INFO) << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_); + sample_rate_ = sample_rate; + return 0; +} + +void CoreAudioOutput::ReleaseCOMObjects() { + RTC_DLOG(LS_INFO) << __FUNCTION__; + CoreAudioBase::ReleaseCOMObjects(); + if (audio_render_client_.Get()) { + audio_render_client_.Reset(); + } +} + +bool CoreAudioOutput::OnErrorCallback(ErrorType error) { + RTC_DLOG(LS_INFO) << __FUNCTION__ << ": " << as_integer(error); + RTC_DCHECK_RUN_ON(&thread_checker_audio_); + if (!initialized_ || !Playing()) { + return true; + } + + if (error == CoreAudioBase::ErrorType::kStreamDisconnected) { + HandleStreamDisconnected(); + } else { + RTC_DLOG(LS_WARNING) << "Unsupported error type"; + } + return true; +} + +bool CoreAudioOutput::OnDataCallback(uint64_t device_frequency) { + RTC_DCHECK_RUN_ON(&thread_checker_audio_); + if (num_data_callbacks_ == 0) { + RTC_LOG(LS_INFO) << "--- Output audio stream is alive ---"; + } + // Get the padding value which indicates the amount of valid unread data that + // the endpoint buffer currently contains. + UINT32 num_unread_frames = 0; + _com_error error = audio_client_->GetCurrentPadding(&num_unread_frames); + if (error.Error() == AUDCLNT_E_DEVICE_INVALIDATED) { + // Avoid breaking the thread loop implicitly by returning false and return + // true instead for AUDCLNT_E_DEVICE_INVALIDATED even it is a valid error + // message. We will use notifications about device changes instead to stop + // data callbacks and attempt to restart streaming . + RTC_DLOG(LS_ERROR) << "AUDCLNT_E_DEVICE_INVALIDATED"; + return true; + } + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::GetCurrentPadding failed: " + << core_audio_utility::ErrorToString(error); + return false; + } + + // Contains how much new data we can write to the buffer without the risk of + // overwriting previously written data that the audio engine has not yet read + // from the buffer. I.e., it is the maximum buffer size we can request when + // calling IAudioRenderClient::GetBuffer(). + UINT32 num_requested_frames = + endpoint_buffer_size_frames_ - num_unread_frames; + if (num_requested_frames == 0) { + RTC_DLOG(LS_WARNING) + << "Audio thread is signaled but no new audio samples are needed"; + return true; + } + + // Request all available space in the rendering endpoint buffer into which the + // client can later write an audio packet. + uint8_t* audio_data; + error = audio_render_client_->GetBuffer(num_requested_frames, &audio_data); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioRenderClient::GetBuffer failed: " + << core_audio_utility::ErrorToString(error); + return false; + } + + // Update output delay estimate but only about once per second to save + // resources. The estimate is usually stable. + if (num_data_callbacks_ % 100 == 0) { + // TODO(henrika): note that FineAudioBuffer adds latency as well. + latency_ms_ = EstimateOutputLatencyMillis(device_frequency); + if (num_data_callbacks_ % 500 == 0) { + RTC_DLOG(LS_INFO) << "latency: " << latency_ms_; + } + } + + // Get audio data from WebRTC and write it to the allocated buffer in + // `audio_data`. The playout latency is not updated for each callback. + fine_audio_buffer_->GetPlayoutData( + rtc::MakeArrayView(reinterpret_cast(audio_data), + num_requested_frames * format_.Format.nChannels), + latency_ms_); + + // Release the buffer space acquired in IAudioRenderClient::GetBuffer. + error = audio_render_client_->ReleaseBuffer(num_requested_frames, 0); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioRenderClient::ReleaseBuffer failed: " + << core_audio_utility::ErrorToString(error); + return false; + } + + num_frames_written_ += num_requested_frames; + ++num_data_callbacks_; + + return true; +} + +// TODO(henrika): IAudioClock2::GetDevicePosition could perhaps be used here +// instead. Tried it once, but it crashed for capture devices. +int CoreAudioOutput::EstimateOutputLatencyMillis(uint64_t device_frequency) { + UINT64 position = 0; + UINT64 qpc_position = 0; + int delay_ms = 0; + // Get the device position through output parameter `position`. This is the + // stream position of the sample that is currently playing through the + // speakers. + _com_error error = audio_clock_->GetPosition(&position, &qpc_position); + if (error.Error() == S_OK) { + // Number of frames already played out through the speaker. + const uint64_t num_played_out_frames = + format_.Format.nSamplesPerSec * position / device_frequency; + + // Number of frames that have been written to the buffer but not yet + // played out corresponding to the estimated latency measured in number + // of audio frames. + const uint64_t delay_frames = num_frames_written_ - num_played_out_frames; + + // Convert latency in number of frames into milliseconds. + webrtc::TimeDelta delay = + webrtc::TimeDelta::Micros(delay_frames * rtc::kNumMicrosecsPerSec / + format_.Format.nSamplesPerSec); + delay_ms = delay.ms(); + } + return delay_ms; +} + +// Called from OnErrorCallback() when error type is kStreamDisconnected. +// Note that this method is called on the audio thread and the internal restart +// sequence is also executed on that same thread. The audio thread is therefore +// not stopped during restart. Such a scheme also makes the restart process less +// complex. +// Note that, none of the called methods are thread checked since they can also +// be called on the main thread. Thread checkers are instead added on one layer +// above (in audio_device_module.cc) which ensures that the public API is thread +// safe. +// TODO(henrika): add more details. +bool CoreAudioOutput::HandleStreamDisconnected() { + RTC_DLOG(LS_INFO) << "<<<--- " << __FUNCTION__; + RTC_DCHECK_RUN_ON(&thread_checker_audio_); + RTC_DCHECK(automatic_restart()); + + if (StopPlayout() != 0) { + return false; + } + + if (!SwitchDeviceIfNeeded()) { + return false; + } + + if (InitPlayout() != 0) { + return false; + } + if (StartPlayout() != 0) { + return false; + } + + RTC_DLOG(LS_INFO) << __FUNCTION__ << " --->>>"; + return true; +} + +} // namespace webrtc_win + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/win/core_audio_output_win.h b/third_party/libwebrtc/modules/audio_device/win/core_audio_output_win.h new file mode 100644 index 0000000000..5a547498a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/core_audio_output_win.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_OUTPUT_WIN_H_ +#define MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_OUTPUT_WIN_H_ + +#include +#include + +#include "modules/audio_device/win/audio_device_module_win.h" +#include "modules/audio_device/win/core_audio_base_win.h" + +namespace webrtc { + +class AudioDeviceBuffer; +class FineAudioBuffer; + +namespace webrtc_win { + +// Windows specific AudioOutput implementation using a CoreAudioBase class where +// an output direction is set at construction. Supports render device handling +// and streaming of decoded audio from a WebRTC client to the native audio +// layer. +class CoreAudioOutput final : public CoreAudioBase, public AudioOutput { + public: + CoreAudioOutput(bool automatic_restart); + ~CoreAudioOutput() override; + + // AudioOutput implementation. + int Init() override; + int Terminate() override; + int NumDevices() const override; + int SetDevice(int index) override; + int SetDevice(AudioDeviceModule::WindowsDeviceType device) override; + int DeviceName(int index, std::string* name, std::string* guid) override; + void AttachAudioBuffer(AudioDeviceBuffer* audio_buffer) override; + bool PlayoutIsInitialized() const override; + int InitPlayout() override; + int StartPlayout() override; + int StopPlayout() override; + bool Playing() override; + int VolumeIsAvailable(bool* available) override; + int RestartPlayout() override; + bool Restarting() const override; + int SetSampleRate(uint32_t sample_rate) override; + + CoreAudioOutput(const CoreAudioOutput&) = delete; + CoreAudioOutput& operator=(const CoreAudioOutput&) = delete; + + private: + void ReleaseCOMObjects(); + bool OnDataCallback(uint64_t device_frequency); + bool OnErrorCallback(ErrorType error); + int EstimateOutputLatencyMillis(uint64_t device_frequency); + bool HandleStreamDisconnected(); + + std::unique_ptr fine_audio_buffer_; + Microsoft::WRL::ComPtr audio_render_client_; + uint64_t num_frames_written_ = 0; +}; + +} // namespace webrtc_win +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_OUTPUT_WIN_H_ diff --git a/third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win.cc b/third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win.cc new file mode 100644 index 0000000000..e4e2864db5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win.cc @@ -0,0 +1,1529 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/win/core_audio_utility_win.h" + +#include +#include +#include + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_thread_types.h" +#include "rtc_base/string_utils.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/win/windows_version.h" + +using Microsoft::WRL::ComPtr; +using webrtc::AudioDeviceName; +using webrtc::AudioParameters; + +namespace webrtc { +namespace webrtc_win { +namespace { + +using core_audio_utility::ErrorToString; + +// Converts from channel mask to list of included channels. +// Each audio data format contains channels for one or more of the positions +// listed below. The number of channels simply equals the number of nonzero +// flag bits in the `channel_mask`. The relative positions of the channels +// within each block of audio data always follow the same relative ordering +// as the flag bits in the table below. For example, if `channel_mask` contains +// the value 0x00000033, the format defines four audio channels that are +// assigned for playback to the front-left, front-right, back-left, +// and back-right speakers, respectively. The channel data should be interleaved +// in that order within each block. +std::string ChannelMaskToString(DWORD channel_mask) { + std::string ss; + int n = 0; + if (channel_mask & SPEAKER_FRONT_LEFT) { + ss += "FRONT_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_FRONT_RIGHT) { + ss += "FRONT_RIGHT | "; + ++n; + } + if (channel_mask & SPEAKER_FRONT_CENTER) { + ss += "FRONT_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_LOW_FREQUENCY) { + ss += "LOW_FREQUENCY | "; + ++n; + } + if (channel_mask & SPEAKER_BACK_LEFT) { + ss += "BACK_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_BACK_RIGHT) { + ss += "BACK_RIGHT | "; + ++n; + } + if (channel_mask & SPEAKER_FRONT_LEFT_OF_CENTER) { + ss += "FRONT_LEFT_OF_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_FRONT_RIGHT_OF_CENTER) { + ss += "RIGHT_OF_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_BACK_CENTER) { + ss += "BACK_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_SIDE_LEFT) { + ss += "SIDE_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_SIDE_RIGHT) { + ss += "SIDE_RIGHT | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_CENTER) { + ss += "TOP_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_FRONT_LEFT) { + ss += "TOP_FRONT_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_FRONT_CENTER) { + ss += "TOP_FRONT_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_FRONT_RIGHT) { + ss += "TOP_FRONT_RIGHT | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_BACK_LEFT) { + ss += "TOP_BACK_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_BACK_CENTER) { + ss += "TOP_BACK_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_BACK_RIGHT) { + ss += "TOP_BACK_RIGHT | "; + ++n; + } + + if (!ss.empty()) { + // Delete last appended " | " substring. + ss.erase(ss.end() - 3, ss.end()); + } + ss += " ("; + ss += std::to_string(n); + ss += ")"; + return ss; +} + +#if !defined(KSAUDIO_SPEAKER_1POINT1) +// These values are only defined in ksmedia.h after a certain version, to build +// cleanly for older windows versions this just defines the ones that are +// missing. +#define KSAUDIO_SPEAKER_1POINT1 (SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY) +#define KSAUDIO_SPEAKER_2POINT1 \ + (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | SPEAKER_LOW_FREQUENCY) +#define KSAUDIO_SPEAKER_3POINT0 \ + (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | SPEAKER_FRONT_CENTER) +#define KSAUDIO_SPEAKER_3POINT1 \ + (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | SPEAKER_FRONT_CENTER | \ + SPEAKER_LOW_FREQUENCY) +#define KSAUDIO_SPEAKER_5POINT0 \ + (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | SPEAKER_FRONT_CENTER | \ + SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT) +#define KSAUDIO_SPEAKER_7POINT0 \ + (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | SPEAKER_FRONT_CENTER | \ + SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT | SPEAKER_SIDE_LEFT | \ + SPEAKER_SIDE_RIGHT) +#endif + +#if !defined(AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY) +#define AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY 0x08000000 +#define AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM 0x80000000 +#endif + +// Converts the most common format tags defined in mmreg.h into string +// equivalents. Mainly intended for log messages. +const char* WaveFormatTagToString(WORD format_tag) { + switch (format_tag) { + case WAVE_FORMAT_UNKNOWN: + return "WAVE_FORMAT_UNKNOWN"; + case WAVE_FORMAT_PCM: + return "WAVE_FORMAT_PCM"; + case WAVE_FORMAT_IEEE_FLOAT: + return "WAVE_FORMAT_IEEE_FLOAT"; + case WAVE_FORMAT_EXTENSIBLE: + return "WAVE_FORMAT_EXTENSIBLE"; + default: + return "UNKNOWN"; + } +} + +const char* RoleToString(const ERole role) { + switch (role) { + case eConsole: + return "Console"; + case eMultimedia: + return "Multimedia"; + case eCommunications: + return "Communications"; + default: + return "Unsupported"; + } +} + +const char* FlowToString(const EDataFlow flow) { + switch (flow) { + case eRender: + return "Render"; + case eCapture: + return "Capture"; + case eAll: + return "Render or Capture"; + default: + return "Unsupported"; + } +} + +bool LoadAudiosesDll() { + static const wchar_t* const kAudiosesDLL = + L"%WINDIR%\\system32\\audioses.dll"; + wchar_t path[MAX_PATH] = {0}; + ExpandEnvironmentStringsW(kAudiosesDLL, path, arraysize(path)); + RTC_DLOG(LS_INFO) << rtc::ToUtf8(path); + return (LoadLibraryExW(path, nullptr, LOAD_WITH_ALTERED_SEARCH_PATH) != + nullptr); +} + +bool LoadAvrtDll() { + static const wchar_t* const kAvrtDLL = L"%WINDIR%\\system32\\Avrt.dll"; + wchar_t path[MAX_PATH] = {0}; + ExpandEnvironmentStringsW(kAvrtDLL, path, arraysize(path)); + RTC_DLOG(LS_INFO) << rtc::ToUtf8(path); + return (LoadLibraryExW(path, nullptr, LOAD_WITH_ALTERED_SEARCH_PATH) != + nullptr); +} + +ComPtr CreateDeviceEnumeratorInternal( + bool allow_reinitialize) { + ComPtr device_enumerator; + _com_error error = + ::CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, + IID_PPV_ARGS(&device_enumerator)); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "CoCreateInstance failed: " << ErrorToString(error); + } + + if (error.Error() == CO_E_NOTINITIALIZED && allow_reinitialize) { + RTC_LOG(LS_ERROR) << "CoCreateInstance failed with CO_E_NOTINITIALIZED"; + // We have seen crashes which indicates that this method can in fact + // fail with CO_E_NOTINITIALIZED in combination with certain 3rd party + // modules. Calling CoInitializeEx() is an attempt to resolve the reported + // issues. See http://crbug.com/378465 for details. + error = CoInitializeEx(nullptr, COINIT_MULTITHREADED); + if (FAILED(error.Error())) { + error = ::CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, + CLSCTX_ALL, IID_PPV_ARGS(&device_enumerator)); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "CoCreateInstance failed: " + << ErrorToString(error); + } + } + } + return device_enumerator; +} + +bool IsSupportedInternal() { + // The Core Audio APIs are implemented in the user-mode system components + // Audioses.dll and Mmdevapi.dll. Dependency Walker shows that it is + // enough to verify possibility to load the Audioses DLL since it depends + // on Mmdevapi.dll. See http://crbug.com/166397 why this extra step is + // required to guarantee Core Audio support. + if (!LoadAudiosesDll()) + return false; + + // Being able to load the Audioses.dll does not seem to be sufficient for + // all devices to guarantee Core Audio support. To be 100%, we also verify + // that it is possible to a create the IMMDeviceEnumerator interface. If + // this works as well we should be home free. + ComPtr device_enumerator = + CreateDeviceEnumeratorInternal(false); + if (!device_enumerator) { + RTC_LOG(LS_ERROR) + << "Failed to create Core Audio device enumerator on thread with ID " + << rtc::CurrentThreadId(); + return false; + } + + return true; +} + +bool IsDeviceActive(IMMDevice* device) { + DWORD state = DEVICE_STATE_DISABLED; + return SUCCEEDED(device->GetState(&state)) && (state & DEVICE_STATE_ACTIVE); +} + +// Retrieve an audio device specified by `device_id` or a default device +// specified by data-flow direction and role if `device_id` is default. +ComPtr CreateDeviceInternal(absl::string_view device_id, + EDataFlow data_flow, + ERole role) { + RTC_DLOG(LS_INFO) << "CreateDeviceInternal: " + "id=" + << device_id << ", flow=" << FlowToString(data_flow) + << ", role=" << RoleToString(role); + ComPtr audio_endpoint_device; + + // Create the IMMDeviceEnumerator interface. + ComPtr device_enum(CreateDeviceEnumeratorInternal(true)); + if (!device_enum.Get()) + return audio_endpoint_device; + + _com_error error(S_FALSE); + if (device_id == AudioDeviceName::kDefaultDeviceId) { + // Get the default audio endpoint for the specified data-flow direction and + // role. Note that, if only a single rendering or capture device is + // available, the system always assigns all three rendering or capture roles + // to that device. If the method fails to find a rendering or capture device + // for the specified role, this means that no rendering or capture device is + // available at all. If no device is available, the method sets the output + // pointer to NULL and returns ERROR_NOT_FOUND. + error = device_enum->GetDefaultAudioEndpoint( + data_flow, role, audio_endpoint_device.GetAddressOf()); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) + << "IMMDeviceEnumerator::GetDefaultAudioEndpoint failed: " + << ErrorToString(error); + } + } else { + // Ask for an audio endpoint device that is identified by an endpoint ID + // string. + error = device_enum->GetDevice(rtc::ToUtf16(device_id).c_str(), + audio_endpoint_device.GetAddressOf()); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IMMDeviceEnumerator::GetDevice failed: " + << ErrorToString(error); + } + } + + // Verify that the audio endpoint device is active, i.e., that the audio + // adapter that connects to the endpoint device is present and enabled. + if (SUCCEEDED(error.Error()) && audio_endpoint_device.Get() && + !IsDeviceActive(audio_endpoint_device.Get())) { + RTC_LOG(LS_WARNING) << "Selected endpoint device is not active"; + audio_endpoint_device.Reset(); + } + + return audio_endpoint_device; +} + +std::string GetDeviceIdInternal(IMMDevice* device) { + // Retrieve unique name of endpoint device. + // Example: "{0.0.1.00000000}.{8db6020f-18e3-4f25-b6f5-7726c9122574}". + LPWSTR device_id; + if (SUCCEEDED(device->GetId(&device_id))) { + std::string device_id_utf8 = rtc::ToUtf8(device_id, wcslen(device_id)); + CoTaskMemFree(device_id); + return device_id_utf8; + } else { + return std::string(); + } +} + +std::string GetDeviceFriendlyNameInternal(IMMDevice* device) { + // Retrieve user-friendly name of endpoint device. + // Example: "Microphone (Realtek High Definition Audio)". + ComPtr properties; + HRESULT hr = device->OpenPropertyStore(STGM_READ, properties.GetAddressOf()); + if (FAILED(hr)) + return std::string(); + + ScopedPropVariant friendly_name_pv; + hr = properties->GetValue(PKEY_Device_FriendlyName, + friendly_name_pv.Receive()); + if (FAILED(hr)) + return std::string(); + + if (friendly_name_pv.get().vt == VT_LPWSTR && + friendly_name_pv.get().pwszVal) { + return rtc::ToUtf8(friendly_name_pv.get().pwszVal, + wcslen(friendly_name_pv.get().pwszVal)); + } else { + return std::string(); + } +} + +ComPtr CreateSessionManager2Internal( + IMMDevice* audio_device) { + if (!audio_device) + return ComPtr(); + + ComPtr audio_session_manager; + _com_error error = + audio_device->Activate(__uuidof(IAudioSessionManager2), CLSCTX_ALL, + nullptr, &audio_session_manager); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IMMDevice::Activate(IAudioSessionManager2) failed: " + << ErrorToString(error); + } + return audio_session_manager; +} + +ComPtr CreateSessionEnumeratorInternal( + IMMDevice* audio_device) { + if (!audio_device) { + return ComPtr(); + } + + ComPtr audio_session_enumerator; + ComPtr audio_session_manager = + CreateSessionManager2Internal(audio_device); + if (!audio_session_manager.Get()) { + return audio_session_enumerator; + } + _com_error error = + audio_session_manager->GetSessionEnumerator(&audio_session_enumerator); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) + << "IAudioSessionEnumerator::IAudioSessionEnumerator failed: " + << ErrorToString(error); + return ComPtr(); + } + return audio_session_enumerator; +} + +// Creates and activates an IAudioClient COM object given the selected +// endpoint device. +ComPtr CreateClientInternal(IMMDevice* audio_device) { + if (!audio_device) + return ComPtr(); + + ComPtr audio_client; + _com_error error = audio_device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, + nullptr, &audio_client); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IMMDevice::Activate(IAudioClient) failed: " + << ErrorToString(error); + } + return audio_client; +} + +ComPtr CreateClient2Internal(IMMDevice* audio_device) { + if (!audio_device) + return ComPtr(); + + ComPtr audio_client; + _com_error error = audio_device->Activate(__uuidof(IAudioClient2), CLSCTX_ALL, + nullptr, &audio_client); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IMMDevice::Activate(IAudioClient2) failed: " + << ErrorToString(error); + } + return audio_client; +} + +ComPtr CreateClient3Internal(IMMDevice* audio_device) { + if (!audio_device) + return ComPtr(); + + ComPtr audio_client; + _com_error error = audio_device->Activate(__uuidof(IAudioClient3), CLSCTX_ALL, + nullptr, &audio_client); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IMMDevice::Activate(IAudioClient3) failed: " + << ErrorToString(error); + } + return audio_client; +} + +ComPtr CreateCollectionInternal(EDataFlow data_flow) { + ComPtr device_enumerator( + CreateDeviceEnumeratorInternal(true)); + if (!device_enumerator) { + return ComPtr(); + } + + // Generate a collection of active (present and not disabled) audio endpoint + // devices for the specified data-flow direction. + // This method will succeed even if all devices are disabled. + ComPtr collection; + _com_error error = device_enumerator->EnumAudioEndpoints( + data_flow, DEVICE_STATE_ACTIVE, collection.GetAddressOf()); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IMMDeviceCollection::EnumAudioEndpoints failed: " + << ErrorToString(error); + } + return collection; +} + +bool GetDeviceNamesInternal(EDataFlow data_flow, + webrtc::AudioDeviceNames* device_names) { + RTC_DLOG(LS_INFO) << "GetDeviceNamesInternal: flow=" + << FlowToString(data_flow); + + // Generate a collection of active audio endpoint devices for the specified + // direction. + ComPtr collection = CreateCollectionInternal(data_flow); + if (!collection.Get()) { + RTC_LOG(LS_ERROR) << "Failed to create a collection of active devices"; + return false; + } + + // Retrieve the number of active (present, not disabled and plugged in) audio + // devices for the specified direction. + UINT number_of_active_devices = 0; + _com_error error = collection->GetCount(&number_of_active_devices); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IMMDeviceCollection::GetCount failed: " + << ErrorToString(error); + return false; + } + + if (number_of_active_devices == 0) { + RTC_DLOG(LS_WARNING) << "Found no active devices"; + return false; + } + + // Loop over all active devices and add friendly name and unique id to the + // `device_names` queue. For now, devices are added at indexes 0, 1, ..., N-1 + // but they will be moved to 2,3,..., N+1 at the next stage when default and + // default communication devices are added at index 0 and 1. + ComPtr audio_device; + for (UINT i = 0; i < number_of_active_devices; ++i) { + // Retrieve a pointer to the specified item in the device collection. + error = collection->Item(i, audio_device.GetAddressOf()); + if (FAILED(error.Error())) { + // Skip this item and try to get the next item instead; will result in an + // incomplete list of devices. + RTC_LOG(LS_WARNING) << "IMMDeviceCollection::Item failed: " + << ErrorToString(error); + continue; + } + if (!audio_device.Get()) { + RTC_LOG(LS_WARNING) << "Invalid audio device"; + continue; + } + + // Retrieve the complete device name for the given audio device endpoint. + AudioDeviceName device_name( + GetDeviceFriendlyNameInternal(audio_device.Get()), + GetDeviceIdInternal(audio_device.Get())); + // Add combination of user-friendly and unique name to the output list. + device_names->push_back(device_name); + } + + // Log a warning of the list of device is not complete but let's keep on + // trying to add default and default communications device at the front. + if (device_names->size() != number_of_active_devices) { + RTC_DLOG(LS_WARNING) + << "List of device names does not contain all active devices"; + } + + // Avoid adding default and default communication devices if no active device + // could be added to the queue. We might as well break here and return false + // since no active devices were identified. + if (device_names->empty()) { + RTC_DLOG(LS_ERROR) << "List of active devices is empty"; + return false; + } + + // Prepend the queue with two more elements: one for the default device and + // one for the default communication device (can correspond to the same unique + // id if only one active device exists). The first element (index 0) is the + // default device and the second element (index 1) is the default + // communication device. + ERole role[] = {eCommunications, eConsole}; + ComPtr default_device; + AudioDeviceName default_device_name; + for (size_t i = 0; i < arraysize(role); ++i) { + default_device = CreateDeviceInternal(AudioDeviceName::kDefaultDeviceId, + data_flow, role[i]); + if (!default_device.Get()) { + // Add empty strings to device name if the device could not be created. + RTC_DLOG(LS_WARNING) << "Failed to add device with role: " + << RoleToString(role[i]); + default_device_name.device_name = std::string(); + default_device_name.unique_id = std::string(); + } else { + // Populate the device name with friendly name and unique id. + std::string device_name; + device_name += (role[i] == eConsole ? "Default - " : "Communication - "); + device_name += GetDeviceFriendlyNameInternal(default_device.Get()); + std::string unique_id = GetDeviceIdInternal(default_device.Get()); + default_device_name.device_name = std::move(device_name); + default_device_name.unique_id = std::move(unique_id); + } + + // Add combination of user-friendly and unique name to the output queue. + // The last element (<=> eConsole) will be at the front of the queue, hence + // at index 0. Empty strings will be added for cases where no default + // devices were found. + device_names->push_front(default_device_name); + } + + // Example of log output when only one device is active. Note that the queue + // contains two extra elements at index 0 (Default) and 1 (Communication) to + // allow selection of device by role instead of id. All elements corresponds + // the same unique id. + // [0] friendly name: Default - Headset Microphone (2- Arctis 7 Chat) + // [0] unique id : {0.0.1.00000000}.{ff9eed76-196e-467a-b295-26986e69451c} + // [1] friendly name: Communication - Headset Microphone (2- Arctis 7 Chat) + // [1] unique id : {0.0.1.00000000}.{ff9eed76-196e-467a-b295-26986e69451c} + // [2] friendly name: Headset Microphone (2- Arctis 7 Chat) + // [2] unique id : {0.0.1.00000000}.{ff9eed76-196e-467a-b295-26986e69451c} + for (size_t i = 0; i < device_names->size(); ++i) { + RTC_DLOG(LS_INFO) << "[" << i + << "] friendly name: " << (*device_names)[i].device_name; + RTC_DLOG(LS_INFO) << "[" << i + << "] unique id : " << (*device_names)[i].unique_id; + } + + return true; +} + +HRESULT GetPreferredAudioParametersInternal(IAudioClient* client, + AudioParameters* params, + int fixed_sample_rate) { + WAVEFORMATPCMEX mix_format; + HRESULT hr = core_audio_utility::GetSharedModeMixFormat(client, &mix_format); + if (FAILED(hr)) + return hr; + + REFERENCE_TIME default_period = 0; + hr = core_audio_utility::GetDevicePeriod(client, AUDCLNT_SHAREMODE_SHARED, + &default_period); + if (FAILED(hr)) + return hr; + + int sample_rate = mix_format.Format.nSamplesPerSec; + // Override default sample rate if `fixed_sample_rate` is set and different + // from the default rate. + if (fixed_sample_rate > 0 && fixed_sample_rate != sample_rate) { + RTC_DLOG(LS_INFO) << "Using fixed sample rate instead of the preferred: " + << sample_rate << " is replaced by " << fixed_sample_rate; + sample_rate = fixed_sample_rate; + } + // TODO(henrika): utilize full mix_format.Format.wBitsPerSample. + // const size_t bits_per_sample = AudioParameters::kBitsPerSample; + // TODO(henrika): improve channel layout support. + const size_t channels = mix_format.Format.nChannels; + + // Use the native device period to derive the smallest possible buffer size + // in shared mode. + double device_period_in_seconds = + static_cast( + core_audio_utility::ReferenceTimeToTimeDelta(default_period).ms()) / + 1000.0L; + const size_t frames_per_buffer = + static_cast(sample_rate * device_period_in_seconds + 0.5); + + AudioParameters audio_params(sample_rate, channels, frames_per_buffer); + *params = audio_params; + RTC_DLOG(LS_INFO) << audio_params.ToString(); + + return hr; +} + +} // namespace + +namespace core_audio_utility { + +// core_audio_utility::WaveFormatWrapper implementation. +WAVEFORMATEXTENSIBLE* WaveFormatWrapper::GetExtensible() const { + RTC_CHECK(IsExtensible()); + return reinterpret_cast(ptr_); +} + +bool WaveFormatWrapper::IsExtensible() const { + return ptr_->wFormatTag == WAVE_FORMAT_EXTENSIBLE && ptr_->cbSize >= 22; +} + +bool WaveFormatWrapper::IsPcm() const { + return IsExtensible() ? GetExtensible()->SubFormat == KSDATAFORMAT_SUBTYPE_PCM + : ptr_->wFormatTag == WAVE_FORMAT_PCM; +} + +bool WaveFormatWrapper::IsFloat() const { + return IsExtensible() + ? GetExtensible()->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT + : ptr_->wFormatTag == WAVE_FORMAT_IEEE_FLOAT; +} + +size_t WaveFormatWrapper::size() const { + return sizeof(*ptr_) + ptr_->cbSize; +} + +bool IsSupported() { + RTC_DLOG(LS_INFO) << "IsSupported"; + static bool g_is_supported = IsSupportedInternal(); + return g_is_supported; +} + +bool IsMMCSSSupported() { + RTC_DLOG(LS_INFO) << "IsMMCSSSupported"; + return LoadAvrtDll(); +} + +int NumberOfActiveDevices(EDataFlow data_flow) { + // Generate a collection of active audio endpoint devices for the specified + // data-flow direction. + ComPtr collection = CreateCollectionInternal(data_flow); + if (!collection.Get()) { + return 0; + } + + // Retrieve the number of active audio devices for the specified direction. + UINT number_of_active_devices = 0; + collection->GetCount(&number_of_active_devices); + std::string str; + if (data_flow == eCapture) { + str = "Number of capture devices: "; + } else if (data_flow == eRender) { + str = "Number of render devices: "; + } else if (data_flow == eAll) { + str = "Total number of devices: "; + } + RTC_DLOG(LS_INFO) << str << number_of_active_devices; + return static_cast(number_of_active_devices); +} + +uint32_t GetAudioClientVersion() { + uint32_t version = 1; + if (rtc::rtc_win::GetVersion() >= rtc::rtc_win::VERSION_WIN10) { + version = 3; + } else if (rtc::rtc_win::GetVersion() >= rtc::rtc_win::VERSION_WIN8) { + version = 2; + } + return version; +} + +ComPtr CreateDeviceEnumerator() { + RTC_DLOG(LS_INFO) << "CreateDeviceEnumerator"; + return CreateDeviceEnumeratorInternal(true); +} + +std::string GetDefaultInputDeviceID() { + RTC_DLOG(LS_INFO) << "GetDefaultInputDeviceID"; + ComPtr device( + CreateDevice(AudioDeviceName::kDefaultDeviceId, eCapture, eConsole)); + return device.Get() ? GetDeviceIdInternal(device.Get()) : std::string(); +} + +std::string GetDefaultOutputDeviceID() { + RTC_DLOG(LS_INFO) << "GetDefaultOutputDeviceID"; + ComPtr device( + CreateDevice(AudioDeviceName::kDefaultDeviceId, eRender, eConsole)); + return device.Get() ? GetDeviceIdInternal(device.Get()) : std::string(); +} + +std::string GetCommunicationsInputDeviceID() { + RTC_DLOG(LS_INFO) << "GetCommunicationsInputDeviceID"; + ComPtr device(CreateDevice(AudioDeviceName::kDefaultDeviceId, + eCapture, eCommunications)); + return device.Get() ? GetDeviceIdInternal(device.Get()) : std::string(); +} + +std::string GetCommunicationsOutputDeviceID() { + RTC_DLOG(LS_INFO) << "GetCommunicationsOutputDeviceID"; + ComPtr device(CreateDevice(AudioDeviceName::kDefaultDeviceId, + eRender, eCommunications)); + return device.Get() ? GetDeviceIdInternal(device.Get()) : std::string(); +} + +ComPtr CreateDevice(absl::string_view device_id, + EDataFlow data_flow, + ERole role) { + RTC_DLOG(LS_INFO) << "CreateDevice"; + return CreateDeviceInternal(device_id, data_flow, role); +} + +AudioDeviceName GetDeviceName(IMMDevice* device) { + RTC_DLOG(LS_INFO) << "GetDeviceName"; + RTC_DCHECK(device); + AudioDeviceName device_name(GetDeviceFriendlyNameInternal(device), + GetDeviceIdInternal(device)); + RTC_DLOG(LS_INFO) << "friendly name: " << device_name.device_name; + RTC_DLOG(LS_INFO) << "unique id : " << device_name.unique_id; + return device_name; +} + +std::string GetFriendlyName(absl::string_view device_id, + EDataFlow data_flow, + ERole role) { + RTC_DLOG(LS_INFO) << "GetFriendlyName"; + ComPtr audio_device = CreateDevice(device_id, data_flow, role); + if (!audio_device.Get()) + return std::string(); + + AudioDeviceName device_name = GetDeviceName(audio_device.Get()); + return device_name.device_name; +} + +EDataFlow GetDataFlow(IMMDevice* device) { + RTC_DLOG(LS_INFO) << "GetDataFlow"; + RTC_DCHECK(device); + ComPtr endpoint; + _com_error error = device->QueryInterface(endpoint.GetAddressOf()); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IMMDevice::QueryInterface failed: " + << ErrorToString(error); + return eAll; + } + + EDataFlow data_flow; + error = endpoint->GetDataFlow(&data_flow); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IMMEndpoint::GetDataFlow failed: " + << ErrorToString(error); + return eAll; + } + return data_flow; +} + +bool GetInputDeviceNames(webrtc::AudioDeviceNames* device_names) { + RTC_DLOG(LS_INFO) << "GetInputDeviceNames"; + RTC_DCHECK(device_names); + RTC_DCHECK(device_names->empty()); + return GetDeviceNamesInternal(eCapture, device_names); +} + +bool GetOutputDeviceNames(webrtc::AudioDeviceNames* device_names) { + RTC_DLOG(LS_INFO) << "GetOutputDeviceNames"; + RTC_DCHECK(device_names); + RTC_DCHECK(device_names->empty()); + return GetDeviceNamesInternal(eRender, device_names); +} + +ComPtr CreateSessionManager2(IMMDevice* device) { + RTC_DLOG(LS_INFO) << "CreateSessionManager2"; + return CreateSessionManager2Internal(device); +} + +Microsoft::WRL::ComPtr CreateSessionEnumerator( + IMMDevice* device) { + RTC_DLOG(LS_INFO) << "CreateSessionEnumerator"; + return CreateSessionEnumeratorInternal(device); +} + +int NumberOfActiveSessions(IMMDevice* device) { + RTC_DLOG(LS_INFO) << "NumberOfActiveSessions"; + ComPtr session_enumerator = + CreateSessionEnumerator(device); + + // Iterate over all audio sessions for the given device. + int session_count = 0; + _com_error error = session_enumerator->GetCount(&session_count); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioSessionEnumerator::GetCount failed: " + << ErrorToString(error); + return 0; + } + RTC_DLOG(LS_INFO) << "Total number of audio sessions: " << session_count; + + int num_active = 0; + for (int session = 0; session < session_count; session++) { + // Acquire the session control interface. + ComPtr session_control; + error = session_enumerator->GetSession(session, &session_control); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioSessionEnumerator::GetSession failed: " + << ErrorToString(error); + return 0; + } + + // Log the display name of the audio session for debugging purposes. + LPWSTR display_name; + if (SUCCEEDED(session_control->GetDisplayName(&display_name))) { + RTC_DLOG(LS_INFO) << "display name: " + << rtc::ToUtf8(display_name, wcslen(display_name)); + CoTaskMemFree(display_name); + } + + // Get the current state and check if the state is active or not. + AudioSessionState state; + error = session_control->GetState(&state); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioSessionControl::GetState failed: " + << ErrorToString(error); + return 0; + } + if (state == AudioSessionStateActive) { + ++num_active; + } + } + + RTC_DLOG(LS_INFO) << "Number of active audio sessions: " << num_active; + return num_active; +} + +ComPtr CreateClient(absl::string_view device_id, + EDataFlow data_flow, + ERole role) { + RTC_DLOG(LS_INFO) << "CreateClient"; + ComPtr device(CreateDevice(device_id, data_flow, role)); + return CreateClientInternal(device.Get()); +} + +ComPtr CreateClient2(absl::string_view device_id, + EDataFlow data_flow, + ERole role) { + RTC_DLOG(LS_INFO) << "CreateClient2"; + ComPtr device(CreateDevice(device_id, data_flow, role)); + return CreateClient2Internal(device.Get()); +} + +ComPtr CreateClient3(absl::string_view device_id, + EDataFlow data_flow, + ERole role) { + RTC_DLOG(LS_INFO) << "CreateClient3"; + ComPtr device(CreateDevice(device_id, data_flow, role)); + return CreateClient3Internal(device.Get()); +} + +HRESULT SetClientProperties(IAudioClient2* client) { + RTC_DLOG(LS_INFO) << "SetClientProperties"; + RTC_DCHECK(client); + if (GetAudioClientVersion() < 2) { + RTC_LOG(LS_WARNING) << "Requires IAudioClient2 or higher"; + return AUDCLNT_E_UNSUPPORTED_FORMAT; + } + AudioClientProperties props = {0}; + props.cbSize = sizeof(AudioClientProperties); + // Real-time VoIP communication. + // TODO(henrika): other categories? + props.eCategory = AudioCategory_Communications; + // Hardware-offloaded audio processing allows the main audio processing tasks + // to be performed outside the computer's main CPU. Check support and log the + // result but hard-code `bIsOffload` to FALSE for now. + // TODO(henrika): evaluate hardware-offloading. Might complicate usage of + // IAudioClient::GetMixFormat(). + BOOL supports_offload = FALSE; + _com_error error = + client->IsOffloadCapable(props.eCategory, &supports_offload); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient2::IsOffloadCapable failed: " + << ErrorToString(error); + } + RTC_DLOG(LS_INFO) << "supports_offload: " << supports_offload; + props.bIsOffload = false; +#if (NTDDI_VERSION < NTDDI_WINBLUE) + RTC_DLOG(LS_INFO) << "options: Not supported in this build"; +#else + // TODO(henrika): pros and cons compared with AUDCLNT_STREAMOPTIONS_NONE? + props.Options |= AUDCLNT_STREAMOPTIONS_NONE; + // Requires System.Devices.AudioDevice.RawProcessingSupported. + // The application can choose to *always ignore* the OEM AEC/AGC by setting + // the AUDCLNT_STREAMOPTIONS_RAW flag in the call to SetClientProperties. + // This flag will preserve the user experience aspect of Communications + // streams, but will not insert any OEM provided communications specific + // processing in the audio signal path. + // props.Options |= AUDCLNT_STREAMOPTIONS_RAW; + + // If it is important to avoid resampling in the audio engine, set this flag. + // AUDCLNT_STREAMOPTIONS_MATCH_FORMAT (or anything in IAudioClient3) is not + // an appropriate interface to use for communications scenarios. + // This interface is mainly meant for pro audio scenarios. + // props.Options |= AUDCLNT_STREAMOPTIONS_MATCH_FORMAT; + RTC_DLOG(LS_INFO) << "options: 0x" << rtc::ToHex(props.Options); +#endif + error = client->SetClientProperties(&props); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient2::SetClientProperties failed: " + << ErrorToString(error); + } + return error.Error(); +} + +HRESULT GetBufferSizeLimits(IAudioClient2* client, + const WAVEFORMATEXTENSIBLE* format, + REFERENCE_TIME* min_buffer_duration, + REFERENCE_TIME* max_buffer_duration) { + RTC_DLOG(LS_INFO) << "GetBufferSizeLimits"; + RTC_DCHECK(client); + if (GetAudioClientVersion() < 2) { + RTC_LOG(LS_WARNING) << "Requires IAudioClient2 or higher"; + return AUDCLNT_E_UNSUPPORTED_FORMAT; + } + REFERENCE_TIME min_duration = 0; + REFERENCE_TIME max_duration = 0; + _com_error error = + client->GetBufferSizeLimits(reinterpret_cast(format), + TRUE, &min_duration, &max_duration); + if (error.Error() == AUDCLNT_E_OFFLOAD_MODE_ONLY) { + // This API seems to be supported in off-load mode only but it is not + // documented as a valid error code. Making a special note about it here. + RTC_LOG(LS_ERROR) << "IAudioClient2::GetBufferSizeLimits failed: " + "AUDCLNT_E_OFFLOAD_MODE_ONLY"; + } else if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient2::GetBufferSizeLimits failed: " + << ErrorToString(error); + } else { + *min_buffer_duration = min_duration; + *max_buffer_duration = max_duration; + RTC_DLOG(LS_INFO) << "min_buffer_duration: " << min_buffer_duration; + RTC_DLOG(LS_INFO) << "max_buffer_duration: " << max_buffer_duration; + } + return error.Error(); +} + +HRESULT GetSharedModeMixFormat(IAudioClient* client, + WAVEFORMATEXTENSIBLE* format) { + RTC_DLOG(LS_INFO) << "GetSharedModeMixFormat"; + RTC_DCHECK(client); + + // The GetMixFormat method retrieves the stream format that the audio engine + // uses for its internal processing of shared-mode streams. The method + // allocates the storage for the structure and this memory will be released + // when `mix_format` goes out of scope. The GetMixFormat method retrieves a + // format descriptor that is in the form of a WAVEFORMATEXTENSIBLE structure + // instead of a standalone WAVEFORMATEX structure. The method outputs a + // pointer to the WAVEFORMATEX structure that is embedded at the start of + // this WAVEFORMATEXTENSIBLE structure. + // Note that, crbug/803056 indicates that some devices can return a format + // where only the WAVEFORMATEX parts is initialized and we must be able to + // account for that. + ScopedCoMem mix_format; + _com_error error = + client->GetMixFormat(reinterpret_cast(&mix_format)); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::GetMixFormat failed: " + << ErrorToString(error); + return error.Error(); + } + + // Use a wave format wrapper to make things simpler. + WaveFormatWrapper wrapped_format(mix_format.Get()); + + // Verify that the reported format can be mixed by the audio engine in + // shared mode. + if (!wrapped_format.IsPcm() && !wrapped_format.IsFloat()) { + RTC_DLOG(LS_ERROR) + << "Only pure PCM or float audio streams can be mixed in shared mode"; + return AUDCLNT_E_UNSUPPORTED_FORMAT; + } + + // Log a warning for the rare case where `mix_format` only contains a + // stand-alone WAVEFORMATEX structure but don't return. + if (!wrapped_format.IsExtensible()) { + RTC_DLOG(LS_WARNING) + << "The returned format contains no extended information. " + "The size is " + << wrapped_format.size() << " bytes."; + } + + // Copy the correct number of bytes into |*format| taking into account if + // the returned structure is correctly extended or not. + RTC_CHECK_LE(wrapped_format.size(), sizeof(WAVEFORMATEXTENSIBLE)); + memcpy(format, wrapped_format.get(), wrapped_format.size()); + RTC_DLOG(LS_INFO) << WaveFormatToString(format); + + return error.Error(); +} + +bool IsFormatSupported(IAudioClient* client, + AUDCLNT_SHAREMODE share_mode, + const WAVEFORMATEXTENSIBLE* format) { + RTC_DLOG(LS_INFO) << "IsFormatSupported"; + RTC_DCHECK(client); + ScopedCoMem closest_match; + // This method provides a way for a client to determine, before calling + // IAudioClient::Initialize, whether the audio engine supports a particular + // stream format or not. In shared mode, the audio engine always supports + // the mix format (see GetSharedModeMixFormat). + // TODO(henrika): verify support for exclusive mode as well? + _com_error error = client->IsFormatSupported( + share_mode, reinterpret_cast(format), + &closest_match); + RTC_LOG(LS_INFO) << WaveFormatToString( + const_cast(format)); + if ((error.Error() == S_OK) && (closest_match == nullptr)) { + RTC_DLOG(LS_INFO) + << "The audio endpoint device supports the specified stream format"; + } else if ((error.Error() == S_FALSE) && (closest_match != nullptr)) { + // Call succeeded with a closest match to the specified format. This log can + // only be triggered for shared mode. + RTC_LOG(LS_WARNING) + << "Exact format is not supported, but a closest match exists"; + RTC_LOG(LS_INFO) << WaveFormatToString(closest_match.Get()); + } else if ((error.Error() == AUDCLNT_E_UNSUPPORTED_FORMAT) && + (closest_match == nullptr)) { + // The audio engine does not support the caller-specified format or any + // similar format. + RTC_DLOG(LS_INFO) << "The audio endpoint device does not support the " + "specified stream format"; + } else { + RTC_LOG(LS_ERROR) << "IAudioClient::IsFormatSupported failed: " + << ErrorToString(error); + } + + return (error.Error() == S_OK); +} + +HRESULT GetDevicePeriod(IAudioClient* client, + AUDCLNT_SHAREMODE share_mode, + REFERENCE_TIME* device_period) { + RTC_DLOG(LS_INFO) << "GetDevicePeriod"; + RTC_DCHECK(client); + // The `default_period` parameter specifies the default scheduling period + // for a shared-mode stream. The `minimum_period` parameter specifies the + // minimum scheduling period for an exclusive-mode stream. + // The time is expressed in 100-nanosecond units. + REFERENCE_TIME default_period = 0; + REFERENCE_TIME minimum_period = 0; + _com_error error = client->GetDevicePeriod(&default_period, &minimum_period); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::GetDevicePeriod failed: " + << ErrorToString(error); + return error.Error(); + } + + *device_period = (share_mode == AUDCLNT_SHAREMODE_SHARED) ? default_period + : minimum_period; + RTC_LOG(LS_INFO) << "device_period: " + << ReferenceTimeToTimeDelta(*device_period).ms() << " [ms]"; + RTC_LOG(LS_INFO) << "minimum_period: " + << ReferenceTimeToTimeDelta(minimum_period).ms() << " [ms]"; + return error.Error(); +} + +HRESULT GetSharedModeEnginePeriod(IAudioClient3* client3, + const WAVEFORMATEXTENSIBLE* format, + uint32_t* default_period_in_frames, + uint32_t* fundamental_period_in_frames, + uint32_t* min_period_in_frames, + uint32_t* max_period_in_frames) { + RTC_DLOG(LS_INFO) << "GetSharedModeEnginePeriod"; + RTC_DCHECK(client3); + + UINT32 default_period = 0; + UINT32 fundamental_period = 0; + UINT32 min_period = 0; + UINT32 max_period = 0; + _com_error error = client3->GetSharedModeEnginePeriod( + reinterpret_cast(format), &default_period, + &fundamental_period, &min_period, &max_period); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient3::GetSharedModeEnginePeriod failed: " + << ErrorToString(error); + return error.Error(); + } + + WAVEFORMATEX format_ex = format->Format; + const WORD sample_rate = format_ex.nSamplesPerSec; + RTC_LOG(LS_INFO) << "default_period_in_frames: " << default_period << " (" + << FramesToMilliseconds(default_period, sample_rate) + << " ms)"; + RTC_LOG(LS_INFO) << "fundamental_period_in_frames: " << fundamental_period + << " (" + << FramesToMilliseconds(fundamental_period, sample_rate) + << " ms)"; + RTC_LOG(LS_INFO) << "min_period_in_frames: " << min_period << " (" + << FramesToMilliseconds(min_period, sample_rate) << " ms)"; + RTC_LOG(LS_INFO) << "max_period_in_frames: " << max_period << " (" + << FramesToMilliseconds(max_period, sample_rate) << " ms)"; + *default_period_in_frames = default_period; + *fundamental_period_in_frames = fundamental_period; + *min_period_in_frames = min_period; + *max_period_in_frames = max_period; + return error.Error(); +} + +HRESULT GetPreferredAudioParameters(IAudioClient* client, + AudioParameters* params) { + RTC_DLOG(LS_INFO) << "GetPreferredAudioParameters"; + RTC_DCHECK(client); + return GetPreferredAudioParametersInternal(client, params, -1); +} + +HRESULT GetPreferredAudioParameters(IAudioClient* client, + webrtc::AudioParameters* params, + uint32_t sample_rate) { + RTC_DLOG(LS_INFO) << "GetPreferredAudioParameters: " << sample_rate; + RTC_DCHECK(client); + return GetPreferredAudioParametersInternal(client, params, sample_rate); +} + +HRESULT SharedModeInitialize(IAudioClient* client, + const WAVEFORMATEXTENSIBLE* format, + HANDLE event_handle, + REFERENCE_TIME buffer_duration, + bool auto_convert_pcm, + uint32_t* endpoint_buffer_size) { + RTC_DLOG(LS_INFO) << "SharedModeInitialize: buffer_duration=" + << buffer_duration + << ", auto_convert_pcm=" << auto_convert_pcm; + RTC_DCHECK(client); + RTC_DCHECK_GE(buffer_duration, 0); + if (buffer_duration != 0) { + RTC_DLOG(LS_WARNING) << "Non-default buffer size is used"; + } + if (auto_convert_pcm) { + RTC_DLOG(LS_WARNING) << "Sample rate converter can be utilized"; + } + // The AUDCLNT_STREAMFLAGS_NOPERSIST flag disables persistence of the volume + // and mute settings for a session that contains rendering streams. + // By default, the volume level and muting state for a rendering session are + // persistent across system restarts. The volume level and muting state for a + // capture session are never persistent. + DWORD stream_flags = AUDCLNT_STREAMFLAGS_NOPERSIST; + + // Enable event-driven streaming if a valid event handle is provided. + // After the stream starts, the audio engine will signal the event handle + // to notify the client each time a buffer becomes ready to process. + // Event-driven buffering is supported for both rendering and capturing. + // Both shared-mode and exclusive-mode streams can use event-driven buffering. + bool use_event = + (event_handle != nullptr && event_handle != INVALID_HANDLE_VALUE); + if (use_event) { + stream_flags |= AUDCLNT_STREAMFLAGS_EVENTCALLBACK; + RTC_DLOG(LS_INFO) << "The stream is initialized to be event driven"; + } + + // Check if sample-rate conversion is requested. + if (auto_convert_pcm) { + // Add channel matrixer (not utilized here) and rate converter to convert + // from our (the client's) format to the audio engine mix format. + // Currently only supported for testing, i.e., not possible to enable using + // public APIs. + RTC_DLOG(LS_INFO) << "The stream is initialized to support rate conversion"; + stream_flags |= AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM; + stream_flags |= AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY; + } + RTC_DLOG(LS_INFO) << "stream_flags: 0x" << rtc::ToHex(stream_flags); + + // Initialize the shared mode client for minimal delay if `buffer_duration` + // is 0 or possibly a higher delay (more robust) if `buffer_duration` is + // larger than 0. The actual size is given by IAudioClient::GetBufferSize(). + _com_error error = client->Initialize( + AUDCLNT_SHAREMODE_SHARED, stream_flags, buffer_duration, 0, + reinterpret_cast(format), nullptr); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::Initialize failed: " + << ErrorToString(error); + return error.Error(); + } + + // If a stream is initialized to be event driven and in shared mode, the + // associated application must also obtain a handle by making a call to + // IAudioClient::SetEventHandle. + if (use_event) { + error = client->SetEventHandle(event_handle); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::SetEventHandle failed: " + << ErrorToString(error); + return error.Error(); + } + } + + UINT32 buffer_size_in_frames = 0; + // Retrieves the size (maximum capacity) of the endpoint buffer. The size is + // expressed as the number of audio frames the buffer can hold. + // For rendering clients, the buffer length determines the maximum amount of + // rendering data that the application can write to the endpoint buffer + // during a single processing pass. For capture clients, the buffer length + // determines the maximum amount of capture data that the audio engine can + // read from the endpoint buffer during a single processing pass. + error = client->GetBufferSize(&buffer_size_in_frames); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::GetBufferSize failed: " + << ErrorToString(error); + return error.Error(); + } + + *endpoint_buffer_size = buffer_size_in_frames; + RTC_DLOG(LS_INFO) << "endpoint buffer size: " << buffer_size_in_frames + << " [audio frames]"; + const double size_in_ms = static_cast(buffer_size_in_frames) / + (format->Format.nSamplesPerSec / 1000.0); + RTC_DLOG(LS_INFO) << "endpoint buffer size: " + << static_cast(size_in_ms + 0.5) << " [ms]"; + RTC_DLOG(LS_INFO) << "bytes per audio frame: " << format->Format.nBlockAlign; + RTC_DLOG(LS_INFO) << "endpoint buffer size: " + << buffer_size_in_frames * format->Format.nChannels * + (format->Format.wBitsPerSample / 8) + << " [bytes]"; + + // TODO(henrika): utilize when delay measurements are added. + REFERENCE_TIME latency = 0; + error = client->GetStreamLatency(&latency); + RTC_DLOG(LS_INFO) << "stream latency: " + << ReferenceTimeToTimeDelta(latency).ms() << " [ms]"; + return error.Error(); +} + +HRESULT SharedModeInitializeLowLatency(IAudioClient3* client, + const WAVEFORMATEXTENSIBLE* format, + HANDLE event_handle, + uint32_t period_in_frames, + bool auto_convert_pcm, + uint32_t* endpoint_buffer_size) { + RTC_DLOG(LS_INFO) << "SharedModeInitializeLowLatency: period_in_frames=" + << period_in_frames + << ", auto_convert_pcm=" << auto_convert_pcm; + RTC_DCHECK(client); + RTC_DCHECK_GT(period_in_frames, 0); + if (auto_convert_pcm) { + RTC_DLOG(LS_WARNING) << "Sample rate converter is enabled"; + } + + // Define stream flags. + DWORD stream_flags = AUDCLNT_STREAMFLAGS_NOPERSIST; + bool use_event = + (event_handle != nullptr && event_handle != INVALID_HANDLE_VALUE); + if (use_event) { + stream_flags |= AUDCLNT_STREAMFLAGS_EVENTCALLBACK; + RTC_DLOG(LS_INFO) << "The stream is initialized to be event driven"; + } + if (auto_convert_pcm) { + stream_flags |= AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM; + stream_flags |= AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY; + } + RTC_DLOG(LS_INFO) << "stream_flags: 0x" << rtc::ToHex(stream_flags); + + // Initialize the shared mode client for lowest possible latency. + // It is assumed that GetSharedModeEnginePeriod() has been used to query the + // smallest possible engine period and that it is given by `period_in_frames`. + _com_error error = client->InitializeSharedAudioStream( + stream_flags, period_in_frames, + reinterpret_cast(format), nullptr); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient3::InitializeSharedAudioStream failed: " + << ErrorToString(error); + return error.Error(); + } + + // Set the event handle. + if (use_event) { + error = client->SetEventHandle(event_handle); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::SetEventHandle failed: " + << ErrorToString(error); + return error.Error(); + } + } + + UINT32 buffer_size_in_frames = 0; + // Retrieve the size (maximum capacity) of the endpoint buffer. + error = client->GetBufferSize(&buffer_size_in_frames); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::GetBufferSize failed: " + << ErrorToString(error); + return error.Error(); + } + + *endpoint_buffer_size = buffer_size_in_frames; + RTC_DLOG(LS_INFO) << "endpoint buffer size: " << buffer_size_in_frames + << " [audio frames]"; + const double size_in_ms = static_cast(buffer_size_in_frames) / + (format->Format.nSamplesPerSec / 1000.0); + RTC_DLOG(LS_INFO) << "endpoint buffer size: " + << static_cast(size_in_ms + 0.5) << " [ms]"; + RTC_DLOG(LS_INFO) << "bytes per audio frame: " << format->Format.nBlockAlign; + RTC_DLOG(LS_INFO) << "endpoint buffer size: " + << buffer_size_in_frames * format->Format.nChannels * + (format->Format.wBitsPerSample / 8) + << " [bytes]"; + + // TODO(henrika): utilize when delay measurements are added. + REFERENCE_TIME latency = 0; + error = client->GetStreamLatency(&latency); + if (FAILED(error.Error())) { + RTC_LOG(LS_WARNING) << "IAudioClient::GetStreamLatency failed: " + << ErrorToString(error); + } else { + RTC_DLOG(LS_INFO) << "stream latency: " + << ReferenceTimeToTimeDelta(latency).ms() << " [ms]"; + } + return error.Error(); +} + +ComPtr CreateRenderClient(IAudioClient* client) { + RTC_DLOG(LS_INFO) << "CreateRenderClient"; + RTC_DCHECK(client); + // Get access to the IAudioRenderClient interface. This interface + // enables us to write output data to a rendering endpoint buffer. + ComPtr audio_render_client; + _com_error error = client->GetService(IID_PPV_ARGS(&audio_render_client)); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) + << "IAudioClient::GetService(IID_IAudioRenderClient) failed: " + << ErrorToString(error); + return ComPtr(); + } + return audio_render_client; +} + +ComPtr CreateCaptureClient(IAudioClient* client) { + RTC_DLOG(LS_INFO) << "CreateCaptureClient"; + RTC_DCHECK(client); + // Get access to the IAudioCaptureClient interface. This interface + // enables us to read input data from a capturing endpoint buffer. + ComPtr audio_capture_client; + _com_error error = client->GetService(IID_PPV_ARGS(&audio_capture_client)); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) + << "IAudioClient::GetService(IID_IAudioCaptureClient) failed: " + << ErrorToString(error); + return ComPtr(); + } + return audio_capture_client; +} + +ComPtr CreateAudioClock(IAudioClient* client) { + RTC_DLOG(LS_INFO) << "CreateAudioClock"; + RTC_DCHECK(client); + // Get access to the IAudioClock interface. This interface enables us to + // monitor a stream's data rate and the current position in the stream. + ComPtr audio_clock; + _com_error error = client->GetService(IID_PPV_ARGS(&audio_clock)); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::GetService(IID_IAudioClock) failed: " + << ErrorToString(error); + return ComPtr(); + } + return audio_clock; +} + +ComPtr CreateAudioSessionControl(IAudioClient* client) { + RTC_DLOG(LS_INFO) << "CreateAudioSessionControl"; + RTC_DCHECK(client); + ComPtr audio_session_control; + _com_error error = client->GetService(IID_PPV_ARGS(&audio_session_control)); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::GetService(IID_IAudioControl) failed: " + << ErrorToString(error); + return ComPtr(); + } + return audio_session_control; +} + +ComPtr CreateSimpleAudioVolume(IAudioClient* client) { + RTC_DLOG(LS_INFO) << "CreateSimpleAudioVolume"; + RTC_DCHECK(client); + // Get access to the ISimpleAudioVolume interface. This interface enables a + // client to control the master volume level of an audio session. + ComPtr simple_audio_volume; + _com_error error = client->GetService(IID_PPV_ARGS(&simple_audio_volume)); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) + << "IAudioClient::GetService(IID_ISimpleAudioVolume) failed: " + << ErrorToString(error); + return ComPtr(); + } + return simple_audio_volume; +} + +bool FillRenderEndpointBufferWithSilence(IAudioClient* client, + IAudioRenderClient* render_client) { + RTC_DLOG(LS_INFO) << "FillRenderEndpointBufferWithSilence"; + RTC_DCHECK(client); + RTC_DCHECK(render_client); + UINT32 endpoint_buffer_size = 0; + _com_error error = client->GetBufferSize(&endpoint_buffer_size); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::GetBufferSize failed: " + << ErrorToString(error); + return false; + } + + UINT32 num_queued_frames = 0; + // Get number of audio frames that are queued up to play in the endpoint + // buffer. + error = client->GetCurrentPadding(&num_queued_frames); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioClient::GetCurrentPadding failed: " + << ErrorToString(error); + return false; + } + RTC_DLOG(LS_INFO) << "num_queued_frames: " << num_queued_frames; + + BYTE* data = nullptr; + int num_frames_to_fill = endpoint_buffer_size - num_queued_frames; + RTC_DLOG(LS_INFO) << "num_frames_to_fill: " << num_frames_to_fill; + error = render_client->GetBuffer(num_frames_to_fill, &data); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioRenderClient::GetBuffer failed: " + << ErrorToString(error); + return false; + } + + // Using the AUDCLNT_BUFFERFLAGS_SILENT flag eliminates the need to + // explicitly write silence data to the rendering buffer. + error = render_client->ReleaseBuffer(num_frames_to_fill, + AUDCLNT_BUFFERFLAGS_SILENT); + if (FAILED(error.Error())) { + RTC_LOG(LS_ERROR) << "IAudioRenderClient::ReleaseBuffer failed: " + << ErrorToString(error); + return false; + } + + return true; +} + +std::string WaveFormatToString(const WaveFormatWrapper format) { + char ss_buf[1024]; + rtc::SimpleStringBuilder ss(ss_buf); + // Start with the WAVEFORMATEX part (which always exists). + ss.AppendFormat("wFormatTag: %s (0x%X)", + WaveFormatTagToString(format->wFormatTag), + format->wFormatTag); + ss.AppendFormat(", nChannels: %d", format->nChannels); + ss.AppendFormat(", nSamplesPerSec: %d", format->nSamplesPerSec); + ss.AppendFormat(", nAvgBytesPerSec: %d", format->nAvgBytesPerSec); + ss.AppendFormat(", nBlockAlign: %d", format->nBlockAlign); + ss.AppendFormat(", wBitsPerSample: %d", format->wBitsPerSample); + ss.AppendFormat(", cbSize: %d", format->cbSize); + if (!format.IsExtensible()) + return ss.str(); + + // Append the WAVEFORMATEXTENSIBLE part (which we know exists). + ss.AppendFormat( + " [+] wValidBitsPerSample: %d, dwChannelMask: %s", + format.GetExtensible()->Samples.wValidBitsPerSample, + ChannelMaskToString(format.GetExtensible()->dwChannelMask).c_str()); + if (format.IsPcm()) { + ss.AppendFormat("%s", ", SubFormat: KSDATAFORMAT_SUBTYPE_PCM"); + } else if (format.IsFloat()) { + ss.AppendFormat("%s", ", SubFormat: KSDATAFORMAT_SUBTYPE_IEEE_FLOAT"); + } else { + ss.AppendFormat("%s", ", SubFormat: NOT_SUPPORTED"); + } + return ss.str(); +} + +webrtc::TimeDelta ReferenceTimeToTimeDelta(REFERENCE_TIME time) { + // Each unit of reference time is 100 nanoseconds <=> 0.1 microsecond. + return webrtc::TimeDelta::Micros(0.1 * time + 0.5); +} + +double FramesToMilliseconds(uint32_t num_frames, uint16_t sample_rate) { + // Convert the current period in frames into milliseconds. + return static_cast(num_frames) / (sample_rate / 1000.0); +} + +std::string ErrorToString(const _com_error& error) { + char ss_buf[1024]; + rtc::SimpleStringBuilder ss(ss_buf); + ss.AppendFormat("(HRESULT: 0x%08X)", error.Error()); + return ss.str(); +} + +} // namespace core_audio_utility +} // namespace webrtc_win +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win.h b/third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win.h new file mode 100644 index 0000000000..454e60bf31 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win.h @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_UTILITY_WIN_H_ +#define MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_UTILITY_WIN_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "absl/strings/string_view.h" +#include "api/units/time_delta.h" +#include "modules/audio_device/audio_device_name.h" +#include "modules/audio_device/include/audio_device_defines.h" +#include "rtc_base/logging.h" +#include "rtc_base/string_utils.h" + +#pragma comment(lib, "Avrt.lib") + +namespace webrtc { +namespace webrtc_win { + +// Utility class which registers a thread with MMCSS in the constructor and +// deregisters MMCSS in the destructor. The task name is given by `task_name`. +// The Multimedia Class Scheduler service (MMCSS) enables multimedia +// applications to ensure that their time-sensitive processing receives +// prioritized access to CPU resources without denying CPU resources to +// lower-priority applications. +class ScopedMMCSSRegistration { + public: + const char* PriorityClassToString(DWORD priority_class) { + switch (priority_class) { + case ABOVE_NORMAL_PRIORITY_CLASS: + return "ABOVE_NORMAL"; + case BELOW_NORMAL_PRIORITY_CLASS: + return "BELOW_NORMAL"; + case HIGH_PRIORITY_CLASS: + return "HIGH"; + case IDLE_PRIORITY_CLASS: + return "IDLE"; + case NORMAL_PRIORITY_CLASS: + return "NORMAL"; + case REALTIME_PRIORITY_CLASS: + return "REALTIME"; + default: + return "INVALID"; + } + } + + const char* PriorityToString(int priority) { + switch (priority) { + case THREAD_PRIORITY_ABOVE_NORMAL: + return "ABOVE_NORMAL"; + case THREAD_PRIORITY_BELOW_NORMAL: + return "BELOW_NORMAL"; + case THREAD_PRIORITY_HIGHEST: + return "HIGHEST"; + case THREAD_PRIORITY_IDLE: + return "IDLE"; + case THREAD_PRIORITY_LOWEST: + return "LOWEST"; + case THREAD_PRIORITY_NORMAL: + return "NORMAL"; + case THREAD_PRIORITY_TIME_CRITICAL: + return "TIME_CRITICAL"; + default: + // Can happen in combination with REALTIME_PRIORITY_CLASS. + return "INVALID"; + } + } + + explicit ScopedMMCSSRegistration(const wchar_t* task_name) { + RTC_DLOG(LS_INFO) << "ScopedMMCSSRegistration: " << rtc::ToUtf8(task_name); + // Register the calling thread with MMCSS for the supplied `task_name`. + DWORD mmcss_task_index = 0; + mmcss_handle_ = AvSetMmThreadCharacteristicsW(task_name, &mmcss_task_index); + if (mmcss_handle_ == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to enable MMCSS on this thread: " + << GetLastError(); + } else { + const DWORD priority_class = GetPriorityClass(GetCurrentProcess()); + const int priority = GetThreadPriority(GetCurrentThread()); + RTC_DLOG(LS_INFO) << "priority class: " + << PriorityClassToString(priority_class) << "(" + << priority_class << ")"; + RTC_DLOG(LS_INFO) << "priority: " << PriorityToString(priority) << "(" + << priority << ")"; + } + } + + ~ScopedMMCSSRegistration() { + if (Succeeded()) { + // Deregister with MMCSS. + RTC_DLOG(LS_INFO) << "~ScopedMMCSSRegistration"; + AvRevertMmThreadCharacteristics(mmcss_handle_); + } + } + + ScopedMMCSSRegistration(const ScopedMMCSSRegistration&) = delete; + ScopedMMCSSRegistration& operator=(const ScopedMMCSSRegistration&) = delete; + + bool Succeeded() const { return mmcss_handle_ != nullptr; } + + private: + HANDLE mmcss_handle_ = nullptr; +}; + +// A PROPVARIANT that is automatically initialized and cleared upon respective +// construction and destruction of this class. +class ScopedPropVariant { + public: + ScopedPropVariant() { PropVariantInit(&pv_); } + + ~ScopedPropVariant() { Reset(); } + + ScopedPropVariant(const ScopedPropVariant&) = delete; + ScopedPropVariant& operator=(const ScopedPropVariant&) = delete; + bool operator==(const ScopedPropVariant&) const = delete; + bool operator!=(const ScopedPropVariant&) const = delete; + + // Returns a pointer to the underlying PROPVARIANT for use as an out param in + // a function call. + PROPVARIANT* Receive() { + RTC_DCHECK_EQ(pv_.vt, VT_EMPTY); + return &pv_; + } + + // Clears the instance to prepare it for re-use (e.g., via Receive). + void Reset() { + if (pv_.vt != VT_EMPTY) { + HRESULT result = PropVariantClear(&pv_); + RTC_DCHECK_EQ(result, S_OK); + } + } + + const PROPVARIANT& get() const { return pv_; } + const PROPVARIANT* ptr() const { return &pv_; } + + private: + PROPVARIANT pv_; +}; + +// Simple scoped memory releaser class for COM allocated memory. +template +class ScopedCoMem { + public: + ScopedCoMem() : mem_ptr_(nullptr) {} + + ~ScopedCoMem() { Reset(nullptr); } + + ScopedCoMem(const ScopedCoMem&) = delete; + ScopedCoMem& operator=(const ScopedCoMem&) = delete; + + T** operator&() { // NOLINT + RTC_DCHECK(mem_ptr_ == nullptr); // To catch memory leaks. + return &mem_ptr_; + } + + operator T*() { return mem_ptr_; } + + T* operator->() { + RTC_DCHECK(mem_ptr_ != nullptr); + return mem_ptr_; + } + + const T* operator->() const { + RTC_DCHECK(mem_ptr_ != nullptr); + return mem_ptr_; + } + + explicit operator bool() const { return mem_ptr_; } + + friend bool operator==(const ScopedCoMem& lhs, std::nullptr_t) { + return lhs.Get() == nullptr; + } + + friend bool operator==(std::nullptr_t, const ScopedCoMem& rhs) { + return rhs.Get() == nullptr; + } + + friend bool operator!=(const ScopedCoMem& lhs, std::nullptr_t) { + return lhs.Get() != nullptr; + } + + friend bool operator!=(std::nullptr_t, const ScopedCoMem& rhs) { + return rhs.Get() != nullptr; + } + + void Reset(T* ptr) { + if (mem_ptr_) + CoTaskMemFree(mem_ptr_); + mem_ptr_ = ptr; + } + + T* Get() const { return mem_ptr_; } + + private: + T* mem_ptr_; +}; + +// A HANDLE that is automatically initialized and closed upon respective +// construction and destruction of this class. +class ScopedHandle { + public: + ScopedHandle() : handle_(nullptr) {} + explicit ScopedHandle(HANDLE h) : handle_(nullptr) { Set(h); } + + ~ScopedHandle() { Close(); } + + ScopedHandle& operator=(const ScopedHandle&) = delete; + bool operator==(const ScopedHandle&) const = delete; + bool operator!=(const ScopedHandle&) const = delete; + + // Use this instead of comparing to INVALID_HANDLE_VALUE. + bool IsValid() const { return handle_ != nullptr; } + + void Set(HANDLE new_handle) { + Close(); + // Windows is inconsistent about invalid handles. + // See https://blogs.msdn.microsoft.com/oldnewthing/20040302-00/?p=40443 + // for details. + if (new_handle != INVALID_HANDLE_VALUE) { + handle_ = new_handle; + } + } + + HANDLE Get() const { return handle_; } + + operator HANDLE() const { return handle_; } + + void Close() { + if (handle_) { + if (!::CloseHandle(handle_)) { + RTC_DCHECK_NOTREACHED(); + } + handle_ = nullptr; + } + } + + private: + HANDLE handle_; +}; + +// Utility methods for the Core Audio API on Windows. +// Always ensure that Core Audio is supported before using these methods. +// Use webrtc_win::core_audio_utility::IsSupported() for this purpose. +// Also, all methods must be called on a valid COM thread. This can be done +// by using the ScopedCOMInitializer helper class. +// These methods are based on media::CoreAudioUtil in Chrome. +namespace core_audio_utility { + +// Helper class which automates casting between WAVEFORMATEX and +// WAVEFORMATEXTENSIBLE raw pointers using implicit constructors and +// operator overloading. Note that, no memory is allocated by this utility +// structure. It only serves as a handle (or a wrapper) of the structure +// provided to it at construction. +class WaveFormatWrapper { + public: + WaveFormatWrapper(WAVEFORMATEXTENSIBLE* p) + : ptr_(reinterpret_cast(p)) {} + WaveFormatWrapper(WAVEFORMATEX* p) : ptr_(p) {} + ~WaveFormatWrapper() = default; + + operator WAVEFORMATEX*() const { return ptr_; } + WAVEFORMATEX* operator->() const { return ptr_; } + WAVEFORMATEX* get() const { return ptr_; } + WAVEFORMATEXTENSIBLE* GetExtensible() const; + + bool IsExtensible() const; + bool IsPcm() const; + bool IsFloat() const; + size_t size() const; + + private: + WAVEFORMATEX* ptr_; +}; + +// Returns true if Windows Core Audio is supported. +// Always verify that this method returns true before using any of the +// other methods in this class. +bool IsSupported(); + +// Returns true if Multimedia Class Scheduler service (MMCSS) is supported. +// The MMCSS enables multimedia applications to ensure that their time-sensitive +// processing receives prioritized access to CPU resources without denying CPU +// resources to lower-priority applications. +bool IsMMCSSSupported(); + +// The MMDevice API lets clients discover the audio endpoint devices in the +// system and determine which devices are suitable for the application to use. +// Header file Mmdeviceapi.h defines the interfaces in the MMDevice API. + +// Number of active audio devices in the specified data flow direction. +// Set `data_flow` to eAll to retrieve the total number of active audio +// devices. +int NumberOfActiveDevices(EDataFlow data_flow); + +// Returns 1, 2, or 3 depending on what version of IAudioClient the platform +// supports. +// Example: IAudioClient2 is supported on Windows 8 and higher => 2 is returned. +uint32_t GetAudioClientVersion(); + +// Creates an IMMDeviceEnumerator interface which provides methods for +// enumerating audio endpoint devices. +// TODO(henrika): IMMDeviceEnumerator::RegisterEndpointNotificationCallback. +Microsoft::WRL::ComPtr CreateDeviceEnumerator(); + +// These functions return the unique device id of the default or +// communications input/output device, or an empty string if no such device +// exists or if the device has been disabled. +std::string GetDefaultInputDeviceID(); +std::string GetDefaultOutputDeviceID(); +std::string GetCommunicationsInputDeviceID(); +std::string GetCommunicationsOutputDeviceID(); + +// Creates an IMMDevice interface corresponding to the unique device id in +// `device_id`, or by data-flow direction and role if `device_id` is set to +// AudioDeviceName::kDefaultDeviceId. +Microsoft::WRL::ComPtr CreateDevice(absl::string_view device_id, + EDataFlow data_flow, + ERole role); + +// Returns the unique ID and user-friendly name of a given endpoint device. +// Example: "{0.0.1.00000000}.{8db6020f-18e3-4f25-b6f5-7726c9122574}", and +// "Microphone (Realtek High Definition Audio)". +webrtc::AudioDeviceName GetDeviceName(IMMDevice* device); + +// Gets the user-friendly name of the endpoint device which is represented +// by a unique id in `device_id`, or by data-flow direction and role if +// `device_id` is set to AudioDeviceName::kDefaultDeviceId. +std::string GetFriendlyName(absl::string_view device_id, + EDataFlow data_flow, + ERole role); + +// Query if the audio device is a rendering device or a capture device. +EDataFlow GetDataFlow(IMMDevice* device); + +// Enumerates all input devices and adds the names (friendly name and unique +// device id) to the list in `device_names`. +bool GetInputDeviceNames(webrtc::AudioDeviceNames* device_names); + +// Enumerates all output devices and adds the names (friendly name and unique +// device id) to the list in `device_names`. +bool GetOutputDeviceNames(webrtc::AudioDeviceNames* device_names); + +// The Windows Audio Session API (WASAPI) enables client applications to +// manage the flow of audio data between the application and an audio endpoint +// device. Header files Audioclient.h and Audiopolicy.h define the WASAPI +// interfaces. + +// Creates an IAudioSessionManager2 interface for the specified `device`. +// This interface provides access to e.g. the IAudioSessionEnumerator +Microsoft::WRL::ComPtr CreateSessionManager2( + IMMDevice* device); + +// Creates an IAudioSessionEnumerator interface for the specified `device`. +// The client can use the interface to enumerate audio sessions on the audio +// device +Microsoft::WRL::ComPtr CreateSessionEnumerator( + IMMDevice* device); + +// Number of active audio sessions for the given `device`. Expired or inactive +// sessions are not included. +int NumberOfActiveSessions(IMMDevice* device); + +// Creates an IAudioClient instance for a specific device or the default +// device specified by data-flow direction and role. +Microsoft::WRL::ComPtr CreateClient(absl::string_view device_id, + EDataFlow data_flow, + ERole role); +Microsoft::WRL::ComPtr CreateClient2(absl::string_view device_id, + EDataFlow data_flow, + ERole role); +Microsoft::WRL::ComPtr CreateClient3(absl::string_view device_id, + EDataFlow data_flow, + ERole role); + +// Sets the AudioCategory_Communications category. Should be called before +// GetSharedModeMixFormat() and IsFormatSupported(). The `client` argument must +// be an IAudioClient2 or IAudioClient3 interface pointer, hence only supported +// on Windows 8 and above. +// TODO(henrika): evaluate effect (if any). +HRESULT SetClientProperties(IAudioClient2* client); + +// Returns the buffer size limits of the hardware audio engine in +// 100-nanosecond units given a specified `format`. Does not require prior +// audio stream initialization. The `client` argument must be an IAudioClient2 +// or IAudioClient3 interface pointer, hence only supported on Windows 8 and +// above. +// TODO(henrika): always fails with AUDCLNT_E_OFFLOAD_MODE_ONLY. +HRESULT GetBufferSizeLimits(IAudioClient2* client, + const WAVEFORMATEXTENSIBLE* format, + REFERENCE_TIME* min_buffer_duration, + REFERENCE_TIME* max_buffer_duration); + +// Get the mix format that the audio engine uses internally for processing +// of shared-mode streams. The client can call this method before calling +// IAudioClient::Initialize. When creating a shared-mode stream for an audio +// endpoint device, the Initialize method always accepts the stream format +// obtained by this method. +HRESULT GetSharedModeMixFormat(IAudioClient* client, + WAVEFORMATEXTENSIBLE* format); + +// Returns true if the specified `client` supports the format in `format` +// for the given `share_mode` (shared or exclusive). The client can call this +// method before calling IAudioClient::Initialize. +bool IsFormatSupported(IAudioClient* client, + AUDCLNT_SHAREMODE share_mode, + const WAVEFORMATEXTENSIBLE* format); + +// For a shared-mode stream, the audio engine periodically processes the +// data in the endpoint buffer at the period obtained in `device_period`. +// For an exclusive mode stream, `device_period` corresponds to the minimum +// time interval between successive processing by the endpoint device. +// This period plus the stream latency between the buffer and endpoint device +// represents the minimum possible latency that an audio application can +// achieve. The time in `device_period` is expressed in 100-nanosecond units. +HRESULT GetDevicePeriod(IAudioClient* client, + AUDCLNT_SHAREMODE share_mode, + REFERENCE_TIME* device_period); + +// Returns the range of periodicities supported by the engine for the specified +// stream `format`. The periodicity of the engine is the rate at which the +// engine wakes an event-driven audio client to transfer audio data to or from +// the engine. Can be used for low-latency support on some devices. +// The `client` argument must be an IAudioClient3 interface pointer, hence only +// supported on Windows 10 and above. +HRESULT GetSharedModeEnginePeriod(IAudioClient3* client3, + const WAVEFORMATEXTENSIBLE* format, + uint32_t* default_period_in_frames, + uint32_t* fundamental_period_in_frames, + uint32_t* min_period_in_frames, + uint32_t* max_period_in_frames); + +// Get the preferred audio parameters for the given `client` corresponding to +// the stream format that the audio engine uses for its internal processing of +// shared-mode streams. The acquired values should only be utilized for shared +// mode streamed since there are no preferred settings for an exclusive mode +// stream. +HRESULT GetPreferredAudioParameters(IAudioClient* client, + webrtc::AudioParameters* params); +// As above but override the preferred sample rate and use `sample_rate` +// instead. Intended mainly for testing purposes and in combination with rate +// conversion. +HRESULT GetPreferredAudioParameters(IAudioClient* client, + webrtc::AudioParameters* params, + uint32_t sample_rate); + +// After activating an IAudioClient interface on an audio endpoint device, +// the client must initialize it once, and only once, to initialize the audio +// stream between the client and the device. In shared mode, the client +// connects indirectly through the audio engine which does the mixing. +// If a valid event is provided in `event_handle`, the client will be +// initialized for event-driven buffer handling. If `event_handle` is set to +// nullptr, event-driven buffer handling is not utilized. To achieve the +// minimum stream latency between the client application and audio endpoint +// device, set `buffer_duration` to 0. A client has the option of requesting a +// buffer size that is larger than what is strictly necessary to make timing +// glitches rare or nonexistent. Increasing the buffer size does not necessarily +// increase the stream latency. Each unit of reference time is 100 nanoseconds. +// The `auto_convert_pcm` parameter can be used for testing purposes to ensure +// that the sample rate of the client side does not have to match the audio +// engine mix format. If `auto_convert_pcm` is set to true, a rate converter +// will be inserted to convert between the sample rate in `format` and the +// preferred rate given by GetPreferredAudioParameters(). +// The output parameter `endpoint_buffer_size` contains the size of the +// endpoint buffer and it is expressed as the number of audio frames the +// buffer can hold. +HRESULT SharedModeInitialize(IAudioClient* client, + const WAVEFORMATEXTENSIBLE* format, + HANDLE event_handle, + REFERENCE_TIME buffer_duration, + bool auto_convert_pcm, + uint32_t* endpoint_buffer_size); + +// Works as SharedModeInitialize() but adds support for using smaller engine +// periods than the default period. +// The `client` argument must be an IAudioClient3 interface pointer, hence only +// supported on Windows 10 and above. +// TODO(henrika): can probably be merged into SharedModeInitialize() to avoid +// duplicating code. Keeping as separate method for now until decided if we +// need low-latency support. +HRESULT SharedModeInitializeLowLatency(IAudioClient3* client, + const WAVEFORMATEXTENSIBLE* format, + HANDLE event_handle, + uint32_t period_in_frames, + bool auto_convert_pcm, + uint32_t* endpoint_buffer_size); + +// Creates an IAudioRenderClient client for an existing IAudioClient given by +// `client`. The IAudioRenderClient interface enables a client to write +// output data to a rendering endpoint buffer. The methods in this interface +// manage the movement of data packets that contain audio-rendering data. +Microsoft::WRL::ComPtr CreateRenderClient( + IAudioClient* client); + +// Creates an IAudioCaptureClient client for an existing IAudioClient given by +// `client`. The IAudioCaptureClient interface enables a client to read +// input data from a capture endpoint buffer. The methods in this interface +// manage the movement of data packets that contain capture data. +Microsoft::WRL::ComPtr CreateCaptureClient( + IAudioClient* client); + +// Creates an IAudioClock interface for an existing IAudioClient given by +// `client`. The IAudioClock interface enables a client to monitor a stream's +// data rate and the current position in the stream. +Microsoft::WRL::ComPtr CreateAudioClock(IAudioClient* client); + +// Creates an AudioSessionControl interface for an existing IAudioClient given +// by `client`. The IAudioControl interface enables a client to configure the +// control parameters for an audio session and to monitor events in the session. +Microsoft::WRL::ComPtr CreateAudioSessionControl( + IAudioClient* client); + +// Creates an ISimpleAudioVolume interface for an existing IAudioClient given by +// `client`. This interface enables a client to control the master volume level +// of an active audio session. +Microsoft::WRL::ComPtr CreateSimpleAudioVolume( + IAudioClient* client); + +// Fills up the endpoint rendering buffer with silence for an existing +// IAudioClient given by `client` and a corresponding IAudioRenderClient +// given by `render_client`. +bool FillRenderEndpointBufferWithSilence(IAudioClient* client, + IAudioRenderClient* render_client); + +// Prints/logs all fields of the format structure in `format`. +// Also supports extended versions (WAVEFORMATEXTENSIBLE). +std::string WaveFormatToString(WaveFormatWrapper format); + +// Converts Windows internal REFERENCE_TIME (100 nanosecond units) into +// generic webrtc::TimeDelta which then can be converted to any time unit. +webrtc::TimeDelta ReferenceTimeToTimeDelta(REFERENCE_TIME time); + +// Converts size expressed in number of audio frames, `num_frames`, into +// milliseconds given a specified `sample_rate`. +double FramesToMilliseconds(uint32_t num_frames, uint16_t sample_rate); + +// Converts a COM error into a human-readable string. +std::string ErrorToString(const _com_error& error); + +} // namespace core_audio_utility +} // namespace webrtc_win +} // namespace webrtc + +#endif // MODULES_AUDIO_DEVICE_WIN_CORE_AUDIO_UTILITY_WIN_H_ diff --git a/third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win_unittest.cc b/third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win_unittest.cc new file mode 100644 index 0000000000..277f54eb35 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_device/win/core_audio_utility_win_unittest.cc @@ -0,0 +1,876 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_device/win/core_audio_utility_win.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/logging.h" +#include "rtc_base/win/scoped_com_initializer.h" +#include "rtc_base/win/windows_version.h" +#include "test/gtest.h" + +using Microsoft::WRL::ComPtr; +using webrtc::AudioDeviceName; + +namespace webrtc { +namespace webrtc_win { +namespace { + +#define ABORT_TEST_IF_NOT(requirements_satisfied) \ + do { \ + bool fail = false; \ + if (ShouldAbortTest(requirements_satisfied, #requirements_satisfied, \ + &fail)) { \ + if (fail) \ + FAIL(); \ + else \ + return; \ + } \ + } while (false) + +bool ShouldAbortTest(bool requirements_satisfied, + const char* requirements_expression, + bool* should_fail) { + if (!requirements_satisfied) { + RTC_LOG(LS_ERROR) << "Requirement(s) not satisfied (" + << requirements_expression << ")"; + // TODO(henrika): improve hard-coded condition to determine if test should + // fail or be ignored. Could use e.g. a command-line argument here to + // determine if the test should fail or not. + *should_fail = false; + return true; + } + *should_fail = false; + return false; +} + +} // namespace + +// CoreAudioUtilityWinTest test fixture. +class CoreAudioUtilityWinTest : public ::testing::Test { + protected: + CoreAudioUtilityWinTest() : com_init_(ScopedCOMInitializer::kMTA) { + // We must initialize the COM library on a thread before we calling any of + // the library functions. All COM functions will return CO_E_NOTINITIALIZED + // otherwise. + EXPECT_TRUE(com_init_.Succeeded()); + + // Configure logging. + rtc::LogMessage::LogToDebug(rtc::LS_INFO); + rtc::LogMessage::LogTimestamps(); + rtc::LogMessage::LogThreads(); + } + + virtual ~CoreAudioUtilityWinTest() {} + + bool DevicesAvailable() { + return core_audio_utility::IsSupported() && + core_audio_utility::NumberOfActiveDevices(eCapture) > 0 && + core_audio_utility::NumberOfActiveDevices(eRender) > 0; + } + + private: + ScopedCOMInitializer com_init_; +}; + +TEST_F(CoreAudioUtilityWinTest, WaveFormatWrapper) { + // Use default constructor for WAVEFORMATEX and verify its size. + WAVEFORMATEX format = {}; + core_audio_utility::WaveFormatWrapper wave_format(&format); + EXPECT_FALSE(wave_format.IsExtensible()); + EXPECT_EQ(wave_format.size(), sizeof(WAVEFORMATEX)); + EXPECT_EQ(wave_format->cbSize, 0); + + // Ensure that the stand-alone WAVEFORMATEX structure has a valid format tag + // and that all accessors work. + format.wFormatTag = WAVE_FORMAT_PCM; + EXPECT_FALSE(wave_format.IsExtensible()); + EXPECT_EQ(wave_format.size(), sizeof(WAVEFORMATEX)); + EXPECT_EQ(wave_format.get()->wFormatTag, WAVE_FORMAT_PCM); + EXPECT_EQ(wave_format->wFormatTag, WAVE_FORMAT_PCM); + + // Next, ensure that the size is valid. Stand-alone is not extended. + EXPECT_EQ(wave_format.size(), sizeof(WAVEFORMATEX)); + + // Verify format types for the stand-alone version. + EXPECT_TRUE(wave_format.IsPcm()); + EXPECT_FALSE(wave_format.IsFloat()); + format.wFormatTag = WAVE_FORMAT_IEEE_FLOAT; + EXPECT_TRUE(wave_format.IsFloat()); +} + +TEST_F(CoreAudioUtilityWinTest, WaveFormatWrapperExtended) { + // Use default constructor for WAVEFORMATEXTENSIBLE and verify that it + // results in same size as for WAVEFORMATEX even if the size of `format_ex` + // equals the size of WAVEFORMATEXTENSIBLE. + WAVEFORMATEXTENSIBLE format_ex = {}; + core_audio_utility::WaveFormatWrapper wave_format_ex(&format_ex); + EXPECT_FALSE(wave_format_ex.IsExtensible()); + EXPECT_EQ(wave_format_ex.size(), sizeof(WAVEFORMATEX)); + EXPECT_EQ(wave_format_ex->cbSize, 0); + + // Ensure that the extended structure has a valid format tag and that all + // accessors work. + format_ex.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE; + EXPECT_FALSE(wave_format_ex.IsExtensible()); + EXPECT_EQ(wave_format_ex.size(), sizeof(WAVEFORMATEX)); + EXPECT_EQ(wave_format_ex->wFormatTag, WAVE_FORMAT_EXTENSIBLE); + EXPECT_EQ(wave_format_ex.get()->wFormatTag, WAVE_FORMAT_EXTENSIBLE); + + // Next, ensure that the size is valid (sum of stand-alone and extended). + // Now the structure qualifies as extended. + format_ex.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); + EXPECT_TRUE(wave_format_ex.IsExtensible()); + EXPECT_EQ(wave_format_ex.size(), sizeof(WAVEFORMATEXTENSIBLE)); + EXPECT_TRUE(wave_format_ex.GetExtensible()); + EXPECT_EQ(wave_format_ex.GetExtensible()->Format.wFormatTag, + WAVE_FORMAT_EXTENSIBLE); + + // Verify format types for the extended version. + EXPECT_FALSE(wave_format_ex.IsPcm()); + format_ex.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + EXPECT_TRUE(wave_format_ex.IsPcm()); + EXPECT_FALSE(wave_format_ex.IsFloat()); + format_ex.SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT; + EXPECT_TRUE(wave_format_ex.IsFloat()); +} + +TEST_F(CoreAudioUtilityWinTest, NumberOfActiveDevices) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + int render_devices = core_audio_utility::NumberOfActiveDevices(eRender); + EXPECT_GT(render_devices, 0); + int capture_devices = core_audio_utility::NumberOfActiveDevices(eCapture); + EXPECT_GT(capture_devices, 0); + int total_devices = core_audio_utility::NumberOfActiveDevices(eAll); + EXPECT_EQ(total_devices, render_devices + capture_devices); +} + +TEST_F(CoreAudioUtilityWinTest, GetAudioClientVersion) { + uint32_t client_version = core_audio_utility::GetAudioClientVersion(); + EXPECT_GE(client_version, 1u); + EXPECT_LE(client_version, 3u); +} + +TEST_F(CoreAudioUtilityWinTest, CreateDeviceEnumerator) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + ComPtr enumerator = + core_audio_utility::CreateDeviceEnumerator(); + EXPECT_TRUE(enumerator.Get()); +} + +TEST_F(CoreAudioUtilityWinTest, GetDefaultInputDeviceID) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + std::string default_device_id = core_audio_utility::GetDefaultInputDeviceID(); + EXPECT_FALSE(default_device_id.empty()); +} + +TEST_F(CoreAudioUtilityWinTest, GetDefaultOutputDeviceID) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + std::string default_device_id = + core_audio_utility::GetDefaultOutputDeviceID(); + EXPECT_FALSE(default_device_id.empty()); +} + +TEST_F(CoreAudioUtilityWinTest, GetCommunicationsInputDeviceID) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + std::string default_device_id = + core_audio_utility::GetCommunicationsInputDeviceID(); + EXPECT_FALSE(default_device_id.empty()); +} + +TEST_F(CoreAudioUtilityWinTest, GetCommunicationsOutputDeviceID) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + std::string default_device_id = + core_audio_utility::GetCommunicationsOutputDeviceID(); + EXPECT_FALSE(default_device_id.empty()); +} + +TEST_F(CoreAudioUtilityWinTest, CreateDefaultDevice) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + struct { + EDataFlow flow; + ERole role; + } data[] = {{eRender, eConsole}, {eRender, eCommunications}, + {eRender, eMultimedia}, {eCapture, eConsole}, + {eCapture, eCommunications}, {eCapture, eMultimedia}}; + + // Create default devices for all flow/role combinations above. + ComPtr audio_device; + for (size_t i = 0; i < arraysize(data); ++i) { + audio_device = core_audio_utility::CreateDevice( + AudioDeviceName::kDefaultDeviceId, data[i].flow, data[i].role); + EXPECT_TRUE(audio_device.Get()); + EXPECT_EQ(data[i].flow, + core_audio_utility::GetDataFlow(audio_device.Get())); + } + + // Only eRender and eCapture are allowed as flow parameter. + audio_device = core_audio_utility::CreateDevice( + AudioDeviceName::kDefaultDeviceId, eAll, eConsole); + EXPECT_FALSE(audio_device.Get()); +} + +TEST_F(CoreAudioUtilityWinTest, CreateDevice) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + // Get name and ID of default device used for playback. + ComPtr default_render_device = core_audio_utility::CreateDevice( + AudioDeviceName::kDefaultDeviceId, eRender, eConsole); + AudioDeviceName default_render_name = + core_audio_utility::GetDeviceName(default_render_device.Get()); + EXPECT_TRUE(default_render_name.IsValid()); + + // Use the unique ID as input to CreateDevice() and create a corresponding + // IMMDevice. The data-flow direction and role parameters are ignored for + // this scenario. + ComPtr audio_device = core_audio_utility::CreateDevice( + default_render_name.unique_id, EDataFlow(), ERole()); + EXPECT_TRUE(audio_device.Get()); + + // Verify that the two IMMDevice interfaces represents the same endpoint + // by comparing their unique IDs. + AudioDeviceName device_name = + core_audio_utility::GetDeviceName(audio_device.Get()); + EXPECT_EQ(default_render_name.unique_id, device_name.unique_id); +} + +TEST_F(CoreAudioUtilityWinTest, GetDefaultDeviceName) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + struct { + EDataFlow flow; + ERole role; + } data[] = {{eRender, eConsole}, + {eRender, eCommunications}, + {eCapture, eConsole}, + {eCapture, eCommunications}}; + + // Get name and ID of default devices for all flow/role combinations above. + ComPtr audio_device; + AudioDeviceName device_name; + for (size_t i = 0; i < arraysize(data); ++i) { + audio_device = core_audio_utility::CreateDevice( + AudioDeviceName::kDefaultDeviceId, data[i].flow, data[i].role); + device_name = core_audio_utility::GetDeviceName(audio_device.Get()); + EXPECT_TRUE(device_name.IsValid()); + } +} + +TEST_F(CoreAudioUtilityWinTest, GetFriendlyName) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + // Get name and ID of default device used for recording. + ComPtr audio_device = core_audio_utility::CreateDevice( + AudioDeviceName::kDefaultDeviceId, eCapture, eConsole); + AudioDeviceName device_name = + core_audio_utility::GetDeviceName(audio_device.Get()); + EXPECT_TRUE(device_name.IsValid()); + + // Use unique ID as input to GetFriendlyName() and compare the result + // with the already obtained friendly name for the default capture device. + std::string friendly_name = core_audio_utility::GetFriendlyName( + device_name.unique_id, eCapture, eConsole); + EXPECT_EQ(friendly_name, device_name.device_name); + + // Same test as above but for playback. + audio_device = core_audio_utility::CreateDevice( + AudioDeviceName::kDefaultDeviceId, eRender, eConsole); + device_name = core_audio_utility::GetDeviceName(audio_device.Get()); + friendly_name = core_audio_utility::GetFriendlyName(device_name.unique_id, + eRender, eConsole); + EXPECT_EQ(friendly_name, device_name.device_name); +} + +TEST_F(CoreAudioUtilityWinTest, GetInputDeviceNames) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + webrtc::AudioDeviceNames device_names; + EXPECT_TRUE(core_audio_utility::GetInputDeviceNames(&device_names)); + // Number of elements in the list should be two more than the number of + // active devices since we always add default and default communication + // devices on index 0 and 1. + EXPECT_EQ(static_cast(device_names.size()), + 2 + core_audio_utility::NumberOfActiveDevices(eCapture)); +} + +TEST_F(CoreAudioUtilityWinTest, GetOutputDeviceNames) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + webrtc::AudioDeviceNames device_names; + EXPECT_TRUE(core_audio_utility::GetOutputDeviceNames(&device_names)); + // Number of elements in the list should be two more than the number of + // active devices since we always add default and default communication + // devices on index 0 and 1. + EXPECT_EQ(static_cast(device_names.size()), + 2 + core_audio_utility::NumberOfActiveDevices(eRender)); +} + +TEST_F(CoreAudioUtilityWinTest, CreateSessionManager2) { + ABORT_TEST_IF_NOT(DevicesAvailable() && + rtc::rtc_win::GetVersion() >= rtc::rtc_win::VERSION_WIN7); + + EDataFlow data_flow[] = {eRender, eCapture}; + + // Obtain reference to an IAudioSessionManager2 interface for a default audio + // endpoint device specified by two different data flows and the `eConsole` + // role. + for (size_t i = 0; i < arraysize(data_flow); ++i) { + ComPtr device(core_audio_utility::CreateDevice( + AudioDeviceName::kDefaultDeviceId, data_flow[i], eConsole)); + EXPECT_TRUE(device.Get()); + ComPtr session_manager = + core_audio_utility::CreateSessionManager2(device.Get()); + EXPECT_TRUE(session_manager.Get()); + } +} + +TEST_F(CoreAudioUtilityWinTest, CreateSessionEnumerator) { + ABORT_TEST_IF_NOT(DevicesAvailable() && + rtc::rtc_win::GetVersion() >= rtc::rtc_win::VERSION_WIN7); + + EDataFlow data_flow[] = {eRender, eCapture}; + + // Obtain reference to an IAudioSessionEnumerator interface for a default + // audio endpoint device specified by two different data flows and the + // `eConsole` role. + for (size_t i = 0; i < arraysize(data_flow); ++i) { + ComPtr device(core_audio_utility::CreateDevice( + AudioDeviceName::kDefaultDeviceId, data_flow[i], eConsole)); + EXPECT_TRUE(device.Get()); + ComPtr session_enumerator = + core_audio_utility::CreateSessionEnumerator(device.Get()); + EXPECT_TRUE(session_enumerator.Get()); + + // Perform a sanity test of the interface by asking for the total number + // of audio sessions that are open on the audio device. Note that, we do + // not check if the session is active or not. + int session_count = 0; + EXPECT_TRUE(SUCCEEDED(session_enumerator->GetCount(&session_count))); + EXPECT_GE(session_count, 0); + } +} + +TEST_F(CoreAudioUtilityWinTest, NumberOfActiveSessions) { + ABORT_TEST_IF_NOT(DevicesAvailable() && + rtc::rtc_win::GetVersion() >= rtc::rtc_win::VERSION_WIN7); + + EDataFlow data_flow[] = {eRender, eCapture}; + + // Count number of active audio session for a default audio endpoint device + // specified by two different data flows and the `eConsole` role. + // Ensure that the number of active audio sessions is less than or equal to + // the total number of audio sessions on that same device. + for (size_t i = 0; i < arraysize(data_flow); ++i) { + // Create an audio endpoint device. + ComPtr device(core_audio_utility::CreateDevice( + AudioDeviceName::kDefaultDeviceId, data_flow[i], eConsole)); + EXPECT_TRUE(device.Get()); + + // Ask for total number of audio sessions on the created device. + ComPtr session_enumerator = + core_audio_utility::CreateSessionEnumerator(device.Get()); + EXPECT_TRUE(session_enumerator.Get()); + int total_session_count = 0; + EXPECT_TRUE(SUCCEEDED(session_enumerator->GetCount(&total_session_count))); + EXPECT_GE(total_session_count, 0); + + // Use NumberOfActiveSessions and get number of active audio sessions. + int active_session_count = + core_audio_utility::NumberOfActiveSessions(device.Get()); + EXPECT_LE(active_session_count, total_session_count); + } +} + +TEST_F(CoreAudioUtilityWinTest, CreateClient) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + EDataFlow data_flow[] = {eRender, eCapture}; + + // Obtain reference to an IAudioClient interface for a default audio endpoint + // device specified by two different data flows and the `eConsole` role. + for (size_t i = 0; i < arraysize(data_flow); ++i) { + ComPtr client = core_audio_utility::CreateClient( + AudioDeviceName::kDefaultDeviceId, data_flow[i], eConsole); + EXPECT_TRUE(client.Get()); + } +} + +TEST_F(CoreAudioUtilityWinTest, CreateClient2) { + ABORT_TEST_IF_NOT(DevicesAvailable() && + core_audio_utility::GetAudioClientVersion() >= 2); + + EDataFlow data_flow[] = {eRender, eCapture}; + + // Obtain reference to an IAudioClient2 interface for a default audio endpoint + // device specified by two different data flows and the `eConsole` role. + for (size_t i = 0; i < arraysize(data_flow); ++i) { + ComPtr client2 = core_audio_utility::CreateClient2( + AudioDeviceName::kDefaultDeviceId, data_flow[i], eConsole); + EXPECT_TRUE(client2.Get()); + } +} + +TEST_F(CoreAudioUtilityWinTest, CreateClient3) { + ABORT_TEST_IF_NOT(DevicesAvailable() && + core_audio_utility::GetAudioClientVersion() >= 3); + + EDataFlow data_flow[] = {eRender, eCapture}; + + // Obtain reference to an IAudioClient3 interface for a default audio endpoint + // device specified by two different data flows and the `eConsole` role. + for (size_t i = 0; i < arraysize(data_flow); ++i) { + ComPtr client3 = core_audio_utility::CreateClient3( + AudioDeviceName::kDefaultDeviceId, data_flow[i], eConsole); + EXPECT_TRUE(client3.Get()); + } +} + +TEST_F(CoreAudioUtilityWinTest, SetClientProperties) { + ABORT_TEST_IF_NOT(DevicesAvailable() && + core_audio_utility::GetAudioClientVersion() >= 2); + + ComPtr client2 = core_audio_utility::CreateClient2( + AudioDeviceName::kDefaultDeviceId, eRender, eConsole); + EXPECT_TRUE(client2.Get()); + EXPECT_TRUE( + SUCCEEDED(core_audio_utility::SetClientProperties(client2.Get()))); + + ComPtr client3 = core_audio_utility::CreateClient3( + AudioDeviceName::kDefaultDeviceId, eRender, eConsole); + EXPECT_TRUE(client3.Get()); + EXPECT_TRUE( + SUCCEEDED(core_audio_utility::SetClientProperties(client3.Get()))); +} + +TEST_F(CoreAudioUtilityWinTest, GetSharedModeEnginePeriod) { + ABORT_TEST_IF_NOT(DevicesAvailable() && + core_audio_utility::GetAudioClientVersion() >= 3); + + ComPtr client3 = core_audio_utility::CreateClient3( + AudioDeviceName::kDefaultDeviceId, eRender, eConsole); + EXPECT_TRUE(client3.Get()); + + WAVEFORMATPCMEX format; + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client3.Get(), &format))); + + uint32_t default_period = 0; + uint32_t fundamental_period = 0; + uint32_t min_period = 0; + uint32_t max_period = 0; + EXPECT_TRUE(SUCCEEDED(core_audio_utility::GetSharedModeEnginePeriod( + client3.Get(), &format, &default_period, &fundamental_period, &min_period, + &max_period))); +} + +// TODO(henrika): figure out why usage of this API always reports +// AUDCLNT_E_OFFLOAD_MODE_ONLY. +TEST_F(CoreAudioUtilityWinTest, DISABLED_GetBufferSizeLimits) { + ABORT_TEST_IF_NOT(DevicesAvailable() && + core_audio_utility::GetAudioClientVersion() >= 2); + + ComPtr client2 = core_audio_utility::CreateClient2( + AudioDeviceName::kDefaultDeviceId, eRender, eConsole); + EXPECT_TRUE(client2.Get()); + + WAVEFORMATPCMEX format; + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client2.Get(), &format))); + + REFERENCE_TIME min_buffer_duration = 0; + REFERENCE_TIME max_buffer_duration = 0; + EXPECT_TRUE(SUCCEEDED(core_audio_utility::GetBufferSizeLimits( + client2.Get(), &format, &min_buffer_duration, &max_buffer_duration))); +} + +TEST_F(CoreAudioUtilityWinTest, GetSharedModeMixFormat) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + ComPtr client = core_audio_utility::CreateClient( + AudioDeviceName::kDefaultDeviceId, eRender, eConsole); + EXPECT_TRUE(client.Get()); + + // Perform a simple sanity test of the acquired format structure. + WAVEFORMATEXTENSIBLE format; + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client.Get(), &format))); + core_audio_utility::WaveFormatWrapper wformat(&format); + EXPECT_GE(wformat->nChannels, 1); + EXPECT_GE(wformat->nSamplesPerSec, 8000u); + EXPECT_GE(wformat->wBitsPerSample, 16); + if (wformat.IsExtensible()) { + EXPECT_EQ(wformat->wFormatTag, WAVE_FORMAT_EXTENSIBLE); + EXPECT_GE(wformat->cbSize, 22); + EXPECT_GE(wformat.GetExtensible()->Samples.wValidBitsPerSample, 16); + } else { + EXPECT_EQ(wformat->cbSize, 0); + } +} + +TEST_F(CoreAudioUtilityWinTest, IsFormatSupported) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + // Create a default render client. + ComPtr client = core_audio_utility::CreateClient( + AudioDeviceName::kDefaultDeviceId, eRender, eConsole); + EXPECT_TRUE(client.Get()); + + // Get the default, shared mode, mixing format. + WAVEFORMATEXTENSIBLE format; + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client.Get(), &format))); + + // In shared mode, the audio engine always supports the mix format. + EXPECT_TRUE(core_audio_utility::IsFormatSupported( + client.Get(), AUDCLNT_SHAREMODE_SHARED, &format)); + + // Use an invalid format and verify that it is not supported. + format.Format.nSamplesPerSec += 1; + EXPECT_FALSE(core_audio_utility::IsFormatSupported( + client.Get(), AUDCLNT_SHAREMODE_SHARED, &format)); +} + +TEST_F(CoreAudioUtilityWinTest, GetDevicePeriod) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + EDataFlow data_flow[] = {eRender, eCapture}; + + // Verify that the device periods are valid for the default render and + // capture devices. + ComPtr client; + for (size_t i = 0; i < arraysize(data_flow); ++i) { + REFERENCE_TIME shared_time_period = 0; + REFERENCE_TIME exclusive_time_period = 0; + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + data_flow[i], eConsole); + EXPECT_TRUE(client.Get()); + EXPECT_TRUE(SUCCEEDED(core_audio_utility::GetDevicePeriod( + client.Get(), AUDCLNT_SHAREMODE_SHARED, &shared_time_period))); + EXPECT_GT(shared_time_period, 0); + EXPECT_TRUE(SUCCEEDED(core_audio_utility::GetDevicePeriod( + client.Get(), AUDCLNT_SHAREMODE_EXCLUSIVE, &exclusive_time_period))); + EXPECT_GT(exclusive_time_period, 0); + EXPECT_LE(exclusive_time_period, shared_time_period); + } +} + +TEST_F(CoreAudioUtilityWinTest, GetPreferredAudioParameters) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + struct { + EDataFlow flow; + ERole role; + } data[] = {{eRender, eConsole}, + {eRender, eCommunications}, + {eCapture, eConsole}, + {eCapture, eCommunications}}; + + // Verify that the preferred audio parameters are OK for all flow/role + // combinations above. + ComPtr client; + webrtc::AudioParameters params; + for (size_t i = 0; i < arraysize(data); ++i) { + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + data[i].flow, data[i].role); + EXPECT_TRUE(client.Get()); + EXPECT_TRUE(SUCCEEDED(core_audio_utility::GetPreferredAudioParameters( + client.Get(), ¶ms))); + EXPECT_TRUE(params.is_valid()); + EXPECT_TRUE(params.is_complete()); + } +} + +TEST_F(CoreAudioUtilityWinTest, SharedModeInitialize) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + ComPtr client; + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + eRender, eConsole); + EXPECT_TRUE(client.Get()); + + WAVEFORMATPCMEX format; + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client.Get(), &format))); + + // Perform a shared-mode initialization without event-driven buffer handling. + uint32_t endpoint_buffer_size = 0; + HRESULT hr = core_audio_utility::SharedModeInitialize( + client.Get(), &format, nullptr, 0, false, &endpoint_buffer_size); + EXPECT_TRUE(SUCCEEDED(hr)); + EXPECT_GT(endpoint_buffer_size, 0u); + + // It is only possible to create a client once. + hr = core_audio_utility::SharedModeInitialize( + client.Get(), &format, nullptr, 0, false, &endpoint_buffer_size); + EXPECT_FALSE(SUCCEEDED(hr)); + EXPECT_EQ(hr, AUDCLNT_E_ALREADY_INITIALIZED); + + // Verify that it is possible to reinitialize the client after releasing it + // and then creating a new client. + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + eRender, eConsole); + EXPECT_TRUE(client.Get()); + hr = core_audio_utility::SharedModeInitialize( + client.Get(), &format, nullptr, 0, false, &endpoint_buffer_size); + EXPECT_TRUE(SUCCEEDED(hr)); + EXPECT_GT(endpoint_buffer_size, 0u); + + // Use a non-supported format and verify that initialization fails. + // A simple way to emulate an invalid format is to use the shared-mode + // mixing format and modify the preferred sample rate. + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + eRender, eConsole); + EXPECT_TRUE(client.Get()); + format.Format.nSamplesPerSec = format.Format.nSamplesPerSec + 1; + EXPECT_FALSE(core_audio_utility::IsFormatSupported( + client.Get(), AUDCLNT_SHAREMODE_SHARED, &format)); + hr = core_audio_utility::SharedModeInitialize( + client.Get(), &format, nullptr, 0, false, &endpoint_buffer_size); + EXPECT_TRUE(FAILED(hr)); + EXPECT_EQ(hr, E_INVALIDARG); + + // Finally, perform a shared-mode initialization using event-driven buffer + // handling. The event handle will be signaled when an audio buffer is ready + // to be processed by the client (not verified here). The event handle should + // be in the non-signaled state. + ScopedHandle event_handle(::CreateEvent(nullptr, TRUE, FALSE, nullptr)); + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + eRender, eConsole); + EXPECT_TRUE(client.Get()); + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client.Get(), &format))); + EXPECT_TRUE(core_audio_utility::IsFormatSupported( + client.Get(), AUDCLNT_SHAREMODE_SHARED, &format)); + hr = core_audio_utility::SharedModeInitialize( + client.Get(), &format, event_handle, 0, false, &endpoint_buffer_size); + EXPECT_TRUE(SUCCEEDED(hr)); + EXPECT_GT(endpoint_buffer_size, 0u); + + // TODO(henrika): possibly add test for signature which overrides the default + // sample rate. +} + +TEST_F(CoreAudioUtilityWinTest, CreateRenderAndCaptureClients) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + EDataFlow data_flow[] = {eRender, eCapture}; + + WAVEFORMATPCMEX format; + uint32_t endpoint_buffer_size = 0; + + for (size_t i = 0; i < arraysize(data_flow); ++i) { + ComPtr client; + ComPtr render_client; + ComPtr capture_client; + + // Create a default client for the given data-flow direction. + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + data_flow[i], eConsole); + EXPECT_TRUE(client.Get()); + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client.Get(), &format))); + if (data_flow[i] == eRender) { + // It is not possible to create a render client using an unitialized + // client interface. + render_client = core_audio_utility::CreateRenderClient(client.Get()); + EXPECT_FALSE(render_client.Get()); + + // Do a proper initialization and verify that it works this time. + core_audio_utility::SharedModeInitialize(client.Get(), &format, nullptr, + 0, false, &endpoint_buffer_size); + render_client = core_audio_utility::CreateRenderClient(client.Get()); + EXPECT_TRUE(render_client.Get()); + EXPECT_GT(endpoint_buffer_size, 0u); + } else if (data_flow[i] == eCapture) { + // It is not possible to create a capture client using an unitialized + // client interface. + capture_client = core_audio_utility::CreateCaptureClient(client.Get()); + EXPECT_FALSE(capture_client.Get()); + + // Do a proper initialization and verify that it works this time. + core_audio_utility::SharedModeInitialize(client.Get(), &format, nullptr, + 0, false, &endpoint_buffer_size); + capture_client = core_audio_utility::CreateCaptureClient(client.Get()); + EXPECT_TRUE(capture_client.Get()); + EXPECT_GT(endpoint_buffer_size, 0u); + } + } +} + +TEST_F(CoreAudioUtilityWinTest, CreateAudioClock) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + EDataFlow data_flow[] = {eRender, eCapture}; + + WAVEFORMATPCMEX format; + uint32_t endpoint_buffer_size = 0; + + for (size_t i = 0; i < arraysize(data_flow); ++i) { + ComPtr client; + ComPtr audio_clock; + + // Create a default client for the given data-flow direction. + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + data_flow[i], eConsole); + EXPECT_TRUE(client.Get()); + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client.Get(), &format))); + + // It is not possible to create an audio clock using an unitialized client + // interface. + audio_clock = core_audio_utility::CreateAudioClock(client.Get()); + EXPECT_FALSE(audio_clock.Get()); + + // Do a proper initialization and verify that it works this time. + core_audio_utility::SharedModeInitialize(client.Get(), &format, nullptr, 0, + false, &endpoint_buffer_size); + audio_clock = core_audio_utility::CreateAudioClock(client.Get()); + EXPECT_TRUE(audio_clock.Get()); + EXPECT_GT(endpoint_buffer_size, 0u); + + // Use the audio clock and verify that querying the device frequency works. + UINT64 frequency = 0; + EXPECT_TRUE(SUCCEEDED(audio_clock->GetFrequency(&frequency))); + EXPECT_GT(frequency, 0u); + } +} + +TEST_F(CoreAudioUtilityWinTest, CreateAudioSessionControl) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + EDataFlow data_flow[] = {eRender, eCapture}; + + WAVEFORMATPCMEX format; + uint32_t endpoint_buffer_size = 0; + + for (size_t i = 0; i < arraysize(data_flow); ++i) { + ComPtr client; + ComPtr audio_session_control; + + // Create a default client for the given data-flow direction. + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + data_flow[i], eConsole); + EXPECT_TRUE(client.Get()); + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client.Get(), &format))); + + // It is not possible to create an audio session control using an + // unitialized client interface. + audio_session_control = + core_audio_utility::CreateAudioSessionControl(client.Get()); + EXPECT_FALSE(audio_session_control.Get()); + + // Do a proper initialization and verify that it works this time. + core_audio_utility::SharedModeInitialize(client.Get(), &format, nullptr, 0, + false, &endpoint_buffer_size); + audio_session_control = + core_audio_utility::CreateAudioSessionControl(client.Get()); + EXPECT_TRUE(audio_session_control.Get()); + EXPECT_GT(endpoint_buffer_size, 0u); + + // Use the audio session control and verify that the session state can be + // queried. When a client opens a session by assigning the first stream to + // the session (by calling the IAudioClient::Initialize method), the initial + // session state is inactive. The session state changes from inactive to + // active when a stream in the session begins running (because the client + // has called the IAudioClient::Start method). + AudioSessionState state; + EXPECT_TRUE(SUCCEEDED(audio_session_control->GetState(&state))); + EXPECT_EQ(state, AudioSessionStateInactive); + } +} + +TEST_F(CoreAudioUtilityWinTest, CreateSimpleAudioVolume) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + EDataFlow data_flow[] = {eRender, eCapture}; + + WAVEFORMATPCMEX format; + uint32_t endpoint_buffer_size = 0; + + for (size_t i = 0; i < arraysize(data_flow); ++i) { + ComPtr client; + ComPtr simple_audio_volume; + + // Create a default client for the given data-flow direction. + client = core_audio_utility::CreateClient(AudioDeviceName::kDefaultDeviceId, + data_flow[i], eConsole); + EXPECT_TRUE(client.Get()); + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client.Get(), &format))); + + // It is not possible to create an audio volume using an uninitialized + // client interface. + simple_audio_volume = + core_audio_utility::CreateSimpleAudioVolume(client.Get()); + EXPECT_FALSE(simple_audio_volume.Get()); + + // Do a proper initialization and verify that it works this time. + core_audio_utility::SharedModeInitialize(client.Get(), &format, nullptr, 0, + false, &endpoint_buffer_size); + simple_audio_volume = + core_audio_utility::CreateSimpleAudioVolume(client.Get()); + EXPECT_TRUE(simple_audio_volume.Get()); + EXPECT_GT(endpoint_buffer_size, 0u); + + // Use the audio volume interface and validate that it works. The volume + // level should be value in the range 0.0 to 1.0 at first call. + float volume = 0.0; + EXPECT_TRUE(SUCCEEDED(simple_audio_volume->GetMasterVolume(&volume))); + EXPECT_GE(volume, 0.0); + EXPECT_LE(volume, 1.0); + + // Next, set a new volume and verify that the setter does its job. + const float target_volume = 0.5; + EXPECT_TRUE(SUCCEEDED( + simple_audio_volume->SetMasterVolume(target_volume, nullptr))); + EXPECT_TRUE(SUCCEEDED(simple_audio_volume->GetMasterVolume(&volume))); + EXPECT_EQ(volume, target_volume); + } +} + +TEST_F(CoreAudioUtilityWinTest, FillRenderEndpointBufferWithSilence) { + ABORT_TEST_IF_NOT(DevicesAvailable()); + + // Create default clients using the default mixing format for shared mode. + ComPtr client(core_audio_utility::CreateClient( + AudioDeviceName::kDefaultDeviceId, eRender, eConsole)); + EXPECT_TRUE(client.Get()); + + WAVEFORMATPCMEX format; + uint32_t endpoint_buffer_size = 0; + EXPECT_TRUE(SUCCEEDED( + core_audio_utility::GetSharedModeMixFormat(client.Get(), &format))); + core_audio_utility::SharedModeInitialize(client.Get(), &format, nullptr, 0, + false, &endpoint_buffer_size); + EXPECT_GT(endpoint_buffer_size, 0u); + + ComPtr render_client( + core_audio_utility::CreateRenderClient(client.Get())); + EXPECT_TRUE(render_client.Get()); + + // The endpoint audio buffer should not be filled up by default after being + // created. + UINT32 num_queued_frames = 0; + client->GetCurrentPadding(&num_queued_frames); + EXPECT_EQ(num_queued_frames, 0u); + + // Fill it up with zeros and verify that the buffer is full. + // It is not possible to verify that the actual data consists of zeros + // since we can't access data that has already been sent to the endpoint + // buffer. + EXPECT_TRUE(core_audio_utility::FillRenderEndpointBufferWithSilence( + client.Get(), render_client.Get())); + client->GetCurrentPadding(&num_queued_frames); + EXPECT_EQ(num_queued_frames, endpoint_buffer_size); +} + +} // namespace webrtc_win +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/BUILD.gn b/third_party/libwebrtc/modules/audio_mixer/BUILD.gn new file mode 100644 index 0000000000..fe20f3d6c7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/BUILD.gn @@ -0,0 +1,144 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") + +group("audio_mixer") { + deps = [ + ":audio_frame_manipulator", + ":audio_mixer_impl", + ] +} + +rtc_library("audio_mixer_impl") { + visibility = [ "*" ] + sources = [ + "audio_mixer_impl.cc", + "audio_mixer_impl.h", + "default_output_rate_calculator.cc", + "default_output_rate_calculator.h", + "frame_combiner.cc", + "frame_combiner.h", + "output_rate_calculator.h", + ] + + public = [ + "audio_mixer_impl.h", + "default_output_rate_calculator.h", # For creating a mixer with limiter + # disabled. + "frame_combiner.h", + ] + + configs += [ "../audio_processing:apm_debug_dump" ] + + deps = [ + ":audio_frame_manipulator", + "../../api:array_view", + "../../api:rtp_packet_info", + "../../api:scoped_refptr", + "../../api/audio:audio_frame_api", + "../../api/audio:audio_mixer_api", + "../../audio/utility:audio_frame_operations", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:event_tracer", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:race_checker", + "../../rtc_base:refcount", + "../../rtc_base:safe_conversions", + "../../rtc_base/synchronization:mutex", + "../../system_wrappers", + "../../system_wrappers:metrics", + "../audio_processing:api", + "../audio_processing:apm_logging", + "../audio_processing:audio_frame_view", + "../audio_processing/agc2:fixed_digital", + ] +} + +rtc_library("audio_frame_manipulator") { + visibility = [ + ":*", + "../../modules:*", + ] + + sources = [ + "audio_frame_manipulator.cc", + "audio_frame_manipulator.h", + ] + + deps = [ + "../../api/audio:audio_frame_api", + "../../audio/utility:audio_frame_operations", + "../../rtc_base:checks", + ] +} + +if (rtc_include_tests) { + rtc_library("audio_mixer_test_utils") { + testonly = true + + sources = [ + "gain_change_calculator.cc", + "gain_change_calculator.h", + "sine_wave_generator.cc", + "sine_wave_generator.h", + ] + + deps = [ + ":audio_frame_manipulator", + ":audio_mixer_impl", + "../../api:array_view", + "../../api/audio:audio_frame_api", + "../../rtc_base:checks", + "../../rtc_base:safe_conversions", + ] + } + + rtc_library("audio_mixer_unittests") { + testonly = true + + sources = [ + "audio_frame_manipulator_unittest.cc", + "audio_mixer_impl_unittest.cc", + "frame_combiner_unittest.cc", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + deps = [ + ":audio_frame_manipulator", + ":audio_mixer_impl", + ":audio_mixer_test_utils", + "../../api:array_view", + "../../api:rtp_packet_info", + "../../api/audio:audio_mixer_api", + "../../api/units:timestamp", + "../../audio/utility:audio_frame_operations", + "../../rtc_base:checks", + "../../rtc_base:stringutils", + "../../rtc_base:task_queue_for_test", + "../../test:test_support", + ] + } + + if (!build_with_chromium) { + rtc_executable("audio_mixer_test") { + testonly = true + sources = [ "audio_mixer_test.cc" ] + + deps = [ + ":audio_mixer_impl", + "../../api/audio:audio_mixer_api", + "../../common_audio", + "../../rtc_base:stringutils", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + } +} diff --git a/third_party/libwebrtc/modules/audio_mixer/DEPS b/third_party/libwebrtc/modules/audio_mixer/DEPS new file mode 100644 index 0000000000..46f29bccf8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/DEPS @@ -0,0 +1,13 @@ +include_rules = [ + "+audio/utility/audio_frame_operations.h", + "+audio/utility/channel_mixer.h", + "+call", + "+common_audio", + "+modules/audio_coding", + "+modules/audio_device", + "+modules/audio_processing", + "+modules/pacing", + "+modules/rtp_rtcp", + "+modules/utility", + "+system_wrappers", +] diff --git a/third_party/libwebrtc/modules/audio_mixer/OWNERS b/third_party/libwebrtc/modules/audio_mixer/OWNERS new file mode 100644 index 0000000000..5edc304ab3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/OWNERS @@ -0,0 +1,2 @@ +alessiob@webrtc.org +henrik.lundin@webrtc.org diff --git a/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator.cc b/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator.cc new file mode 100644 index 0000000000..3100271cfb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator.cc @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_mixer/audio_frame_manipulator.h" + +#include "audio/utility/audio_frame_operations.h" +#include "audio/utility/channel_mixer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +uint32_t AudioMixerCalculateEnergy(const AudioFrame& audio_frame) { + if (audio_frame.muted()) { + return 0; + } + + uint32_t energy = 0; + const int16_t* frame_data = audio_frame.data(); + for (size_t position = 0; + position < audio_frame.samples_per_channel_ * audio_frame.num_channels_; + position++) { + // TODO(aleloi): This can overflow. Convert to floats. + energy += frame_data[position] * frame_data[position]; + } + return energy; +} + +void Ramp(float start_gain, float target_gain, AudioFrame* audio_frame) { + RTC_DCHECK(audio_frame); + RTC_DCHECK_GE(start_gain, 0.0f); + RTC_DCHECK_GE(target_gain, 0.0f); + if (start_gain == target_gain || audio_frame->muted()) { + return; + } + + size_t samples = audio_frame->samples_per_channel_; + RTC_DCHECK_LT(0, samples); + float increment = (target_gain - start_gain) / samples; + float gain = start_gain; + int16_t* frame_data = audio_frame->mutable_data(); + for (size_t i = 0; i < samples; ++i) { + // If the audio is interleaved of several channels, we want to + // apply the same gain change to the ith sample of every channel. + for (size_t ch = 0; ch < audio_frame->num_channels_; ++ch) { + frame_data[audio_frame->num_channels_ * i + ch] *= gain; + } + gain += increment; + } +} + +void RemixFrame(size_t target_number_of_channels, AudioFrame* frame) { + RTC_DCHECK_GE(target_number_of_channels, 1); + // TODO(bugs.webrtc.org/10783): take channel layout into account as well. + if (frame->num_channels() == target_number_of_channels) { + return; + } + + // Use legacy components for the most simple cases (mono <-> stereo) to ensure + // that native WebRTC clients are not affected when support for multi-channel + // audio is added to Chrome. + // TODO(bugs.webrtc.org/10783): utilize channel mixer for mono/stereo as well. + if (target_number_of_channels < 3 && frame->num_channels() < 3) { + if (frame->num_channels() > target_number_of_channels) { + AudioFrameOperations::DownmixChannels(target_number_of_channels, frame); + } else { + AudioFrameOperations::UpmixChannels(target_number_of_channels, frame); + } + } else { + // Use generic channel mixer when the number of channels for input our + // output is larger than two. E.g. stereo -> 5.1 channel up-mixing. + // TODO(bugs.webrtc.org/10783): ensure that actual channel layouts are used + // instead of guessing based on number of channels. + const ChannelLayout output_layout( + GuessChannelLayout(target_number_of_channels)); + ChannelMixer mixer(GuessChannelLayout(frame->num_channels()), + output_layout); + mixer.Transform(frame); + RTC_DCHECK_EQ(frame->channel_layout(), output_layout); + } + RTC_DCHECK_EQ(frame->num_channels(), target_number_of_channels) + << "Wrong number of channels, " << frame->num_channels() << " vs " + << target_number_of_channels; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator.h b/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator.h new file mode 100644 index 0000000000..ab3633d266 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_MIXER_AUDIO_FRAME_MANIPULATOR_H_ +#define MODULES_AUDIO_MIXER_AUDIO_FRAME_MANIPULATOR_H_ + +#include +#include + +#include "api/audio/audio_frame.h" + +namespace webrtc { + +// Updates the audioFrame's energy (based on its samples). +uint32_t AudioMixerCalculateEnergy(const AudioFrame& audio_frame); + +// Ramps up or down the provided audio frame. Ramp(0, 1, frame) will +// linearly increase the samples in the frame from 0 to full volume. +void Ramp(float start_gain, float target_gain, AudioFrame* audio_frame); + +// Downmixes or upmixes a frame between stereo and mono. +void RemixFrame(size_t target_number_of_channels, AudioFrame* frame); + +} // namespace webrtc + +#endif // MODULES_AUDIO_MIXER_AUDIO_FRAME_MANIPULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator_gn/moz.build b/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator_gn/moz.build new file mode 100644 index 0000000000..c6a45f9a37 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_frame_manipulator_gn") diff --git a/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc b/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc new file mode 100644 index 0000000000..cfb3f2c230 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_mixer/audio_frame_manipulator.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +void FillFrameWithConstants(size_t samples_per_channel, + size_t number_of_channels, + int16_t value, + AudioFrame* frame) { + frame->num_channels_ = number_of_channels; + frame->samples_per_channel_ = samples_per_channel; + int16_t* frame_data = frame->mutable_data(); + std::fill(frame_data, frame_data + samples_per_channel * number_of_channels, + value); +} +} // namespace + +TEST(AudioFrameManipulator, CompareForwardRampWithExpectedResultStereo) { + constexpr int kSamplesPerChannel = 5; + constexpr int kNumberOfChannels = 2; + + // Create a frame with values 5, 5, 5, ... and channels & samples as above. + AudioFrame frame; + FillFrameWithConstants(kSamplesPerChannel, kNumberOfChannels, 5, &frame); + + Ramp(0.0f, 1.0f, &frame); + + const int total_samples = kSamplesPerChannel * kNumberOfChannels; + const int16_t expected_result[total_samples] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4}; + const int16_t* frame_data = frame.data(); + EXPECT_TRUE( + std::equal(frame_data, frame_data + total_samples, expected_result)); +} + +TEST(AudioFrameManipulator, CompareBackwardRampWithExpectedResultMono) { + constexpr int kSamplesPerChannel = 5; + constexpr int kNumberOfChannels = 1; + + // Create a frame with values 5, 5, 5, ... and channels & samples as above. + AudioFrame frame; + FillFrameWithConstants(kSamplesPerChannel, kNumberOfChannels, 5, &frame); + + Ramp(1.0f, 0.0f, &frame); + + const int total_samples = kSamplesPerChannel * kNumberOfChannels; + const int16_t expected_result[total_samples] = {5, 4, 3, 2, 1}; + const int16_t* frame_data = frame.data(); + EXPECT_TRUE( + std::equal(frame_data, frame_data + total_samples, expected_result)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl.cc b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl.cc new file mode 100644 index 0000000000..0c203a1d9f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl.cc @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_mixer/audio_mixer_impl.h" + +#include + +#include +#include +#include +#include + +#include "modules/audio_mixer/audio_frame_manipulator.h" +#include "modules/audio_mixer/default_output_rate_calculator.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/trace_event.h" + +namespace webrtc { + +struct AudioMixerImpl::SourceStatus { + SourceStatus(Source* audio_source, bool is_mixed, float gain) + : audio_source(audio_source), is_mixed(is_mixed), gain(gain) {} + Source* audio_source = nullptr; + bool is_mixed = false; + float gain = 0.0f; + + // A frame that will be passed to audio_source->GetAudioFrameWithInfo. + AudioFrame audio_frame; +}; + +namespace { + +struct SourceFrame { + SourceFrame() = default; + + SourceFrame(AudioMixerImpl::SourceStatus* source_status, + AudioFrame* audio_frame, + bool muted) + : source_status(source_status), audio_frame(audio_frame), muted(muted) { + RTC_DCHECK(source_status); + RTC_DCHECK(audio_frame); + if (!muted) { + energy = AudioMixerCalculateEnergy(*audio_frame); + } + } + + SourceFrame(AudioMixerImpl::SourceStatus* source_status, + AudioFrame* audio_frame, + bool muted, + uint32_t energy) + : source_status(source_status), + audio_frame(audio_frame), + muted(muted), + energy(energy) { + RTC_DCHECK(source_status); + RTC_DCHECK(audio_frame); + } + + AudioMixerImpl::SourceStatus* source_status = nullptr; + AudioFrame* audio_frame = nullptr; + bool muted = true; + uint32_t energy = 0; +}; + +// ShouldMixBefore(a, b) is used to select mixer sources. +// Returns true if `a` is preferred over `b` as a source to be mixed. +bool ShouldMixBefore(const SourceFrame& a, const SourceFrame& b) { + if (a.muted != b.muted) { + return b.muted; + } + + const auto a_activity = a.audio_frame->vad_activity_; + const auto b_activity = b.audio_frame->vad_activity_; + + if (a_activity != b_activity) { + return a_activity == AudioFrame::kVadActive; + } + + return a.energy > b.energy; +} + +void RampAndUpdateGain( + rtc::ArrayView mixed_sources_and_frames) { + for (const auto& source_frame : mixed_sources_and_frames) { + float target_gain = source_frame.source_status->is_mixed ? 1.0f : 0.0f; + Ramp(source_frame.source_status->gain, target_gain, + source_frame.audio_frame); + source_frame.source_status->gain = target_gain; + } +} + +std::vector>::const_iterator +FindSourceInList( + AudioMixerImpl::Source const* audio_source, + std::vector> const* + audio_source_list) { + return std::find_if( + audio_source_list->begin(), audio_source_list->end(), + [audio_source](const std::unique_ptr& p) { + return p->audio_source == audio_source; + }); +} +} // namespace + +struct AudioMixerImpl::HelperContainers { + void resize(size_t size) { + audio_to_mix.resize(size); + audio_source_mixing_data_list.resize(size); + ramp_list.resize(size); + preferred_rates.resize(size); + } + + std::vector audio_to_mix; + std::vector audio_source_mixing_data_list; + std::vector ramp_list; + std::vector preferred_rates; +}; + +AudioMixerImpl::AudioMixerImpl( + std::unique_ptr output_rate_calculator, + bool use_limiter, + int max_sources_to_mix) + : max_sources_to_mix_(max_sources_to_mix), + output_rate_calculator_(std::move(output_rate_calculator)), + audio_source_list_(), + helper_containers_(std::make_unique()), + frame_combiner_(use_limiter) { + RTC_CHECK_GE(max_sources_to_mix, 1) << "At least one source must be mixed"; + audio_source_list_.reserve(max_sources_to_mix); + helper_containers_->resize(max_sources_to_mix); +} + +AudioMixerImpl::~AudioMixerImpl() {} + +rtc::scoped_refptr AudioMixerImpl::Create( + int max_sources_to_mix) { + return Create(std::unique_ptr( + new DefaultOutputRateCalculator()), + /*use_limiter=*/true, max_sources_to_mix); +} + +rtc::scoped_refptr AudioMixerImpl::Create( + std::unique_ptr output_rate_calculator, + bool use_limiter, + int max_sources_to_mix) { + return rtc::make_ref_counted( + std::move(output_rate_calculator), use_limiter, max_sources_to_mix); +} + +void AudioMixerImpl::Mix(size_t number_of_channels, + AudioFrame* audio_frame_for_mixing) { + TRACE_EVENT0("webrtc", "AudioMixerImpl::Mix"); + RTC_DCHECK(number_of_channels >= 1); + MutexLock lock(&mutex_); + + size_t number_of_streams = audio_source_list_.size(); + + std::transform(audio_source_list_.begin(), audio_source_list_.end(), + helper_containers_->preferred_rates.begin(), + [&](std::unique_ptr& a) { + return a->audio_source->PreferredSampleRate(); + }); + + int output_frequency = output_rate_calculator_->CalculateOutputRateFromRange( + rtc::ArrayView(helper_containers_->preferred_rates.data(), + number_of_streams)); + + frame_combiner_.Combine(GetAudioFromSources(output_frequency), + number_of_channels, output_frequency, + number_of_streams, audio_frame_for_mixing); +} + +bool AudioMixerImpl::AddSource(Source* audio_source) { + RTC_DCHECK(audio_source); + MutexLock lock(&mutex_); + RTC_DCHECK(FindSourceInList(audio_source, &audio_source_list_) == + audio_source_list_.end()) + << "Source already added to mixer"; + audio_source_list_.emplace_back(new SourceStatus(audio_source, false, 0)); + helper_containers_->resize(audio_source_list_.size()); + return true; +} + +void AudioMixerImpl::RemoveSource(Source* audio_source) { + RTC_DCHECK(audio_source); + MutexLock lock(&mutex_); + const auto iter = FindSourceInList(audio_source, &audio_source_list_); + RTC_DCHECK(iter != audio_source_list_.end()) << "Source not present in mixer"; + audio_source_list_.erase(iter); +} + +rtc::ArrayView AudioMixerImpl::GetAudioFromSources( + int output_frequency) { + // Get audio from the audio sources and put it in the SourceFrame vector. + int audio_source_mixing_data_count = 0; + for (auto& source_and_status : audio_source_list_) { + const auto audio_frame_info = + source_and_status->audio_source->GetAudioFrameWithInfo( + output_frequency, &source_and_status->audio_frame); + + if (audio_frame_info == Source::AudioFrameInfo::kError) { + RTC_LOG_F(LS_WARNING) << "failed to GetAudioFrameWithInfo() from source"; + continue; + } + helper_containers_ + ->audio_source_mixing_data_list[audio_source_mixing_data_count++] = + SourceFrame(source_and_status.get(), &source_and_status->audio_frame, + audio_frame_info == Source::AudioFrameInfo::kMuted); + } + rtc::ArrayView audio_source_mixing_data_view( + helper_containers_->audio_source_mixing_data_list.data(), + audio_source_mixing_data_count); + + // Sort frames by sorting function. + std::sort(audio_source_mixing_data_view.begin(), + audio_source_mixing_data_view.end(), ShouldMixBefore); + + int max_audio_frame_counter = max_sources_to_mix_; + int ramp_list_lengh = 0; + int audio_to_mix_count = 0; + // Go through list in order and put unmuted frames in result list. + for (const auto& p : audio_source_mixing_data_view) { + // Filter muted. + if (p.muted) { + p.source_status->is_mixed = false; + continue; + } + + // Add frame to result vector for mixing. + bool is_mixed = false; + if (max_audio_frame_counter > 0) { + --max_audio_frame_counter; + helper_containers_->audio_to_mix[audio_to_mix_count++] = p.audio_frame; + helper_containers_->ramp_list[ramp_list_lengh++] = + SourceFrame(p.source_status, p.audio_frame, false, -1); + is_mixed = true; + } + p.source_status->is_mixed = is_mixed; + } + RampAndUpdateGain(rtc::ArrayView( + helper_containers_->ramp_list.data(), ramp_list_lengh)); + return rtc::ArrayView( + helper_containers_->audio_to_mix.data(), audio_to_mix_count); +} + +bool AudioMixerImpl::GetAudioSourceMixabilityStatusForTest( + AudioMixerImpl::Source* audio_source) const { + MutexLock lock(&mutex_); + + const auto iter = FindSourceInList(audio_source, &audio_source_list_); + if (iter != audio_source_list_.end()) { + return (*iter)->is_mixed; + } + + RTC_LOG(LS_ERROR) << "Audio source unknown"; + return false; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl.h b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl.h new file mode 100644 index 0000000000..76b1131777 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_MIXER_AUDIO_MIXER_IMPL_H_ +#define MODULES_AUDIO_MIXER_AUDIO_MIXER_IMPL_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "api/audio/audio_frame.h" +#include "api/audio/audio_mixer.h" +#include "api/scoped_refptr.h" +#include "modules/audio_mixer/frame_combiner.h" +#include "modules/audio_mixer/output_rate_calculator.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class AudioMixerImpl : public AudioMixer { + public: + struct SourceStatus; + + // AudioProcessing only accepts 10 ms frames. + static const int kFrameDurationInMs = 10; + + static const int kDefaultNumberOfMixedAudioSources = 3; + + static rtc::scoped_refptr Create( + int max_sources_to_mix = kDefaultNumberOfMixedAudioSources); + + static rtc::scoped_refptr Create( + std::unique_ptr output_rate_calculator, + bool use_limiter, + int max_sources_to_mix = kDefaultNumberOfMixedAudioSources); + + ~AudioMixerImpl() override; + + AudioMixerImpl(const AudioMixerImpl&) = delete; + AudioMixerImpl& operator=(const AudioMixerImpl&) = delete; + + // AudioMixer functions + bool AddSource(Source* audio_source) override; + void RemoveSource(Source* audio_source) override; + + void Mix(size_t number_of_channels, + AudioFrame* audio_frame_for_mixing) override + RTC_LOCKS_EXCLUDED(mutex_); + + // Returns true if the source was mixed last round. Returns + // false and logs an error if the source was never added to the + // mixer. + bool GetAudioSourceMixabilityStatusForTest(Source* audio_source) const; + + protected: + AudioMixerImpl(std::unique_ptr output_rate_calculator, + bool use_limiter, + int max_sources_to_mix); + + private: + struct HelperContainers; + + // Compute what audio sources to mix from audio_source_list_. Ramp + // in and out. Update mixed status. Mixes up to + // kMaximumAmountOfMixedAudioSources audio sources. + rtc::ArrayView GetAudioFromSources(int output_frequency) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // The critical section lock guards audio source insertion and + // removal, which can be done from any thread. The race checker + // checks that mixing is done sequentially. + mutable Mutex mutex_; + + const int max_sources_to_mix_; + + std::unique_ptr output_rate_calculator_; + + // List of all audio sources. + std::vector> audio_source_list_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr helper_containers_ + RTC_GUARDED_BY(mutex_); + + // Component that handles actual adding of audio frames. + FrameCombiner frame_combiner_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_MIXER_AUDIO_MIXER_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl_gn/moz.build b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl_gn/moz.build new file mode 100644 index 0000000000..32a47e6b06 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl_gn/moz.build @@ -0,0 +1,235 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl.cc", + "/third_party/libwebrtc/modules/audio_mixer/default_output_rate_calculator.cc", + "/third_party/libwebrtc/modules/audio_mixer/frame_combiner.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_mixer_impl_gn") diff --git a/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc new file mode 100644 index 0000000000..e4b12a7000 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc @@ -0,0 +1,790 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_mixer/audio_mixer_impl.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/audio/audio_mixer.h" +#include "api/rtp_packet_info.h" +#include "api/rtp_packet_infos.h" +#include "api/units/timestamp.h" +#include "modules/audio_mixer/default_output_rate_calculator.h" +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/task_queue_for_test.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::Exactly; +using ::testing::Invoke; +using ::testing::Return; +using ::testing::UnorderedElementsAre; + +namespace webrtc { + +namespace { + +constexpr int kDefaultSampleRateHz = 48000; + +// Utility function that resets the frame member variables with +// sensible defaults. +void ResetFrame(AudioFrame* frame) { + frame->sample_rate_hz_ = kDefaultSampleRateHz; + frame->num_channels_ = 1; + + // Frame duration 10ms. + frame->samples_per_channel_ = kDefaultSampleRateHz / 100; + frame->vad_activity_ = AudioFrame::kVadActive; + frame->speech_type_ = AudioFrame::kNormalSpeech; +} + +std::string ProduceDebugText(int sample_rate_hz, + int number_of_channels, + int number_of_sources) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz << " "; + ss << "Number of channels: " << number_of_channels << " "; + ss << "Number of sources: " << number_of_sources; + return ss.Release(); +} + +AudioFrame frame_for_mixing; + +} // namespace + +class MockMixerAudioSource : public ::testing::NiceMock { + public: + MockMixerAudioSource() + : fake_audio_frame_info_(AudioMixer::Source::AudioFrameInfo::kNormal) { + ON_CALL(*this, GetAudioFrameWithInfo(_, _)) + .WillByDefault( + Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo)); + ON_CALL(*this, PreferredSampleRate()) + .WillByDefault(Return(kDefaultSampleRateHz)); + } + + MOCK_METHOD(AudioFrameInfo, + GetAudioFrameWithInfo, + (int sample_rate_hz, AudioFrame* audio_frame), + (override)); + + MOCK_METHOD(int, PreferredSampleRate, (), (const, override)); + MOCK_METHOD(int, Ssrc, (), (const, override)); + + AudioFrame* fake_frame() { return &fake_frame_; } + AudioFrameInfo fake_info() { return fake_audio_frame_info_; } + void set_fake_info(const AudioFrameInfo audio_frame_info) { + fake_audio_frame_info_ = audio_frame_info; + } + + void set_packet_infos(const RtpPacketInfos& packet_infos) { + packet_infos_ = packet_infos; + } + + private: + AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz, + AudioFrame* audio_frame) { + audio_frame->CopyFrom(fake_frame_); + audio_frame->sample_rate_hz_ = sample_rate_hz; + audio_frame->samples_per_channel_ = + rtc::CheckedDivExact(sample_rate_hz, 100); + audio_frame->packet_infos_ = packet_infos_; + return fake_info(); + } + + AudioFrame fake_frame_; + AudioFrameInfo fake_audio_frame_info_; + RtpPacketInfos packet_infos_; +}; + +class CustomRateCalculator : public OutputRateCalculator { + public: + explicit CustomRateCalculator(int rate) : rate_(rate) {} + int CalculateOutputRateFromRange( + rtc::ArrayView preferred_rates) override { + return rate_; + } + + private: + const int rate_; +}; + +// Creates participants from `frames` and `frame_info` and adds them +// to the mixer. Compares mixed status with `expected_status` +void MixAndCompare( + const std::vector& frames, + const std::vector& frame_info, + const std::vector& expected_status) { + const size_t num_audio_sources = frames.size(); + RTC_DCHECK(frames.size() == frame_info.size()); + RTC_DCHECK(frame_info.size() == expected_status.size()); + + const auto mixer = AudioMixerImpl::Create(); + std::vector participants(num_audio_sources); + + for (size_t i = 0; i < num_audio_sources; ++i) { + participants[i].fake_frame()->CopyFrom(frames[i]); + participants[i].set_fake_info(frame_info[i]); + } + + for (size_t i = 0; i < num_audio_sources; ++i) { + EXPECT_TRUE(mixer->AddSource(&participants[i])); + EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) + .Times(Exactly(1)); + } + + mixer->Mix(1, &frame_for_mixing); + + for (size_t i = 0; i < num_audio_sources; ++i) { + EXPECT_EQ(expected_status[i], + mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) + << "Mixed status of AudioSource #" << i << " wrong."; + } +} + +void MixMonoAtGivenNativeRate(int native_sample_rate, + AudioFrame* mix_frame, + rtc::scoped_refptr mixer, + MockMixerAudioSource* audio_source) { + ON_CALL(*audio_source, PreferredSampleRate()) + .WillByDefault(Return(native_sample_rate)); + audio_source->fake_frame()->sample_rate_hz_ = native_sample_rate; + audio_source->fake_frame()->samples_per_channel_ = native_sample_rate / 100; + + mixer->Mix(1, mix_frame); +} + +TEST(AudioMixer, LargestEnergyVadActiveMixed) { + constexpr int kAudioSources = + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 3; + + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource participants[kAudioSources]; + + for (int i = 0; i < kAudioSources; ++i) { + ResetFrame(participants[i].fake_frame()); + + // We set the 80-th sample value since the first 80 samples may be + // modified by a ramped-in window. + participants[i].fake_frame()->mutable_data()[80] = i; + + EXPECT_TRUE(mixer->AddSource(&participants[i])); + EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1)); + } + + // Last participant gives audio frame with passive VAD, although it has the + // largest energy. + participants[kAudioSources - 1].fake_frame()->vad_activity_ = + AudioFrame::kVadPassive; + + AudioFrame audio_frame; + mixer->Mix(1, // number of channels + &audio_frame); + + for (int i = 0; i < kAudioSources; ++i) { + bool is_mixed = + mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]); + if (i == kAudioSources - 1 || + i < kAudioSources - 1 - + AudioMixerImpl::kDefaultNumberOfMixedAudioSources) { + EXPECT_FALSE(is_mixed) + << "Mixing status of AudioSource #" << i << " wrong."; + } else { + EXPECT_TRUE(is_mixed) + << "Mixing status of AudioSource #" << i << " wrong."; + } + } +} + +TEST(AudioMixer, FrameNotModifiedForSingleParticipant) { + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource participant; + + ResetFrame(participant.fake_frame()); + const size_t n_samples = participant.fake_frame()->samples_per_channel_; + + // Modify the frame so that it's not zero. + int16_t* fake_frame_data = participant.fake_frame()->mutable_data(); + for (size_t j = 0; j < n_samples; ++j) { + fake_frame_data[j] = static_cast(j); + } + + EXPECT_TRUE(mixer->AddSource(&participant)); + EXPECT_CALL(participant, GetAudioFrameWithInfo(_, _)).Times(Exactly(2)); + + AudioFrame audio_frame; + // Two mix iteration to compare after the ramp-up step. + for (int i = 0; i < 2; ++i) { + mixer->Mix(1, // number of channels + &audio_frame); + } + + EXPECT_EQ(0, memcmp(participant.fake_frame()->data(), audio_frame.data(), + n_samples)); +} + +TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) { + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource audio_source; + ResetFrame(audio_source.fake_frame()); + + mixer->AddSource(&audio_source); + + for (auto frequency : {8000, 16000, 32000, 48000}) { + EXPECT_CALL(audio_source, GetAudioFrameWithInfo(frequency, _)) + .Times(Exactly(1)); + + MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer, + &audio_source); + } +} + +TEST(AudioMixer, MixerShouldMixAtNativeSourceRate) { + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource audio_source; + ResetFrame(audio_source.fake_frame()); + + mixer->AddSource(&audio_source); + + for (auto frequency : {8000, 16000, 32000, 48000}) { + MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer, + &audio_source); + + EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_); + } +} + +TEST(AudioMixer, MixerShouldAlwaysMixAtNativeRate) { + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource participant; + ResetFrame(participant.fake_frame()); + mixer->AddSource(&participant); + + const int needed_frequency = 44100; + ON_CALL(participant, PreferredSampleRate()) + .WillByDefault(Return(needed_frequency)); + + // We expect mixing frequency to be native and >= needed_frequency. + const int expected_mix_frequency = 48000; + EXPECT_CALL(participant, GetAudioFrameWithInfo(expected_mix_frequency, _)) + .Times(Exactly(1)); + participant.fake_frame()->sample_rate_hz_ = expected_mix_frequency; + participant.fake_frame()->samples_per_channel_ = expected_mix_frequency / 100; + + mixer->Mix(1, &frame_for_mixing); + + EXPECT_EQ(48000, frame_for_mixing.sample_rate_hz_); +} + +// Check that the mixing rate is always >= participants preferred rate. +TEST(AudioMixer, ShouldNotCauseQualityLossForMultipleSources) { + const auto mixer = AudioMixerImpl::Create(); + + std::vector audio_sources(2); + const std::vector source_sample_rates = {8000, 16000}; + for (int i = 0; i < 2; ++i) { + auto& source = audio_sources[i]; + ResetFrame(source.fake_frame()); + mixer->AddSource(&source); + const auto sample_rate = source_sample_rates[i]; + EXPECT_CALL(source, PreferredSampleRate()).WillOnce(Return(sample_rate)); + + EXPECT_CALL(source, GetAudioFrameWithInfo(::testing::Ge(sample_rate), _)); + } + mixer->Mix(1, &frame_for_mixing); +} + +TEST(AudioMixer, ParticipantNumberOfChannels) { + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource participant; + ResetFrame(participant.fake_frame()); + + EXPECT_TRUE(mixer->AddSource(&participant)); + for (size_t number_of_channels : {1, 2}) { + EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) + .Times(Exactly(1)); + mixer->Mix(number_of_channels, &frame_for_mixing); + EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_); + } +} + +// Maximal amount of participants are mixed one iteration, then +// another participant with higher energy is added. +TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { + constexpr int kAudioSources = + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; + + const auto mixer = AudioMixerImpl::Create(); + MockMixerAudioSource participants[kAudioSources]; + + for (int i = 0; i < kAudioSources; ++i) { + ResetFrame(participants[i].fake_frame()); + // Set the participant audio energy to increase with the index + // `i`. + participants[i].fake_frame()->mutable_data()[0] = 100 * i; + } + + // Add all participants but the loudest for mixing. + for (int i = 0; i < kAudioSources - 1; ++i) { + EXPECT_TRUE(mixer->AddSource(&participants[i])); + EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) + .Times(Exactly(1)); + } + + // First mixer iteration + mixer->Mix(1, &frame_for_mixing); + + // All participants but the loudest should have been mixed. + for (int i = 0; i < kAudioSources - 1; ++i) { + EXPECT_TRUE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) + << "Mixed status of AudioSource #" << i << " wrong."; + } + + // Add new participant with higher energy. + EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1])); + for (int i = 0; i < kAudioSources; ++i) { + EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) + .Times(Exactly(1)); + } + + mixer->Mix(1, &frame_for_mixing); + + // The most quiet participant should not have been mixed. + EXPECT_FALSE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[0])) + << "Mixed status of AudioSource #0 wrong."; + + // The loudest participants should have been mixed. + for (int i = 1; i < kAudioSources; ++i) { + EXPECT_EQ(true, + mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) + << "Mixed status of AudioSource #" << i << " wrong."; + } +} + +// This test checks that the initialization and participant addition +// can be done on a different thread. +TEST(AudioMixer, ConstructFromOtherThread) { + TaskQueueForTest init_queue("init"); + rtc::scoped_refptr mixer; + init_queue.SendTask([&mixer]() { mixer = AudioMixerImpl::Create(); }); + + MockMixerAudioSource participant; + EXPECT_CALL(participant, PreferredSampleRate()) + .WillRepeatedly(Return(kDefaultSampleRateHz)); + + ResetFrame(participant.fake_frame()); + + TaskQueueForTest participant_queue("participant"); + participant_queue.SendTask( + [&mixer, &participant]() { mixer->AddSource(&participant); }); + + EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) + .Times(Exactly(1)); + + // Do one mixer iteration + mixer->Mix(1, &frame_for_mixing); +} + +TEST(AudioMixer, MutedShouldMixAfterUnmuted) { + constexpr int kAudioSources = + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; + + std::vector frames(kAudioSources); + for (auto& frame : frames) { + ResetFrame(&frame); + } + + std::vector frame_info( + kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal); + frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted; + std::vector expected_status(kAudioSources, true); + expected_status[0] = false; + + MixAndCompare(frames, frame_info, expected_status); +} + +TEST(AudioMixer, PassiveShouldMixAfterNormal) { + constexpr int kAudioSources = + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; + + std::vector frames(kAudioSources); + for (auto& frame : frames) { + ResetFrame(&frame); + } + + std::vector frame_info( + kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal); + frames[0].vad_activity_ = AudioFrame::kVadPassive; + std::vector expected_status(kAudioSources, true); + expected_status[0] = false; + + MixAndCompare(frames, frame_info, expected_status); +} + +TEST(AudioMixer, ActiveShouldMixBeforeLoud) { + constexpr int kAudioSources = + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; + + std::vector frames(kAudioSources); + for (auto& frame : frames) { + ResetFrame(&frame); + } + + std::vector frame_info( + kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal); + frames[0].vad_activity_ = AudioFrame::kVadPassive; + int16_t* frame_data = frames[0].mutable_data(); + std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100, + std::numeric_limits::max()); + std::vector expected_status(kAudioSources, true); + expected_status[0] = false; + + MixAndCompare(frames, frame_info, expected_status); +} + +TEST(AudioMixer, ShouldMixUpToSpecifiedNumberOfSourcesToMix) { + constexpr int kAudioSources = 5; + constexpr int kSourcesToMix = 2; + + std::vector frames(kAudioSources); + for (auto& frame : frames) { + ResetFrame(&frame); + } + + std::vector frame_info( + kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal); + // Set up to kSourceToMix sources with kVadActive so that they're mixed. + const std::vector kVadActivities = { + AudioFrame::kVadUnknown, AudioFrame::kVadPassive, AudioFrame::kVadPassive, + AudioFrame::kVadActive, AudioFrame::kVadActive}; + // Populate VAD and frame for all sources. + for (int i = 0; i < kAudioSources; i++) { + frames[i].vad_activity_ = kVadActivities[i]; + } + + std::vector participants(kAudioSources); + for (int i = 0; i < kAudioSources; ++i) { + participants[i].fake_frame()->CopyFrom(frames[i]); + participants[i].set_fake_info(frame_info[i]); + } + + const auto mixer = AudioMixerImpl::Create(kSourcesToMix); + for (int i = 0; i < kAudioSources; ++i) { + EXPECT_TRUE(mixer->AddSource(&participants[i])); + EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) + .Times(Exactly(1)); + } + + mixer->Mix(1, &frame_for_mixing); + + std::vector expected_status = {false, false, false, true, true}; + for (int i = 0; i < kAudioSources; ++i) { + EXPECT_EQ(expected_status[i], + mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) + << "Wrong mix status for source #" << i << " is wrong"; + } +} + +TEST(AudioMixer, UnmutedShouldMixBeforeLoud) { + constexpr int kAudioSources = + AudioMixerImpl::kDefaultNumberOfMixedAudioSources + 1; + + std::vector frames(kAudioSources); + for (auto& frame : frames) { + ResetFrame(&frame); + } + + std::vector frame_info( + kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal); + frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted; + int16_t* frame_data = frames[0].mutable_data(); + std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100, + std::numeric_limits::max()); + std::vector expected_status(kAudioSources, true); + expected_status[0] = false; + + MixAndCompare(frames, frame_info, expected_status); +} + +TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) { + constexpr int kOutputRate = 22000; + const auto mixer = + AudioMixerImpl::Create(std::unique_ptr( + new CustomRateCalculator(kOutputRate)), + true); + MockMixerAudioSource audio_source; + mixer->AddSource(&audio_source); + ResetFrame(audio_source.fake_frame()); + + EXPECT_CALL(audio_source, GetAudioFrameWithInfo(kOutputRate, _)) + .Times(Exactly(1)); + + mixer->Mix(1, &frame_for_mixing); +} + +TEST(AudioMixer, ZeroSourceRateShouldBeDecidedByRateCalculator) { + constexpr int kOutputRate = 8000; + const auto mixer = + AudioMixerImpl::Create(std::unique_ptr( + new CustomRateCalculator(kOutputRate)), + true); + + mixer->Mix(1, &frame_for_mixing); + + EXPECT_EQ(kOutputRate, frame_for_mixing.sample_rate_hz_); +} + +TEST(AudioMixer, NoLimiterBasicApiCalls) { + const auto mixer = AudioMixerImpl::Create( + std::unique_ptr(new DefaultOutputRateCalculator()), + false); + mixer->Mix(1, &frame_for_mixing); +} + +TEST(AudioMixer, AnyRateIsPossibleWithNoLimiter) { + // No APM limiter means no AudioProcessing::NativeRate restriction + // on mixing rate. The rate has to be divisible by 100 since we use + // 10 ms frames, though. + for (const auto rate : {8000, 20000, 24000, 32000, 44100}) { + for (const size_t number_of_channels : {1, 2}) { + for (const auto number_of_sources : {0, 1, 2, 3, 4}) { + SCOPED_TRACE( + ProduceDebugText(rate, number_of_sources, number_of_sources)); + const auto mixer = + AudioMixerImpl::Create(std::unique_ptr( + new CustomRateCalculator(rate)), + false); + + std::vector sources(number_of_sources); + for (auto& source : sources) { + ResetFrame(source.fake_frame()); + mixer->AddSource(&source); + } + + mixer->Mix(number_of_channels, &frame_for_mixing); + EXPECT_EQ(rate, frame_for_mixing.sample_rate_hz_); + EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_); + } + } + } +} + +TEST(AudioMixer, MultipleChannelsOneParticipant) { + // Set up a participant with a 6-channel frame, and make sure a 6-channel + // frame with the right sample values comes out from the mixer. There are 2 + // Mix calls because of ramp-up. + constexpr size_t kNumberOfChannels = 6; + MockMixerAudioSource source; + ResetFrame(source.fake_frame()); + const auto mixer = AudioMixerImpl::Create(); + mixer->AddSource(&source); + mixer->Mix(1, &frame_for_mixing); + auto* frame = source.fake_frame(); + frame->num_channels_ = kNumberOfChannels; + std::fill(frame->mutable_data(), + frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0); + for (size_t i = 0; i < kNumberOfChannels; ++i) { + frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i; + } + + mixer->Mix(kNumberOfChannels, &frame_for_mixing); + + EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels); + for (size_t i = 0; i < kNumberOfChannels; ++i) { + EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i], + static_cast(1000 * i)); + } +} + +TEST(AudioMixer, MultipleChannelsManyParticipants) { + // Sets up 2 participants. One has a 6-channel frame. Make sure a 6-channel + // frame with the right sample values comes out from the mixer. There are 2 + // Mix calls because of ramp-up. + constexpr size_t kNumberOfChannels = 6; + MockMixerAudioSource source; + const auto mixer = AudioMixerImpl::Create(); + mixer->AddSource(&source); + ResetFrame(source.fake_frame()); + mixer->Mix(1, &frame_for_mixing); + auto* frame = source.fake_frame(); + frame->num_channels_ = kNumberOfChannels; + std::fill(frame->mutable_data(), + frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0); + for (size_t i = 0; i < kNumberOfChannels; ++i) { + frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i; + } + MockMixerAudioSource other_source; + ResetFrame(other_source.fake_frame()); + mixer->AddSource(&other_source); + + mixer->Mix(kNumberOfChannels, &frame_for_mixing); + + EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels); + for (size_t i = 0; i < kNumberOfChannels; ++i) { + EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i], + static_cast(1000 * i)); + } +} + +TEST(AudioMixer, ShouldIncludeRtpPacketInfoFromAllMixedSources) { + const uint32_t kSsrc0 = 10; + const uint32_t kSsrc1 = 11; + const uint32_t kSsrc2 = 12; + const uint32_t kCsrc0 = 20; + const uint32_t kCsrc1 = 21; + const uint32_t kCsrc2 = 22; + const uint32_t kCsrc3 = 23; + const int kAudioLevel0 = 10; + const int kAudioLevel1 = 40; + const absl::optional kAudioLevel2 = absl::nullopt; + const uint32_t kRtpTimestamp0 = 300; + const uint32_t kRtpTimestamp1 = 400; + const Timestamp kReceiveTime0 = Timestamp::Millis(10); + const Timestamp kReceiveTime1 = Timestamp::Millis(20); + + RtpPacketInfo p0(kSsrc0, {kCsrc0, kCsrc1}, kRtpTimestamp0, kReceiveTime0); + p0.set_audio_level(kAudioLevel0); + RtpPacketInfo p1(kSsrc1, {kCsrc2}, kRtpTimestamp1, kReceiveTime1); + p1.set_audio_level(kAudioLevel1); + RtpPacketInfo p2(kSsrc2, {kCsrc3}, kRtpTimestamp1, kReceiveTime1); + p2.set_audio_level(kAudioLevel2); + + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource source; + source.set_packet_infos(RtpPacketInfos({p0})); + mixer->AddSource(&source); + ResetFrame(source.fake_frame()); + mixer->Mix(1, &frame_for_mixing); + + MockMixerAudioSource other_source; + other_source.set_packet_infos(RtpPacketInfos({p1, p2})); + ResetFrame(other_source.fake_frame()); + mixer->AddSource(&other_source); + + mixer->Mix(/*number_of_channels=*/1, &frame_for_mixing); + + EXPECT_THAT(frame_for_mixing.packet_infos_, UnorderedElementsAre(p0, p1, p2)); +} + +TEST(AudioMixer, MixerShouldIncludeRtpPacketInfoFromMixedSourcesOnly) { + const uint32_t kSsrc0 = 10; + const uint32_t kSsrc1 = 11; + const uint32_t kSsrc2 = 21; + const uint32_t kCsrc0 = 30; + const uint32_t kCsrc1 = 31; + const uint32_t kCsrc2 = 32; + const uint32_t kCsrc3 = 33; + const int kAudioLevel0 = 10; + const absl::optional kAudioLevelMissing = absl::nullopt; + const uint32_t kRtpTimestamp0 = 300; + const uint32_t kRtpTimestamp1 = 400; + const Timestamp kReceiveTime0 = Timestamp::Millis(10); + const Timestamp kReceiveTime1 = Timestamp::Millis(20); + + RtpPacketInfo p0(kSsrc0, {kCsrc0, kCsrc1}, kRtpTimestamp0, kReceiveTime0); + p0.set_audio_level(kAudioLevel0); + RtpPacketInfo p1(kSsrc1, {kCsrc2}, kRtpTimestamp1, kReceiveTime1); + p1.set_audio_level(kAudioLevelMissing); + RtpPacketInfo p2(kSsrc2, {kCsrc3}, kRtpTimestamp1, kReceiveTime1); + p2.set_audio_level(kAudioLevelMissing); + + const auto mixer = AudioMixerImpl::Create(/*max_sources_to_mix=*/2); + + MockMixerAudioSource source1; + source1.set_packet_infos(RtpPacketInfos({p0})); + mixer->AddSource(&source1); + ResetFrame(source1.fake_frame()); + mixer->Mix(1, &frame_for_mixing); + + MockMixerAudioSource source2; + source2.set_packet_infos(RtpPacketInfos({p1})); + ResetFrame(source2.fake_frame()); + mixer->AddSource(&source2); + + // The mixer prioritizes kVadActive over kVadPassive. + // We limit the number of sources to mix to 2 and set the third source's VAD + // activity to kVadPassive so that it will not be added to the mix. + MockMixerAudioSource source3; + source3.set_packet_infos(RtpPacketInfos({p2})); + ResetFrame(source3.fake_frame()); + source3.fake_frame()->vad_activity_ = AudioFrame::kVadPassive; + mixer->AddSource(&source3); + + mixer->Mix(/*number_of_channels=*/1, &frame_for_mixing); + + EXPECT_THAT(frame_for_mixing.packet_infos_, UnorderedElementsAre(p0, p1)); +} + +class HighOutputRateCalculator : public OutputRateCalculator { + public: + static const int kDefaultFrequency = 76000; + int CalculateOutputRateFromRange( + rtc::ArrayView preferred_sample_rates) override { + return kDefaultFrequency; + } + ~HighOutputRateCalculator() override {} +}; +const int HighOutputRateCalculator::kDefaultFrequency; + +TEST(AudioMixerDeathTest, MultipleChannelsAndHighRate) { + constexpr size_t kSamplesPerChannel = + HighOutputRateCalculator::kDefaultFrequency / 100; + // As many channels as an AudioFrame can fit: + constexpr size_t kNumberOfChannels = + AudioFrame::kMaxDataSizeSamples / kSamplesPerChannel; + MockMixerAudioSource source; + const auto mixer = AudioMixerImpl::Create( + std::make_unique(), true); + mixer->AddSource(&source); + ResetFrame(source.fake_frame()); + mixer->Mix(1, &frame_for_mixing); + auto* frame = source.fake_frame(); + frame->num_channels_ = kNumberOfChannels; + frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency; + frame->samples_per_channel_ = kSamplesPerChannel; + + std::fill(frame->mutable_data(), + frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0); + MockMixerAudioSource other_source; + ResetFrame(other_source.fake_frame()); + auto* other_frame = other_source.fake_frame(); + other_frame->num_channels_ = kNumberOfChannels; + other_frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency; + other_frame->samples_per_channel_ = kSamplesPerChannel; + mixer->AddSource(&other_source); + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + EXPECT_DEATH(mixer->Mix(kNumberOfChannels, &frame_for_mixing), ""); +#elif !RTC_DCHECK_IS_ON + mixer->Mix(kNumberOfChannels, &frame_for_mixing); + EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels); + EXPECT_EQ(frame_for_mixing.sample_rate_hz_, + HighOutputRateCalculator::kDefaultFrequency); +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/audio_mixer_test.cc b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_test.cc new file mode 100644 index 0000000000..3ee28a7937 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/audio_mixer_test.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio/audio_mixer.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/wav_file.h" +#include "modules/audio_mixer/audio_mixer_impl.h" +#include "modules/audio_mixer/default_output_rate_calculator.h" +#include "rtc_base/strings/string_builder.h" + +ABSL_FLAG(int, + sampling_rate, + 16000, + "Rate at which to mix (all input streams must have this rate)"); + +ABSL_FLAG(bool, + stereo, + false, + "Enable stereo (interleaved). Inputs need not be as this parameter."); + +ABSL_FLAG(bool, limiter, true, "Enable limiter."); +ABSL_FLAG(std::string, + output_file, + "mixed_file.wav", + "File in which to store the mixed result."); +ABSL_FLAG(std::string, input_file_1, "", "First input. Default none."); +ABSL_FLAG(std::string, input_file_2, "", "Second input. Default none."); +ABSL_FLAG(std::string, input_file_3, "", "Third input. Default none."); +ABSL_FLAG(std::string, input_file_4, "", "Fourth input. Default none."); + +namespace webrtc { +namespace test { + +class FilePlayingSource : public AudioMixer::Source { + public: + explicit FilePlayingSource(absl::string_view filename) + : wav_reader_(new WavReader(filename)), + sample_rate_hz_(wav_reader_->sample_rate()), + samples_per_channel_(sample_rate_hz_ / 100), + number_of_channels_(wav_reader_->num_channels()) {} + + AudioFrameInfo GetAudioFrameWithInfo(int target_rate_hz, + AudioFrame* frame) override { + frame->samples_per_channel_ = samples_per_channel_; + frame->num_channels_ = number_of_channels_; + frame->sample_rate_hz_ = target_rate_hz; + + RTC_CHECK_EQ(target_rate_hz, sample_rate_hz_); + + const size_t num_to_read = number_of_channels_ * samples_per_channel_; + const size_t num_read = + wav_reader_->ReadSamples(num_to_read, frame->mutable_data()); + + file_has_ended_ = num_to_read != num_read; + if (file_has_ended_) { + frame->Mute(); + } + return file_has_ended_ ? AudioFrameInfo::kMuted : AudioFrameInfo::kNormal; + } + + int Ssrc() const override { return 0; } + + int PreferredSampleRate() const override { return sample_rate_hz_; } + + bool FileHasEnded() const { return file_has_ended_; } + + std::string ToString() const { + rtc::StringBuilder ss; + ss << "{rate: " << sample_rate_hz_ << ", channels: " << number_of_channels_ + << ", samples_tot: " << wav_reader_->num_samples() << "}"; + return ss.Release(); + } + + private: + std::unique_ptr wav_reader_; + int sample_rate_hz_; + int samples_per_channel_; + int number_of_channels_; + bool file_has_ended_ = false; +}; +} // namespace test +} // namespace webrtc + +namespace { + +const std::vector parse_input_files() { + std::vector result; + for (auto& x : + {absl::GetFlag(FLAGS_input_file_1), absl::GetFlag(FLAGS_input_file_2), + absl::GetFlag(FLAGS_input_file_3), absl::GetFlag(FLAGS_input_file_4)}) { + if (!x.empty()) { + result.push_back(x); + } + } + return result; +} +} // namespace + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + + rtc::scoped_refptr mixer( + webrtc::AudioMixerImpl::Create( + std::unique_ptr( + new webrtc::DefaultOutputRateCalculator()), + absl::GetFlag(FLAGS_limiter))); + + const std::vector input_files = parse_input_files(); + std::vector sources; + const int num_channels = absl::GetFlag(FLAGS_stereo) ? 2 : 1; + sources.reserve(input_files.size()); + for (const auto& input_file : input_files) { + sources.emplace_back(input_file); + } + + for (auto& source : sources) { + auto error = mixer->AddSource(&source); + RTC_CHECK(error); + } + + if (sources.empty()) { + std::cout << "Need at least one source!\n"; + return 1; + } + + const size_t sample_rate = sources[0].PreferredSampleRate(); + for (const auto& source : sources) { + RTC_CHECK_EQ(sample_rate, source.PreferredSampleRate()); + } + + // Print stats. + std::cout << "Limiting is: " << (absl::GetFlag(FLAGS_limiter) ? "on" : "off") + << "\n" + "Channels: " + << num_channels + << "\n" + "Rate: " + << sample_rate + << "\n" + "Number of input streams: " + << input_files.size() << "\n"; + for (const auto& source : sources) { + std::cout << "\t" << source.ToString() << "\n"; + } + std::cout << "Now mixing\n...\n"; + + webrtc::WavWriter wav_writer(absl::GetFlag(FLAGS_output_file), sample_rate, + num_channels); + + webrtc::AudioFrame frame; + + bool all_streams_finished = false; + while (!all_streams_finished) { + mixer->Mix(num_channels, &frame); + RTC_CHECK_EQ(sample_rate / 100, frame.samples_per_channel_); + RTC_CHECK_EQ(sample_rate, frame.sample_rate_hz_); + RTC_CHECK_EQ(num_channels, frame.num_channels_); + wav_writer.WriteSamples(frame.data(), + num_channels * frame.samples_per_channel_); + + all_streams_finished = + std::all_of(sources.begin(), sources.end(), + [](const webrtc::test::FilePlayingSource& source) { + return source.FileHasEnded(); + }); + } + + std::cout << "Done!\n" << std::endl; +} diff --git a/third_party/libwebrtc/modules/audio_mixer/default_output_rate_calculator.cc b/third_party/libwebrtc/modules/audio_mixer/default_output_rate_calculator.cc new file mode 100644 index 0000000000..5f24b653a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/default_output_rate_calculator.cc @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_mixer/default_output_rate_calculator.h" + +#include +#include + +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +int DefaultOutputRateCalculator::CalculateOutputRateFromRange( + rtc::ArrayView preferred_sample_rates) { + if (preferred_sample_rates.empty()) { + return DefaultOutputRateCalculator::kDefaultFrequency; + } + using NativeRate = AudioProcessing::NativeRate; + const int maximal_frequency = *std::max_element( + preferred_sample_rates.cbegin(), preferred_sample_rates.cend()); + + RTC_DCHECK_LE(NativeRate::kSampleRate8kHz, maximal_frequency); + RTC_DCHECK_GE(NativeRate::kSampleRate48kHz, maximal_frequency); + + static constexpr NativeRate native_rates[] = { + NativeRate::kSampleRate8kHz, NativeRate::kSampleRate16kHz, + NativeRate::kSampleRate32kHz, NativeRate::kSampleRate48kHz}; + const auto* rounded_up_index = std::lower_bound( + std::begin(native_rates), std::end(native_rates), maximal_frequency); + RTC_DCHECK(rounded_up_index != std::end(native_rates)); + return *rounded_up_index; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/default_output_rate_calculator.h b/third_party/libwebrtc/modules/audio_mixer/default_output_rate_calculator.h new file mode 100644 index 0000000000..02a3b5c37b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/default_output_rate_calculator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_MIXER_DEFAULT_OUTPUT_RATE_CALCULATOR_H_ +#define MODULES_AUDIO_MIXER_DEFAULT_OUTPUT_RATE_CALCULATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_mixer/output_rate_calculator.h" + +namespace webrtc { + +class DefaultOutputRateCalculator : public OutputRateCalculator { + public: + static const int kDefaultFrequency = 48000; + + // Produces the least native rate greater or equal to the preferred + // sample rates. A native rate is one in + // AudioProcessing::NativeRate. If `preferred_sample_rates` is + // empty, returns `kDefaultFrequency`. + int CalculateOutputRateFromRange( + rtc::ArrayView preferred_sample_rates) override; + ~DefaultOutputRateCalculator() override {} +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_MIXER_DEFAULT_OUTPUT_RATE_CALCULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_mixer/frame_combiner.cc b/third_party/libwebrtc/modules/audio_mixer/frame_combiner.cc new file mode 100644 index 0000000000..96c62f6b0d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/frame_combiner.cc @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_mixer/frame_combiner.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "api/array_view.h" +#include "api/rtp_packet_info.h" +#include "api/rtp_packet_infos.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_mixer/audio_frame_manipulator.h" +#include "modules/audio_mixer/audio_mixer_impl.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +using MixingBuffer = + std::array, + FrameCombiner::kMaximumNumberOfChannels>; + +void SetAudioFrameFields(rtc::ArrayView mix_list, + size_t number_of_channels, + int sample_rate, + size_t number_of_streams, + AudioFrame* audio_frame_for_mixing) { + const size_t samples_per_channel = static_cast( + (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); + + // TODO(minyue): Issue bugs.webrtc.org/3390. + // Audio frame timestamp. The 'timestamp_' field is set to dummy + // value '0', because it is only supported in the one channel case and + // is then updated in the helper functions. + audio_frame_for_mixing->UpdateFrame( + 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, + AudioFrame::kVadUnknown, number_of_channels); + + if (mix_list.empty()) { + audio_frame_for_mixing->elapsed_time_ms_ = -1; + } else { + audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_; + audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_; + audio_frame_for_mixing->ntp_time_ms_ = mix_list[0]->ntp_time_ms_; + std::vector packet_infos; + for (const auto& frame : mix_list) { + audio_frame_for_mixing->timestamp_ = + std::min(audio_frame_for_mixing->timestamp_, frame->timestamp_); + audio_frame_for_mixing->ntp_time_ms_ = + std::min(audio_frame_for_mixing->ntp_time_ms_, frame->ntp_time_ms_); + audio_frame_for_mixing->elapsed_time_ms_ = std::max( + audio_frame_for_mixing->elapsed_time_ms_, frame->elapsed_time_ms_); + packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(), + frame->packet_infos_.end()); + } + audio_frame_for_mixing->packet_infos_ = + RtpPacketInfos(std::move(packet_infos)); + } +} + +void MixFewFramesWithNoLimiter(rtc::ArrayView mix_list, + AudioFrame* audio_frame_for_mixing) { + if (mix_list.empty()) { + audio_frame_for_mixing->Mute(); + return; + } + RTC_DCHECK_LE(mix_list.size(), 1); + std::copy(mix_list[0]->data(), + mix_list[0]->data() + + mix_list[0]->num_channels_ * mix_list[0]->samples_per_channel_, + audio_frame_for_mixing->mutable_data()); +} + +void MixToFloatFrame(rtc::ArrayView mix_list, + size_t samples_per_channel, + size_t number_of_channels, + MixingBuffer* mixing_buffer) { + RTC_DCHECK_LE(samples_per_channel, FrameCombiner::kMaximumChannelSize); + RTC_DCHECK_LE(number_of_channels, FrameCombiner::kMaximumNumberOfChannels); + // Clear the mixing buffer. + *mixing_buffer = {}; + + // Convert to FloatS16 and mix. + for (size_t i = 0; i < mix_list.size(); ++i) { + const AudioFrame* const frame = mix_list[i]; + const int16_t* const frame_data = frame->data(); + for (size_t j = 0; j < std::min(number_of_channels, + FrameCombiner::kMaximumNumberOfChannels); + ++j) { + for (size_t k = 0; k < std::min(samples_per_channel, + FrameCombiner::kMaximumChannelSize); + ++k) { + (*mixing_buffer)[j][k] += frame_data[number_of_channels * k + j]; + } + } + } +} + +void RunLimiter(AudioFrameView mixing_buffer_view, Limiter* limiter) { + const size_t sample_rate = mixing_buffer_view.samples_per_channel() * 1000 / + AudioMixerImpl::kFrameDurationInMs; + // TODO(alessiob): Avoid calling SetSampleRate every time. + limiter->SetSampleRate(sample_rate); + limiter->Process(mixing_buffer_view); +} + +// Both interleaves and rounds. +void InterleaveToAudioFrame(AudioFrameView mixing_buffer_view, + AudioFrame* audio_frame_for_mixing) { + const size_t number_of_channels = mixing_buffer_view.num_channels(); + const size_t samples_per_channel = mixing_buffer_view.samples_per_channel(); + int16_t* const mixing_data = audio_frame_for_mixing->mutable_data(); + // Put data in the result frame. + for (size_t i = 0; i < number_of_channels; ++i) { + for (size_t j = 0; j < samples_per_channel; ++j) { + mixing_data[number_of_channels * j + i] = + FloatS16ToS16(mixing_buffer_view.channel(i)[j]); + } + } +} +} // namespace + +constexpr size_t FrameCombiner::kMaximumNumberOfChannels; +constexpr size_t FrameCombiner::kMaximumChannelSize; + +FrameCombiner::FrameCombiner(bool use_limiter) + : data_dumper_(new ApmDataDumper(0)), + mixing_buffer_( + std::make_unique, + kMaximumNumberOfChannels>>()), + limiter_(static_cast(48000), data_dumper_.get(), "AudioMixer"), + use_limiter_(use_limiter) { + static_assert(kMaximumChannelSize * kMaximumNumberOfChannels <= + AudioFrame::kMaxDataSizeSamples, + ""); +} + +FrameCombiner::~FrameCombiner() = default; + +void FrameCombiner::Combine(rtc::ArrayView mix_list, + size_t number_of_channels, + int sample_rate, + size_t number_of_streams, + AudioFrame* audio_frame_for_mixing) { + RTC_DCHECK(audio_frame_for_mixing); + + SetAudioFrameFields(mix_list, number_of_channels, sample_rate, + number_of_streams, audio_frame_for_mixing); + + const size_t samples_per_channel = static_cast( + (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); + + for (const auto* frame : mix_list) { + RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_); + RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); + } + + // The 'num_channels_' field of frames in 'mix_list' could be + // different from 'number_of_channels'. + for (auto* frame : mix_list) { + RemixFrame(number_of_channels, frame); + } + + if (number_of_streams <= 1) { + MixFewFramesWithNoLimiter(mix_list, audio_frame_for_mixing); + return; + } + + MixToFloatFrame(mix_list, samples_per_channel, number_of_channels, + mixing_buffer_.get()); + + const size_t output_number_of_channels = + std::min(number_of_channels, kMaximumNumberOfChannels); + const size_t output_samples_per_channel = + std::min(samples_per_channel, kMaximumChannelSize); + + // Put float data in an AudioFrameView. + std::array channel_pointers{}; + for (size_t i = 0; i < output_number_of_channels; ++i) { + channel_pointers[i] = &(*mixing_buffer_.get())[i][0]; + } + AudioFrameView mixing_buffer_view(&channel_pointers[0], + output_number_of_channels, + output_samples_per_channel); + + if (use_limiter_) { + RunLimiter(mixing_buffer_view, &limiter_); + } + + InterleaveToAudioFrame(mixing_buffer_view, audio_frame_for_mixing); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/frame_combiner.h b/third_party/libwebrtc/modules/audio_mixer/frame_combiner.h new file mode 100644 index 0000000000..6185b29f8a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/frame_combiner.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_MIXER_FRAME_COMBINER_H_ +#define MODULES_AUDIO_MIXER_FRAME_COMBINER_H_ + +#include +#include + +#include "api/array_view.h" +#include "api/audio/audio_frame.h" +#include "modules/audio_processing/agc2/limiter.h" + +namespace webrtc { +class ApmDataDumper; + +class FrameCombiner { + public: + explicit FrameCombiner(bool use_limiter); + ~FrameCombiner(); + + // Combine several frames into one. Assumes sample_rate, + // samples_per_channel of the input frames match the parameters. The + // parameters 'number_of_channels' and 'sample_rate' are needed + // because 'mix_list' can be empty. The parameter + // 'number_of_streams' is used for determining whether to pass the + // data through a limiter. + void Combine(rtc::ArrayView mix_list, + size_t number_of_channels, + int sample_rate, + size_t number_of_streams, + AudioFrame* audio_frame_for_mixing); + + // Stereo, 48 kHz, 10 ms. + static constexpr size_t kMaximumNumberOfChannels = 8; + static constexpr size_t kMaximumChannelSize = 48 * 10; + + using MixingBuffer = std::array, + kMaximumNumberOfChannels>; + + private: + std::unique_ptr data_dumper_; + std::unique_ptr mixing_buffer_; + Limiter limiter_; + const bool use_limiter_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_MIXER_FRAME_COMBINER_H_ diff --git a/third_party/libwebrtc/modules/audio_mixer/frame_combiner_unittest.cc b/third_party/libwebrtc/modules/audio_mixer/frame_combiner_unittest.cc new file mode 100644 index 0000000000..6c64d0852a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/frame_combiner_unittest.cc @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_mixer/frame_combiner.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/rtp_packet_info.h" +#include "api/rtp_packet_infos.h" +#include "api/units/timestamp.h" +#include "audio/utility/audio_frame_operations.h" +#include "modules/audio_mixer/gain_change_calculator.h" +#include "modules/audio_mixer/sine_wave_generator.h" +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +using ::testing::ElementsAreArray; +using ::testing::IsEmpty; +using ::testing::UnorderedElementsAreArray; + +struct FrameCombinerConfig { + bool use_limiter; + int sample_rate_hz; + int number_of_channels; + float wave_frequency; +}; + +std::string ProduceDebugText(int sample_rate_hz, + int number_of_channels, + int number_of_sources) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz << " ,"; + ss << "number of channels: " << number_of_channels << " ,"; + ss << "number of sources: " << number_of_sources; + return ss.Release(); +} + +std::string ProduceDebugText(const FrameCombinerConfig& config) { + rtc::StringBuilder ss; + ss << "Sample rate: " << config.sample_rate_hz << " ,"; + ss << "number of channels: " << config.number_of_channels << " ,"; + ss << "limiter active: " << (config.use_limiter ? "on" : "off") << " ,"; + ss << "wave frequency: " << config.wave_frequency << " ,"; + return ss.Release(); +} + +AudioFrame frame1; +AudioFrame frame2; + +void SetUpFrames(int sample_rate_hz, int number_of_channels) { + RtpPacketInfo packet_info1(/*ssrc=*/1001, /*csrcs=*/{}, + /*rtp_timestamp=*/1000, + /*receive_time=*/Timestamp::Millis(1)); + RtpPacketInfo packet_info2(/*ssrc=*/4004, /*csrcs=*/{}, + /*rtp_timestamp=*/1234, + /*receive_time=*/Timestamp::Millis(2)); + RtpPacketInfo packet_info3(/*ssrc=*/7007, /*csrcs=*/{}, + /*rtp_timestamp=*/1333, + /*receive_time=*/Timestamp::Millis(2)); + + frame1.packet_infos_ = RtpPacketInfos({packet_info1}); + frame2.packet_infos_ = RtpPacketInfos({packet_info2, packet_info3}); + + for (auto* frame : {&frame1, &frame2}) { + frame->UpdateFrame(0, nullptr, rtc::CheckedDivExact(sample_rate_hz, 100), + sample_rate_hz, AudioFrame::kNormalSpeech, + AudioFrame::kVadActive, number_of_channels); + } +} +} // namespace + +// The limiter requires sample rate divisible by 2000. +TEST(FrameCombiner, BasicApiCallsLimiter) { + FrameCombiner combiner(true); + for (const int rate : {8000, 18000, 34000, 48000}) { + for (const int number_of_channels : {1, 2, 4, 8}) { + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(rate, number_of_channels); + + for (const int number_of_frames : {0, 1, 2}) { + SCOPED_TRACE( + ProduceDebugText(rate, number_of_channels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; + combiner.Combine(frames_to_combine, number_of_channels, rate, + frames_to_combine.size(), &audio_frame_for_mixing); + } + } + } +} + +// The RtpPacketInfos field of the mixed packet should contain the union of the +// RtpPacketInfos from the frames that were actually mixed. +TEST(FrameCombiner, ContainsAllRtpPacketInfos) { + static constexpr int kSampleRateHz = 48000; + static constexpr int kNumChannels = 1; + FrameCombiner combiner(true); + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(kSampleRateHz, kNumChannels); + + for (const int number_of_frames : {0, 1, 2}) { + SCOPED_TRACE( + ProduceDebugText(kSampleRateHz, kNumChannels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + + std::vector packet_infos; + for (const auto& frame : frames_to_combine) { + packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(), + frame->packet_infos_.end()); + } + + AudioFrame audio_frame_for_mixing; + combiner.Combine(frames_to_combine, kNumChannels, kSampleRateHz, + frames_to_combine.size(), &audio_frame_for_mixing); + EXPECT_THAT(audio_frame_for_mixing.packet_infos_, + UnorderedElementsAreArray(packet_infos)); + } +} + +// There are DCHECKs in place to check for invalid parameters. +TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) { + FrameCombiner combiner(true); + for (const int rate : {8000, 18000, 34000, 48000}) { + for (const int number_of_channels : {10, 20, 21}) { + if (static_cast(rate / 100 * number_of_channels) > + AudioFrame::kMaxDataSizeSamples) { + continue; + } + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(rate, number_of_channels); + + const int number_of_frames = 2; + SCOPED_TRACE( + ProduceDebugText(rate, number_of_channels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + EXPECT_DEATH( + combiner.Combine(frames_to_combine, number_of_channels, rate, + frames_to_combine.size(), &audio_frame_for_mixing), + ""); +#elif !RTC_DCHECK_IS_ON + combiner.Combine(frames_to_combine, number_of_channels, rate, + frames_to_combine.size(), &audio_frame_for_mixing); +#endif + } + } +} + +TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) { + FrameCombiner combiner(true); + for (const int rate : {50000, 96000, 128000, 196000}) { + for (const int number_of_channels : {1, 2, 3}) { + if (static_cast(rate / 100 * number_of_channels) > + AudioFrame::kMaxDataSizeSamples) { + continue; + } + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(rate, number_of_channels); + + const int number_of_frames = 2; + SCOPED_TRACE( + ProduceDebugText(rate, number_of_channels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + EXPECT_DEATH( + combiner.Combine(frames_to_combine, number_of_channels, rate, + frames_to_combine.size(), &audio_frame_for_mixing), + ""); +#elif !RTC_DCHECK_IS_ON + combiner.Combine(frames_to_combine, number_of_channels, rate, + frames_to_combine.size(), &audio_frame_for_mixing); +#endif + } + } +} + +// With no limiter, the rate has to be divisible by 100 since we use +// 10 ms frames. +TEST(FrameCombiner, BasicApiCallsNoLimiter) { + FrameCombiner combiner(false); + for (const int rate : {8000, 10000, 11000, 32000, 44100}) { + for (const int number_of_channels : {1, 2, 4, 8}) { + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(rate, number_of_channels); + + for (const int number_of_frames : {0, 1, 2}) { + SCOPED_TRACE( + ProduceDebugText(rate, number_of_channels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; + combiner.Combine(frames_to_combine, number_of_channels, rate, + frames_to_combine.size(), &audio_frame_for_mixing); + } + } + } +} + +TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { + FrameCombiner combiner(false); + for (const int rate : {8000, 10000, 11000, 32000, 44100}) { + for (const int number_of_channels : {1, 2}) { + SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0)); + + AudioFrame audio_frame_for_mixing; + + const std::vector frames_to_combine; + combiner.Combine(frames_to_combine, number_of_channels, rate, + frames_to_combine.size(), &audio_frame_for_mixing); + const int16_t* audio_frame_for_mixing_data = + audio_frame_for_mixing.data(); + const std::vector mixed_data( + audio_frame_for_mixing_data, + audio_frame_for_mixing_data + number_of_channels * rate / 100); + + const std::vector expected(number_of_channels * rate / 100, 0); + EXPECT_EQ(mixed_data, expected); + EXPECT_THAT(audio_frame_for_mixing.packet_infos_, IsEmpty()); + } + } +} + +TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { + FrameCombiner combiner(false); + for (const int rate : {8000, 10000, 11000, 32000, 44100}) { + for (const int number_of_channels : {1, 2, 4, 8, 10}) { + SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1)); + + AudioFrame audio_frame_for_mixing; + + SetUpFrames(rate, number_of_channels); + int16_t* frame1_data = frame1.mutable_data(); + std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0); + const std::vector frames_to_combine = {&frame1}; + combiner.Combine(frames_to_combine, number_of_channels, rate, + frames_to_combine.size(), &audio_frame_for_mixing); + + const int16_t* audio_frame_for_mixing_data = + audio_frame_for_mixing.data(); + const std::vector mixed_data( + audio_frame_for_mixing_data, + audio_frame_for_mixing_data + number_of_channels * rate / 100); + + std::vector expected(number_of_channels * rate / 100); + std::iota(expected.begin(), expected.end(), 0); + EXPECT_EQ(mixed_data, expected); + EXPECT_THAT(audio_frame_for_mixing.packet_infos_, + ElementsAreArray(frame1.packet_infos_)); + } + } +} + +// Send a sine wave through the FrameCombiner, and check that the +// difference between input and output varies smoothly. Also check +// that it is inside reasonable bounds. This is to catch issues like +// chromium:695993 and chromium:816875. +TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) { + // Rates are divisible by 2000 when limiter is active. + std::vector configs = { + {false, 30100, 2, 50.f}, {false, 16500, 1, 3200.f}, + {true, 8000, 1, 3200.f}, {true, 16000, 1, 50.f}, + {true, 18000, 8, 3200.f}, {true, 10000, 2, 50.f}, + }; + + for (const auto& config : configs) { + SCOPED_TRACE(ProduceDebugText(config)); + + FrameCombiner combiner(config.use_limiter); + + constexpr int16_t wave_amplitude = 30000; + SineWaveGenerator wave_generator(config.wave_frequency, wave_amplitude); + + GainChangeCalculator change_calculator; + float cumulative_change = 0.f; + + constexpr size_t iterations = 100; + + for (size_t i = 0; i < iterations; ++i) { + SetUpFrames(config.sample_rate_hz, config.number_of_channels); + wave_generator.GenerateNextFrame(&frame1); + AudioFrameOperations::Mute(&frame2); + + std::vector frames_to_combine = {&frame1}; + if (i % 2 == 0) { + frames_to_combine.push_back(&frame2); + } + const size_t number_of_samples = + frame1.samples_per_channel_ * config.number_of_channels; + + // Ensures limiter is on if 'use_limiter'. + constexpr size_t number_of_streams = 2; + AudioFrame audio_frame_for_mixing; + combiner.Combine(frames_to_combine, config.number_of_channels, + config.sample_rate_hz, number_of_streams, + &audio_frame_for_mixing); + cumulative_change += change_calculator.CalculateGainChange( + rtc::ArrayView(frame1.data(), number_of_samples), + rtc::ArrayView(audio_frame_for_mixing.data(), + number_of_samples)); + } + + // Check that the gain doesn't vary too much. + EXPECT_LT(cumulative_change, 10); + + // Check that the latest gain is within reasonable bounds. It + // should be slightly less that 1. + EXPECT_LT(0.9f, change_calculator.LatestGain()); + EXPECT_LT(change_calculator.LatestGain(), 1.01f); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/g3doc/index.md b/third_party/libwebrtc/modules/audio_mixer/g3doc/index.md new file mode 100644 index 0000000000..6b48378fcb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/g3doc/index.md @@ -0,0 +1,54 @@ + + + +# The WebRTC Audio Mixer Module + +The WebRTC audio mixer module is responsible for mixing multiple incoming audio +streams (sources) into a single audio stream (mix). It works with 10 ms frames, +it supports sample rates up to 48 kHz and up to 8 audio channels. The API is +defined in +[`api/audio/audio_mixer.h`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/audio/audio_mixer.h) +and it includes the definition of +[`AudioMixer::Source`](https://source.chromium.org/search?q=symbol:AudioMixer::Source%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h), +which describes an incoming audio stream, and the definition of +[`AudioMixer`](https://source.chromium.org/search?q=symbol:AudioMixer%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h), +which operates on a collection of +[`AudioMixer::Source`](https://source.chromium.org/search?q=symbol:AudioMixer::Source%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h) +objects to produce a mix. + +## AudioMixer::Source + +A source has different characteristic (e.g., sample rate, number of channels, +muted state) and it is identified by an SSRC[^1]. +[`AudioMixer::Source::GetAudioFrameWithInfo()`](https://source.chromium.org/search?q=symbol:AudioMixer::Source::GetAudioFrameWithInfo%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h) +is used to retrieve the next 10 ms chunk of audio to be mixed. + +[^1]: A synchronization source (SSRC) is the source of a stream of RTP packets, + identified by a 32-bit numeric SSRC identifier carried in the RTP header + so as not to be dependent upon the network address (see + [RFC 3550](https://tools.ietf.org/html/rfc3550#section-3)). + +## AudioMixer + +The interface allows to add and remove sources and the +[`AudioMixer::Mix()`](https://source.chromium.org/search?q=symbol:AudioMixer::Mix%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h) +method allows to generates a mix with the desired number of channels. + +## WebRTC implementation + +The interface is implemented in different parts of WebRTC: + +* [`AudioMixer::Source`](https://source.chromium.org/search?q=symbol:AudioMixer::Source%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h): + [`audio/audio_receive_stream.h`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/audio/audio_receive_stream.h) +* [`AudioMixer`](https://source.chromium.org/search?q=symbol:AudioMixer%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h): + [`modules/audio_mixer/audio_mixer_impl.h`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_mixer/audio_mixer_impl.h) + +[`AudioMixer`](https://source.chromium.org/search?q=symbol:AudioMixer%20file:third_party%2Fwebrtc%2Fapi%2Faudio%2Faudio_mixer.h) +is thread-safe. The output sample rate of the generated mix is automatically +assigned depending on the sample rate of the sources; whereas the number of +output channels is defined by the caller[^2]. Samples from the non-muted sources +are summed up and then a limiter is used to apply soft-clipping when needed. + +[^2]: [`audio/utility/channel_mixer.h`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/audio/utility/channel_mixer.h) + is used to mix channels in the non-trivial cases - i.e., if the number of + channels for a source or the mix is greater than 3. diff --git a/third_party/libwebrtc/modules/audio_mixer/gain_change_calculator.cc b/third_party/libwebrtc/modules/audio_mixer/gain_change_calculator.cc new file mode 100644 index 0000000000..dbd0945239 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/gain_change_calculator.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_mixer/gain_change_calculator.h" + +#include + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +constexpr int16_t kReliabilityThreshold = 100; +} // namespace + +float GainChangeCalculator::CalculateGainChange( + rtc::ArrayView in, + rtc::ArrayView out) { + RTC_DCHECK_EQ(in.size(), out.size()); + + std::vector gain(in.size()); + CalculateGain(in, out, gain); + return CalculateDifferences(gain); +} + +float GainChangeCalculator::LatestGain() const { + return last_reliable_gain_; +} + +void GainChangeCalculator::CalculateGain(rtc::ArrayView in, + rtc::ArrayView out, + rtc::ArrayView gain) { + RTC_DCHECK_EQ(in.size(), out.size()); + RTC_DCHECK_EQ(in.size(), gain.size()); + + for (size_t i = 0; i < in.size(); ++i) { + if (std::abs(in[i]) >= kReliabilityThreshold) { + last_reliable_gain_ = out[i] / static_cast(in[i]); + } + gain[i] = last_reliable_gain_; + } +} + +float GainChangeCalculator::CalculateDifferences( + rtc::ArrayView values) { + float res = 0; + for (float f : values) { + res += fabs(f - last_value_); + last_value_ = f; + } + return res; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/gain_change_calculator.h b/third_party/libwebrtc/modules/audio_mixer/gain_change_calculator.h new file mode 100644 index 0000000000..3dde9be61e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/gain_change_calculator.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_MIXER_GAIN_CHANGE_CALCULATOR_H_ +#define MODULES_AUDIO_MIXER_GAIN_CHANGE_CALCULATOR_H_ + +#include + +#include "api/array_view.h" + +namespace webrtc { + +class GainChangeCalculator { + public: + // The 'out' signal is assumed to be produced from 'in' by applying + // a smoothly varying gain. This method computes variations of the + // gain and handles special cases when the samples are small. + float CalculateGainChange(rtc::ArrayView in, + rtc::ArrayView out); + + float LatestGain() const; + + private: + void CalculateGain(rtc::ArrayView in, + rtc::ArrayView out, + rtc::ArrayView gain); + + float CalculateDifferences(rtc::ArrayView values); + float last_value_ = 0.f; + float last_reliable_gain_ = 1.0f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_MIXER_GAIN_CHANGE_CALCULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_mixer/output_rate_calculator.h b/third_party/libwebrtc/modules/audio_mixer/output_rate_calculator.h new file mode 100644 index 0000000000..46b65a8b57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/output_rate_calculator.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_MIXER_OUTPUT_RATE_CALCULATOR_H_ +#define MODULES_AUDIO_MIXER_OUTPUT_RATE_CALCULATOR_H_ + +#include + +#include "api/array_view.h" + +namespace webrtc { + +// Decides the sample rate of a mixing iteration given the preferred +// sample rates of the sources. +class OutputRateCalculator { + public: + virtual int CalculateOutputRateFromRange( + rtc::ArrayView preferred_sample_rates) = 0; + + virtual ~OutputRateCalculator() {} +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_MIXER_OUTPUT_RATE_CALCULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_mixer/sine_wave_generator.cc b/third_party/libwebrtc/modules/audio_mixer/sine_wave_generator.cc new file mode 100644 index 0000000000..591fe14e8c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/sine_wave_generator.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_mixer/sine_wave_generator.h" + +#include +#include + +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { +constexpr float kPi = 3.14159265f; +} // namespace + +void SineWaveGenerator::GenerateNextFrame(AudioFrame* frame) { + RTC_DCHECK(frame); + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_; ++i) { + for (size_t ch = 0; ch < frame->num_channels_; ++ch) { + frame_data[frame->num_channels_ * i + ch] = + rtc::saturated_cast(amplitude_ * sinf(phase_)); + } + phase_ += wave_frequency_hz_ * 2 * kPi / frame->sample_rate_hz_; + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_mixer/sine_wave_generator.h b/third_party/libwebrtc/modules/audio_mixer/sine_wave_generator.h new file mode 100644 index 0000000000..ec0fcd24bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_mixer/sine_wave_generator.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_MIXER_SINE_WAVE_GENERATOR_H_ +#define MODULES_AUDIO_MIXER_SINE_WAVE_GENERATOR_H_ + +#include + +#include "api/audio/audio_frame.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +class SineWaveGenerator { + public: + SineWaveGenerator(float wave_frequency_hz, int16_t amplitude) + : wave_frequency_hz_(wave_frequency_hz), amplitude_(amplitude) { + RTC_DCHECK_GT(wave_frequency_hz, 0); + } + + // Produces appropriate output based on frame->num_channels_, + // frame->sample_rate_hz_. + void GenerateNextFrame(AudioFrame* frame); + + private: + float phase_ = 0.f; + const float wave_frequency_hz_; + const int16_t amplitude_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_MIXER_SINE_WAVE_GENERATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/BUILD.gn new file mode 100644 index 0000000000..64e83a006b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/BUILD.gn @@ -0,0 +1,677 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") +if (rtc_enable_protobuf) { + import("//third_party/protobuf/proto_library.gni") +} + +config("apm_debug_dump") { + if (apm_debug_dump) { + defines = [ "WEBRTC_APM_DEBUG_DUMP=1" ] + } else { + defines = [ "WEBRTC_APM_DEBUG_DUMP=0" ] + } +} + +rtc_library("api") { + visibility = [ "*" ] + sources = [ + "include/audio_processing.cc", + "include/audio_processing.h", + ] + deps = [ + ":audio_frame_view", + ":audio_processing_statistics", + "../../api:array_view", + "../../api:scoped_refptr", + "../../api/audio:aec3_config", + "../../api/audio:audio_frame_api", + "../../api/audio:echo_control", + "../../rtc_base:macromagic", + "../../rtc_base:refcount", + "../../rtc_base:stringutils", + "../../rtc_base/system:arch", + "../../rtc_base/system:file_wrapper", + "../../rtc_base/system:rtc_export", + "agc:gain_control_interface", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("audio_frame_proxies") { + visibility = [ "*" ] + sources = [ + "include/audio_frame_proxies.cc", + "include/audio_frame_proxies.h", + ] + deps = [ + ":api", + ":audio_frame_view", + "../../api/audio:audio_frame_api", + ] +} + +rtc_library("audio_buffer") { + visibility = [ "*" ] + + configs += [ ":apm_debug_dump" ] + + sources = [ + "audio_buffer.cc", + "audio_buffer.h", + "splitting_filter.cc", + "splitting_filter.h", + "three_band_filter_bank.cc", + "three_band_filter_bank.h", + ] + + defines = [] + + deps = [ + ":api", + "../../api:array_view", + "../../common_audio", + "../../common_audio:common_audio_c", + "../../rtc_base:checks", + ] +} + +rtc_library("high_pass_filter") { + visibility = [ "*" ] + + sources = [ + "high_pass_filter.cc", + "high_pass_filter.h", + ] + + defines = [] + + deps = [ + ":audio_buffer", + "../../api:array_view", + "../../rtc_base:checks", + "utility:cascaded_biquad_filter", + ] +} + +rtc_source_set("aec_dump_interface") { + visibility = [ "*" ] + sources = [ + "include/aec_dump.cc", + "include/aec_dump.h", + ] + + deps = [ + ":api", + ":audio_frame_view", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/base:core_headers", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("gain_controller2") { + configs += [ ":apm_debug_dump" ] + sources = [ + "gain_controller2.cc", + "gain_controller2.h", + ] + defines = [] + deps = [ + ":aec_dump_interface", + ":api", + ":apm_logging", + ":audio_buffer", + ":audio_frame_view", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:stringutils", + "../../system_wrappers:field_trial", + "agc2:adaptive_digital_gain_controller", + "agc2:common", + "agc2:cpu_features", + "agc2:fixed_digital", + "agc2:gain_applier", + "agc2:input_volume_controller", + "agc2:noise_level_estimator", + "agc2:saturation_protector", + "agc2:speech_level_estimator", + "agc2:vad_wrapper", + ] +} + +rtc_library("audio_processing") { + visibility = [ "*" ] + configs += [ ":apm_debug_dump" ] + sources = [ + "audio_processing_builder_impl.cc", + "audio_processing_impl.cc", + "audio_processing_impl.h", + "echo_control_mobile_impl.cc", + "echo_control_mobile_impl.h", + "gain_control_impl.cc", + "gain_control_impl.h", + "render_queue_item_verifier.h", + ] + + defines = [] + deps = [ + ":aec_dump_interface", + ":api", + ":apm_logging", + ":audio_buffer", + ":audio_frame_proxies", + ":audio_frame_view", + ":audio_processing_statistics", + ":gain_controller2", + ":high_pass_filter", + ":optionally_built_submodule_creators", + ":rms_level", + "../../api:array_view", + "../../api:function_view", + "../../api:make_ref_counted", + "../../api/audio:aec3_config", + "../../api/audio:audio_frame_api", + "../../api/audio:echo_control", + "../../audio/utility:audio_frame_operations", + "../../common_audio:common_audio_c", + "../../common_audio/third_party/ooura:fft_size_256", + "../../rtc_base:checks", + "../../rtc_base:event_tracer", + "../../rtc_base:gtest_prod", + "../../rtc_base:ignore_wundef", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:safe_minmax", + "../../rtc_base:sanitizer", + "../../rtc_base:swap_queue", + "../../rtc_base:timeutils", + "../../rtc_base/experiments:field_trial_parser", + "../../rtc_base/synchronization:mutex", + "../../rtc_base/system:rtc_export", + "../../system_wrappers", + "../../system_wrappers:denormal_disabler", + "../../system_wrappers:field_trial", + "../../system_wrappers:metrics", + "aec3", + "aec_dump:aec_dump", + "aecm:aecm_core", + "agc", + "agc:gain_control_interface", + "agc:legacy_agc", + "agc2:input_volume_stats_reporter", + "capture_levels_adjuster", + "ns", + "transient:transient_suppressor_api", + "vad", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + deps += [ + "../../common_audio", + "../../common_audio:fir_filter", + "../../common_audio:fir_filter_factory", + "../../system_wrappers", + ] + + if (rtc_enable_protobuf) { + deps += [ "aec_dump:aec_dump_impl" ] + } else { + deps += [ "aec_dump:null_aec_dump_factory" ] + } +} + +rtc_library("residual_echo_detector") { + poisonous = [ "default_echo_detector" ] + configs += [ ":apm_debug_dump" ] + sources = [ + "echo_detector/circular_buffer.cc", + "echo_detector/circular_buffer.h", + "echo_detector/mean_variance_estimator.cc", + "echo_detector/mean_variance_estimator.h", + "echo_detector/moving_max.cc", + "echo_detector/moving_max.h", + "echo_detector/normalized_covariance_estimator.cc", + "echo_detector/normalized_covariance_estimator.h", + "residual_echo_detector.cc", + "residual_echo_detector.h", + ] + deps = [ + ":api", + ":apm_logging", + "../../api:array_view", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../system_wrappers:metrics", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("optionally_built_submodule_creators") { + sources = [ + "optionally_built_submodule_creators.cc", + "optionally_built_submodule_creators.h", + ] + deps = [ + "transient:transient_suppressor_api", + "transient:transient_suppressor_impl", + ] +} + +rtc_source_set("rms_level") { + visibility = [ "*" ] + sources = [ + "rms_level.cc", + "rms_level.h", + ] + deps = [ + "../../api:array_view", + "../../rtc_base:checks", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("audio_processing_statistics") { + visibility = [ "*" ] + sources = [ + "include/audio_processing_statistics.cc", + "include/audio_processing_statistics.h", + ] + deps = [ "../../rtc_base/system:rtc_export" ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_source_set("audio_frame_view") { + sources = [ "include/audio_frame_view.h" ] + deps = [ "../../api:array_view" ] +} + +if (rtc_enable_protobuf) { + proto_library("audioproc_debug_proto") { + sources = [ "debug.proto" ] + + proto_out_dir = "modules/audio_processing" + } +} + +rtc_library("apm_logging") { + configs += [ ":apm_debug_dump" ] + sources = [ + "logging/apm_data_dumper.cc", + "logging/apm_data_dumper.h", + ] + deps = [ + "../../api:array_view", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:stringutils", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + defines = [] +} + +if (rtc_include_tests) { + rtc_source_set("mocks") { + testonly = true + sources = [ "include/mock_audio_processing.h" ] + deps = [ + ":aec_dump_interface", + ":api", + ":audio_buffer", + ":audio_processing", + ":audio_processing_statistics", + "../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } + + if (!build_with_chromium) { + group("audio_processing_tests") { + testonly = true + deps = [ + ":audioproc_test_utils", + "transient:click_annotate", + "transient:transient_suppression_test", + ] + + if (rtc_enable_protobuf) { + deps += [ + ":audioproc_unittest_proto", + "aec_dump:aec_dump_unittests", + "test/conversational_speech", + "test/py_quality_assessment", + ] + } + } + + rtc_library("audio_processing_unittests") { + testonly = true + + configs += [ ":apm_debug_dump" ] + sources = [ + "audio_buffer_unittest.cc", + "audio_frame_view_unittest.cc", + "echo_control_mobile_unittest.cc", + "gain_controller2_unittest.cc", + "splitting_filter_unittest.cc", + "test/fake_recording_device_unittest.cc", + ] + + deps = [ + ":analog_mic_simulation", + ":api", + ":apm_logging", + ":audio_buffer", + ":audio_frame_view", + ":audio_processing", + ":audioproc_test_utils", + ":gain_controller2", + ":high_pass_filter", + ":mocks", + "../../api:array_view", + "../../api:make_ref_counted", + "../../api:scoped_refptr", + "../../api/audio:aec3_config", + "../../api/audio:aec3_factory", + "../../api/audio:echo_detector_creator", + "../../common_audio", + "../../common_audio:common_audio_c", + "../../rtc_base:checks", + "../../rtc_base:gtest_prod", + "../../rtc_base:ignore_wundef", + "../../rtc_base:macromagic", + "../../rtc_base:platform_thread", + "../../rtc_base:protobuf_utils", + "../../rtc_base:random", + "../../rtc_base:rtc_base_tests_utils", + "../../rtc_base:rtc_event", + "../../rtc_base:safe_conversions", + "../../rtc_base:safe_minmax", + "../../rtc_base:stringutils", + "../../rtc_base:swap_queue", + "../../rtc_base:task_queue_for_test", + "../../rtc_base:threading", + "../../rtc_base/synchronization:mutex", + "../../rtc_base/system:arch", + "../../rtc_base/system:file_wrapper", + "../../system_wrappers", + "../../system_wrappers:denormal_disabler", + "../../test:field_trial", + "../../test:fileutils", + "../../test:rtc_expect_death", + "../../test:test_support", + "../audio_coding:neteq_input_audio_tools", + "aec_dump:mock_aec_dump_unittests", + "agc:agc_unittests", + "agc2:adaptive_digital_gain_controller_unittest", + "agc2:biquad_filter_unittests", + "agc2:fixed_digital_unittests", + "agc2:gain_applier_unittest", + "agc2:input_volume_controller_unittests", + "agc2:input_volume_stats_reporter_unittests", + "agc2:noise_estimator_unittests", + "agc2:saturation_protector_unittest", + "agc2:speech_level_estimator_unittest", + "agc2:test_utils", + "agc2:vad_wrapper_unittests", + "agc2/rnn_vad:unittests", + "capture_levels_adjuster", + "capture_levels_adjuster:capture_levels_adjuster_unittests", + "test/conversational_speech:unittest", + "transient:transient_suppression_unittests", + "utility:legacy_delay_estimator_unittest", + "utility:pffft_wrapper_unittest", + "vad:vad_unittests", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + defines = [] + + if (rtc_prefer_fixed_point) { + defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ] + } else { + defines += [ "WEBRTC_AUDIOPROC_FLOAT_PROFILE" ] + } + + if (rtc_enable_protobuf) { + defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ] + deps += [ + ":audioproc_debug_proto", + ":audioproc_protobuf_utils", + ":audioproc_test_utils", + ":audioproc_unittest_proto", + ":optionally_built_submodule_creators", + ":residual_echo_detector", + ":rms_level", + ":runtime_settings_protobuf_utils", + "../../api/audio:audio_frame_api", + "../../api/audio:echo_control", + "../../rtc_base:rtc_base_tests_utils", + "../../rtc_base:rtc_task_queue", + "aec_dump", + "aec_dump:aec_dump_unittests", + ] + absl_deps += [ "//third_party/abseil-cpp/absl/flags:flag" ] + sources += [ + "audio_processing_impl_locking_unittest.cc", + "audio_processing_impl_unittest.cc", + "audio_processing_unittest.cc", + "echo_control_mobile_bit_exact_unittest.cc", + "echo_detector/circular_buffer_unittest.cc", + "echo_detector/mean_variance_estimator_unittest.cc", + "echo_detector/moving_max_unittest.cc", + "echo_detector/normalized_covariance_estimator_unittest.cc", + "gain_control_unittest.cc", + "high_pass_filter_unittest.cc", + "residual_echo_detector_unittest.cc", + "rms_level_unittest.cc", + "test/debug_dump_replayer.cc", + "test/debug_dump_replayer.h", + "test/debug_dump_test.cc", + "test/echo_canceller_test_tools.cc", + "test/echo_canceller_test_tools.h", + "test/echo_canceller_test_tools_unittest.cc", + "test/echo_control_mock.h", + "test/test_utils.h", + ] + } + } + } + + rtc_library("audio_processing_perf_tests") { + testonly = true + configs += [ ":apm_debug_dump" ] + + sources = [ "audio_processing_performance_unittest.cc" ] + deps = [ + ":audio_processing", + ":audioproc_test_utils", + "../../api:array_view", + "../../api/numerics", + "../../api/test/metrics:global_metrics_logger_and_exporter", + "../../api/test/metrics:metric", + "../../rtc_base:platform_thread", + "../../rtc_base:protobuf_utils", + "../../rtc_base:random", + "../../rtc_base:rtc_event", + "../../rtc_base:safe_conversions", + "../../system_wrappers", + "../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } + + rtc_library("analog_mic_simulation") { + sources = [ + "test/fake_recording_device.cc", + "test/fake_recording_device.h", + ] + deps = [ + "../../api:array_view", + "../../api/audio:audio_frame_api", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:safe_conversions", + "../../rtc_base:safe_minmax", + "agc2:gain_map", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + } + + if (rtc_enable_protobuf && !build_with_chromium) { + rtc_library("audioproc_f_impl") { + testonly = true + configs += [ ":apm_debug_dump" ] + sources = [ + "test/aec_dump_based_simulator.cc", + "test/aec_dump_based_simulator.h", + "test/api_call_statistics.cc", + "test/api_call_statistics.h", + "test/audio_processing_simulator.cc", + "test/audio_processing_simulator.h", + "test/audioproc_float_impl.cc", + "test/audioproc_float_impl.h", + "test/wav_based_simulator.cc", + "test/wav_based_simulator.h", + ] + + deps = [ + ":analog_mic_simulation", + ":api", + ":apm_logging", + ":audio_processing", + ":audioproc_debug_proto", + ":audioproc_protobuf_utils", + ":audioproc_test_utils", + ":runtime_settings_protobuf_utils", + "../../api/audio:aec3_config_json", + "../../api/audio:aec3_factory", + "../../api/audio:echo_detector_creator", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:ignore_wundef", + "../../rtc_base:logging", + "../../rtc_base:protobuf_utils", + "../../rtc_base:rtc_json", + "../../rtc_base:safe_conversions", + "../../rtc_base:stringutils", + "../../rtc_base:task_queue_for_test", + "../../rtc_base:timeutils", + "../../rtc_base/system:file_wrapper", + "../../system_wrappers", + "../../system_wrappers:field_trial", + "../../test:test_support", + "aec_dump", + "aec_dump:aec_dump_impl", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } # audioproc_f_impl + } + + if (rtc_enable_protobuf) { + proto_library("audioproc_unittest_proto") { + sources = [ "test/unittest.proto" ] + proto_out_dir = "modules/audio_processing/test" + } + + rtc_library("audioproc_protobuf_utils") { + sources = [ + "test/protobuf_utils.cc", + "test/protobuf_utils.h", + ] + + deps = [ + ":audioproc_debug_proto", + "../../rtc_base:checks", + "../../rtc_base:ignore_wundef", + "../../rtc_base:protobuf_utils", + "../../rtc_base/system:arch", + ] + } + + rtc_library("runtime_settings_protobuf_utils") { + testonly = true + sources = [ + "test/runtime_setting_util.cc", + "test/runtime_setting_util.h", + ] + + deps = [ + ":api", + ":audioproc_debug_proto", + ":audioproc_protobuf_utils", + "../../rtc_base:checks", + ] + } + } +} + +rtc_library("audioproc_test_utils") { + visibility = [ "*" ] + testonly = true + sources = [ + "test/audio_buffer_tools.cc", + "test/audio_buffer_tools.h", + "test/audio_processing_builder_for_testing.cc", + "test/audio_processing_builder_for_testing.h", + "test/bitexactness_tools.cc", + "test/bitexactness_tools.h", + "test/performance_timer.cc", + "test/performance_timer.h", + "test/simulator_buffers.cc", + "test/simulator_buffers.h", + "test/test_utils.cc", + "test/test_utils.h", + ] + + configs += [ ":apm_debug_dump" ] + + deps = [ + ":api", + ":audio_buffer", + ":audio_processing", + "../../api:array_view", + "../../api/audio:audio_frame_api", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:random", + "../../rtc_base/system:arch", + "../../system_wrappers", + "../../test:fileutils", + "../../test:test_support", + "../audio_coding:neteq_input_audio_tools", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/DEPS b/third_party/libwebrtc/modules/audio_processing/DEPS new file mode 100644 index 0000000000..79fd071785 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/DEPS @@ -0,0 +1,14 @@ +include_rules = [ + "+audio/utility/audio_frame_operations.h", + "+common_audio", + "+system_wrappers", +] + +specific_include_rules = { + ".*test\.cc": [ + "+rtc_tools", + # Android platform build has different paths. + "+gtest", + "+external/webrtc", + ], +} diff --git a/third_party/libwebrtc/modules/audio_processing/OWNERS b/third_party/libwebrtc/modules/audio_processing/OWNERS new file mode 100644 index 0000000000..f5dc59ea35 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/OWNERS @@ -0,0 +1,8 @@ +alessiob@webrtc.org +devicentepena@webrtc.org +gustaf@webrtc.org +henrik.lundin@webrtc.org +ivoc@webrtc.org +lionelk@webrtc.org +peah@webrtc.org +saza@webrtc.org diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn new file mode 100644 index 0000000000..c29b893b7d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn @@ -0,0 +1,384 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("aec3") { + visibility = [ "*" ] + configs += [ "..:apm_debug_dump" ] + sources = [ + "adaptive_fir_filter.cc", + "adaptive_fir_filter_erl.cc", + "aec3_common.cc", + "aec3_fft.cc", + "aec_state.cc", + "aec_state.h", + "alignment_mixer.cc", + "alignment_mixer.h", + "api_call_jitter_metrics.cc", + "api_call_jitter_metrics.h", + "block.h", + "block_buffer.cc", + "block_delay_buffer.cc", + "block_delay_buffer.h", + "block_framer.cc", + "block_framer.h", + "block_processor.cc", + "block_processor.h", + "block_processor_metrics.cc", + "block_processor_metrics.h", + "clockdrift_detector.cc", + "clockdrift_detector.h", + "coarse_filter_update_gain.cc", + "coarse_filter_update_gain.h", + "comfort_noise_generator.cc", + "comfort_noise_generator.h", + "config_selector.cc", + "config_selector.h", + "decimator.cc", + "decimator.h", + "delay_estimate.h", + "dominant_nearend_detector.cc", + "dominant_nearend_detector.h", + "downsampled_render_buffer.cc", + "downsampled_render_buffer.h", + "echo_audibility.cc", + "echo_audibility.h", + "echo_canceller3.cc", + "echo_canceller3.h", + "echo_path_delay_estimator.cc", + "echo_path_delay_estimator.h", + "echo_path_variability.cc", + "echo_path_variability.h", + "echo_remover.cc", + "echo_remover.h", + "echo_remover_metrics.cc", + "echo_remover_metrics.h", + "erl_estimator.cc", + "erl_estimator.h", + "erle_estimator.cc", + "erle_estimator.h", + "fft_buffer.cc", + "filter_analyzer.cc", + "filter_analyzer.h", + "frame_blocker.cc", + "frame_blocker.h", + "fullband_erle_estimator.cc", + "fullband_erle_estimator.h", + "matched_filter.cc", + "matched_filter_lag_aggregator.cc", + "matched_filter_lag_aggregator.h", + "moving_average.cc", + "moving_average.h", + "multi_channel_content_detector.cc", + "multi_channel_content_detector.h", + "nearend_detector.h", + "refined_filter_update_gain.cc", + "refined_filter_update_gain.h", + "render_buffer.cc", + "render_delay_buffer.cc", + "render_delay_buffer.h", + "render_delay_controller.cc", + "render_delay_controller.h", + "render_delay_controller_metrics.cc", + "render_delay_controller_metrics.h", + "render_signal_analyzer.cc", + "render_signal_analyzer.h", + "residual_echo_estimator.cc", + "residual_echo_estimator.h", + "reverb_decay_estimator.cc", + "reverb_decay_estimator.h", + "reverb_frequency_response.cc", + "reverb_frequency_response.h", + "reverb_model.cc", + "reverb_model.h", + "reverb_model_estimator.cc", + "reverb_model_estimator.h", + "signal_dependent_erle_estimator.cc", + "signal_dependent_erle_estimator.h", + "spectrum_buffer.cc", + "stationarity_estimator.cc", + "stationarity_estimator.h", + "subband_erle_estimator.cc", + "subband_erle_estimator.h", + "subband_nearend_detector.cc", + "subband_nearend_detector.h", + "subtractor.cc", + "subtractor.h", + "subtractor_output.cc", + "subtractor_output.h", + "subtractor_output_analyzer.cc", + "subtractor_output_analyzer.h", + "suppression_filter.cc", + "suppression_filter.h", + "suppression_gain.cc", + "suppression_gain.h", + "transparent_mode.cc", + "transparent_mode.h", + ] + + defines = [] + if (rtc_build_with_neon && target_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":adaptive_fir_filter", + ":adaptive_fir_filter_erl", + ":aec3_common", + ":aec3_fft", + ":fft_data", + ":matched_filter", + ":render_buffer", + ":vector_math", + "..:apm_logging", + "..:audio_buffer", + "..:high_pass_filter", + "../../../api:array_view", + "../../../api/audio:aec3_config", + "../../../api/audio:echo_control", + "../../../common_audio:common_audio_c", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:macromagic", + "../../../rtc_base:race_checker", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:swap_queue", + "../../../rtc_base/experiments:field_trial_parser", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + "../utility:cascaded_biquad_filter", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + if (target_cpu == "x86" || target_cpu == "x64") { + deps += [ ":aec3_avx2" ] + } +} + +rtc_source_set("aec3_common") { + sources = [ "aec3_common.h" ] +} + +rtc_source_set("aec3_fft") { + sources = [ "aec3_fft.h" ] + deps = [ + ":aec3_common", + ":fft_data", + "../../../api:array_view", + "../../../common_audio/third_party/ooura:fft_size_128", + "../../../rtc_base:checks", + "../../../rtc_base/system:arch", + ] +} + +rtc_source_set("render_buffer") { + sources = [ + "block.h", + "block_buffer.h", + "fft_buffer.h", + "render_buffer.h", + "spectrum_buffer.h", + ] + deps = [ + ":aec3_common", + ":fft_data", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base/system:arch", + ] +} + +rtc_source_set("adaptive_fir_filter") { + sources = [ "adaptive_fir_filter.h" ] + deps = [ + ":aec3_common", + ":aec3_fft", + ":fft_data", + ":render_buffer", + "..:apm_logging", + "../../../api:array_view", + "../../../rtc_base/system:arch", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_source_set("adaptive_fir_filter_erl") { + sources = [ "adaptive_fir_filter_erl.h" ] + deps = [ + ":aec3_common", + "../../../api:array_view", + "../../../rtc_base/system:arch", + ] +} + +rtc_source_set("matched_filter") { + sources = [ "matched_filter.h" ] + deps = [ + ":aec3_common", + "../../../api:array_view", + "../../../rtc_base:gtest_prod", + "../../../rtc_base/system:arch", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_source_set("vector_math") { + sources = [ "vector_math.h" ] + deps = [ + ":aec3_common", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base/system:arch", + ] +} + +rtc_source_set("fft_data") { + sources = [ "fft_data.h" ] + deps = [ + ":aec3_common", + "../../../api:array_view", + "../../../rtc_base/system:arch", + ] +} + +if (target_cpu == "x86" || target_cpu == "x64") { + rtc_library("aec3_avx2") { + configs += [ "..:apm_debug_dump" ] + sources = [ + "adaptive_fir_filter_avx2.cc", + "adaptive_fir_filter_erl_avx2.cc", + "fft_data_avx2.cc", + "matched_filter_avx2.cc", + "vector_math_avx2.cc", + ] + + cflags = [ + "-mavx", + "-mavx2", + "-mfma", + ] + + deps = [ + ":adaptive_fir_filter", + ":adaptive_fir_filter_erl", + ":fft_data", + ":matched_filter", + ":vector_math", + "../../../api:array_view", + "../../../rtc_base:checks", + ] + } +} + +if (rtc_include_tests) { + rtc_library("aec3_unittests") { + testonly = true + + configs += [ "..:apm_debug_dump" ] + sources = [ + "mock/mock_block_processor.cc", + "mock/mock_block_processor.h", + "mock/mock_echo_remover.cc", + "mock/mock_echo_remover.h", + "mock/mock_render_delay_buffer.cc", + "mock/mock_render_delay_buffer.h", + "mock/mock_render_delay_controller.cc", + "mock/mock_render_delay_controller.h", + ] + + deps = [ + ":adaptive_fir_filter", + ":adaptive_fir_filter_erl", + ":aec3", + ":aec3_common", + ":aec3_fft", + ":fft_data", + ":matched_filter", + ":render_buffer", + ":vector_math", + "..:apm_logging", + "..:audio_buffer", + "..:audio_processing", + "..:high_pass_filter", + "../../../api:array_view", + "../../../api/audio:aec3_config", + "../../../rtc_base:checks", + "../../../rtc_base:macromagic", + "../../../rtc_base:random", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + "../../../system_wrappers:metrics", + "../../../test:field_trial", + "../../../test:test_support", + "../utility:cascaded_biquad_filter", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + + defines = [] + + if (rtc_enable_protobuf) { + sources += [ + "adaptive_fir_filter_erl_unittest.cc", + "adaptive_fir_filter_unittest.cc", + "aec3_fft_unittest.cc", + "aec_state_unittest.cc", + "alignment_mixer_unittest.cc", + "api_call_jitter_metrics_unittest.cc", + "block_delay_buffer_unittest.cc", + "block_framer_unittest.cc", + "block_processor_metrics_unittest.cc", + "block_processor_unittest.cc", + "clockdrift_detector_unittest.cc", + "coarse_filter_update_gain_unittest.cc", + "comfort_noise_generator_unittest.cc", + "config_selector_unittest.cc", + "decimator_unittest.cc", + "echo_canceller3_unittest.cc", + "echo_path_delay_estimator_unittest.cc", + "echo_path_variability_unittest.cc", + "echo_remover_metrics_unittest.cc", + "echo_remover_unittest.cc", + "erl_estimator_unittest.cc", + "erle_estimator_unittest.cc", + "fft_data_unittest.cc", + "filter_analyzer_unittest.cc", + "frame_blocker_unittest.cc", + "matched_filter_lag_aggregator_unittest.cc", + "matched_filter_unittest.cc", + "moving_average_unittest.cc", + "multi_channel_content_detector_unittest.cc", + "refined_filter_update_gain_unittest.cc", + "render_buffer_unittest.cc", + "render_delay_buffer_unittest.cc", + "render_delay_controller_metrics_unittest.cc", + "render_delay_controller_unittest.cc", + "render_signal_analyzer_unittest.cc", + "residual_echo_estimator_unittest.cc", + "reverb_model_estimator_unittest.cc", + "signal_dependent_erle_estimator_unittest.cc", + "subtractor_unittest.cc", + "suppression_filter_unittest.cc", + "suppression_gain_unittest.cc", + "vector_math_unittest.cc", + ] + } + + if (!build_with_chromium) { + deps += [ "..:audio_processing_unittests" ] + } + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc new file mode 100644 index 0000000000..917aa951ee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc @@ -0,0 +1,744 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include + +#include +#include + +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace aec3 { + +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2) { + for (auto& H2_ch : *H2) { + H2_ch.fill(0.f); + } + + const size_t num_render_channels = H[0].size(); + RTC_DCHECK_EQ(H.size(), H2->capacity()); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size()); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + float tmp = + H[p][ch].re[j] * H[p][ch].re[j] + H[p][ch].im[j] * H[p][ch].im[j]; + (*H2)[p][j] = std::max((*H2)[p][j], tmp); + } + } + } +} + +#if defined(WEBRTC_HAS_NEON) +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse_Neon( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2) { + for (auto& H2_ch : *H2) { + H2_ch.fill(0.f); + } + + const size_t num_render_channels = H[0].size(); + RTC_DCHECK_EQ(H.size(), H2->capacity()); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size()); + auto& H2_p = (*H2)[p]; + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + for (size_t j = 0; j < kFftLengthBy2; j += 4) { + const float32x4_t re = vld1q_f32(&H_p_ch.re[j]); + const float32x4_t im = vld1q_f32(&H_p_ch.im[j]); + float32x4_t H2_new = vmulq_f32(re, re); + H2_new = vmlaq_f32(H2_new, im, im); + float32x4_t H2_p_j = vld1q_f32(&H2_p[j]); + H2_p_j = vmaxq_f32(H2_p_j, H2_new); + vst1q_f32(&H2_p[j], H2_p_j); + } + float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] + + H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new); + } + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse_Sse2( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2) { + for (auto& H2_ch : *H2) { + H2_ch.fill(0.f); + } + + const size_t num_render_channels = H[0].size(); + RTC_DCHECK_EQ(H.size(), H2->capacity()); + // constexpr __mmmask8 kMaxMask = static_cast<__mmmask8>(256u); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size()); + auto& H2_p = (*H2)[p]; + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + for (size_t j = 0; j < kFftLengthBy2; j += 4) { + const __m128 re = _mm_loadu_ps(&H_p_ch.re[j]); + const __m128 re2 = _mm_mul_ps(re, re); + const __m128 im = _mm_loadu_ps(&H_p_ch.im[j]); + const __m128 im2 = _mm_mul_ps(im, im); + const __m128 H2_new = _mm_add_ps(re2, im2); + __m128 H2_k_j = _mm_loadu_ps(&H2_p[j]); + H2_k_j = _mm_max_ps(H2_k_j, H2_new); + _mm_storeu_ps(&H2_p[j], H2_k_j); + } + float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] + + H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new); + } + } +} +#endif + +// Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)). +void AdaptPartitions(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H) { + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + size_t index = render_buffer.Position(); + const size_t num_render_channels = render_buffer_data[index].size(); + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& X_p_ch = render_buffer_data[index][ch]; + FftData& H_p_ch = (*H)[p][ch]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + H_p_ch.re[k] += X_p_ch.re[k] * G.re[k] + X_p_ch.im[k] * G.im[k]; + H_p_ch.im[k] += X_p_ch.re[k] * G.im[k] - X_p_ch.im[k] * G.re[k]; + } + } + index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; + } +} + +#if defined(WEBRTC_HAS_NEON) +// Adapts the filter partitions. (Neon variant) +void AdaptPartitions_Neon(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H) { + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4; + + size_t X_partition = render_buffer.Position(); + size_t limit = lim1; + size_t p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const float32x4_t G_re = vld1q_f32(&G.re[k]); + const float32x4_t G_im = vld1q_f32(&G.im[k]); + const float32x4_t X_re = vld1q_f32(&X.re[k]); + const float32x4_t X_im = vld1q_f32(&X.im[k]); + const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]); + const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]); + const float32x4_t a = vmulq_f32(X_re, G_re); + const float32x4_t e = vmlaq_f32(a, X_im, G_im); + const float32x4_t c = vmulq_f32(X_re, G_im); + const float32x4_t f = vmlsq_f32(c, X_im, G_re); + const float32x4_t g = vaddq_f32(H_re, e); + const float32x4_t h = vaddq_f32(H_im, f); + vst1q_f32(&H_p_ch.re[k], g); + vst1q_f32(&H_p_ch.im[k], h); + } + } + } + + X_partition = 0; + limit = lim2; + } while (p < lim2); + + X_partition = render_buffer.Position(); + limit = lim1; + p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X.im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X.im[kFftLengthBy2] * G.re[kFftLengthBy2]; + } + } + X_partition = 0; + limit = lim2; + } while (p < lim2); +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Adapts the filter partitions. (SSE2 variant) +void AdaptPartitions_Sse2(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H) { + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4; + + size_t X_partition = render_buffer.Position(); + size_t limit = lim1; + size_t p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const __m128 G_re = _mm_loadu_ps(&G.re[k]); + const __m128 G_im = _mm_loadu_ps(&G.im[k]); + const __m128 X_re = _mm_loadu_ps(&X.re[k]); + const __m128 X_im = _mm_loadu_ps(&X.im[k]); + const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]); + const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]); + const __m128 a = _mm_mul_ps(X_re, G_re); + const __m128 b = _mm_mul_ps(X_im, G_im); + const __m128 c = _mm_mul_ps(X_re, G_im); + const __m128 d = _mm_mul_ps(X_im, G_re); + const __m128 e = _mm_add_ps(a, b); + const __m128 f = _mm_sub_ps(c, d); + const __m128 g = _mm_add_ps(H_re, e); + const __m128 h = _mm_add_ps(H_im, f); + _mm_storeu_ps(&H_p_ch.re[k], g); + _mm_storeu_ps(&H_p_ch.im[k], h); + } + } + } + X_partition = 0; + limit = lim2; + } while (p < lim2); + + X_partition = render_buffer.Position(); + limit = lim1; + p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X.im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X.im[kFftLengthBy2] * G.re[kFftLengthBy2]; + } + } + + X_partition = 0; + limit = lim2; + } while (p < lim2); +} +#endif + +// Produces the filter output. +void ApplyFilter(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S) { + S->re.fill(0.f); + S->im.fill(0.f); + + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + size_t index = render_buffer.Position(); + const size_t num_render_channels = render_buffer_data[index].size(); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(num_render_channels, H[p].size()); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& X_p_ch = render_buffer_data[index][ch]; + const FftData& H_p_ch = H[p][ch]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + S->re[k] += X_p_ch.re[k] * H_p_ch.re[k] - X_p_ch.im[k] * H_p_ch.im[k]; + S->im[k] += X_p_ch.re[k] * H_p_ch.im[k] + X_p_ch.im[k] * H_p_ch.re[k]; + } + } + index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; + } +} + +#if defined(WEBRTC_HAS_NEON) +// Produces the filter output (Neon variant). +void ApplyFilter_Neon(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S) { + // const RenderBuffer& render_buffer, + // rtc::ArrayView H, + // FftData* S) { + RTC_DCHECK_GE(H.size(), H.size() - 1); + S->Clear(); + + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4; + + size_t X_partition = render_buffer.Position(); + size_t p = 0; + size_t limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const float32x4_t X_re = vld1q_f32(&X.re[k]); + const float32x4_t X_im = vld1q_f32(&X.im[k]); + const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]); + const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]); + const float32x4_t S_re = vld1q_f32(&S->re[k]); + const float32x4_t S_im = vld1q_f32(&S->im[k]); + const float32x4_t a = vmulq_f32(X_re, H_re); + const float32x4_t e = vmlsq_f32(a, X_im, H_im); + const float32x4_t c = vmulq_f32(X_re, H_im); + const float32x4_t f = vmlaq_f32(c, X_im, H_re); + const float32x4_t g = vaddq_f32(S_re, e); + const float32x4_t h = vaddq_f32(S_im, f); + vst1q_f32(&S->re[k], g); + vst1q_f32(&S->im[k], h); + } + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); + + X_partition = render_buffer.Position(); + p = 0; + limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] - + X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] + + X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2]; + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Produces the filter output (SSE2 variant). +void ApplyFilter_Sse2(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S) { + // const RenderBuffer& render_buffer, + // rtc::ArrayView H, + // FftData* S) { + RTC_DCHECK_GE(H.size(), H.size() - 1); + S->re.fill(0.f); + S->im.fill(0.f); + + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4; + + size_t X_partition = render_buffer.Position(); + size_t p = 0; + size_t limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const __m128 X_re = _mm_loadu_ps(&X.re[k]); + const __m128 X_im = _mm_loadu_ps(&X.im[k]); + const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]); + const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]); + const __m128 S_re = _mm_loadu_ps(&S->re[k]); + const __m128 S_im = _mm_loadu_ps(&S->im[k]); + const __m128 a = _mm_mul_ps(X_re, H_re); + const __m128 b = _mm_mul_ps(X_im, H_im); + const __m128 c = _mm_mul_ps(X_re, H_im); + const __m128 d = _mm_mul_ps(X_im, H_re); + const __m128 e = _mm_sub_ps(a, b); + const __m128 f = _mm_add_ps(c, d); + const __m128 g = _mm_add_ps(S_re, e); + const __m128 h = _mm_add_ps(S_im, f); + _mm_storeu_ps(&S->re[k], g); + _mm_storeu_ps(&S->im[k], h); + } + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); + + X_partition = render_buffer.Position(); + p = 0; + limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] - + X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] + + X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2]; + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); +} +#endif + +} // namespace aec3 + +namespace { + +// Ensures that the newly added filter partitions after a size increase are set +// to zero. +void ZeroFilter(size_t old_size, + size_t new_size, + std::vector>* H) { + RTC_DCHECK_GE(H->size(), old_size); + RTC_DCHECK_GE(H->size(), new_size); + + for (size_t p = old_size; p < new_size; ++p) { + RTC_DCHECK_EQ((*H)[p].size(), (*H)[0].size()); + for (size_t ch = 0; ch < (*H)[0].size(); ++ch) { + (*H)[p][ch].Clear(); + } + } +} + +} // namespace + +AdaptiveFirFilter::AdaptiveFirFilter(size_t max_size_partitions, + size_t initial_size_partitions, + size_t size_change_duration_blocks, + size_t num_render_channels, + Aec3Optimization optimization, + ApmDataDumper* data_dumper) + : data_dumper_(data_dumper), + fft_(), + optimization_(optimization), + num_render_channels_(num_render_channels), + max_size_partitions_(max_size_partitions), + size_change_duration_blocks_( + static_cast(size_change_duration_blocks)), + current_size_partitions_(initial_size_partitions), + target_size_partitions_(initial_size_partitions), + old_target_size_partitions_(initial_size_partitions), + H_(max_size_partitions_, std::vector(num_render_channels_)) { + RTC_DCHECK(data_dumper_); + RTC_DCHECK_GE(max_size_partitions, initial_size_partitions); + + RTC_DCHECK_LT(0, size_change_duration_blocks_); + one_by_size_change_duration_blocks_ = 1.f / size_change_duration_blocks_; + + ZeroFilter(0, max_size_partitions_, &H_); + + SetSizePartitions(current_size_partitions_, true); +} + +AdaptiveFirFilter::~AdaptiveFirFilter() = default; + +void AdaptiveFirFilter::HandleEchoPathChange() { + // TODO(peah): Check the value and purpose of the code below. + ZeroFilter(current_size_partitions_, max_size_partitions_, &H_); +} + +void AdaptiveFirFilter::SetSizePartitions(size_t size, bool immediate_effect) { + RTC_DCHECK_EQ(max_size_partitions_, H_.capacity()); + RTC_DCHECK_LE(size, max_size_partitions_); + + target_size_partitions_ = std::min(max_size_partitions_, size); + if (immediate_effect) { + size_t old_size_partitions_ = current_size_partitions_; + current_size_partitions_ = old_target_size_partitions_ = + target_size_partitions_; + ZeroFilter(old_size_partitions_, current_size_partitions_, &H_); + + partition_to_constrain_ = + std::min(partition_to_constrain_, current_size_partitions_ - 1); + size_change_counter_ = 0; + } else { + size_change_counter_ = size_change_duration_blocks_; + } +} + +void AdaptiveFirFilter::UpdateSize() { + RTC_DCHECK_GE(size_change_duration_blocks_, size_change_counter_); + size_t old_size_partitions_ = current_size_partitions_; + if (size_change_counter_ > 0) { + --size_change_counter_; + + auto average = [](float from, float to, float from_weight) { + return from * from_weight + to * (1.f - from_weight); + }; + + float change_factor = + size_change_counter_ * one_by_size_change_duration_blocks_; + + current_size_partitions_ = average(old_target_size_partitions_, + target_size_partitions_, change_factor); + + partition_to_constrain_ = + std::min(partition_to_constrain_, current_size_partitions_ - 1); + } else { + current_size_partitions_ = old_target_size_partitions_ = + target_size_partitions_; + } + ZeroFilter(old_size_partitions_, current_size_partitions_, &H_); + RTC_DCHECK_LE(0, size_change_counter_); +} + +void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer, + FftData* S) const { + RTC_DCHECK(S); + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::ApplyFilter_Sse2(render_buffer, current_size_partitions_, H_, S); + break; + case Aec3Optimization::kAvx2: + aec3::ApplyFilter_Avx2(render_buffer, current_size_partitions_, H_, S); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::ApplyFilter_Neon(render_buffer, current_size_partitions_, H_, S); + break; +#endif + default: + aec3::ApplyFilter(render_buffer, current_size_partitions_, H_, S); + } +} + +void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer, + const FftData& G) { + // Adapt the filter and update the filter size. + AdaptAndUpdateSize(render_buffer, G); + + // Constrain the filter partitions in a cyclic manner. + Constrain(); +} + +void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer, + const FftData& G, + std::vector* impulse_response) { + // Adapt the filter and update the filter size. + AdaptAndUpdateSize(render_buffer, G); + + // Constrain the filter partitions in a cyclic manner. + ConstrainAndUpdateImpulseResponse(impulse_response); +} + +void AdaptiveFirFilter::ComputeFrequencyResponse( + std::vector>* H2) const { + RTC_DCHECK_GE(max_size_partitions_, H2->capacity()); + + H2->resize(current_size_partitions_); + + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::ComputeFrequencyResponse_Sse2(current_size_partitions_, H_, H2); + break; + case Aec3Optimization::kAvx2: + aec3::ComputeFrequencyResponse_Avx2(current_size_partitions_, H_, H2); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::ComputeFrequencyResponse_Neon(current_size_partitions_, H_, H2); + break; +#endif + default: + aec3::ComputeFrequencyResponse(current_size_partitions_, H_, H2); + } +} + +void AdaptiveFirFilter::AdaptAndUpdateSize(const RenderBuffer& render_buffer, + const FftData& G) { + // Update the filter size if needed. + UpdateSize(); + + // Adapt the filter. + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::AdaptPartitions_Sse2(render_buffer, G, current_size_partitions_, + &H_); + break; + case Aec3Optimization::kAvx2: + aec3::AdaptPartitions_Avx2(render_buffer, G, current_size_partitions_, + &H_); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::AdaptPartitions_Neon(render_buffer, G, current_size_partitions_, + &H_); + break; +#endif + default: + aec3::AdaptPartitions(render_buffer, G, current_size_partitions_, &H_); + } +} + +// Constrains the partition of the frequency domain filter to be limited in +// time via setting the relevant time-domain coefficients to zero and updates +// the corresponding values in an externally stored impulse response estimate. +void AdaptiveFirFilter::ConstrainAndUpdateImpulseResponse( + std::vector* impulse_response) { + RTC_DCHECK_EQ(GetTimeDomainLength(max_size_partitions_), + impulse_response->capacity()); + impulse_response->resize(GetTimeDomainLength(current_size_partitions_)); + std::array h; + impulse_response->resize(GetTimeDomainLength(current_size_partitions_)); + std::fill( + impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2, + impulse_response->begin() + (partition_to_constrain_ + 1) * kFftLengthBy2, + 0.f); + + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + fft_.Ifft(H_[partition_to_constrain_][ch], &h); + + static constexpr float kScale = 1.0f / kFftLengthBy2; + std::for_each(h.begin(), h.begin() + kFftLengthBy2, + [](float& a) { a *= kScale; }); + std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f); + + if (ch == 0) { + std::copy( + h.begin(), h.begin() + kFftLengthBy2, + impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2); + } else { + for (size_t k = 0, j = partition_to_constrain_ * kFftLengthBy2; + k < kFftLengthBy2; ++k, ++j) { + if (fabsf((*impulse_response)[j]) < fabsf(h[k])) { + (*impulse_response)[j] = h[k]; + } + } + } + + fft_.Fft(&h, &H_[partition_to_constrain_][ch]); + } + + partition_to_constrain_ = + partition_to_constrain_ < (current_size_partitions_ - 1) + ? partition_to_constrain_ + 1 + : 0; +} + +// Constrains the a partiton of the frequency domain filter to be limited in +// time via setting the relevant time-domain coefficients to zero. +void AdaptiveFirFilter::Constrain() { + std::array h; + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + fft_.Ifft(H_[partition_to_constrain_][ch], &h); + + static constexpr float kScale = 1.0f / kFftLengthBy2; + std::for_each(h.begin(), h.begin() + kFftLengthBy2, + [](float& a) { a *= kScale; }); + std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f); + + fft_.Fft(&h, &H_[partition_to_constrain_][ch]); + } + + partition_to_constrain_ = + partition_to_constrain_ < (current_size_partitions_ - 1) + ? partition_to_constrain_ + 1 + : 0; +} + +void AdaptiveFirFilter::ScaleFilter(float factor) { + for (auto& H_p : H_) { + for (auto& H_p_ch : H_p) { + for (auto& re : H_p_ch.re) { + re *= factor; + } + for (auto& im : H_p_ch.im) { + im *= factor; + } + } + } +} + +// Set the filter coefficients. +void AdaptiveFirFilter::SetFilter(size_t num_partitions, + const std::vector>& H) { + const size_t min_num_partitions = + std::min(current_size_partitions_, num_partitions); + for (size_t p = 0; p < min_num_partitions; ++p) { + RTC_DCHECK_EQ(H_[p].size(), H[p].size()); + RTC_DCHECK_EQ(num_render_channels_, H_[p].size()); + + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + std::copy(H[p][ch].re.begin(), H[p][ch].re.end(), H_[p][ch].re.begin()); + std::copy(H[p][ch].im.begin(), H[p][ch].im.end(), H_[p][ch].im.begin()); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.h b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.h new file mode 100644 index 0000000000..34c06f4367 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_ + +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace aec3 { +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2); +#if defined(WEBRTC_HAS_NEON) +void ComputeFrequencyResponse_Neon( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void ComputeFrequencyResponse_Sse2( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2); + +void ComputeFrequencyResponse_Avx2( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2); +#endif + +// Adapts the filter partitions. +void AdaptPartitions(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H); +#if defined(WEBRTC_HAS_NEON) +void AdaptPartitions_Neon(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void AdaptPartitions_Sse2(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H); + +void AdaptPartitions_Avx2(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H); +#endif + +// Produces the filter output. +void ApplyFilter(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S); +#if defined(WEBRTC_HAS_NEON) +void ApplyFilter_Neon(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void ApplyFilter_Sse2(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S); + +void ApplyFilter_Avx2(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S); +#endif + +} // namespace aec3 + +// Provides a frequency domain adaptive filter functionality. +class AdaptiveFirFilter { + public: + AdaptiveFirFilter(size_t max_size_partitions, + size_t initial_size_partitions, + size_t size_change_duration_blocks, + size_t num_render_channels, + Aec3Optimization optimization, + ApmDataDumper* data_dumper); + + ~AdaptiveFirFilter(); + + AdaptiveFirFilter(const AdaptiveFirFilter&) = delete; + AdaptiveFirFilter& operator=(const AdaptiveFirFilter&) = delete; + + // Produces the output of the filter. + void Filter(const RenderBuffer& render_buffer, FftData* S) const; + + // Adapts the filter and updates an externally stored impulse response + // estimate. + void Adapt(const RenderBuffer& render_buffer, + const FftData& G, + std::vector* impulse_response); + + // Adapts the filter. + void Adapt(const RenderBuffer& render_buffer, const FftData& G); + + // Receives reports that known echo path changes have occured and adjusts + // the filter adaptation accordingly. + void HandleEchoPathChange(); + + // Returns the filter size. + size_t SizePartitions() const { return current_size_partitions_; } + + // Sets the filter size. + void SetSizePartitions(size_t size, bool immediate_effect); + + // Computes the frequency responses for the filter partitions. + void ComputeFrequencyResponse( + std::vector>* H2) const; + + // Returns the maximum number of partitions for the filter. + size_t max_filter_size_partitions() const { return max_size_partitions_; } + + void DumpFilter(absl::string_view name_frequency_domain) { + for (size_t p = 0; p < max_size_partitions_; ++p) { + data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].re); + data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].im); + } + } + + // Scale the filter impulse response and spectrum by a factor. + void ScaleFilter(float factor); + + // Set the filter coefficients. + void SetFilter(size_t num_partitions, + const std::vector>& H); + + // Gets the filter coefficients. + const std::vector>& GetFilter() const { return H_; } + + private: + // Adapts the filter and updates the filter size. + void AdaptAndUpdateSize(const RenderBuffer& render_buffer, const FftData& G); + + // Constrain the filter partitions in a cyclic manner. + void Constrain(); + // Constrains the filter in a cyclic manner and updates the corresponding + // values in the supplied impulse response. + void ConstrainAndUpdateImpulseResponse(std::vector* impulse_response); + + // Gradually Updates the current filter size towards the target size. + void UpdateSize(); + + ApmDataDumper* const data_dumper_; + const Aec3Fft fft_; + const Aec3Optimization optimization_; + const size_t num_render_channels_; + const size_t max_size_partitions_; + const int size_change_duration_blocks_; + float one_by_size_change_duration_blocks_; + size_t current_size_partitions_; + size_t target_size_partitions_; + size_t old_target_size_partitions_; + int size_change_counter_ = 0; + std::vector> H_; + size_t partition_to_constrain_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc new file mode 100644 index 0000000000..44d4514275 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" + +#include "common_audio/intrin.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace aec3 { + +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse_Avx2( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2) { + for (auto& H2_ch : *H2) { + H2_ch.fill(0.f); + } + + const size_t num_render_channels = H[0].size(); + RTC_DCHECK_EQ(H.size(), H2->capacity()); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size()); + auto& H2_p = (*H2)[p]; + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + for (size_t j = 0; j < kFftLengthBy2; j += 8) { + __m256 re = _mm256_loadu_ps(&H_p_ch.re[j]); + __m256 re2 = _mm256_mul_ps(re, re); + __m256 im = _mm256_loadu_ps(&H_p_ch.im[j]); + re2 = _mm256_fmadd_ps(im, im, re2); + __m256 H2_k_j = _mm256_loadu_ps(&H2_p[j]); + H2_k_j = _mm256_max_ps(H2_k_j, re2); + _mm256_storeu_ps(&H2_p[j], H2_k_j); + } + float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] + + H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new); + } + } +} + +// Adapts the filter partitions. +void AdaptPartitions_Avx2(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H) { + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumEightBinBands = kFftLengthBy2 / 8; + + size_t X_partition = render_buffer.Position(); + size_t limit = lim1; + size_t p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + for (size_t k = 0, n = 0; n < kNumEightBinBands; ++n, k += 8) { + const __m256 G_re = _mm256_loadu_ps(&G.re[k]); + const __m256 G_im = _mm256_loadu_ps(&G.im[k]); + const __m256 X_re = _mm256_loadu_ps(&X.re[k]); + const __m256 X_im = _mm256_loadu_ps(&X.im[k]); + const __m256 H_re = _mm256_loadu_ps(&H_p_ch.re[k]); + const __m256 H_im = _mm256_loadu_ps(&H_p_ch.im[k]); + const __m256 a = _mm256_mul_ps(X_re, G_re); + const __m256 b = _mm256_mul_ps(X_im, G_im); + const __m256 c = _mm256_mul_ps(X_re, G_im); + const __m256 d = _mm256_mul_ps(X_im, G_re); + const __m256 e = _mm256_add_ps(a, b); + const __m256 f = _mm256_sub_ps(c, d); + const __m256 g = _mm256_add_ps(H_re, e); + const __m256 h = _mm256_add_ps(H_im, f); + _mm256_storeu_ps(&H_p_ch.re[k], g); + _mm256_storeu_ps(&H_p_ch.im[k], h); + } + } + } + X_partition = 0; + limit = lim2; + } while (p < lim2); + + X_partition = render_buffer.Position(); + limit = lim1; + p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X.im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X.im[kFftLengthBy2] * G.re[kFftLengthBy2]; + } + } + + X_partition = 0; + limit = lim2; + } while (p < lim2); +} + +// Produces the filter output (AVX2 variant). +void ApplyFilter_Avx2(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S) { + RTC_DCHECK_GE(H.size(), H.size() - 1); + S->re.fill(0.f); + S->im.fill(0.f); + + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumEightBinBands = kFftLengthBy2 / 8; + + size_t X_partition = render_buffer.Position(); + size_t p = 0; + size_t limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + for (size_t k = 0, n = 0; n < kNumEightBinBands; ++n, k += 8) { + const __m256 X_re = _mm256_loadu_ps(&X.re[k]); + const __m256 X_im = _mm256_loadu_ps(&X.im[k]); + const __m256 H_re = _mm256_loadu_ps(&H_p_ch.re[k]); + const __m256 H_im = _mm256_loadu_ps(&H_p_ch.im[k]); + const __m256 S_re = _mm256_loadu_ps(&S->re[k]); + const __m256 S_im = _mm256_loadu_ps(&S->im[k]); + const __m256 a = _mm256_mul_ps(X_re, H_re); + const __m256 b = _mm256_mul_ps(X_im, H_im); + const __m256 c = _mm256_mul_ps(X_re, H_im); + const __m256 d = _mm256_mul_ps(X_im, H_re); + const __m256 e = _mm256_sub_ps(a, b); + const __m256 f = _mm256_add_ps(c, d); + const __m256 g = _mm256_add_ps(S_re, e); + const __m256 h = _mm256_add_ps(S_im, f); + _mm256_storeu_ps(&S->re[k], g); + _mm256_storeu_ps(&S->im[k], h); + } + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); + + X_partition = render_buffer.Position(); + p = 0; + limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] - + X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] + + X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2]; + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc new file mode 100644 index 0000000000..45b8813979 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" + +#include +#include + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif + +namespace webrtc { + +namespace aec3 { + +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer(const std::vector>& H2, + rtc::ArrayView erl) { + std::fill(erl.begin(), erl.end(), 0.f); + for (auto& H2_j : H2) { + std::transform(H2_j.begin(), H2_j.end(), erl.begin(), erl.begin(), + std::plus()); + } +} + +#if defined(WEBRTC_HAS_NEON) +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer_NEON( + const std::vector>& H2, + rtc::ArrayView erl) { + std::fill(erl.begin(), erl.end(), 0.f); + for (auto& H2_j : H2) { + for (size_t k = 0; k < kFftLengthBy2; k += 4) { + const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]); + float32x4_t erl_k = vld1q_f32(&erl[k]); + erl_k = vaddq_f32(erl_k, H2_j_k); + vst1q_f32(&erl[k], erl_k); + } + erl[kFftLengthBy2] += H2_j[kFftLengthBy2]; + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer_SSE2( + const std::vector>& H2, + rtc::ArrayView erl) { + std::fill(erl.begin(), erl.end(), 0.f); + for (auto& H2_j : H2) { + for (size_t k = 0; k < kFftLengthBy2; k += 4) { + const __m128 H2_j_k = _mm_loadu_ps(&H2_j[k]); + __m128 erl_k = _mm_loadu_ps(&erl[k]); + erl_k = _mm_add_ps(erl_k, H2_j_k); + _mm_storeu_ps(&erl[k], erl_k); + } + erl[kFftLengthBy2] += H2_j[kFftLengthBy2]; + } +} +#endif + +} // namespace aec3 + +void ComputeErl(const Aec3Optimization& optimization, + const std::vector>& H2, + rtc::ArrayView erl) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, erl.size()); + // Update the frequency response and echo return loss for the filter. + switch (optimization) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::ErlComputer_SSE2(H2, erl); + break; + case Aec3Optimization::kAvx2: + aec3::ErlComputer_AVX2(H2, erl); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::ErlComputer_NEON(H2, erl); + break; +#endif + default: + aec3::ErlComputer(H2, erl); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h new file mode 100644 index 0000000000..4ac13b1bc3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace aec3 { + +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer(const std::vector>& H2, + rtc::ArrayView erl); +#if defined(WEBRTC_HAS_NEON) +void ErlComputer_NEON( + const std::vector>& H2, + rtc::ArrayView erl); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void ErlComputer_SSE2( + const std::vector>& H2, + rtc::ArrayView erl); + +void ErlComputer_AVX2( + const std::vector>& H2, + rtc::ArrayView erl); +#endif + +} // namespace aec3 + +// Computes the echo return loss based on a frequency response. +void ComputeErl(const Aec3Optimization& optimization, + const std::vector>& H2, + rtc::ArrayView erl); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc new file mode 100644 index 0000000000..5fe7514db1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" + +#include + +namespace webrtc { + +namespace aec3 { + +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer_AVX2( + const std::vector>& H2, + rtc::ArrayView erl) { + std::fill(erl.begin(), erl.end(), 0.f); + for (auto& H2_j : H2) { + for (size_t k = 0; k < kFftLengthBy2; k += 8) { + const __m256 H2_j_k = _mm256_loadu_ps(&H2_j[k]); + __m256 erl_k = _mm256_loadu_ps(&erl[k]); + erl_k = _mm256_add_ps(erl_k, H2_j_k); + _mm256_storeu_ps(&erl[k], erl_k); + } + erl[kFftLengthBy2] += H2_j[kFftLengthBy2]; + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_gn/moz.build new file mode 100644 index 0000000000..60ecc93ab9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("adaptive_fir_filter_erl_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_unittest.cc new file mode 100644 index 0000000000..d2af70a9f2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_unittest.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" + +#include +#include + +#include "rtc_base/system/arch.h" +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif + +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { + +#if defined(WEBRTC_HAS_NEON) +// Verifies that the optimized method for echo return loss computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateErlNeonOptimization) { + const size_t kNumPartitions = 12; + std::vector> H2(kNumPartitions); + std::array erl; + std::array erl_NEON; + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H2[j].size(); ++k) { + H2[j][k] = k + j / 3.f; + } + } + + ErlComputer(H2, erl); + ErlComputer_NEON(H2, erl_NEON); + + for (size_t j = 0; j < erl.size(); ++j) { + EXPECT_FLOAT_EQ(erl[j], erl_NEON[j]); + } +} + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized method for echo return loss computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateErlSse2Optimization) { + bool use_sse2 = (GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + const size_t kNumPartitions = 12; + std::vector> H2(kNumPartitions); + std::array erl; + std::array erl_SSE2; + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H2[j].size(); ++k) { + H2[j][k] = k + j / 3.f; + } + } + + ErlComputer(H2, erl); + ErlComputer_SSE2(H2, erl_SSE2); + + for (size_t j = 0; j < erl.size(); ++j) { + EXPECT_FLOAT_EQ(erl[j], erl_SSE2[j]); + } + } +} + +// Verifies that the optimized method for echo return loss computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateErlAvx2Optimization) { + bool use_avx2 = (GetCPUInfo(kAVX2) != 0); + if (use_avx2) { + const size_t kNumPartitions = 12; + std::vector> H2(kNumPartitions); + std::array erl; + std::array erl_AVX2; + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H2[j].size(); ++k) { + H2[j][k] = k + j / 3.f; + } + } + + ErlComputer(H2, erl); + ErlComputer_AVX2(H2, erl_AVX2); + + for (size_t j = 0; j < erl.size(); ++j) { + EXPECT_FLOAT_EQ(erl[j], erl_AVX2[j]); + } + } +} + +#endif + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_gn/moz.build new file mode 100644 index 0000000000..fd78a43560 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_gn/moz.build @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("adaptive_fir_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc new file mode 100644 index 0000000000..a13764c109 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include + +#include +#include +#include + +#include "rtc_base/system/arch.h" +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif + +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { +namespace { + +std::string ProduceDebugText(size_t num_render_channels, size_t delay) { + rtc::StringBuilder ss; + ss << "delay: " << delay << ", "; + ss << "num_render_channels:" << num_render_channels; + return ss.Release(); +} + +} // namespace + +class AdaptiveFirFilterOneTwoFourEightRenderChannels + : public ::testing::Test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + AdaptiveFirFilterOneTwoFourEightRenderChannels, + ::testing::Values(1, 2, 4, 8)); + +#if defined(WEBRTC_HAS_NEON) +// Verifies that the optimized methods for filter adaptation are similar to +// their reference counterparts. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + FilterAdaptationNeonOptimizations) { + const size_t num_render_channels = GetParam(); + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + num_render_channels)); + Random random_generator(42U); + Block x(kNumBands, num_render_channels); + FftData S_C; + FftData S_Neon; + FftData G; + Aec3Fft fft; + std::vector> H_C( + num_partitions, std::vector(num_render_channels)); + std::vector> H_Neon( + num_partitions, std::vector(num_render_channels)); + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + H_C[p][ch].Clear(); + H_Neon[p][ch].Clear(); + } + } + + for (int k = 0; k < 30; ++k) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int ch = 0; ch < x.NumChannels(); ++ch) { + RandomizeSampleVector(&random_generator, x.View(band, ch)); + } + } + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + } + auto* const render_buffer = render_delay_buffer->GetRenderBuffer(); + + for (size_t j = 0; j < G.re.size(); ++j) { + G.re[j] = j / 10001.f; + } + for (size_t j = 1; j < G.im.size() - 1; ++j) { + G.im[j] = j / 20001.f; + } + G.im[0] = 0.f; + G.im[G.im.size() - 1] = 0.f; + + AdaptPartitions_Neon(*render_buffer, G, num_partitions, &H_Neon); + AdaptPartitions(*render_buffer, G, num_partitions, &H_C); + AdaptPartitions_Neon(*render_buffer, G, num_partitions, &H_Neon); + AdaptPartitions(*render_buffer, G, num_partitions, &H_C); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t j = 0; j < H_C[p][ch].re.size(); ++j) { + EXPECT_FLOAT_EQ(H_C[p][ch].re[j], H_Neon[p][ch].re[j]); + EXPECT_FLOAT_EQ(H_C[p][ch].im[j], H_Neon[p][ch].im[j]); + } + } + } + + ApplyFilter_Neon(*render_buffer, num_partitions, H_Neon, &S_Neon); + ApplyFilter(*render_buffer, num_partitions, H_C, &S_C); + for (size_t j = 0; j < S_C.re.size(); ++j) { + EXPECT_NEAR(S_C.re[j], S_Neon.re[j], fabs(S_C.re[j] * 0.00001f)); + EXPECT_NEAR(S_C.im[j], S_Neon.im[j], fabs(S_C.re[j] * 0.00001f)); + } + } +} + +// Verifies that the optimized method for frequency response computation is +// bitexact to the reference counterpart. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + ComputeFrequencyResponseNeonOptimization) { + const size_t num_render_channels = GetParam(); + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::vector> H( + num_partitions, std::vector(num_render_channels)); + std::vector> H2(num_partitions); + std::vector> H2_Neon(num_partitions); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < H[p][ch].re.size(); ++k) { + H[p][ch].re[k] = k + p / 3.f + ch; + H[p][ch].im[k] = p + k / 7.f - ch; + } + } + } + + ComputeFrequencyResponse(num_partitions, H, &H2); + ComputeFrequencyResponse_Neon(num_partitions, H, &H2_Neon); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t k = 0; k < H2[p].size(); ++k) { + EXPECT_FLOAT_EQ(H2[p][k], H2_Neon[p][k]); + } + } + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods for filter adaptation are bitexact to +// their reference counterparts. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + FilterAdaptationSse2Optimizations) { + const size_t num_render_channels = GetParam(); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + bool use_sse2 = (GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + num_render_channels)); + Random random_generator(42U); + Block x(kNumBands, num_render_channels); + FftData S_C; + FftData S_Sse2; + FftData G; + Aec3Fft fft; + std::vector> H_C( + num_partitions, std::vector(num_render_channels)); + std::vector> H_Sse2( + num_partitions, std::vector(num_render_channels)); + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + H_C[p][ch].Clear(); + H_Sse2[p][ch].Clear(); + } + } + + for (size_t k = 0; k < 500; ++k) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int ch = 0; ch < x.NumChannels(); ++ch) { + RandomizeSampleVector(&random_generator, x.View(band, ch)); + } + } + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + auto* const render_buffer = render_delay_buffer->GetRenderBuffer(); + + ApplyFilter_Sse2(*render_buffer, num_partitions, H_Sse2, &S_Sse2); + ApplyFilter(*render_buffer, num_partitions, H_C, &S_C); + for (size_t j = 0; j < S_C.re.size(); ++j) { + EXPECT_FLOAT_EQ(S_C.re[j], S_Sse2.re[j]); + EXPECT_FLOAT_EQ(S_C.im[j], S_Sse2.im[j]); + } + + std::for_each(G.re.begin(), G.re.end(), + [&](float& a) { a = random_generator.Rand(); }); + std::for_each(G.im.begin(), G.im.end(), + [&](float& a) { a = random_generator.Rand(); }); + + AdaptPartitions_Sse2(*render_buffer, G, num_partitions, &H_Sse2); + AdaptPartitions(*render_buffer, G, num_partitions, &H_C); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t j = 0; j < H_C[p][ch].re.size(); ++j) { + EXPECT_FLOAT_EQ(H_C[p][ch].re[j], H_Sse2[p][ch].re[j]); + EXPECT_FLOAT_EQ(H_C[p][ch].im[j], H_Sse2[p][ch].im[j]); + } + } + } + } + } + } +} + +// Verifies that the optimized methods for filter adaptation are bitexact to +// their reference counterparts. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + FilterAdaptationAvx2Optimizations) { + const size_t num_render_channels = GetParam(); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + bool use_avx2 = (GetCPUInfo(kAVX2) != 0); + if (use_avx2) { + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + num_render_channels)); + Random random_generator(42U); + Block x(kNumBands, num_render_channels); + FftData S_C; + FftData S_Avx2; + FftData G; + Aec3Fft fft; + std::vector> H_C( + num_partitions, std::vector(num_render_channels)); + std::vector> H_Avx2( + num_partitions, std::vector(num_render_channels)); + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + H_C[p][ch].Clear(); + H_Avx2[p][ch].Clear(); + } + } + + for (size_t k = 0; k < 500; ++k) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int ch = 0; ch < x.NumChannels(); ++ch) { + RandomizeSampleVector(&random_generator, x.View(band, ch)); + } + } + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + auto* const render_buffer = render_delay_buffer->GetRenderBuffer(); + + ApplyFilter_Avx2(*render_buffer, num_partitions, H_Avx2, &S_Avx2); + ApplyFilter(*render_buffer, num_partitions, H_C, &S_C); + for (size_t j = 0; j < S_C.re.size(); ++j) { + EXPECT_FLOAT_EQ(S_C.re[j], S_Avx2.re[j]); + EXPECT_FLOAT_EQ(S_C.im[j], S_Avx2.im[j]); + } + + std::for_each(G.re.begin(), G.re.end(), + [&](float& a) { a = random_generator.Rand(); }); + std::for_each(G.im.begin(), G.im.end(), + [&](float& a) { a = random_generator.Rand(); }); + + AdaptPartitions_Avx2(*render_buffer, G, num_partitions, &H_Avx2); + AdaptPartitions(*render_buffer, G, num_partitions, &H_C); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t j = 0; j < H_C[p][ch].re.size(); ++j) { + EXPECT_FLOAT_EQ(H_C[p][ch].re[j], H_Avx2[p][ch].re[j]); + EXPECT_FLOAT_EQ(H_C[p][ch].im[j], H_Avx2[p][ch].im[j]); + } + } + } + } + } + } +} + +// Verifies that the optimized method for frequency response computation is +// bitexact to the reference counterpart. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + ComputeFrequencyResponseSse2Optimization) { + const size_t num_render_channels = GetParam(); + bool use_sse2 = (GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::vector> H( + num_partitions, std::vector(num_render_channels)); + std::vector> H2(num_partitions); + std::vector> H2_Sse2( + num_partitions); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < H[p][ch].re.size(); ++k) { + H[p][ch].re[k] = k + p / 3.f + ch; + H[p][ch].im[k] = p + k / 7.f - ch; + } + } + } + + ComputeFrequencyResponse(num_partitions, H, &H2); + ComputeFrequencyResponse_Sse2(num_partitions, H, &H2_Sse2); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t k = 0; k < H2[p].size(); ++k) { + EXPECT_FLOAT_EQ(H2[p][k], H2_Sse2[p][k]); + } + } + } + } +} + +// Verifies that the optimized method for frequency response computation is +// bitexact to the reference counterpart. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + ComputeFrequencyResponseAvx2Optimization) { + const size_t num_render_channels = GetParam(); + bool use_avx2 = (GetCPUInfo(kAVX2) != 0); + if (use_avx2) { + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::vector> H( + num_partitions, std::vector(num_render_channels)); + std::vector> H2(num_partitions); + std::vector> H2_Avx2( + num_partitions); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < H[p][ch].re.size(); ++k) { + H[p][ch].re[k] = k + p / 3.f + ch; + H[p][ch].im[k] = p + k / 7.f - ch; + } + } + } + + ComputeFrequencyResponse(num_partitions, H, &H2); + ComputeFrequencyResponse_Avx2(num_partitions, H, &H2_Avx2); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t k = 0; k < H2[p].size(); ++k) { + EXPECT_FLOAT_EQ(H2[p][k], H2_Avx2[p][k]); + } + } + } + } +} + +#endif + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check for non-null data dumper works. +TEST(AdaptiveFirFilterDeathTest, NullDataDumper) { + EXPECT_DEATH(AdaptiveFirFilter(9, 9, 250, 1, DetectOptimization(), nullptr), + ""); +} + +// Verifies that the check for non-null filter output works. +TEST(AdaptiveFirFilterDeathTest, NullFilterOutput) { + ApmDataDumper data_dumper(42); + AdaptiveFirFilter filter(9, 9, 250, 1, DetectOptimization(), &data_dumper); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, 1)); + EXPECT_DEATH(filter.Filter(*render_delay_buffer->GetRenderBuffer(), nullptr), + ""); +} + +#endif + +// Verifies that the filter statistics can be accessed when filter statistics +// are turned on. +TEST(AdaptiveFirFilterTest, FilterStatisticsAccess) { + ApmDataDumper data_dumper(42); + Aec3Optimization optimization = DetectOptimization(); + AdaptiveFirFilter filter(9, 9, 250, 1, optimization, &data_dumper); + std::vector> H2( + filter.max_filter_size_partitions(), + std::array()); + for (auto& H2_k : H2) { + H2_k.fill(0.f); + } + + std::array erl; + ComputeErl(optimization, H2, erl); + filter.ComputeFrequencyResponse(&H2); +} + +// Verifies that the filter size if correctly repported. +TEST(AdaptiveFirFilterTest, FilterSize) { + ApmDataDumper data_dumper(42); + for (size_t filter_size = 1; filter_size < 5; ++filter_size) { + AdaptiveFirFilter filter(filter_size, filter_size, 250, 1, + DetectOptimization(), &data_dumper); + EXPECT_EQ(filter_size, filter.SizePartitions()); + } +} + +class AdaptiveFirFilterMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + AdaptiveFirFilterMultiChannel, + ::testing::Combine(::testing::Values(1, 4), + ::testing::Values(1, 8))); + +// Verifies that the filter is being able to properly filter a signal and to +// adapt its coefficients. +TEST_P(AdaptiveFirFilterMultiChannel, FilterAndAdapt) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + constexpr size_t kNumBlocksToProcessPerRenderChannel = 1000; + + ApmDataDumper data_dumper(42); + EchoCanceller3Config config; + + if (num_render_channels == 33) { + config.filter.refined = {13, 0.00005f, 0.0005f, 0.0001f, 2.f, 20075344.f}; + config.filter.coarse = {13, 0.1f, 20075344.f}; + config.filter.refined_initial = {12, 0.005f, 0.5f, 0.001f, 2.f, 20075344.f}; + config.filter.coarse_initial = {12, 0.7f, 20075344.f}; + } + + AdaptiveFirFilter filter( + config.filter.refined.length_blocks, config.filter.refined.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + DetectOptimization(), &data_dumper); + std::vector>> H2( + num_capture_channels, std::vector>( + filter.max_filter_size_partitions(), + std::array())); + std::vector> h( + num_capture_channels, + std::vector( + GetTimeDomainLength(filter.max_filter_size_partitions()), 0.f)); + Aec3Fft fft; + config.delay.default_delay = 1; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + CoarseFilterUpdateGain gain(config.filter.coarse, + config.filter.config_change_duration_blocks); + Random random_generator(42U); + Block x(kNumBands, num_render_channels); + std::vector n(kBlockSize, 0.f); + std::vector y(kBlockSize, 0.f); + AecState aec_state(EchoCanceller3Config{}, num_capture_channels); + RenderSignalAnalyzer render_signal_analyzer(config); + absl::optional delay_estimate; + std::vector e(kBlockSize, 0.f); + std::array s_scratch; + std::vector output(num_capture_channels); + FftData S; + FftData G; + FftData E; + std::vector> Y2(num_capture_channels); + std::vector> E2_refined( + num_capture_channels); + std::array E2_coarse; + // [B,A] = butter(2,100/8000,'high') + constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients = {{0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + for (auto& Y2_ch : Y2) { + Y2_ch.fill(0.f); + } + for (auto& E2_refined_ch : E2_refined) { + E2_refined_ch.fill(0.f); + } + E2_coarse.fill(0.f); + for (auto& subtractor_output : output) { + subtractor_output.Reset(); + } + + constexpr float kScale = 1.0f / kFftLengthBy2; + + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + std::vector> delay_buffer( + num_render_channels, DelayBuffer(delay_samples)); + std::vector> x_hp_filter( + num_render_channels); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x_hp_filter[ch] = std::make_unique( + kHighPassFilterCoefficients, 1); + } + CascadedBiQuadFilter y_hp_filter(kHighPassFilterCoefficients, 1); + + SCOPED_TRACE(ProduceDebugText(num_render_channels, delay_samples)); + const size_t num_blocks_to_process = + kNumBlocksToProcessPerRenderChannel * num_render_channels; + for (size_t j = 0; j < num_blocks_to_process; ++j) { + std::fill(y.begin(), y.end(), 0.f); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + RandomizeSampleVector(&random_generator, x.View(/*band=*/0, ch)); + std::array y_channel; + delay_buffer[ch].Delay(x.View(/*band=*/0, ch), y_channel); + for (size_t k = 0; k < y.size(); ++k) { + y[k] += y_channel[k] / num_render_channels; + } + } + + RandomizeSampleVector(&random_generator, n); + const float noise_scaling = 1.f / 100.f / num_render_channels; + for (size_t k = 0; k < y.size(); ++k) { + y[k] += n[k] * noise_scaling; + } + + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x_hp_filter[ch]->Process(x.View(/*band=*/0, ch)); + } + y_hp_filter.Process(y); + + render_delay_buffer->Insert(x); + if (j == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + auto* const render_buffer = render_delay_buffer->GetRenderBuffer(); + + render_signal_analyzer.Update(*render_buffer, + aec_state.MinDirectPathFilterDelay()); + + filter.Filter(*render_buffer, &S); + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e.begin(), e.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e, Aec3Fft::Window::kRectangular, &E); + for (auto& o : output) { + for (size_t k = 0; k < kBlockSize; ++k) { + o.s_refined[k] = kScale * s_scratch[k + kFftLengthBy2]; + } + } + + std::array render_power; + render_buffer->SpectralSum(filter.SizePartitions(), &render_power); + gain.Compute(render_power, render_signal_analyzer, E, + filter.SizePartitions(), false, &G); + filter.Adapt(*render_buffer, G, &h[0]); + aec_state.HandleEchoPathChange(EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNone, false)); + + filter.ComputeFrequencyResponse(&H2[0]); + aec_state.Update(delay_estimate, H2, h, *render_buffer, E2_refined, Y2, + output); + } + // Verify that the filter is able to perform well. + EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn/moz.build new file mode 100644 index 0000000000..6f67bd6fad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn/moz.build @@ -0,0 +1,190 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +CXXFLAGS += [ + "-mavx2", + "-mfma" +] + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_AVX2"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_GNU_SOURCE"] = True + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + +Library("aec3_avx2_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc new file mode 100644 index 0000000000..3ba10d5baf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_common.h" + +#include + +#include "rtc_base/checks.h" +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" + +namespace webrtc { + +Aec3Optimization DetectOptimization() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (GetCPUInfo(kAVX2) != 0) { + return Aec3Optimization::kAvx2; + } else if (GetCPUInfo(kSSE2) != 0) { + return Aec3Optimization::kSse2; + } +#endif + +#if defined(WEBRTC_HAS_NEON) + return Aec3Optimization::kNeon; +#else + return Aec3Optimization::kNone; +#endif +} + +float FastApproxLog2f(const float in) { + RTC_DCHECK_GT(in, .0f); + // Read and interpret float as uint32_t and then cast to float. + // This is done to extract the exponent (bits 30 - 23). + // "Right shift" of the exponent is then performed by multiplying + // with the constant (1/2^23). Finally, we subtract a constant to + // remove the bias (https://en.wikipedia.org/wiki/Exponent_bias). + union { + float dummy; + uint32_t a; + } x = {in}; + float out = x.a; + out *= 1.1920929e-7f; // 1/2^23 + out -= 126.942695f; // Remove bias. + return out; +} + +float Log2TodB(const float in_log2) { + return 3.0102999566398121 * in_log2; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.h b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.h new file mode 100644 index 0000000000..32b564f14b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_ + +#include + +namespace webrtc { + +#ifdef _MSC_VER /* visual c++ */ +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END +#else /* gcc or icc */ +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) +#endif + +enum class Aec3Optimization { kNone, kSse2, kAvx2, kNeon }; + +constexpr int kNumBlocksPerSecond = 250; + +constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond; +constexpr int kMetricsComputationBlocks = 3; +constexpr int kMetricsCollectionBlocks = + kMetricsReportingIntervalBlocks - kMetricsComputationBlocks; + +constexpr size_t kFftLengthBy2 = 64; +constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1; +constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1; +constexpr size_t kFftLength = 2 * kFftLengthBy2; +constexpr size_t kFftLengthBy2Log2 = 6; + +constexpr int kRenderTransferQueueSizeFrames = 100; + +constexpr size_t kMaxNumBands = 3; +constexpr size_t kFrameSize = 160; +constexpr size_t kSubFrameLength = kFrameSize / 2; + +constexpr size_t kBlockSize = kFftLengthBy2; +constexpr size_t kBlockSizeLog2 = kFftLengthBy2Log2; + +constexpr size_t kExtendedBlockSize = 2 * kFftLengthBy2; +constexpr size_t kMatchedFilterWindowSizeSubBlocks = 32; +constexpr size_t kMatchedFilterAlignmentShiftSizeSubBlocks = + kMatchedFilterWindowSizeSubBlocks * 3 / 4; + +// TODO(peah): Integrate this with how it is done inside audio_processing_impl. +constexpr size_t NumBandsForRate(int sample_rate_hz) { + return static_cast(sample_rate_hz / 16000); +} + +constexpr bool ValidFullBandRate(int sample_rate_hz) { + return sample_rate_hz == 16000 || sample_rate_hz == 32000 || + sample_rate_hz == 48000; +} + +constexpr int GetTimeDomainLength(int filter_length_blocks) { + return filter_length_blocks * kFftLengthBy2; +} + +constexpr size_t GetDownSampledBufferSize(size_t down_sampling_factor, + size_t num_matched_filters) { + return kBlockSize / down_sampling_factor * + (kMatchedFilterAlignmentShiftSizeSubBlocks * num_matched_filters + + kMatchedFilterWindowSizeSubBlocks + 1); +} + +constexpr size_t GetRenderDelayBufferSize(size_t down_sampling_factor, + size_t num_matched_filters, + size_t filter_length_blocks) { + return GetDownSampledBufferSize(down_sampling_factor, num_matched_filters) / + (kBlockSize / down_sampling_factor) + + filter_length_blocks + 1; +} + +// Detects what kind of optimizations to use for the code. +Aec3Optimization DetectOptimization(); + +// Computes the log2 of the input in a fast an approximate manner. +float FastApproxLog2f(float in); + +// Returns dB from a power quantity expressed in log2. +float Log2TodB(float in_log2); + +static_assert(1 << kBlockSizeLog2 == kBlockSize, + "Proper number of shifts for blocksize"); + +static_assert(1 << kFftLengthBy2Log2 == kFftLengthBy2, + "Proper number of shifts for the fft length"); + +static_assert(1 == NumBandsForRate(16000), "Number of bands for 16 kHz"); +static_assert(2 == NumBandsForRate(32000), "Number of bands for 32 kHz"); +static_assert(3 == NumBandsForRate(48000), "Number of bands for 48 kHz"); + +static_assert(ValidFullBandRate(16000), + "Test that 16 kHz is a valid sample rate"); +static_assert(ValidFullBandRate(32000), + "Test that 32 kHz is a valid sample rate"); +static_assert(ValidFullBandRate(48000), + "Test that 48 kHz is a valid sample rate"); +static_assert(!ValidFullBandRate(8001), + "Test that 8001 Hz is not a valid sample rate"); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common_gn/moz.build new file mode 100644 index 0000000000..b0952a7d0c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec3_common_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc new file mode 100644 index 0000000000..9cc8016f0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_fft.h" + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "system_wrappers/include/cpu_features_wrapper.h" + +namespace webrtc { + +namespace { + +const float kHanning64[kFftLengthBy2] = { + 0.f, 0.00248461f, 0.00991376f, 0.0222136f, 0.03926189f, + 0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f, + 0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f, + 0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f, + 0.70564355f, 0.75f, 0.79187184f, 0.83084292f, 0.86652594f, + 0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f, + 0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f, + 0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f, + 0.83084292f, 0.79187184f, 0.75f, 0.70564355f, 0.65924333f, + 0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f, + 0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f, + 0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f, + 0.0222136f, 0.00991376f, 0.00248461f, 0.f}; + +// Hanning window from Matlab command win = sqrt(hanning(128)). +const float kSqrtHanning128[kFftLength] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f, + 0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f, + 0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f, + 0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f, + 0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f, + 0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f, + 0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f, + 0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f, + 0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f, + 0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f, + 0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f, + 0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f, + 0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f, + 0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f, + 0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f, + 0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f}; + +bool IsSse2Available() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + return GetCPUInfo(kSSE2) != 0; +#else + return false; +#endif +} + +} // namespace + +Aec3Fft::Aec3Fft() : ooura_fft_(IsSse2Available()) {} + +// TODO(peah): Change x to be std::array once the rest of the code allows this. +void Aec3Fft::ZeroPaddedFft(rtc::ArrayView x, + Window window, + FftData* X) const { + RTC_DCHECK(X); + RTC_DCHECK_EQ(kFftLengthBy2, x.size()); + std::array fft; + std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f); + switch (window) { + case Window::kRectangular: + std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2); + break; + case Window::kHanning: + std::transform(x.begin(), x.end(), std::begin(kHanning64), + fft.begin() + kFftLengthBy2, + [](float a, float b) { return a * b; }); + break; + case Window::kSqrtHanning: + RTC_DCHECK_NOTREACHED(); + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + Fft(&fft, X); +} + +void Aec3Fft::PaddedFft(rtc::ArrayView x, + rtc::ArrayView x_old, + Window window, + FftData* X) const { + RTC_DCHECK(X); + RTC_DCHECK_EQ(kFftLengthBy2, x.size()); + RTC_DCHECK_EQ(kFftLengthBy2, x_old.size()); + std::array fft; + + switch (window) { + case Window::kRectangular: + std::copy(x_old.begin(), x_old.end(), fft.begin()); + std::copy(x.begin(), x.end(), fft.begin() + x_old.size()); + break; + case Window::kHanning: + RTC_DCHECK_NOTREACHED(); + break; + case Window::kSqrtHanning: + std::transform(x_old.begin(), x_old.end(), std::begin(kSqrtHanning128), + fft.begin(), std::multiplies()); + std::transform(x.begin(), x.end(), + std::begin(kSqrtHanning128) + x_old.size(), + fft.begin() + x_old.size(), std::multiplies()); + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + Fft(&fft, X); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.h b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.h new file mode 100644 index 0000000000..c68de53963 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_ + +#include + +#include "api/array_view.h" +#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Wrapper class that provides 128 point real valued FFT functionality with the +// FftData type. +class Aec3Fft { + public: + enum class Window { kRectangular, kHanning, kSqrtHanning }; + + Aec3Fft(); + + Aec3Fft(const Aec3Fft&) = delete; + Aec3Fft& operator=(const Aec3Fft&) = delete; + + // Computes the FFT. Note that both the input and output are modified. + void Fft(std::array* x, FftData* X) const { + RTC_DCHECK(x); + RTC_DCHECK(X); + ooura_fft_.Fft(x->data()); + X->CopyFromPackedArray(*x); + } + // Computes the inverse Fft. + void Ifft(const FftData& X, std::array* x) const { + RTC_DCHECK(x); + X.CopyToPackedArray(x); + ooura_fft_.InverseFft(x->data()); + } + + // Windows the input using a Hanning window, and then adds padding of + // kFftLengthBy2 initial zeros before computing the Fft. + void ZeroPaddedFft(rtc::ArrayView x, + Window window, + FftData* X) const; + + // Concatenates the kFftLengthBy2 values long x and x_old before computing the + // Fft. After that, x is copied to x_old. + void PaddedFft(rtc::ArrayView x, + rtc::ArrayView x_old, + FftData* X) const { + PaddedFft(x, x_old, Window::kRectangular, X); + } + + // Padded Fft using a time-domain window. + void PaddedFft(rtc::ArrayView x, + rtc::ArrayView x_old, + Window window, + FftData* X) const; + + private: + const OouraFft ooura_fft_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_gn/moz.build new file mode 100644 index 0000000000..97bbc43539 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_gn/moz.build @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec3_fft_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc new file mode 100644 index 0000000000..e60ef5b713 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_fft.h" + +#include + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null input in Fft works. +TEST(Aec3FftDeathTest, NullFftInput) { + Aec3Fft fft; + FftData X; + EXPECT_DEATH(fft.Fft(nullptr, &X), ""); +} + +// Verifies that the check for non-null input in Fft works. +TEST(Aec3FftDeathTest, NullFftOutput) { + Aec3Fft fft; + std::array x; + EXPECT_DEATH(fft.Fft(&x, nullptr), ""); +} + +// Verifies that the check for non-null output in Ifft works. +TEST(Aec3FftDeathTest, NullIfftOutput) { + Aec3Fft fft; + FftData X; + EXPECT_DEATH(fft.Ifft(X, nullptr), ""); +} + +// Verifies that the check for non-null output in ZeroPaddedFft works. +TEST(Aec3FftDeathTest, NullZeroPaddedFftOutput) { + Aec3Fft fft; + std::array x; + EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, nullptr), + ""); +} + +// Verifies that the check for input length in ZeroPaddedFft works. +TEST(Aec3FftDeathTest, ZeroPaddedFftWrongInputLength) { + Aec3Fft fft; + FftData X; + std::array x; + EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X), ""); +} + +// Verifies that the check for non-null output in PaddedFft works. +TEST(Aec3FftDeathTest, NullPaddedFftOutput) { + Aec3Fft fft; + std::array x; + std::array x_old; + EXPECT_DEATH(fft.PaddedFft(x, x_old, nullptr), ""); +} + +// Verifies that the check for input length in PaddedFft works. +TEST(Aec3FftDeathTest, PaddedFftWrongInputLength) { + Aec3Fft fft; + FftData X; + std::array x; + std::array x_old; + EXPECT_DEATH(fft.PaddedFft(x, x_old, &X), ""); +} + +// Verifies that the check for length in the old value in PaddedFft works. +TEST(Aec3FftDeathTest, PaddedFftWrongOldValuesLength) { + Aec3Fft fft; + FftData X; + std::array x; + std::array x_old; + EXPECT_DEATH(fft.PaddedFft(x, x_old, &X), ""); +} + +#endif + +// Verifies that Fft works as intended. +TEST(Aec3Fft, Fft) { + Aec3Fft fft; + FftData X; + std::array x; + x.fill(0.f); + fft.Fft(&x, &X); + EXPECT_THAT(X.re, ::testing::Each(0.f)); + EXPECT_THAT(X.im, ::testing::Each(0.f)); + + x.fill(0.f); + x[0] = 1.f; + fft.Fft(&x, &X); + EXPECT_THAT(X.re, ::testing::Each(1.f)); + EXPECT_THAT(X.im, ::testing::Each(0.f)); + + x.fill(1.f); + fft.Fft(&x, &X); + EXPECT_EQ(128.f, X.re[0]); + std::for_each(X.re.begin() + 1, X.re.end(), + [](float a) { EXPECT_EQ(0.f, a); }); + EXPECT_THAT(X.im, ::testing::Each(0.f)); +} + +// Verifies that InverseFft works as intended. +TEST(Aec3Fft, Ifft) { + Aec3Fft fft; + FftData X; + std::array x; + + X.re.fill(0.f); + X.im.fill(0.f); + fft.Ifft(X, &x); + EXPECT_THAT(x, ::testing::Each(0.f)); + + X.re.fill(1.f); + X.im.fill(0.f); + fft.Ifft(X, &x); + EXPECT_EQ(64.f, x[0]); + std::for_each(x.begin() + 1, x.end(), [](float a) { EXPECT_EQ(0.f, a); }); + + X.re.fill(0.f); + X.re[0] = 128; + X.im.fill(0.f); + fft.Ifft(X, &x); + EXPECT_THAT(x, ::testing::Each(64.f)); +} + +// Verifies that InverseFft and Fft work as intended. +TEST(Aec3Fft, FftAndIfft) { + Aec3Fft fft; + FftData X; + std::array x; + std::array x_ref; + + int v = 0; + for (int k = 0; k < 20; ++k) { + for (size_t j = 0; j < x.size(); ++j) { + x[j] = v++; + x_ref[j] = x[j] * 64.f; + } + fft.Fft(&x, &X); + fft.Ifft(X, &x); + for (size_t j = 0; j < x.size(); ++j) { + EXPECT_NEAR(x_ref[j], x[j], 0.001f); + } + } +} + +// Verifies that ZeroPaddedFft work as intended. +TEST(Aec3Fft, ZeroPaddedFft) { + Aec3Fft fft; + FftData X; + std::array x_in; + std::array x_ref; + std::array x_out; + + int v = 0; + x_ref.fill(0.f); + for (int k = 0; k < 20; ++k) { + for (size_t j = 0; j < x_in.size(); ++j) { + x_in[j] = v++; + x_ref[j + kFftLengthBy2] = x_in[j] * 64.f; + } + fft.ZeroPaddedFft(x_in, Aec3Fft::Window::kRectangular, &X); + fft.Ifft(X, &x_out); + for (size_t j = 0; j < x_out.size(); ++j) { + EXPECT_NEAR(x_ref[j], x_out[j], 0.1f); + } + } +} + +// Verifies that ZeroPaddedFft work as intended. +TEST(Aec3Fft, PaddedFft) { + Aec3Fft fft; + FftData X; + std::array x_in; + std::array x_out; + std::array x_old; + std::array x_old_ref; + std::array x_ref; + + int v = 0; + x_old.fill(0.f); + for (int k = 0; k < 20; ++k) { + for (size_t j = 0; j < x_in.size(); ++j) { + x_in[j] = v++; + } + + std::copy(x_old.begin(), x_old.end(), x_ref.begin()); + std::copy(x_in.begin(), x_in.end(), x_ref.begin() + kFftLengthBy2); + std::copy(x_in.begin(), x_in.end(), x_old_ref.begin()); + std::for_each(x_ref.begin(), x_ref.end(), [](float& a) { a *= 64.f; }); + + fft.PaddedFft(x_in, x_old, &X); + std::copy(x_in.begin(), x_in.end(), x_old.begin()); + fft.Ifft(X, &x_out); + + for (size_t j = 0; j < x_out.size(); ++j) { + EXPECT_NEAR(x_ref[j], x_out[j], 0.1f); + } + + EXPECT_EQ(x_old_ref, x_old); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_gn/moz.build new file mode 100644 index 0000000000..6646d41ff3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_gn/moz.build @@ -0,0 +1,289 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec3_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc new file mode 100644 index 0000000000..81fd91fab9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec_state.h" + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +bool DeactivateInitialStateResetAtEchoPathChange() { + return field_trial::IsEnabled( + "WebRTC-Aec3DeactivateInitialStateResetKillSwitch"); +} + +bool FullResetAtEchoPathChange() { + return !field_trial::IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch"); +} + +bool SubtractorAnalyzerResetAtEchoPathChange() { + return !field_trial::IsEnabled( + "WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch"); +} + +void ComputeAvgRenderReverb( + const SpectrumBuffer& spectrum_buffer, + int delay_blocks, + float reverb_decay, + ReverbModel* reverb_model, + rtc::ArrayView reverb_power_spectrum) { + RTC_DCHECK(reverb_model); + const size_t num_render_channels = spectrum_buffer.buffer[0].size(); + int idx_at_delay = + spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks); + int idx_past = spectrum_buffer.IncIndex(idx_at_delay); + + std::array X2_data; + rtc::ArrayView X2; + if (num_render_channels > 1) { + auto average_channels = + [](size_t num_render_channels, + rtc::ArrayView> + spectrum_band_0, + rtc::ArrayView render_power) { + std::fill(render_power.begin(), render_power.end(), 0.f); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power[k] += spectrum_band_0[ch][k]; + } + } + const float normalizer = 1.f / num_render_channels; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power[k] *= normalizer; + } + }; + average_channels(num_render_channels, spectrum_buffer.buffer[idx_past], + X2_data); + reverb_model->UpdateReverbNoFreqShaping( + X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay); + + average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay], + X2_data); + X2 = X2_data; + } else { + reverb_model->UpdateReverbNoFreqShaping( + spectrum_buffer.buffer[idx_past][/*channel=*/0], + /*power_spectrum_scaling=*/1.0f, reverb_decay); + + X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0]; + } + + rtc::ArrayView reverb_power = + reverb_model->reverb(); + for (size_t k = 0; k < X2.size(); ++k) { + reverb_power_spectrum[k] = X2[k] + reverb_power[k]; + } +} + +} // namespace + +std::atomic AecState::instance_count_(0); + +void AecState::GetResidualEchoScaling( + rtc::ArrayView residual_scaling) const { + bool filter_has_had_time_to_converge; + if (config_.filter.conservative_initial_phase) { + filter_has_had_time_to_converge = + strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond; + } else { + filter_has_had_time_to_converge = + strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond; + } + echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge, + residual_scaling); +} + +AecState::AecState(const EchoCanceller3Config& config, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + config_(config), + num_capture_channels_(num_capture_channels), + deactivate_initial_state_reset_at_echo_path_change_( + DeactivateInitialStateResetAtEchoPathChange()), + full_reset_at_echo_path_change_(FullResetAtEchoPathChange()), + subtractor_analyzer_reset_at_echo_path_change_( + SubtractorAnalyzerResetAtEchoPathChange()), + initial_state_(config_), + delay_state_(config_, num_capture_channels_), + transparent_state_(TransparentMode::Create(config_)), + filter_quality_state_(config_, num_capture_channels_), + erl_estimator_(2 * kNumBlocksPerSecond), + erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels_), + filter_analyzer_(config_, num_capture_channels_), + echo_audibility_( + config_.echo_audibility.use_stationarity_properties_at_init), + reverb_model_estimator_(config_, num_capture_channels_), + subtractor_output_analyzer_(num_capture_channels_) {} + +AecState::~AecState() = default; + +void AecState::HandleEchoPathChange( + const EchoPathVariability& echo_path_variability) { + const auto full_reset = [&]() { + filter_analyzer_.Reset(); + capture_signal_saturation_ = false; + strong_not_saturated_render_blocks_ = 0; + blocks_with_active_render_ = 0; + if (!deactivate_initial_state_reset_at_echo_path_change_) { + initial_state_.Reset(); + } + if (transparent_state_) { + transparent_state_->Reset(); + } + erle_estimator_.Reset(true); + erl_estimator_.Reset(); + filter_quality_state_.Reset(); + }; + + // TODO(peah): Refine the reset scheme according to the type of gain and + // delay adjustment. + + if (full_reset_at_echo_path_change_ && + echo_path_variability.delay_change != + EchoPathVariability::DelayAdjustment::kNone) { + full_reset(); + } else if (echo_path_variability.gain_change) { + erle_estimator_.Reset(false); + } + if (subtractor_analyzer_reset_at_echo_path_change_) { + subtractor_output_analyzer_.HandleEchoPathChange(); + } +} + +void AecState::Update( + const absl::optional& external_delay, + rtc::ArrayView>> + adaptive_filter_frequency_responses, + rtc::ArrayView> adaptive_filter_impulse_responses, + const RenderBuffer& render_buffer, + rtc::ArrayView> E2_refined, + rtc::ArrayView> Y2, + rtc::ArrayView subtractor_output) { + RTC_DCHECK_EQ(num_capture_channels_, Y2.size()); + RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size()); + RTC_DCHECK_EQ(num_capture_channels_, + adaptive_filter_frequency_responses.size()); + RTC_DCHECK_EQ(num_capture_channels_, + adaptive_filter_impulse_responses.size()); + + // Analyze the filter outputs and filters. + bool any_filter_converged; + bool any_coarse_filter_converged; + bool all_filters_diverged; + subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged, + &any_coarse_filter_converged, + &all_filters_diverged); + + bool any_filter_consistent; + float max_echo_path_gain; + filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer, + &any_filter_consistent, &max_echo_path_gain); + + // Estimate the direct path delay of the filter. + if (config_.filter.use_linear_filter) { + delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay, + strong_not_saturated_render_blocks_); + } + + const Block& aligned_render_block = + render_buffer.GetBlock(-delay_state_.MinDirectPathFilterDelay()); + + // Update render counters. + bool active_render = false; + for (int ch = 0; ch < aligned_render_block.NumChannels(); ++ch) { + const float render_energy = + std::inner_product(aligned_render_block.begin(/*block=*/0, ch), + aligned_render_block.end(/*block=*/0, ch), + aligned_render_block.begin(/*block=*/0, ch), 0.f); + if (render_energy > (config_.render_levels.active_render_limit * + config_.render_levels.active_render_limit) * + kFftLengthBy2) { + active_render = true; + break; + } + } + blocks_with_active_render_ += active_render ? 1 : 0; + strong_not_saturated_render_blocks_ += + active_render && !SaturatedCapture() ? 1 : 0; + + std::array avg_render_spectrum_with_reverb; + + ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(), + delay_state_.MinDirectPathFilterDelay(), + ReverbDecay(/*mild=*/false), &avg_render_reverb_, + avg_render_spectrum_with_reverb); + + if (config_.echo_audibility.use_stationarity_properties) { + // Update the echo audibility evaluator. + echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(), + delay_state_.MinDirectPathFilterDelay(), + delay_state_.ExternalDelayReported()); + } + + // Update the ERL and ERLE measures. + if (initial_state_.TransitionTriggered()) { + erle_estimator_.Reset(false); + } + + erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses, + avg_render_spectrum_with_reverb, Y2, E2_refined, + subtractor_output_analyzer_.ConvergedFilters()); + + erl_estimator_.Update( + subtractor_output_analyzer_.ConvergedFilters(), + render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2); + + // Detect and flag echo saturation. + if (config_.ep_strength.echo_can_saturate) { + saturation_detector_.Update(aligned_render_block, SaturatedCapture(), + UsableLinearEstimate(), subtractor_output, + max_echo_path_gain); + } else { + RTC_DCHECK(!saturation_detector_.SaturatedEcho()); + } + + // Update the decision on whether to use the initial state parameter set. + initial_state_.Update(active_render, SaturatedCapture()); + + // Detect whether the transparent mode should be activated. + if (transparent_state_) { + transparent_state_->Update( + delay_state_.MinDirectPathFilterDelay(), any_filter_consistent, + any_filter_converged, any_coarse_filter_converged, all_filters_diverged, + active_render, SaturatedCapture()); + } + + // Analyze the quality of the filter. + filter_quality_state_.Update(active_render, TransparentModeActive(), + SaturatedCapture(), external_delay, + any_filter_converged); + + // Update the reverb estimate. + const bool stationary_block = + config_.echo_audibility.use_stationarity_properties && + echo_audibility_.IsBlockStationary(); + + reverb_model_estimator_.Update( + filter_analyzer_.GetAdjustedFilters(), + adaptive_filter_frequency_responses, + erle_estimator_.GetInstLinearQualityEstimates(), + delay_state_.DirectPathFilterDelays(), + filter_quality_state_.UsableLinearFilterOutputs(), stationary_block); + + erle_estimator_.Dump(data_dumper_); + reverb_model_estimator_.Dump(data_dumper_.get()); + data_dumper_->DumpRaw("aec3_active_render", active_render); + data_dumper_->DumpRaw("aec3_erl", Erl()); + data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain()); + data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]); + data_dumper_->DumpRaw("aec3_erle_onset_compensated", + Erle(/*onset_compensated=*/true)[0]); + data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate()); + data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive()); + data_dumper_->DumpRaw("aec3_filter_delay", + filter_analyzer_.MinFilterDelayBlocks()); + + data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent); + data_dumper_->DumpRaw("aec3_initial_state", + initial_state_.InitialStateActive()); + data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture()); + data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho()); + data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged); + data_dumper_->DumpRaw("aec3_any_coarse_filter_converged", + any_coarse_filter_converged); + data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged); + + data_dumper_->DumpRaw("aec3_external_delay_avaliable", + external_delay ? 1 : 0); + data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est", + GetReverbFrequencyResponse()); + data_dumper_->DumpRaw("aec3_subtractor_y2", subtractor_output[0].y2); + data_dumper_->DumpRaw("aec3_subtractor_e2_coarse", + subtractor_output[0].e2_coarse); + data_dumper_->DumpRaw("aec3_subtractor_e2_refined", + subtractor_output[0].e2_refined); +} + +AecState::InitialState::InitialState(const EchoCanceller3Config& config) + : conservative_initial_phase_(config.filter.conservative_initial_phase), + initial_state_seconds_(config.filter.initial_state_seconds) { + Reset(); +} +void AecState::InitialState::InitialState::Reset() { + initial_state_ = true; + strong_not_saturated_render_blocks_ = 0; +} +void AecState::InitialState::InitialState::Update(bool active_render, + bool saturated_capture) { + strong_not_saturated_render_blocks_ += + active_render && !saturated_capture ? 1 : 0; + + // Flag whether the initial state is still active. + bool prev_initial_state = initial_state_; + if (conservative_initial_phase_) { + initial_state_ = + strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond; + } else { + initial_state_ = strong_not_saturated_render_blocks_ < + initial_state_seconds_ * kNumBlocksPerSecond; + } + + // Flag whether the transition from the initial state has started. + transition_triggered_ = !initial_state_ && prev_initial_state; +} + +AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config, + size_t num_capture_channels) + : delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize), + filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_), + min_filter_delay_(delay_headroom_blocks_) {} + +void AecState::FilterDelay::Update( + rtc::ArrayView analyzer_filter_delay_estimates_blocks, + const absl::optional& external_delay, + size_t blocks_with_proper_filter_adaptation) { + // Update the delay based on the external delay. + if (external_delay && + (!external_delay_ || external_delay_->delay != external_delay->delay)) { + external_delay_ = external_delay; + external_delay_reported_ = true; + } + + // Override the estimated delay if it is not certain that the filter has had + // time to converge. + const bool delay_estimator_may_not_have_converged = + blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond; + if (delay_estimator_may_not_have_converged && external_delay_) { + const int delay_guess = delay_headroom_blocks_; + std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(), + delay_guess); + } else { + RTC_DCHECK_EQ(filter_delays_blocks_.size(), + analyzer_filter_delay_estimates_blocks.size()); + std::copy(analyzer_filter_delay_estimates_blocks.begin(), + analyzer_filter_delay_estimates_blocks.end(), + filter_delays_blocks_.begin()); + } + + min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(), + filter_delays_blocks_.end()); +} + +AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer( + const EchoCanceller3Config& config, + size_t num_capture_channels) + : use_linear_filter_(config.filter.use_linear_filter), + usable_linear_filter_estimates_(num_capture_channels, false) {} + +void AecState::FilteringQualityAnalyzer::Reset() { + std::fill(usable_linear_filter_estimates_.begin(), + usable_linear_filter_estimates_.end(), false); + overall_usable_linear_estimates_ = false; + filter_update_blocks_since_reset_ = 0; +} + +void AecState::FilteringQualityAnalyzer::Update( + bool active_render, + bool transparent_mode, + bool saturated_capture, + const absl::optional& external_delay, + bool any_filter_converged) { + // Update blocks counter. + const bool filter_update = active_render && !saturated_capture; + filter_update_blocks_since_reset_ += filter_update ? 1 : 0; + filter_update_blocks_since_start_ += filter_update ? 1 : 0; + + // Store convergence flag when observed. + convergence_seen_ = convergence_seen_ || any_filter_converged; + + // Verify requirements for achieving a decent filter. The requirements for + // filter adaptation at call startup are more restrictive than after an + // in-call reset. + const bool sufficient_data_to_converge_at_startup = + filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f; + const bool sufficient_data_to_converge_at_reset = + sufficient_data_to_converge_at_startup && + filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f; + + // The linear filter can only be used if it has had time to converge. + overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup && + sufficient_data_to_converge_at_reset; + + // The linear filter can only be used if an external delay or convergence have + // been identified + overall_usable_linear_estimates_ = + overall_usable_linear_estimates_ && (external_delay || convergence_seen_); + + // If transparent mode is on, deactivate usign the linear filter. + overall_usable_linear_estimates_ = + overall_usable_linear_estimates_ && !transparent_mode; + + if (use_linear_filter_) { + std::fill(usable_linear_filter_estimates_.begin(), + usable_linear_filter_estimates_.end(), + overall_usable_linear_estimates_); + } +} + +void AecState::SaturationDetector::Update( + const Block& x, + bool saturated_capture, + bool usable_linear_estimate, + rtc::ArrayView subtractor_output, + float echo_path_gain) { + saturated_echo_ = false; + if (!saturated_capture) { + return; + } + + if (usable_linear_estimate) { + constexpr float kSaturationThreshold = 20000.f; + for (size_t ch = 0; ch < subtractor_output.size(); ++ch) { + saturated_echo_ = + saturated_echo_ || + (subtractor_output[ch].s_refined_max_abs > kSaturationThreshold || + subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold); + } + } else { + float max_sample = 0.f; + for (int ch = 0; ch < x.NumChannels(); ++ch) { + rtc::ArrayView x_ch = x.View(/*band=*/0, ch); + for (float sample : x_ch) { + max_sample = std::max(max_sample, fabsf(sample)); + } + } + + const float kMargin = 10.f; + float peak_echo_amplitude = max_sample * echo_path_gain * kMargin; + saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.h b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.h new file mode 100644 index 0000000000..a39325c8b8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.h @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_ + +#include + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/echo_audibility.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/erl_estimator.h" +#include "modules/audio_processing/aec3/erle_estimator.h" +#include "modules/audio_processing/aec3/filter_analyzer.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/reverb_model_estimator.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/aec3/subtractor_output_analyzer.h" +#include "modules/audio_processing/aec3/transparent_mode.h" + +namespace webrtc { + +class ApmDataDumper; + +// Handles the state and the conditions for the echo removal functionality. +class AecState { + public: + AecState(const EchoCanceller3Config& config, size_t num_capture_channels); + ~AecState(); + + // Returns whether the echo subtractor can be used to determine the residual + // echo. + bool UsableLinearEstimate() const { + return filter_quality_state_.LinearFilterUsable() && + config_.filter.use_linear_filter; + } + + // Returns whether the echo subtractor output should be used as output. + bool UseLinearFilterOutput() const { + return filter_quality_state_.LinearFilterUsable() && + config_.filter.use_linear_filter; + } + + // Returns whether the render signal is currently active. + bool ActiveRender() const { return blocks_with_active_render_ > 200; } + + // Returns the appropriate scaling of the residual echo to match the + // audibility. + void GetResidualEchoScaling(rtc::ArrayView residual_scaling) const; + + // Returns whether the stationary properties of the signals are used in the + // aec. + bool UseStationarityProperties() const { + return config_.echo_audibility.use_stationarity_properties; + } + + // Returns the ERLE. + rtc::ArrayView> Erle( + bool onset_compensated) const { + return erle_estimator_.Erle(onset_compensated); + } + + // Returns the non-capped ERLE. + rtc::ArrayView> ErleUnbounded() + const { + return erle_estimator_.ErleUnbounded(); + } + + // Returns the fullband ERLE estimate in log2 units. + float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); } + + // Returns the ERL. + const std::array& Erl() const { + return erl_estimator_.Erl(); + } + + // Returns the time-domain ERL. + float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); } + + // Returns the delay estimate based on the linear filter. + int MinDirectPathFilterDelay() const { + return delay_state_.MinDirectPathFilterDelay(); + } + + // Returns whether the capture signal is saturated. + bool SaturatedCapture() const { return capture_signal_saturation_; } + + // Returns whether the echo signal is saturated. + bool SaturatedEcho() const { return saturation_detector_.SaturatedEcho(); } + + // Updates the capture signal saturation. + void UpdateCaptureSaturation(bool capture_signal_saturation) { + capture_signal_saturation_ = capture_signal_saturation; + } + + // Returns whether the transparent mode is active + bool TransparentModeActive() const { + return transparent_state_ && transparent_state_->Active(); + } + + // Takes appropriate action at an echo path change. + void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + + // Returns the decay factor for the echo reverberation. The parameter `mild` + // indicates which exponential decay to return. The default one or a milder + // one that can be used during nearend regions. + float ReverbDecay(bool mild) const { + return reverb_model_estimator_.ReverbDecay(mild); + } + + // Return the frequency response of the reverberant echo. + rtc::ArrayView GetReverbFrequencyResponse() const { + return reverb_model_estimator_.GetReverbFrequencyResponse(); + } + + // Returns whether the transition for going out of the initial stated has + // been triggered. + bool TransitionTriggered() const { + return initial_state_.TransitionTriggered(); + } + + // Updates the aec state. + // TODO(bugs.webrtc.org/10913): Compute multi-channel ERL. + void Update( + const absl::optional& external_delay, + rtc::ArrayView>> + adaptive_filter_frequency_responses, + rtc::ArrayView> + adaptive_filter_impulse_responses, + const RenderBuffer& render_buffer, + rtc::ArrayView> E2_refined, + rtc::ArrayView> Y2, + rtc::ArrayView subtractor_output); + + // Returns filter length in blocks. + int FilterLengthBlocks() const { + // All filters have the same length, so arbitrarily return channel 0 length. + return filter_analyzer_.FilterLengthBlocks(); + } + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const EchoCanceller3Config config_; + const size_t num_capture_channels_; + const bool deactivate_initial_state_reset_at_echo_path_change_; + const bool full_reset_at_echo_path_change_; + const bool subtractor_analyzer_reset_at_echo_path_change_; + + // Class for controlling the transition from the intial state, which in turn + // controls when the filter parameters for the initial state should be used. + class InitialState { + public: + explicit InitialState(const EchoCanceller3Config& config); + // Resets the state to again begin in the initial state. + void Reset(); + + // Updates the state based on new data. + void Update(bool active_render, bool saturated_capture); + + // Returns whether the initial state is active or not. + bool InitialStateActive() const { return initial_state_; } + + // Returns that the transition from the initial state has was started. + bool TransitionTriggered() const { return transition_triggered_; } + + private: + const bool conservative_initial_phase_; + const float initial_state_seconds_; + bool transition_triggered_ = false; + bool initial_state_ = true; + size_t strong_not_saturated_render_blocks_ = 0; + } initial_state_; + + // Class for choosing the direct-path delay relative to the beginning of the + // filter, as well as any other data related to the delay used within + // AecState. + class FilterDelay { + public: + FilterDelay(const EchoCanceller3Config& config, + size_t num_capture_channels); + + // Returns whether an external delay has been reported to the AecState (from + // the delay estimator). + bool ExternalDelayReported() const { return external_delay_reported_; } + + // Returns the delay in blocks relative to the beginning of the filter that + // corresponds to the direct path of the echo. + rtc::ArrayView DirectPathFilterDelays() const { + return filter_delays_blocks_; + } + + // Returns the minimum delay among the direct path delays relative to the + // beginning of the filter + int MinDirectPathFilterDelay() const { return min_filter_delay_; } + + // Updates the delay estimates based on new data. + void Update( + rtc::ArrayView analyzer_filter_delay_estimates_blocks, + const absl::optional& external_delay, + size_t blocks_with_proper_filter_adaptation); + + private: + const int delay_headroom_blocks_; + bool external_delay_reported_ = false; + std::vector filter_delays_blocks_; + int min_filter_delay_; + absl::optional external_delay_; + } delay_state_; + + // Classifier for toggling transparent mode when there is no echo. + std::unique_ptr transparent_state_; + + // Class for analyzing how well the linear filter is, and can be expected to, + // perform on the current signals. The purpose of this is for using to + // select the echo suppression functionality as well as the input to the echo + // suppressor. + class FilteringQualityAnalyzer { + public: + FilteringQualityAnalyzer(const EchoCanceller3Config& config, + size_t num_capture_channels); + + // Returns whether the linear filter can be used for the echo + // canceller output. + bool LinearFilterUsable() const { return overall_usable_linear_estimates_; } + + // Returns whether an individual filter output can be used for the echo + // canceller output. + const std::vector& UsableLinearFilterOutputs() const { + return usable_linear_filter_estimates_; + } + + // Resets the state of the analyzer. + void Reset(); + + // Updates the analysis based on new data. + void Update(bool active_render, + bool transparent_mode, + bool saturated_capture, + const absl::optional& external_delay, + bool any_filter_converged); + + private: + const bool use_linear_filter_; + bool overall_usable_linear_estimates_ = false; + size_t filter_update_blocks_since_reset_ = 0; + size_t filter_update_blocks_since_start_ = 0; + bool convergence_seen_ = false; + std::vector usable_linear_filter_estimates_; + } filter_quality_state_; + + // Class for detecting whether the echo is to be considered to be + // saturated. + class SaturationDetector { + public: + // Returns whether the echo is to be considered saturated. + bool SaturatedEcho() const { return saturated_echo_; } + + // Updates the detection decision based on new data. + void Update(const Block& x, + bool saturated_capture, + bool usable_linear_estimate, + rtc::ArrayView subtractor_output, + float echo_path_gain); + + private: + bool saturated_echo_ = false; + } saturation_detector_; + + ErlEstimator erl_estimator_; + ErleEstimator erle_estimator_; + size_t strong_not_saturated_render_blocks_ = 0; + size_t blocks_with_active_render_ = 0; + bool capture_signal_saturation_ = false; + FilterAnalyzer filter_analyzer_; + EchoAudibility echo_audibility_; + ReverbModelEstimator reverb_model_estimator_; + ReverbModel avg_render_reverb_; + SubtractorOutputAnalyzer subtractor_output_analyzer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state_unittest.cc new file mode 100644 index 0000000000..6662c8fb1a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state_unittest.cc @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec_state.h" + +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +void RunNormalUsageTest(size_t num_render_channels, + size_t num_capture_channels) { + // TODO(bugs.webrtc.org/10913): Test with different content in different + // channels. + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + ApmDataDumper data_dumper(42); + EchoCanceller3Config config; + AecState state(config, num_capture_channels); + absl::optional delay_estimate = + DelayEstimate(DelayEstimate::Quality::kRefined, 10); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + std::vector> E2_refined( + num_capture_channels); + std::vector> Y2(num_capture_channels); + Block x(kNumBands, num_render_channels); + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + std::vector> y(num_capture_channels); + std::vector subtractor_output(num_capture_channels); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].Reset(); + subtractor_output[ch].s_refined.fill(100.f); + subtractor_output[ch].e_refined.fill(100.f); + y[ch].fill(1000.f); + E2_refined[ch].fill(0.f); + Y2[ch].fill(0.f); + } + Aec3Fft fft; + std::vector>> + converged_filter_frequency_response( + num_capture_channels, + std::vector>(10)); + for (auto& v_ch : converged_filter_frequency_response) { + for (auto& v : v_ch) { + v.fill(0.01f); + } + } + std::vector>> + diverged_filter_frequency_response = converged_filter_frequency_response; + converged_filter_frequency_response[0][2].fill(100.f); + converged_filter_frequency_response[0][2][0] = 1.f; + std::vector> impulse_response( + num_capture_channels, + std::vector( + GetTimeDomainLength(config.filter.refined.length_blocks), 0.f)); + + // Verify that linear AEC usability is true when the filter is converged + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + std::fill(x.begin(band, ch), x.end(band, ch), 101.f); + } + } + for (int k = 0; k < 3000; ++k) { + render_delay_buffer->Insert(x); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + EXPECT_TRUE(state.UsableLinearEstimate()); + + // Verify that linear AEC usability becomes false after an echo path + // change is reported + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.HandleEchoPathChange(EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false)); + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + EXPECT_FALSE(state.UsableLinearEstimate()); + + // Verify that the active render detection works as intended. + for (size_t ch = 0; ch < num_render_channels; ++ch) { + std::fill(x.begin(0, ch), x.end(0, ch), 101.f); + } + render_delay_buffer->Insert(x); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.HandleEchoPathChange(EchoPathVariability( + true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false)); + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + EXPECT_FALSE(state.ActiveRender()); + + for (int k = 0; k < 1000; ++k) { + render_delay_buffer->Insert(x); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + EXPECT_TRUE(state.ActiveRender()); + + // Verify that the ERL is properly estimated + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + std::fill(x.begin(band, channel), x.end(band, channel), 0.0f); + } + } + + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x.View(/*band=*/0, ch)[0] = 5000.f; + } + for (size_t k = 0; + k < render_delay_buffer->GetRenderBuffer()->GetFftBuffer().size(); ++k) { + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + } + + for (auto& Y2_ch : Y2) { + Y2_ch.fill(10.f * 10000.f * 10000.f); + } + for (size_t k = 0; k < 1000; ++k) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + + ASSERT_TRUE(state.UsableLinearEstimate()); + const std::array& erl = state.Erl(); + EXPECT_EQ(erl[0], erl[1]); + for (size_t k = 1; k < erl.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 10.f : 1000.f, erl[k], 0.1); + } + EXPECT_EQ(erl[erl.size() - 2], erl[erl.size() - 1]); + + // Verify that the ERLE is properly estimated + for (auto& E2_refined_ch : E2_refined) { + E2_refined_ch.fill(1.f * 10000.f * 10000.f); + } + for (auto& Y2_ch : Y2) { + Y2_ch.fill(10.f * E2_refined[0][0]); + } + for (size_t k = 0; k < 1000; ++k) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + ASSERT_TRUE(state.UsableLinearEstimate()); + { + // Note that the render spectrum is built so it does not have energy in + // the odd bands but just in the even bands. + const auto& erle = state.Erle(/*onset_compensated=*/true)[0]; + EXPECT_EQ(erle[0], erle[1]); + constexpr size_t kLowFrequencyLimit = 32; + for (size_t k = 2; k < kLowFrequencyLimit; k = k + 2) { + EXPECT_NEAR(4.f, erle[k], 0.1); + } + for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; k = k + 2) { + EXPECT_NEAR(1.5f, erle[k], 0.1); + } + EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); + } + for (auto& E2_refined_ch : E2_refined) { + E2_refined_ch.fill(1.f * 10000.f * 10000.f); + } + for (auto& Y2_ch : Y2) { + Y2_ch.fill(5.f * E2_refined[0][0]); + } + for (size_t k = 0; k < 1000; ++k) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + + ASSERT_TRUE(state.UsableLinearEstimate()); + { + const auto& erle = state.Erle(/*onset_compensated=*/true)[0]; + EXPECT_EQ(erle[0], erle[1]); + constexpr size_t kLowFrequencyLimit = 32; + for (size_t k = 1; k < kLowFrequencyLimit; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 4.f : 1.f, erle[k], 0.1); + } + for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 1.5f : 1.f, erle[k], 0.1); + } + EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); + } +} + +} // namespace + +class AecStateMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + AecStateMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(1, 2, 8))); + +// Verify the general functionality of AecState +TEST_P(AecStateMultiChannel, NormalUsage) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + RunNormalUsageTest(num_render_channels, num_capture_channels); +} + +// Verifies the delay for a converged filter is correctly identified. +TEST(AecState, ConvergedFilterDelay) { + constexpr int kFilterLengthBlocks = 10; + constexpr size_t kNumCaptureChannels = 1; + EchoCanceller3Config config; + AecState state(config, kNumCaptureChannels); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, 48000, 1)); + absl::optional delay_estimate; + std::vector> E2_refined( + kNumCaptureChannels); + std::vector> Y2(kNumCaptureChannels); + std::array x; + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + std::vector subtractor_output(kNumCaptureChannels); + for (auto& output : subtractor_output) { + output.Reset(); + output.s_refined.fill(100.f); + } + std::array y; + x.fill(0.f); + y.fill(0.f); + + std::vector>> + frequency_response(kNumCaptureChannels, + std::vector>( + kFilterLengthBlocks)); + for (auto& v_ch : frequency_response) { + for (auto& v : v_ch) { + v.fill(0.01f); + } + } + + std::vector> impulse_response( + kNumCaptureChannels, + std::vector( + GetTimeDomainLength(config.filter.refined.length_blocks), 0.f)); + + // Verify that the filter delay for a converged filter is properly + // identified. + for (int k = 0; k < kFilterLengthBlocks; ++k) { + for (auto& ir : impulse_response) { + std::fill(ir.begin(), ir.end(), 0.f); + ir[k * kBlockSize + 1] = 1.f; + } + + state.HandleEchoPathChange(echo_path_variability); + subtractor_output[0].ComputeMetrics(y); + state.Update(delay_estimate, frequency_response, impulse_response, + *render_delay_buffer->GetRenderBuffer(), E2_refined, Y2, + subtractor_output); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc new file mode 100644 index 0000000000..7f076dea8e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/alignment_mixer.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +AlignmentMixer::MixingVariant ChooseMixingVariant(bool downmix, + bool adaptive_selection, + int num_channels) { + RTC_DCHECK(!(adaptive_selection && downmix)); + RTC_DCHECK_LT(0, num_channels); + + if (num_channels == 1) { + return AlignmentMixer::MixingVariant::kFixed; + } + if (downmix) { + return AlignmentMixer::MixingVariant::kDownmix; + } + if (adaptive_selection) { + return AlignmentMixer::MixingVariant::kAdaptive; + } + return AlignmentMixer::MixingVariant::kFixed; +} + +} // namespace + +AlignmentMixer::AlignmentMixer( + size_t num_channels, + const EchoCanceller3Config::Delay::AlignmentMixing& config) + : AlignmentMixer(num_channels, + config.downmix, + config.adaptive_selection, + config.activity_power_threshold, + config.prefer_first_two_channels) {} + +AlignmentMixer::AlignmentMixer(size_t num_channels, + bool downmix, + bool adaptive_selection, + float activity_power_threshold, + bool prefer_first_two_channels) + : num_channels_(num_channels), + one_by_num_channels_(1.f / num_channels_), + excitation_energy_threshold_(kBlockSize * activity_power_threshold), + prefer_first_two_channels_(prefer_first_two_channels), + selection_variant_( + ChooseMixingVariant(downmix, adaptive_selection, num_channels_)) { + if (selection_variant_ == MixingVariant::kAdaptive) { + std::fill(strong_block_counters_.begin(), strong_block_counters_.end(), 0); + cumulative_energies_.resize(num_channels_); + std::fill(cumulative_energies_.begin(), cumulative_energies_.end(), 0.f); + } +} + +void AlignmentMixer::ProduceOutput(const Block& x, + rtc::ArrayView y) { + RTC_DCHECK_EQ(x.NumChannels(), num_channels_); + + if (selection_variant_ == MixingVariant::kDownmix) { + Downmix(x, y); + return; + } + + int ch = selection_variant_ == MixingVariant::kFixed ? 0 : SelectChannel(x); + + RTC_DCHECK_GT(x.NumChannels(), ch); + std::copy(x.begin(/*band=*/0, ch), x.end(/*band=*/0, ch), y.begin()); +} + +void AlignmentMixer::Downmix(const Block& x, + rtc::ArrayView y) const { + RTC_DCHECK_EQ(x.NumChannels(), num_channels_); + RTC_DCHECK_GE(num_channels_, 2); + std::memcpy(&y[0], x.View(/*band=*/0, /*channel=*/0).data(), + kBlockSize * sizeof(y[0])); + for (size_t ch = 1; ch < num_channels_; ++ch) { + const auto x_ch = x.View(/*band=*/0, ch); + for (size_t i = 0; i < kBlockSize; ++i) { + y[i] += x_ch[i]; + } + } + + for (size_t i = 0; i < kBlockSize; ++i) { + y[i] *= one_by_num_channels_; + } +} + +int AlignmentMixer::SelectChannel(const Block& x) { + RTC_DCHECK_EQ(x.NumChannels(), num_channels_); + RTC_DCHECK_GE(num_channels_, 2); + RTC_DCHECK_EQ(cumulative_energies_.size(), num_channels_); + + constexpr size_t kBlocksToChooseLeftOrRight = + static_cast(0.5f * kNumBlocksPerSecond); + const bool good_signal_in_left_or_right = + prefer_first_two_channels_ && + (strong_block_counters_[0] > kBlocksToChooseLeftOrRight || + strong_block_counters_[1] > kBlocksToChooseLeftOrRight); + + const int num_ch_to_analyze = + good_signal_in_left_or_right ? 2 : num_channels_; + + constexpr int kNumBlocksBeforeEnergySmoothing = 60 * kNumBlocksPerSecond; + ++block_counter_; + + for (int ch = 0; ch < num_ch_to_analyze; ++ch) { + float x2_sum = 0.f; + rtc::ArrayView x_ch = x.View(/*band=*/0, ch); + for (size_t i = 0; i < kBlockSize; ++i) { + x2_sum += x_ch[i] * x_ch[i]; + } + + if (ch < 2 && x2_sum > excitation_energy_threshold_) { + ++strong_block_counters_[ch]; + } + + if (block_counter_ <= kNumBlocksBeforeEnergySmoothing) { + cumulative_energies_[ch] += x2_sum; + } else { + constexpr float kSmoothing = 1.f / (10 * kNumBlocksPerSecond); + cumulative_energies_[ch] += + kSmoothing * (x2_sum - cumulative_energies_[ch]); + } + } + + // Normalize the energies to allow the energy computations to from now be + // based on smoothing. + if (block_counter_ == kNumBlocksBeforeEnergySmoothing) { + constexpr float kOneByNumBlocksBeforeEnergySmoothing = + 1.f / kNumBlocksBeforeEnergySmoothing; + for (int ch = 0; ch < num_ch_to_analyze; ++ch) { + cumulative_energies_[ch] *= kOneByNumBlocksBeforeEnergySmoothing; + } + } + + int strongest_ch = 0; + for (int ch = 0; ch < num_ch_to_analyze; ++ch) { + if (cumulative_energies_[ch] > cumulative_energies_[strongest_ch]) { + strongest_ch = ch; + } + } + + if ((good_signal_in_left_or_right && selected_channel_ > 1) || + cumulative_energies_[strongest_ch] > + 2.f * cumulative_energies_[selected_channel_]) { + selected_channel_ = strongest_ch; + } + + return selected_channel_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.h b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.h new file mode 100644 index 0000000000..b3ed04755c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_ + +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block.h" + +namespace webrtc { + +// Performs channel conversion to mono for the purpose of providing a decent +// mono input for the delay estimation. This is achieved by analyzing all +// incoming channels and produce one single channel output. +class AlignmentMixer { + public: + AlignmentMixer(size_t num_channels, + const EchoCanceller3Config::Delay::AlignmentMixing& config); + + AlignmentMixer(size_t num_channels, + bool downmix, + bool adaptive_selection, + float excitation_limit, + bool prefer_first_two_channels); + + void ProduceOutput(const Block& x, rtc::ArrayView y); + + enum class MixingVariant { kDownmix, kAdaptive, kFixed }; + + private: + const size_t num_channels_; + const float one_by_num_channels_; + const float excitation_energy_threshold_; + const bool prefer_first_two_channels_; + const MixingVariant selection_variant_; + std::array strong_block_counters_; + std::vector cumulative_energies_; + int selected_channel_ = 0; + size_t block_counter_ = 0; + + void Downmix(const Block& x, rtc::ArrayView y) const; + int SelectChannel(const Block& x); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer_unittest.cc new file mode 100644 index 0000000000..eaf6dcb235 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer_unittest.cc @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/alignment_mixer.h" + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::AllOf; +using ::testing::Each; + +namespace webrtc { +namespace { +std::string ProduceDebugText(bool initial_silence, + bool huge_activity_threshold, + bool prefer_first_two_channels, + int num_channels, + int strongest_ch) { + rtc::StringBuilder ss; + ss << ", Initial silence: " << initial_silence; + ss << ", Huge activity threshold: " << huge_activity_threshold; + ss << ", Prefer first two channels: " << prefer_first_two_channels; + ss << ", Number of channels: " << num_channels; + ss << ", Strongest channel: " << strongest_ch; + return ss.Release(); +} + +} // namespace + +TEST(AlignmentMixer, GeneralAdaptiveMode) { + constexpr int kChannelOffset = 100; + constexpr int kMaxChannelsToTest = 8; + constexpr float kStrongestSignalScaling = + kMaxChannelsToTest * kChannelOffset * 100; + + for (bool initial_silence : {false, true}) { + for (bool huge_activity_threshold : {false, true}) { + for (bool prefer_first_two_channels : {false, true}) { + for (int num_channels = 2; num_channels < 8; ++num_channels) { + for (int strongest_ch = 0; strongest_ch < num_channels; + ++strongest_ch) { + SCOPED_TRACE(ProduceDebugText( + initial_silence, huge_activity_threshold, + prefer_first_two_channels, num_channels, strongest_ch)); + const float excitation_limit = + huge_activity_threshold ? 1000000000.f : 0.001f; + AlignmentMixer am(num_channels, /*downmix*/ false, + /*adaptive_selection*/ true, excitation_limit, + prefer_first_two_channels); + + Block x( + /*num_bands=*/1, num_channels); + if (initial_silence) { + std::array y; + for (int frame = 0; frame < 10 * kNumBlocksPerSecond; ++frame) { + am.ProduceOutput(x, y); + } + } + + for (int frame = 0; frame < 2 * kNumBlocksPerSecond; ++frame) { + const auto channel_value = [&](int frame_index, + int channel_index) { + return static_cast(frame_index + + channel_index * kChannelOffset); + }; + + for (int ch = 0; ch < num_channels; ++ch) { + float scaling = + ch == strongest_ch ? kStrongestSignalScaling : 1.f; + auto x_ch = x.View(/*band=*/0, ch); + std::fill(x_ch.begin(), x_ch.end(), + channel_value(frame, ch) * scaling); + } + + std::array y; + y.fill(-1.f); + am.ProduceOutput(x, y); + + if (frame > 1 * kNumBlocksPerSecond) { + if (!prefer_first_two_channels || huge_activity_threshold) { + EXPECT_THAT(y, + AllOf(Each(x.View(/*band=*/0, strongest_ch)[0]))); + } else { + bool left_or_right_chosen; + for (int ch = 0; ch < 2; ++ch) { + left_or_right_chosen = true; + const auto x_ch = x.View(/*band=*/0, ch); + for (size_t k = 0; k < kBlockSize; ++k) { + if (y[k] != x_ch[k]) { + left_or_right_chosen = false; + break; + } + } + if (left_or_right_chosen) { + break; + } + } + EXPECT_TRUE(left_or_right_chosen); + } + } + } + } + } + } + } + } +} + +TEST(AlignmentMixer, DownmixMode) { + for (int num_channels = 1; num_channels < 8; ++num_channels) { + AlignmentMixer am(num_channels, /*downmix*/ true, + /*adaptive_selection*/ false, /*excitation_limit*/ 1.f, + /*prefer_first_two_channels*/ false); + + Block x(/*num_bands=*/1, num_channels); + const auto channel_value = [](int frame_index, int channel_index) { + return static_cast(frame_index + channel_index); + }; + for (int frame = 0; frame < 10; ++frame) { + for (int ch = 0; ch < num_channels; ++ch) { + auto x_ch = x.View(/*band=*/0, ch); + std::fill(x_ch.begin(), x_ch.end(), channel_value(frame, ch)); + } + + std::array y; + y.fill(-1.f); + am.ProduceOutput(x, y); + + float expected_mixed_value = 0.f; + for (int ch = 0; ch < num_channels; ++ch) { + expected_mixed_value += channel_value(frame, ch); + } + expected_mixed_value *= 1.f / num_channels; + + EXPECT_THAT(y, AllOf(Each(expected_mixed_value))); + } + } +} + +TEST(AlignmentMixer, FixedMode) { + for (int num_channels = 1; num_channels < 8; ++num_channels) { + AlignmentMixer am(num_channels, /*downmix*/ false, + /*adaptive_selection*/ false, /*excitation_limit*/ 1.f, + /*prefer_first_two_channels*/ false); + + Block x(/*num_band=*/1, num_channels); + const auto channel_value = [](int frame_index, int channel_index) { + return static_cast(frame_index + channel_index); + }; + for (int frame = 0; frame < 10; ++frame) { + for (int ch = 0; ch < num_channels; ++ch) { + auto x_ch = x.View(/*band=*/0, ch); + std::fill(x_ch.begin(), x_ch.end(), channel_value(frame, ch)); + } + + std::array y; + y.fill(-1.f); + am.ProduceOutput(x, y); + EXPECT_THAT(y, AllOf(Each(x.View(/*band=*/0, /*channel=*/0)[0]))); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +TEST(AlignmentMixerDeathTest, ZeroNumChannels) { + EXPECT_DEATH( + AlignmentMixer(/*num_channels*/ 0, /*downmix*/ false, + /*adaptive_selection*/ false, /*excitation_limit*/ 1.f, + /*prefer_first_two_channels*/ false); + , ""); +} + +TEST(AlignmentMixerDeathTest, IncorrectVariant) { + EXPECT_DEATH( + AlignmentMixer(/*num_channels*/ 1, /*downmix*/ true, + /*adaptive_selection*/ true, /*excitation_limit*/ 1.f, + /*prefer_first_two_channels*/ false); + , ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc new file mode 100644 index 0000000000..45f56a5dce --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/api_call_jitter_metrics.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +bool TimeToReportMetrics(int frames_since_last_report) { + constexpr int kNumFramesPerSecond = 100; + constexpr int kReportingIntervalFrames = 10 * kNumFramesPerSecond; + return frames_since_last_report == kReportingIntervalFrames; +} + +} // namespace + +ApiCallJitterMetrics::Jitter::Jitter() + : max_(0), min_(std::numeric_limits::max()) {} + +void ApiCallJitterMetrics::Jitter::Update(int num_api_calls_in_a_row) { + min_ = std::min(min_, num_api_calls_in_a_row); + max_ = std::max(max_, num_api_calls_in_a_row); +} + +void ApiCallJitterMetrics::Jitter::Reset() { + min_ = std::numeric_limits::max(); + max_ = 0; +} + +void ApiCallJitterMetrics::Reset() { + render_jitter_.Reset(); + capture_jitter_.Reset(); + num_api_calls_in_a_row_ = 0; + frames_since_last_report_ = 0; + last_call_was_render_ = false; + proper_call_observed_ = false; +} + +void ApiCallJitterMetrics::ReportRenderCall() { + if (!last_call_was_render_) { + // If the previous call was a capture and a proper call has been observed + // (containing both render and capture data), storing the last number of + // capture calls into the metrics. + if (proper_call_observed_) { + capture_jitter_.Update(num_api_calls_in_a_row_); + } + + // Reset the call counter to start counting render calls. + num_api_calls_in_a_row_ = 0; + } + ++num_api_calls_in_a_row_; + last_call_was_render_ = true; +} + +void ApiCallJitterMetrics::ReportCaptureCall() { + if (last_call_was_render_) { + // If the previous call was a render and a proper call has been observed + // (containing both render and capture data), storing the last number of + // render calls into the metrics. + if (proper_call_observed_) { + render_jitter_.Update(num_api_calls_in_a_row_); + } + // Reset the call counter to start counting capture calls. + num_api_calls_in_a_row_ = 0; + + // If this statement is reached, at least one render and one capture call + // have been observed. + proper_call_observed_ = true; + } + ++num_api_calls_in_a_row_; + last_call_was_render_ = false; + + // Only report and update jitter metrics for when a proper call, containing + // both render and capture data, has been observed. + if (proper_call_observed_ && + TimeToReportMetrics(++frames_since_last_report_)) { + // Report jitter, where the base basic unit is frames. + constexpr int kMaxJitterToReport = 50; + + // Report max and min jitter for render and capture, in units of 20 ms. + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.MaxRenderJitter", + std::min(kMaxJitterToReport, render_jitter().max()), 1, + kMaxJitterToReport, kMaxJitterToReport); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.MinRenderJitter", + std::min(kMaxJitterToReport, render_jitter().min()), 1, + kMaxJitterToReport, kMaxJitterToReport); + + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.MaxCaptureJitter", + std::min(kMaxJitterToReport, capture_jitter().max()), 1, + kMaxJitterToReport, kMaxJitterToReport); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.MinCaptureJitter", + std::min(kMaxJitterToReport, capture_jitter().min()), 1, + kMaxJitterToReport, kMaxJitterToReport); + + frames_since_last_report_ = 0; + Reset(); + } +} + +bool ApiCallJitterMetrics::WillReportMetricsAtNextCapture() const { + return TimeToReportMetrics(frames_since_last_report_ + 1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h new file mode 100644 index 0000000000..dd1fa82e93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_ + +namespace webrtc { + +// Stores data for reporting metrics on the API call jitter. +class ApiCallJitterMetrics { + public: + class Jitter { + public: + Jitter(); + void Update(int num_api_calls_in_a_row); + void Reset(); + + int min() const { return min_; } + int max() const { return max_; } + + private: + int max_; + int min_; + }; + + ApiCallJitterMetrics() { Reset(); } + + // Update metrics for render API call. + void ReportRenderCall(); + + // Update and periodically report metrics for capture API call. + void ReportCaptureCall(); + + // Methods used only for testing. + const Jitter& render_jitter() const { return render_jitter_; } + const Jitter& capture_jitter() const { return capture_jitter_; } + bool WillReportMetricsAtNextCapture() const; + + private: + void Reset(); + + Jitter render_jitter_; + Jitter capture_jitter_; + + int num_api_calls_in_a_row_ = 0; + int frames_since_last_report_ = 0; + bool last_call_was_render_ = false; + bool proper_call_observed_ = false; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics_unittest.cc new file mode 100644 index 0000000000..b902487152 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics_unittest.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/api_call_jitter_metrics.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verify constant jitter. +TEST(ApiCallJitterMetrics, ConstantJitter) { + for (int jitter = 1; jitter < 20; ++jitter) { + ApiCallJitterMetrics metrics; + for (size_t k = 0; k < 30 * kNumBlocksPerSecond; ++k) { + for (int j = 0; j < jitter; ++j) { + metrics.ReportRenderCall(); + } + + for (int j = 0; j < jitter; ++j) { + metrics.ReportCaptureCall(); + + if (metrics.WillReportMetricsAtNextCapture()) { + EXPECT_EQ(jitter, metrics.render_jitter().min()); + EXPECT_EQ(jitter, metrics.render_jitter().max()); + EXPECT_EQ(jitter, metrics.capture_jitter().min()); + EXPECT_EQ(jitter, metrics.capture_jitter().max()); + } + } + } + } +} + +// Verify peaky jitter for the render. +TEST(ApiCallJitterMetrics, JitterPeakRender) { + constexpr int kMinJitter = 2; + constexpr int kJitterPeak = 10; + constexpr int kPeakInterval = 100; + + ApiCallJitterMetrics metrics; + int render_surplus = 0; + + for (size_t k = 0; k < 30 * kNumBlocksPerSecond; ++k) { + const int num_render_calls = + k % kPeakInterval == 0 ? kJitterPeak : kMinJitter; + for (int j = 0; j < num_render_calls; ++j) { + metrics.ReportRenderCall(); + ++render_surplus; + } + + ASSERT_LE(kMinJitter, render_surplus); + const int num_capture_calls = + render_surplus == kMinJitter ? kMinJitter : kMinJitter + 1; + for (int j = 0; j < num_capture_calls; ++j) { + metrics.ReportCaptureCall(); + + if (metrics.WillReportMetricsAtNextCapture()) { + EXPECT_EQ(kMinJitter, metrics.render_jitter().min()); + EXPECT_EQ(kJitterPeak, metrics.render_jitter().max()); + EXPECT_EQ(kMinJitter, metrics.capture_jitter().min()); + EXPECT_EQ(kMinJitter + 1, metrics.capture_jitter().max()); + } + --render_surplus; + } + } +} + +// Verify peaky jitter for the capture. +TEST(ApiCallJitterMetrics, JitterPeakCapture) { + constexpr int kMinJitter = 2; + constexpr int kJitterPeak = 10; + constexpr int kPeakInterval = 100; + + ApiCallJitterMetrics metrics; + int capture_surplus = kMinJitter; + + for (size_t k = 0; k < 30 * kNumBlocksPerSecond; ++k) { + ASSERT_LE(kMinJitter, capture_surplus); + const int num_render_calls = + capture_surplus == kMinJitter ? kMinJitter : kMinJitter + 1; + for (int j = 0; j < num_render_calls; ++j) { + metrics.ReportRenderCall(); + --capture_surplus; + } + + const int num_capture_calls = + k % kPeakInterval == 0 ? kJitterPeak : kMinJitter; + for (int j = 0; j < num_capture_calls; ++j) { + metrics.ReportCaptureCall(); + + if (metrics.WillReportMetricsAtNextCapture()) { + EXPECT_EQ(kMinJitter, metrics.render_jitter().min()); + EXPECT_EQ(kMinJitter + 1, metrics.render_jitter().max()); + EXPECT_EQ(kMinJitter, metrics.capture_jitter().min()); + EXPECT_EQ(kJitterPeak, metrics.capture_jitter().max()); + } + ++capture_surplus; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block.h b/third_party/libwebrtc/modules/audio_processing/aec3/block.h new file mode 100644 index 0000000000..c1fc70722d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Contains one or more channels of 4 milliseconds of audio data. +// The audio is split in one or more frequency bands, each with a sampling +// rate of 16 kHz. +class Block { + public: + Block(int num_bands, int num_channels, float default_value = 0.0f) + : num_bands_(num_bands), + num_channels_(num_channels), + data_(num_bands * num_channels * kBlockSize, default_value) {} + + // Returns the number of bands. + int NumBands() const { return num_bands_; } + + // Returns the number of channels. + int NumChannels() const { return num_channels_; } + + // Modifies the number of channels and sets all samples to zero. + void SetNumChannels(int num_channels) { + num_channels_ = num_channels; + data_.resize(num_bands_ * num_channels_ * kBlockSize); + std::fill(data_.begin(), data_.end(), 0.0f); + } + + // Iterators for accessing the data. + auto begin(int band, int channel) { + return data_.begin() + GetIndex(band, channel); + } + + auto begin(int band, int channel) const { + return data_.begin() + GetIndex(band, channel); + } + + auto end(int band, int channel) { return begin(band, channel) + kBlockSize; } + + auto end(int band, int channel) const { + return begin(band, channel) + kBlockSize; + } + + // Access data via ArrayView. + rtc::ArrayView View(int band, int channel) { + return rtc::ArrayView(&data_[GetIndex(band, channel)], + kBlockSize); + } + + rtc::ArrayView View(int band, int channel) const { + return rtc::ArrayView( + &data_[GetIndex(band, channel)], kBlockSize); + } + + // Lets two Blocks swap audio data. + void Swap(Block& b) { + std::swap(num_bands_, b.num_bands_); + std::swap(num_channels_, b.num_channels_); + data_.swap(b.data_); + } + + private: + // Returns the index of the first sample of the requested |band| and + // |channel|. + int GetIndex(int band, int channel) const { + return (band * num_channels_ + channel) * kBlockSize; + } + + int num_bands_; + int num_channels_; + std::vector data_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc new file mode 100644 index 0000000000..289c3f0d10 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_buffer.h" + +#include + +namespace webrtc { + +BlockBuffer::BlockBuffer(size_t size, size_t num_bands, size_t num_channels) + : size(static_cast(size)), + buffer(size, Block(num_bands, num_channels)) {} + +BlockBuffer::~BlockBuffer() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.h new file mode 100644 index 0000000000..3489d51646 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_ + +#include + +#include + +#include "modules/audio_processing/aec3/block.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Struct for bundling a circular buffer of two dimensional vector objects +// together with the read and write indices. +struct BlockBuffer { + BlockBuffer(size_t size, size_t num_bands, size_t num_channels); + ~BlockBuffer(); + + int IncIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index < size - 1 ? index + 1 : 0; + } + + int DecIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index > 0 ? index - 1 : size - 1; + } + + int OffsetIndex(int index, int offset) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + RTC_DCHECK_GE(size, offset); + return (size + index + offset) % size; + } + + void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); } + void IncWriteIndex() { write = IncIndex(write); } + void DecWriteIndex() { write = DecIndex(write); } + void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); } + void IncReadIndex() { read = IncIndex(read); } + void DecReadIndex() { read = DecIndex(read); } + + const int size; + std::vector buffer; + int write = 0; + int read = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc new file mode 100644 index 0000000000..059bbafcdb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/block_delay_buffer.h" + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +BlockDelayBuffer::BlockDelayBuffer(size_t num_channels, + size_t num_bands, + size_t frame_length, + size_t delay_samples) + : frame_length_(frame_length), + delay_(delay_samples), + buf_(num_channels, + std::vector>(num_bands, + std::vector(delay_, 0.f))) {} + +BlockDelayBuffer::~BlockDelayBuffer() = default; + +void BlockDelayBuffer::DelaySignal(AudioBuffer* frame) { + RTC_DCHECK_EQ(buf_.size(), frame->num_channels()); + if (delay_ == 0) { + return; + } + + const size_t num_bands = buf_[0].size(); + const size_t num_channels = buf_.size(); + + const size_t i_start = last_insert_; + size_t i = 0; + for (size_t ch = 0; ch < num_channels; ++ch) { + RTC_DCHECK_EQ(buf_[ch].size(), frame->num_bands()); + RTC_DCHECK_EQ(buf_[ch].size(), num_bands); + rtc::ArrayView frame_ch(frame->split_bands(ch), num_bands); + const size_t delay = delay_; + + for (size_t band = 0; band < num_bands; ++band) { + RTC_DCHECK_EQ(delay_, buf_[ch][band].size()); + i = i_start; + + // Offloading these pointers and class variables to local variables allows + // the compiler to optimize the below loop when compiling with + // '-fno-strict-aliasing'. + float* buf_ch_band = buf_[ch][band].data(); + float* frame_ch_band = frame_ch[band]; + + for (size_t k = 0, frame_length = frame_length_; k < frame_length; ++k) { + const float tmp = buf_ch_band[i]; + buf_ch_band[i] = frame_ch_band[k]; + frame_ch_band[k] = tmp; + + i = i < delay - 1 ? i + 1 : 0; + } + } + } + + last_insert_ = i; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.h new file mode 100644 index 0000000000..711a790bfe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_ + +#include + +#include + +#include "modules/audio_processing/audio_buffer.h" + +namespace webrtc { + +// Class for applying a fixed delay to the samples in a signal partitioned using +// the audiobuffer band-splitting scheme. +class BlockDelayBuffer { + public: + BlockDelayBuffer(size_t num_channels, + size_t num_bands, + size_t frame_length, + size_t delay_samples); + ~BlockDelayBuffer(); + + // Delays the samples by the specified delay. + void DelaySignal(AudioBuffer* frame); + + private: + const size_t frame_length_; + const size_t delay_; + std::vector>> buf_; + size_t last_insert_ = 0; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer_unittest.cc new file mode 100644 index 0000000000..011ab49651 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer_unittest.cc @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_delay_buffer.h" + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +float SampleValue(size_t sample_index) { + return sample_index % 32768; +} + +// Populates the frame with linearly increasing sample values for each band. +void PopulateInputFrame(size_t frame_length, + size_t num_bands, + size_t first_sample_index, + float* const* frame) { + for (size_t k = 0; k < num_bands; ++k) { + for (size_t i = 0; i < frame_length; ++i) { + frame[k][i] = SampleValue(first_sample_index + i); + } + } +} + +std::string ProduceDebugText(int sample_rate_hz, size_t delay) { + char log_stream_buffer[8 * 1024]; + rtc::SimpleStringBuilder ss(log_stream_buffer); + ss << "Sample rate: " << sample_rate_hz; + ss << ", Delay: " << delay; + return ss.str(); +} + +} // namespace + +class BlockDelayBufferTest + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P( + ParameterCombinations, + BlockDelayBufferTest, + ::testing::Combine(::testing::Values(0, 1, 27, 160, 4321, 7021), + ::testing::Values(16000, 32000, 48000), + ::testing::Values(1, 2, 4))); + +// Verifies that the correct signal delay is achived. +TEST_P(BlockDelayBufferTest, CorrectDelayApplied) { + const size_t delay = std::get<0>(GetParam()); + const int rate = std::get<1>(GetParam()); + const size_t num_channels = std::get<2>(GetParam()); + + SCOPED_TRACE(ProduceDebugText(rate, delay)); + size_t num_bands = NumBandsForRate(rate); + size_t subband_frame_length = 160; + + BlockDelayBuffer delay_buffer(num_channels, num_bands, subband_frame_length, + delay); + + static constexpr size_t kNumFramesToProcess = 20; + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + AudioBuffer audio_buffer(rate, num_channels, rate, num_channels, rate, + num_channels); + if (rate > 16000) { + audio_buffer.SplitIntoFrequencyBands(); + } + size_t first_sample_index = frame_index * subband_frame_length; + for (size_t ch = 0; ch < num_channels; ++ch) { + PopulateInputFrame(subband_frame_length, num_bands, first_sample_index, + &audio_buffer.split_bands(ch)[0]); + } + delay_buffer.DelaySignal(&audio_buffer); + + for (size_t ch = 0; ch < num_channels; ++ch) { + for (size_t band = 0; band < num_bands; ++band) { + size_t sample_index = first_sample_index; + for (size_t i = 0; i < subband_frame_length; ++i, ++sample_index) { + if (sample_index < delay) { + EXPECT_EQ(0.f, audio_buffer.split_bands(ch)[band][i]); + } else { + EXPECT_EQ(SampleValue(sample_index - delay), + audio_buffer.split_bands(ch)[band][i]); + } + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc new file mode 100644 index 0000000000..4243ddeba0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_framer.h" + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +BlockFramer::BlockFramer(size_t num_bands, size_t num_channels) + : num_bands_(num_bands), + num_channels_(num_channels), + buffer_(num_bands_, + std::vector>( + num_channels, + std::vector(kBlockSize, 0.f))) { + RTC_DCHECK_LT(0, num_bands); + RTC_DCHECK_LT(0, num_channels); +} + +BlockFramer::~BlockFramer() = default; + +// All the constants are chosen so that the buffer is either empty or has enough +// samples for InsertBlockAndExtractSubFrame to produce a frame. In order to +// achieve this, the InsertBlockAndExtractSubFrame and InsertBlock methods need +// to be called in the correct order. +void BlockFramer::InsertBlock(const Block& block) { + RTC_DCHECK_EQ(num_bands_, block.NumBands()); + RTC_DCHECK_EQ(num_channels_, block.NumChannels()); + for (size_t band = 0; band < num_bands_; ++band) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + RTC_DCHECK_EQ(0, buffer_[band][channel].size()); + + buffer_[band][channel].insert(buffer_[band][channel].begin(), + block.begin(band, channel), + block.end(band, channel)); + } + } +} + +void BlockFramer::InsertBlockAndExtractSubFrame( + const Block& block, + std::vector>>* sub_frame) { + RTC_DCHECK(sub_frame); + RTC_DCHECK_EQ(num_bands_, block.NumBands()); + RTC_DCHECK_EQ(num_channels_, block.NumChannels()); + RTC_DCHECK_EQ(num_bands_, sub_frame->size()); + for (size_t band = 0; band < num_bands_; ++band) { + RTC_DCHECK_EQ(num_channels_, (*sub_frame)[0].size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + RTC_DCHECK_LE(kSubFrameLength, + buffer_[band][channel].size() + kBlockSize); + RTC_DCHECK_GE(kBlockSize, buffer_[band][channel].size()); + RTC_DCHECK_EQ(kSubFrameLength, (*sub_frame)[band][channel].size()); + + const int samples_to_frame = + kSubFrameLength - buffer_[band][channel].size(); + std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(), + (*sub_frame)[band][channel].begin()); + std::copy( + block.begin(band, channel), + block.begin(band, channel) + samples_to_frame, + (*sub_frame)[band][channel].begin() + buffer_[band][channel].size()); + buffer_[band][channel].clear(); + buffer_[band][channel].insert( + buffer_[band][channel].begin(), + block.begin(band, channel) + samples_to_frame, + block.end(band, channel)); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.h new file mode 100644 index 0000000000..e2cdd5a17c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block.h" + +namespace webrtc { + +// Class for producing frames consisting of 2 subframes of 80 samples each +// from 64 sample blocks. The class is designed to work together with the +// FrameBlocker class which performs the reverse conversion. Used together with +// that, this class produces output frames are the same rate as frames are +// received by the FrameBlocker class. Note that the internal buffers will +// overrun if any other rate of packets insertion is used. +class BlockFramer { + public: + BlockFramer(size_t num_bands, size_t num_channels); + ~BlockFramer(); + BlockFramer(const BlockFramer&) = delete; + BlockFramer& operator=(const BlockFramer&) = delete; + + // Adds a 64 sample block into the data that will form the next output frame. + void InsertBlock(const Block& block); + // Adds a 64 sample block and extracts an 80 sample subframe. + void InsertBlockAndExtractSubFrame( + const Block& block, + std::vector>>* sub_frame); + + private: + const size_t num_bands_; + const size_t num_channels_; + std::vector>> buffer_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_framer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer_unittest.cc new file mode 100644 index 0000000000..9439623f72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer_unittest.cc @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_framer.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +void SetupSubFrameView( + std::vector>>* sub_frame, + std::vector>>* sub_frame_view) { + for (size_t band = 0; band < sub_frame_view->size(); ++band) { + for (size_t channel = 0; channel < (*sub_frame_view)[band].size(); + ++channel) { + (*sub_frame_view)[band][channel] = + rtc::ArrayView((*sub_frame)[band][channel].data(), + (*sub_frame)[band][channel].size()); + } + } +} + +float ComputeSampleValue(size_t chunk_counter, + size_t chunk_size, + size_t band, + size_t channel, + size_t sample_index, + int offset) { + float value = static_cast(100 + chunk_counter * chunk_size + + sample_index + channel) + + offset; + return 5000 * band + value; +} + +bool VerifySubFrame( + size_t sub_frame_counter, + int offset, + const std::vector>>& sub_frame_view) { + for (size_t band = 0; band < sub_frame_view.size(); ++band) { + for (size_t channel = 0; channel < sub_frame_view[band].size(); ++channel) { + for (size_t sample = 0; sample < sub_frame_view[band][channel].size(); + ++sample) { + const float reference_value = ComputeSampleValue( + sub_frame_counter, kSubFrameLength, band, channel, sample, offset); + if (reference_value != sub_frame_view[band][channel][sample]) { + return false; + } + } + } + } + return true; +} + +void FillBlock(size_t block_counter, Block* block) { + for (int band = 0; band < block->NumBands(); ++band) { + for (int channel = 0; channel < block->NumChannels(); ++channel) { + auto b = block->View(band, channel); + for (size_t sample = 0; sample < kBlockSize; ++sample) { + b[sample] = ComputeSampleValue(block_counter, kBlockSize, band, channel, + sample, 0); + } + } + } +} + +// Verifies that the BlockFramer is able to produce the expected frame content. +void RunFramerTest(int sample_rate_hz, size_t num_channels) { + constexpr size_t kNumSubFramesToProcess = 10; + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_bands, num_channels); + std::vector>> output_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame_view( + num_bands, std::vector>(num_channels)); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(num_bands, num_channels); + + size_t block_index = 0; + for (size_t sub_frame_index = 0; sub_frame_index < kNumSubFramesToProcess; + ++sub_frame_index) { + FillBlock(block_index++, &block); + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view); + if (sub_frame_index > 1) { + EXPECT_TRUE(VerifySubFrame(sub_frame_index, -64, output_sub_frame_view)); + } + + if ((sub_frame_index + 1) % 4 == 0) { + FillBlock(block_index++, &block); + framer.InsertBlock(block); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the BlockFramer crashes if the InsertBlockAndExtractSubFrame +// method is called for inputs with the wrong number of bands or band lengths. +void RunWronglySizedInsertAndExtractParametersTest( + int sample_rate_hz, + size_t correct_num_channels, + size_t num_block_bands, + size_t num_block_channels, + size_t num_sub_frame_bands, + size_t num_sub_frame_channels, + size_t sub_frame_length) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_block_bands, num_block_channels); + std::vector>> output_sub_frame( + num_sub_frame_bands, + std::vector>( + num_sub_frame_channels, std::vector(sub_frame_length, 0.f))); + std::vector>> output_sub_frame_view( + output_sub_frame.size(), + std::vector>(num_sub_frame_channels)); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(correct_num_bands, correct_num_channels); + EXPECT_DEATH( + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view), ""); +} + +// Verifies that the BlockFramer crashes if the InsertBlock method is called for +// inputs with the wrong number of bands or band lengths. +void RunWronglySizedInsertParameterTest(int sample_rate_hz, + size_t correct_num_channels, + size_t num_block_bands, + size_t num_block_channels) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block correct_block(correct_num_bands, correct_num_channels); + Block wrong_block(num_block_bands, num_block_channels); + std::vector>> output_sub_frame( + correct_num_bands, + std::vector>( + correct_num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame_view( + output_sub_frame.size(), + std::vector>(correct_num_channels)); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(correct_num_bands, correct_num_channels); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + + EXPECT_DEATH(framer.InsertBlock(wrong_block), ""); +} + +// Verifies that the BlockFramer crashes if the InsertBlock method is called +// after a wrong number of previous InsertBlockAndExtractSubFrame method calls +// have been made. + +void RunWronglyInsertOrderTest(int sample_rate_hz, + size_t num_channels, + size_t num_preceeding_api_calls) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block block(correct_num_bands, num_channels); + std::vector>> output_sub_frame( + correct_num_bands, + std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame_view( + output_sub_frame.size(), + std::vector>(num_channels)); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(correct_num_bands, num_channels); + for (size_t k = 0; k < num_preceeding_api_calls; ++k) { + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view); + } + + EXPECT_DEATH(framer.InsertBlock(block), ""); +} +#endif + +std::string ProduceDebugText(int sample_rate_hz, size_t num_channels) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + ss << ", number of channels: " << num_channels; + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(BlockFramerDeathTest, + WrongNumberOfBandsInBlockForInsertBlockAndExtractSubFrame) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, wrong_num_bands, correct_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(BlockFramerDeathTest, + WrongNumberOfChannelsInBlockForInsertBlockAndExtractSubFrame) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, wrong_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(BlockFramerDeathTest, + WrongNumberOfBandsInSubFrameForInsertBlockAndExtractSubFrame) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + wrong_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(BlockFramerDeathTest, + WrongNumberOfChannelsInSubFrameForInsertBlockAndExtractSubFrame) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + correct_num_bands, wrong_num_channels, kSubFrameLength); + } + } +} + +TEST(BlockFramerDeathTest, + WrongNumberOfSamplesInSubFrameForInsertBlockAndExtractSubFrame) { + const size_t correct_num_channels = 1; + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength - 1); + } +} + +TEST(BlockFramerDeathTest, WrongNumberOfBandsInBlockForInsertBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertParameterTest(rate, correct_num_channels, + wrong_num_bands, correct_num_channels); + } + } +} + +TEST(BlockFramerDeathTest, WrongNumberOfChannelsInBlockForInsertBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertParameterTest(rate, correct_num_channels, + correct_num_bands, wrong_num_channels); + } + } +} + +TEST(BlockFramerDeathTest, WrongNumberOfPreceedingApiCallsForInsertBlock) { + for (size_t num_channels : {1, 2, 8}) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_calls = 0; num_calls < 4; ++num_calls) { + rtc::StringBuilder ss; + ss << "Sample rate: " << rate; + ss << ", Num channels: " << num_channels; + ss << ", Num preceeding InsertBlockAndExtractSubFrame calls: " + << num_calls; + + SCOPED_TRACE(ss.str()); + RunWronglyInsertOrderTest(rate, num_channels, num_calls); + } + } + } +} + +// Verifies that the verification for 0 number of channels works. +TEST(BlockFramerDeathTest, ZeroNumberOfChannelsParameter) { + EXPECT_DEATH(BlockFramer(16000, 0), ""); +} + +// Verifies that the verification for 0 number of bands works. +TEST(BlockFramerDeathTest, ZeroNumberOfBandsParameter) { + EXPECT_DEATH(BlockFramer(0, 1), ""); +} + +// Verifies that the verification for null sub_frame pointer works. +TEST(BlockFramerDeathTest, NullSubFrameParameter) { + EXPECT_DEATH( + BlockFramer(1, 1).InsertBlockAndExtractSubFrame(Block(1, 1), nullptr), + ""); +} + +#endif + +TEST(BlockFramer, FrameBitexactness) { + for (auto rate : {16000, 32000, 48000}) { + for (auto num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, num_channels)); + RunFramerTest(rate, num_channels); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc new file mode 100644 index 0000000000..63e3d9cc7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/block_processor.h" + +#include + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor_metrics.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/echo_remover.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/render_delay_controller.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +enum class BlockProcessorApiCall { kCapture, kRender }; + +class BlockProcessorImpl final : public BlockProcessor { + public: + BlockProcessorImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer, + std::unique_ptr delay_controller, + std::unique_ptr echo_remover); + + BlockProcessorImpl() = delete; + + ~BlockProcessorImpl() override; + + void ProcessCapture(bool echo_path_gain_change, + bool capture_signal_saturation, + Block* linear_output, + Block* capture_block) override; + + void BufferRender(const Block& block) override; + + void UpdateEchoLeakageStatus(bool leakage_detected) override; + + void GetMetrics(EchoControl::Metrics* metrics) const override; + + void SetAudioBufferDelay(int delay_ms) override; + void SetCaptureOutputUsage(bool capture_output_used) override; + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const EchoCanceller3Config config_; + bool capture_properly_started_ = false; + bool render_properly_started_ = false; + const size_t sample_rate_hz_; + std::unique_ptr render_buffer_; + std::unique_ptr delay_controller_; + std::unique_ptr echo_remover_; + BlockProcessorMetrics metrics_; + RenderDelayBuffer::BufferingEvent render_event_; + size_t capture_call_counter_ = 0; + absl::optional estimated_delay_; +}; + +std::atomic BlockProcessorImpl::instance_count_(0); + +BlockProcessorImpl::BlockProcessorImpl( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer, + std::unique_ptr delay_controller, + std::unique_ptr echo_remover) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + config_(config), + sample_rate_hz_(sample_rate_hz), + render_buffer_(std::move(render_buffer)), + delay_controller_(std::move(delay_controller)), + echo_remover_(std::move(echo_remover)), + render_event_(RenderDelayBuffer::BufferingEvent::kNone) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); +} + +BlockProcessorImpl::~BlockProcessorImpl() = default; + +void BlockProcessorImpl::ProcessCapture(bool echo_path_gain_change, + bool capture_signal_saturation, + Block* linear_output, + Block* capture_block) { + RTC_DCHECK(capture_block); + RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->NumBands()); + + capture_call_counter_++; + + data_dumper_->DumpRaw("aec3_processblock_call_order", + static_cast(BlockProcessorApiCall::kCapture)); + data_dumper_->DumpWav("aec3_processblock_capture_input", + capture_block->View(/*band=*/0, /*channel=*/0), 16000, + 1); + + if (render_properly_started_) { + if (!capture_properly_started_) { + capture_properly_started_ = true; + render_buffer_->Reset(); + if (delay_controller_) + delay_controller_->Reset(true); + } + } else { + // If no render data has yet arrived, do not process the capture signal. + render_buffer_->HandleSkippedCaptureProcessing(); + return; + } + + EchoPathVariability echo_path_variability( + echo_path_gain_change, EchoPathVariability::DelayAdjustment::kNone, + false); + + if (render_event_ == RenderDelayBuffer::BufferingEvent::kRenderOverrun && + render_properly_started_) { + echo_path_variability.delay_change = + EchoPathVariability::DelayAdjustment::kBufferFlush; + if (delay_controller_) + delay_controller_->Reset(true); + RTC_LOG(LS_WARNING) << "Reset due to render buffer overrun at block " + << capture_call_counter_; + } + render_event_ = RenderDelayBuffer::BufferingEvent::kNone; + + // Update the render buffers with any newly arrived render blocks and prepare + // the render buffers for reading the render data corresponding to the current + // capture block. + RenderDelayBuffer::BufferingEvent buffer_event = + render_buffer_->PrepareCaptureProcessing(); + // Reset the delay controller at render buffer underrun. + if (buffer_event == RenderDelayBuffer::BufferingEvent::kRenderUnderrun) { + if (delay_controller_) + delay_controller_->Reset(false); + } + + data_dumper_->DumpWav("aec3_processblock_capture_input2", + capture_block->View(/*band=*/0, /*channel=*/0), 16000, + 1); + + bool has_delay_estimator = !config_.delay.use_external_delay_estimator; + if (has_delay_estimator) { + RTC_DCHECK(delay_controller_); + // Compute and apply the render delay required to achieve proper signal + // alignment. + estimated_delay_ = delay_controller_->GetDelay( + render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(), + *capture_block); + + if (estimated_delay_) { + bool delay_change = + render_buffer_->AlignFromDelay(estimated_delay_->delay); + if (delay_change) { + rtc::LoggingSeverity log_level = + config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING + : rtc::LS_INFO; + RTC_LOG_V(log_level) << "Delay changed to " << estimated_delay_->delay + << " at block " << capture_call_counter_; + echo_path_variability.delay_change = + EchoPathVariability::DelayAdjustment::kNewDetectedDelay; + } + } + + echo_path_variability.clock_drift = delay_controller_->HasClockdrift(); + + } else { + render_buffer_->AlignFromExternalDelay(); + } + + // Remove the echo from the capture signal. + if (has_delay_estimator || render_buffer_->HasReceivedBufferDelay()) { + echo_remover_->ProcessCapture( + echo_path_variability, capture_signal_saturation, estimated_delay_, + render_buffer_->GetRenderBuffer(), linear_output, capture_block); + } + + // Update the metrics. + metrics_.UpdateCapture(false); +} + +void BlockProcessorImpl::BufferRender(const Block& block) { + RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.NumBands()); + data_dumper_->DumpRaw("aec3_processblock_call_order", + static_cast(BlockProcessorApiCall::kRender)); + data_dumper_->DumpWav("aec3_processblock_render_input", + block.View(/*band=*/0, /*channel=*/0), 16000, 1); + + render_event_ = render_buffer_->Insert(block); + + metrics_.UpdateRender(render_event_ != + RenderDelayBuffer::BufferingEvent::kNone); + + render_properly_started_ = true; + if (delay_controller_) + delay_controller_->LogRenderCall(); +} + +void BlockProcessorImpl::UpdateEchoLeakageStatus(bool leakage_detected) { + echo_remover_->UpdateEchoLeakageStatus(leakage_detected); +} + +void BlockProcessorImpl::GetMetrics(EchoControl::Metrics* metrics) const { + echo_remover_->GetMetrics(metrics); + constexpr int block_size_ms = 4; + absl::optional delay = render_buffer_->Delay(); + metrics->delay_ms = delay ? static_cast(*delay) * block_size_ms : 0; +} + +void BlockProcessorImpl::SetAudioBufferDelay(int delay_ms) { + render_buffer_->SetAudioBufferDelay(delay_ms); +} + +void BlockProcessorImpl::SetCaptureOutputUsage(bool capture_output_used) { + echo_remover_->SetCaptureOutputUsage(capture_output_used); +} + +} // namespace + +BlockProcessor* BlockProcessor::Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) { + std::unique_ptr render_buffer( + RenderDelayBuffer::Create(config, sample_rate_hz, num_render_channels)); + std::unique_ptr delay_controller; + if (!config.delay.use_external_delay_estimator) { + delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz, + num_capture_channels)); + } + std::unique_ptr echo_remover(EchoRemover::Create( + config, sample_rate_hz, num_render_channels, num_capture_channels)); + return Create(config, sample_rate_hz, num_render_channels, + num_capture_channels, std::move(render_buffer), + std::move(delay_controller), std::move(echo_remover)); +} + +BlockProcessor* BlockProcessor::Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer) { + std::unique_ptr delay_controller; + if (!config.delay.use_external_delay_estimator) { + delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz, + num_capture_channels)); + } + std::unique_ptr echo_remover(EchoRemover::Create( + config, sample_rate_hz, num_render_channels, num_capture_channels)); + return Create(config, sample_rate_hz, num_render_channels, + num_capture_channels, std::move(render_buffer), + std::move(delay_controller), std::move(echo_remover)); +} + +BlockProcessor* BlockProcessor::Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer, + std::unique_ptr delay_controller, + std::unique_ptr echo_remover) { + return new BlockProcessorImpl(config, sample_rate_hz, num_render_channels, + num_capture_channels, std::move(render_buffer), + std::move(delay_controller), + std::move(echo_remover)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.h new file mode 100644 index 0000000000..01a83ae5f7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_ + +#include + +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/echo_remover.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/render_delay_controller.h" + +namespace webrtc { + +// Class for performing echo cancellation on 64 sample blocks of audio data. +class BlockProcessor { + public: + static BlockProcessor* Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels); + // Only used for testing purposes. + static BlockProcessor* Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer); + static BlockProcessor* Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer, + std::unique_ptr delay_controller, + std::unique_ptr echo_remover); + + virtual ~BlockProcessor() = default; + + // Get current metrics. + virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0; + + // Provides an optional external estimate of the audio buffer delay. + virtual void SetAudioBufferDelay(int delay_ms) = 0; + + // Processes a block of capture data. + virtual void ProcessCapture(bool echo_path_gain_change, + bool capture_signal_saturation, + Block* linear_output, + Block* capture_block) = 0; + + // Buffers a block of render data supplied by a FrameBlocker object. + virtual void BufferRender(const Block& render_block) = 0; + + // Reports whether echo leakage has been detected in the echo canceller + // output. + virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0; + + // Specifies whether the capture output will be used. The purpose of this is + // to allow the block processor to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + virtual void SetCaptureOutputUsage(bool capture_output_used) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc new file mode 100644 index 0000000000..deac1fcd22 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_processor_metrics.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +enum class RenderUnderrunCategory { + kNone, + kFew, + kSeveral, + kMany, + kConstant, + kNumCategories +}; + +enum class RenderOverrunCategory { + kNone, + kFew, + kSeveral, + kMany, + kConstant, + kNumCategories +}; + +} // namespace + +void BlockProcessorMetrics::UpdateCapture(bool underrun) { + ++capture_block_counter_; + if (underrun) { + ++render_buffer_underruns_; + } + + if (capture_block_counter_ == kMetricsReportingIntervalBlocks) { + metrics_reported_ = true; + + RenderUnderrunCategory underrun_category; + if (render_buffer_underruns_ == 0) { + underrun_category = RenderUnderrunCategory::kNone; + } else if (render_buffer_underruns_ > (capture_block_counter_ >> 1)) { + underrun_category = RenderUnderrunCategory::kConstant; + } else if (render_buffer_underruns_ > 100) { + underrun_category = RenderUnderrunCategory::kMany; + } else if (render_buffer_underruns_ > 10) { + underrun_category = RenderUnderrunCategory::kSeveral; + } else { + underrun_category = RenderUnderrunCategory::kFew; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.RenderUnderruns", + static_cast(underrun_category), + static_cast(RenderUnderrunCategory::kNumCategories)); + + RenderOverrunCategory overrun_category; + if (render_buffer_overruns_ == 0) { + overrun_category = RenderOverrunCategory::kNone; + } else if (render_buffer_overruns_ > (buffer_render_calls_ >> 1)) { + overrun_category = RenderOverrunCategory::kConstant; + } else if (render_buffer_overruns_ > 100) { + overrun_category = RenderOverrunCategory::kMany; + } else if (render_buffer_overruns_ > 10) { + overrun_category = RenderOverrunCategory::kSeveral; + } else { + overrun_category = RenderOverrunCategory::kFew; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.RenderOverruns", + static_cast(overrun_category), + static_cast(RenderOverrunCategory::kNumCategories)); + + ResetMetrics(); + capture_block_counter_ = 0; + } else { + metrics_reported_ = false; + } +} + +void BlockProcessorMetrics::UpdateRender(bool overrun) { + ++buffer_render_calls_; + if (overrun) { + ++render_buffer_overruns_; + } +} + +void BlockProcessorMetrics::ResetMetrics() { + render_buffer_underruns_ = 0; + render_buffer_overruns_ = 0; + buffer_render_calls_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.h new file mode 100644 index 0000000000..a70d0dac5b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_ + +namespace webrtc { + +// Handles the reporting of metrics for the block_processor. +class BlockProcessorMetrics { + public: + BlockProcessorMetrics() = default; + + BlockProcessorMetrics(const BlockProcessorMetrics&) = delete; + BlockProcessorMetrics& operator=(const BlockProcessorMetrics&) = delete; + + // Updates the metric with new capture data. + void UpdateCapture(bool underrun); + + // Updates the metric with new render data. + void UpdateRender(bool overrun); + + // Returns true if the metrics have just been reported, otherwise false. + bool MetricsReported() { return metrics_reported_; } + + private: + // Resets the metrics. + void ResetMetrics(); + + int capture_block_counter_ = 0; + bool metrics_reported_ = false; + int render_buffer_underruns_ = 0; + int render_buffer_overruns_ = 0; + int buffer_render_calls_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc new file mode 100644 index 0000000000..3e23c2499d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_processor_metrics.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verify the general functionality of BlockProcessorMetrics. +TEST(BlockProcessorMetrics, NormalUsage) { + BlockProcessorMetrics metrics; + + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < kMetricsReportingIntervalBlocks - 1; ++k) { + metrics.UpdateRender(false); + metrics.UpdateRender(false); + metrics.UpdateCapture(false); + EXPECT_FALSE(metrics.MetricsReported()); + } + metrics.UpdateCapture(false); + EXPECT_TRUE(metrics.MetricsReported()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_unittest.cc new file mode 100644 index 0000000000..aba5c4186d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_unittest.cc @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_processor.h" + +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/mock/mock_echo_remover.h" +#include "modules/audio_processing/aec3/mock/mock_render_delay_buffer.h" +#include "modules/audio_processing/aec3/mock/mock_render_delay_controller.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::StrictMock; + +// Verifies that the basic BlockProcessor functionality works and that the API +// methods are callable. +void RunBasicSetupAndApiCallTest(int sample_rate_hz, int num_iterations) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + std::unique_ptr block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz, + kNumRenderChannels, kNumCaptureChannels)); + Block block(NumBandsForRate(sample_rate_hz), kNumRenderChannels, 1000.f); + for (int k = 0; k < num_iterations; ++k) { + block_processor->BufferRender(block); + block_processor->ProcessCapture(false, false, nullptr, &block); + block_processor->UpdateEchoLeakageStatus(false); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +void RunRenderBlockSizeVerificationTest(int sample_rate_hz) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + std::unique_ptr block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz, + kNumRenderChannels, kNumCaptureChannels)); + Block block(NumBandsForRate(sample_rate_hz), kNumRenderChannels); + + EXPECT_DEATH(block_processor->BufferRender(block), ""); +} + +void RunRenderNumBandsVerificationTest(int sample_rate_hz) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + const size_t wrong_num_bands = NumBandsForRate(sample_rate_hz) < 3 + ? NumBandsForRate(sample_rate_hz) + 1 + : 1; + std::unique_ptr block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz, + kNumRenderChannels, kNumCaptureChannels)); + Block block(wrong_num_bands, kNumRenderChannels); + + EXPECT_DEATH(block_processor->BufferRender(block), ""); +} + +void RunCaptureNumBandsVerificationTest(int sample_rate_hz) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + const size_t wrong_num_bands = NumBandsForRate(sample_rate_hz) < 3 + ? NumBandsForRate(sample_rate_hz) + 1 + : 1; + std::unique_ptr block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz, + kNumRenderChannels, kNumCaptureChannels)); + Block block(wrong_num_bands, kNumRenderChannels); + + EXPECT_DEATH(block_processor->ProcessCapture(false, false, nullptr, &block), + ""); +} +#endif + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +void FillSampleVector(int call_counter, + int delay, + rtc::ArrayView samples) { + for (size_t i = 0; i < samples.size(); ++i) { + samples[i] = (call_counter - delay) * 10000.0f + i; + } +} + +} // namespace + +// Verifies that the delay controller functionality is properly integrated with +// the render delay buffer inside block processor. +// TODO(peah): Activate the unittest once the required code has been landed. +TEST(BlockProcessor, DISABLED_DelayControllerIntegration) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + constexpr size_t kNumBlocks = 310; + constexpr size_t kDelayInSamples = 640; + constexpr size_t kDelayHeadroom = 1; + constexpr size_t kDelayInBlocks = + kDelayInSamples / kBlockSize - kDelayHeadroom; + Random random_generator(42U); + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr> + render_delay_buffer_mock( + new StrictMock(rate, 1)); + EXPECT_CALL(*render_delay_buffer_mock, Insert(_)) + .Times(kNumBlocks) + .WillRepeatedly(Return(RenderDelayBuffer::BufferingEvent::kNone)); + EXPECT_CALL(*render_delay_buffer_mock, AlignFromDelay(kDelayInBlocks)) + .Times(AtLeast(1)); + EXPECT_CALL(*render_delay_buffer_mock, MaxDelay()).WillOnce(Return(30)); + EXPECT_CALL(*render_delay_buffer_mock, Delay()) + .Times(kNumBlocks + 1) + .WillRepeatedly(Return(0)); + std::unique_ptr block_processor(BlockProcessor::Create( + EchoCanceller3Config(), rate, kNumRenderChannels, kNumCaptureChannels, + std::move(render_delay_buffer_mock))); + + Block render_block(NumBandsForRate(rate), kNumRenderChannels); + Block capture_block(NumBandsForRate(rate), kNumCaptureChannels); + DelayBuffer signal_delay_buffer(kDelayInSamples); + for (size_t k = 0; k < kNumBlocks; ++k) { + RandomizeSampleVector(&random_generator, + render_block.View(/*band=*/0, /*capture=*/0)); + signal_delay_buffer.Delay(render_block.View(/*band=*/0, /*capture=*/0), + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->BufferRender(render_block); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); + } + } +} + +// Verifies that BlockProcessor submodules are called in a proper manner. +TEST(BlockProcessor, DISABLED_SubmoduleIntegration) { + constexpr size_t kNumBlocks = 310; + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + Random random_generator(42U); + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr> + render_delay_buffer_mock( + new StrictMock(rate, 1)); + std::unique_ptr< + ::testing::StrictMock> + render_delay_controller_mock( + new StrictMock()); + std::unique_ptr> + echo_remover_mock(new StrictMock()); + + EXPECT_CALL(*render_delay_buffer_mock, Insert(_)) + .Times(kNumBlocks - 1) + .WillRepeatedly(Return(RenderDelayBuffer::BufferingEvent::kNone)); + EXPECT_CALL(*render_delay_buffer_mock, PrepareCaptureProcessing()) + .Times(kNumBlocks); + EXPECT_CALL(*render_delay_buffer_mock, AlignFromDelay(9)).Times(AtLeast(1)); + EXPECT_CALL(*render_delay_buffer_mock, Delay()) + .Times(kNumBlocks) + .WillRepeatedly(Return(0)); + EXPECT_CALL(*render_delay_controller_mock, GetDelay(_, _, _)) + .Times(kNumBlocks); + EXPECT_CALL(*echo_remover_mock, ProcessCapture(_, _, _, _, _, _)) + .Times(kNumBlocks); + EXPECT_CALL(*echo_remover_mock, UpdateEchoLeakageStatus(_)) + .Times(kNumBlocks); + + std::unique_ptr block_processor(BlockProcessor::Create( + EchoCanceller3Config(), rate, kNumRenderChannels, kNumCaptureChannels, + std::move(render_delay_buffer_mock), + std::move(render_delay_controller_mock), std::move(echo_remover_mock))); + + Block render_block(NumBandsForRate(rate), kNumRenderChannels); + Block capture_block(NumBandsForRate(rate), kNumCaptureChannels); + DelayBuffer signal_delay_buffer(640); + for (size_t k = 0; k < kNumBlocks; ++k) { + RandomizeSampleVector(&random_generator, + render_block.View(/*band=*/0, /*capture=*/0)); + signal_delay_buffer.Delay(render_block.View(/*band=*/0, /*capture=*/0), + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->BufferRender(render_block); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); + block_processor->UpdateEchoLeakageStatus(false); + } + } +} + +TEST(BlockProcessor, BasicSetupAndApiCalls) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunBasicSetupAndApiCallTest(rate, 1); + } +} + +TEST(BlockProcessor, TestLongerCall) { + RunBasicSetupAndApiCallTest(16000, 20 * kNumBlocksPerSecond); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// TODO(gustaf): Re-enable the test once the issue with memory leaks during +// DEATH tests on test bots has been fixed. +TEST(BlockProcessorDeathTest, DISABLED_VerifyRenderBlockSizeCheck) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunRenderBlockSizeVerificationTest(rate); + } +} + +TEST(BlockProcessorDeathTest, VerifyRenderNumBandsCheck) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunRenderNumBandsVerificationTest(rate); + } +} + +// TODO(peah): Verify the check for correct number of bands in the capture +// signal. +TEST(BlockProcessorDeathTest, VerifyCaptureNumBandsCheck) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunCaptureNumBandsVerificationTest(rate); + } +} + +// Verifiers that the verification for null ProcessCapture input works. +TEST(BlockProcessorDeathTest, NullProcessCaptureParameter) { + EXPECT_DEATH(std::unique_ptr( + BlockProcessor::Create(EchoCanceller3Config(), 16000, 1, 1)) + ->ProcessCapture(false, false, nullptr, nullptr), + ""); +} + +// Verifies the check for correct sample rate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(BlockProcessor, DISABLED_WrongSampleRate) { + EXPECT_DEATH(std::unique_ptr( + BlockProcessor::Create(EchoCanceller3Config(), 8001, 1, 1)), + ""); +} + +#endif + +// Verifies that external delay estimator delays are applied correctly when a +// call begins with a sequence of capture blocks. +TEST(BlockProcessor, ExternalDelayAppliedCorrectlyWithInitialCaptureCalls) { + constexpr int kNumRenderChannels = 1; + constexpr int kNumCaptureChannels = 1; + constexpr int kSampleRateHz = 16000; + + EchoCanceller3Config config; + config.delay.use_external_delay_estimator = true; + + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); + + std::unique_ptr> + echo_remover_mock(new NiceMock()); + webrtc::test::MockEchoRemover* echo_remover_mock_pointer = + echo_remover_mock.get(); + + std::unique_ptr block_processor(BlockProcessor::Create( + config, kSampleRateHz, kNumRenderChannels, kNumCaptureChannels, + std::move(delay_buffer), /*delay_controller=*/nullptr, + std::move(echo_remover_mock))); + + Block render_block(NumBandsForRate(kSampleRateHz), kNumRenderChannels); + Block capture_block(NumBandsForRate(kSampleRateHz), kNumCaptureChannels); + + // Process... + // - 10 capture calls, where no render data is available, + // - 10 render calls, populating the buffer, + // - 2 capture calls, verifying that the delay was applied correctly. + constexpr int kDelayInBlocks = 5; + constexpr int kDelayInMs = 20; + block_processor->SetAudioBufferDelay(kDelayInMs); + + int capture_call_counter = 0; + int render_call_counter = 0; + for (size_t k = 0; k < 10; ++k) { + FillSampleVector(++capture_call_counter, kDelayInBlocks, + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); + } + for (size_t k = 0; k < 10; ++k) { + FillSampleVector(++render_call_counter, 0, + render_block.View(/*band=*/0, /*capture=*/0)); + block_processor->BufferRender(render_block); + } + + EXPECT_CALL(*echo_remover_mock_pointer, ProcessCapture) + .WillRepeatedly( + [](EchoPathVariability /*echo_path_variability*/, + bool /*capture_signal_saturation*/, + const absl::optional& /*external_delay*/, + RenderBuffer* render_buffer, Block* /*linear_output*/, + Block* capture) { + const auto& render = render_buffer->GetBlock(0); + const auto render_view = render.View(/*band=*/0, /*channel=*/0); + const auto capture_view = capture->View(/*band=*/0, /*channel=*/0); + for (size_t i = 0; i < kBlockSize; ++i) { + EXPECT_FLOAT_EQ(render_view[i], capture_view[i]); + } + }); + + FillSampleVector(++capture_call_counter, kDelayInBlocks, + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); + + FillSampleVector(++capture_call_counter, kDelayInBlocks, + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc new file mode 100644 index 0000000000..2c49b795c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/clockdrift_detector.h" + +namespace webrtc { + +ClockdriftDetector::ClockdriftDetector() + : level_(Level::kNone), stability_counter_(0) { + delay_history_.fill(0); +} + +ClockdriftDetector::~ClockdriftDetector() = default; + +void ClockdriftDetector::Update(int delay_estimate) { + if (delay_estimate == delay_history_[0]) { + // Reset clockdrift level if delay estimate is stable for 7500 blocks (30 + // seconds). + if (++stability_counter_ > 7500) + level_ = Level::kNone; + return; + } + + stability_counter_ = 0; + const int d1 = delay_history_[0] - delay_estimate; + const int d2 = delay_history_[1] - delay_estimate; + const int d3 = delay_history_[2] - delay_estimate; + + // Patterns recognized as positive clockdrift: + // [x-3], x-2, x-1, x. + // [x-3], x-1, x-2, x. + const bool probable_drift_up = + (d1 == -1 && d2 == -2) || (d1 == -2 && d2 == -1); + const bool drift_up = probable_drift_up && d3 == -3; + + // Patterns recognized as negative clockdrift: + // [x+3], x+2, x+1, x. + // [x+3], x+1, x+2, x. + const bool probable_drift_down = (d1 == 1 && d2 == 2) || (d1 == 2 && d2 == 1); + const bool drift_down = probable_drift_down && d3 == 3; + + // Set clockdrift level. + if (drift_up || drift_down) { + level_ = Level::kVerified; + } else if ((probable_drift_up || probable_drift_down) && + level_ == Level::kNone) { + level_ = Level::kProbable; + } + + // Shift delay history one step. + delay_history_[2] = delay_history_[1]; + delay_history_[1] = delay_history_[0]; + delay_history_[0] = delay_estimate; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.h new file mode 100644 index 0000000000..2ba90bb889 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_ + +#include + +#include + +namespace webrtc { + +class ApmDataDumper; +struct DownsampledRenderBuffer; +struct EchoCanceller3Config; + +// Detects clockdrift by analyzing the estimated delay. +class ClockdriftDetector { + public: + enum class Level { kNone, kProbable, kVerified, kNumCategories }; + ClockdriftDetector(); + ~ClockdriftDetector(); + void Update(int delay_estimate); + Level ClockdriftLevel() const { return level_; } + + private: + std::array delay_history_; + Level level_; + size_t stability_counter_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector_unittest.cc new file mode 100644 index 0000000000..0f98b01d3a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector_unittest.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/clockdrift_detector.h" + +#include "test/gtest.h" + +namespace webrtc { +TEST(ClockdriftDetector, ClockdriftDetector) { + ClockdriftDetector c; + // No clockdrift at start. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kNone); + + // Monotonically increasing delay. + for (int i = 0; i < 100; i++) + c.Update(1000); + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kNone); + for (int i = 0; i < 100; i++) + c.Update(1001); + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kNone); + for (int i = 0; i < 100; i++) + c.Update(1002); + // Probable clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kProbable); + for (int i = 0; i < 100; i++) + c.Update(1003); + // Verified clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kVerified); + + // Stable delay. + for (int i = 0; i < 10000; i++) + c.Update(1003); + // No clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kNone); + + // Decreasing delay. + for (int i = 0; i < 100; i++) + c.Update(1001); + for (int i = 0; i < 100; i++) + c.Update(999); + // Probable clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kProbable); + for (int i = 0; i < 100; i++) + c.Update(1000); + for (int i = 0; i < 100; i++) + c.Update(998); + // Verified clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kVerified); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc new file mode 100644 index 0000000000..f4fb74d20d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +CoarseFilterUpdateGain::CoarseFilterUpdateGain( + const EchoCanceller3Config::Filter::CoarseConfiguration& config, + size_t config_change_duration_blocks) + : config_change_duration_blocks_( + static_cast(config_change_duration_blocks)) { + SetConfig(config, true); + RTC_DCHECK_LT(0, config_change_duration_blocks_); + one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_; +} + +void CoarseFilterUpdateGain::HandleEchoPathChange() { + poor_signal_excitation_counter_ = 0; + call_counter_ = 0; +} + +void CoarseFilterUpdateGain::Compute( + const std::array& render_power, + const RenderSignalAnalyzer& render_signal_analyzer, + const FftData& E_coarse, + size_t size_partitions, + bool saturated_capture_signal, + FftData* G) { + RTC_DCHECK(G); + ++call_counter_; + + UpdateCurrentConfig(); + + if (render_signal_analyzer.PoorSignalExcitation()) { + poor_signal_excitation_counter_ = 0; + } + + // Do not update the filter if the render is not sufficiently excited. + if (++poor_signal_excitation_counter_ < size_partitions || + saturated_capture_signal || call_counter_ <= size_partitions) { + G->re.fill(0.f); + G->im.fill(0.f); + return; + } + + // Compute mu. + std::array mu; + const auto& X2 = render_power; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (X2[k] > current_config_.noise_gate) { + mu[k] = current_config_.rate / X2[k]; + } else { + mu[k] = 0.f; + } + } + + // Avoid updating the filter close to narrow bands in the render signals. + render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu); + + // G = mu * E * X2. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + G->re[k] = mu[k] * E_coarse.re[k]; + G->im[k] = mu[k] * E_coarse.im[k]; + } +} + +void CoarseFilterUpdateGain::UpdateCurrentConfig() { + RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_); + if (config_change_counter_ > 0) { + if (--config_change_counter_ > 0) { + auto average = [](float from, float to, float from_weight) { + return from * from_weight + to * (1.f - from_weight); + }; + + float change_factor = + config_change_counter_ * one_by_config_change_duration_blocks_; + + current_config_.rate = + average(old_target_config_.rate, target_config_.rate, change_factor); + current_config_.noise_gate = + average(old_target_config_.noise_gate, target_config_.noise_gate, + change_factor); + } else { + current_config_ = old_target_config_ = target_config_; + } + } + RTC_DCHECK_LE(0, config_change_counter_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h new file mode 100644 index 0000000000..a1a1399b2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_ + +#include + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" + +namespace webrtc { + +// Provides functionality for computing the fixed gain for the coarse filter. +class CoarseFilterUpdateGain { + public: + explicit CoarseFilterUpdateGain( + const EchoCanceller3Config::Filter::CoarseConfiguration& config, + size_t config_change_duration_blocks); + + // Takes action in the case of a known echo path change. + void HandleEchoPathChange(); + + // Computes the gain. + void Compute(const std::array& render_power, + const RenderSignalAnalyzer& render_signal_analyzer, + const FftData& E_coarse, + size_t size_partitions, + bool saturated_capture_signal, + FftData* G); + + // Sets a new config. + void SetConfig( + const EchoCanceller3Config::Filter::CoarseConfiguration& config, + bool immediate_effect) { + if (immediate_effect) { + old_target_config_ = current_config_ = target_config_ = config; + config_change_counter_ = 0; + } else { + old_target_config_ = current_config_; + target_config_ = config; + config_change_counter_ = config_change_duration_blocks_; + } + } + + private: + EchoCanceller3Config::Filter::CoarseConfiguration current_config_; + EchoCanceller3Config::Filter::CoarseConfiguration target_config_; + EchoCanceller3Config::Filter::CoarseConfiguration old_target_config_; + const int config_change_duration_blocks_; + float one_by_config_change_duration_blocks_; + // TODO(peah): Check whether this counter should instead be initialized to a + // large value. + size_t poor_signal_excitation_counter_ = 0; + size_t call_counter_ = 0; + int config_change_counter_ = 0; + + void UpdateCurrentConfig(); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain_unittest.cc new file mode 100644 index 0000000000..55b79bb812 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain_unittest.cc @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +// Method for performing the simulations needed to test the refined filter +// update gain functionality. +void RunFilterUpdateTest(int num_blocks_to_process, + size_t delay_samples, + size_t num_render_channels, + int filter_length_blocks, + const std::vector& blocks_with_saturation, + std::array* e_last_block, + std::array* y_last_block, + FftData* G_last_block) { + ApmDataDumper data_dumper(42); + EchoCanceller3Config config; + config.filter.refined.length_blocks = filter_length_blocks; + AdaptiveFirFilter refined_filter( + config.filter.refined.length_blocks, config.filter.refined.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + DetectOptimization(), &data_dumper); + AdaptiveFirFilter coarse_filter( + config.filter.coarse.length_blocks, config.filter.coarse.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + DetectOptimization(), &data_dumper); + Aec3Fft fft; + + constexpr int kSampleRateHz = 48000; + config.delay.default_delay = 1; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + + CoarseFilterUpdateGain coarse_gain( + config.filter.coarse, config.filter.config_change_duration_blocks); + Random random_generator(42U); + Block x(NumBandsForRate(kSampleRateHz), num_render_channels); + std::array y; + RenderSignalAnalyzer render_signal_analyzer(config); + std::array s; + FftData S; + FftData G; + FftData E_coarse; + std::array e_coarse; + + constexpr float kScale = 1.0f / kFftLengthBy2; + + DelayBuffer delay_buffer(delay_samples); + for (int k = 0; k < num_blocks_to_process; ++k) { + // Handle saturation. + bool saturation = + std::find(blocks_with_saturation.begin(), blocks_with_saturation.end(), + k) != blocks_with_saturation.end(); + + // Create the render signal. + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + RandomizeSampleVector(&random_generator, x.View(band, channel)); + } + } + delay_buffer.Delay(x.View(/*band=*/0, /*channel*/ 0), y); + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + + render_signal_analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + delay_samples / kBlockSize); + + coarse_filter.Filter(*render_delay_buffer->GetRenderBuffer(), &S); + fft.Ifft(S, &s); + std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, + e_coarse.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e_coarse.begin(), e_coarse.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e_coarse, Aec3Fft::Window::kRectangular, &E_coarse); + + std::array render_power; + render_delay_buffer->GetRenderBuffer()->SpectralSum( + coarse_filter.SizePartitions(), &render_power); + coarse_gain.Compute(render_power, render_signal_analyzer, E_coarse, + coarse_filter.SizePartitions(), saturation, &G); + coarse_filter.Adapt(*render_delay_buffer->GetRenderBuffer(), G); + } + + std::copy(e_coarse.begin(), e_coarse.end(), e_last_block->begin()); + std::copy(y.begin(), y.end(), y_last_block->begin()); + std::copy(G.re.begin(), G.re.end(), G_last_block->re.begin()); + std::copy(G.im.begin(), G.im.end(), G_last_block->im.begin()); +} + +std::string ProduceDebugText(int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "Length: " << filter_length_blocks; + return ss.Release(); +} + +std::string ProduceDebugText(size_t delay, int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "Delay: " << delay << ", "; + ss << ProduceDebugText(filter_length_blocks); + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output gain parameter works. +TEST(CoarseFilterUpdateGainDeathTest, NullDataOutputGain) { + ApmDataDumper data_dumper(42); + FftBuffer fft_buffer(1, 1); + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + FftData E; + const EchoCanceller3Config::Filter::CoarseConfiguration& config = { + 12, 0.5f, 220075344.f}; + CoarseFilterUpdateGain gain(config, 250); + std::array render_power; + render_power.fill(0.f); + EXPECT_DEATH(gain.Compute(render_power, analyzer, E, 1, false, nullptr), ""); +} + +#endif + +class CoarseFilterUpdateGainOneTwoEightRenderChannels + : public ::testing::Test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + CoarseFilterUpdateGainOneTwoEightRenderChannels, + ::testing::Values(1, 2, 8)); + +// Verifies that the gain formed causes the filter using it to converge. +TEST_P(CoarseFilterUpdateGainOneTwoEightRenderChannels, + GainCausesFilterToConverge) { + const size_t num_render_channels = GetParam(); + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + + for (size_t filter_length_blocks : {12, 20, 30}) { + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(delay_samples, filter_length_blocks)); + + std::array e; + std::array y; + FftData G; + + RunFilterUpdateTest(5000, delay_samples, num_render_channels, + filter_length_blocks, blocks_with_saturation, &e, &y, + &G); + + // Verify that the refined filter is able to perform well. + // Use different criteria to take overmodelling into account. + if (filter_length_blocks == 12) { + EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } else { + EXPECT_LT(std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } + } + } +} + +// Verifies that the gain is zero when there is saturation. +TEST_P(CoarseFilterUpdateGainOneTwoEightRenderChannels, SaturationBehavior) { + const size_t num_render_channels = GetParam(); + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + for (int k = 99; k < 200; ++k) { + blocks_with_saturation.push_back(k); + } + for (size_t filter_length_blocks : {12, 20, 30}) { + SCOPED_TRACE(ProduceDebugText(filter_length_blocks)); + + std::array e; + std::array y; + FftData G_a; + FftData G_a_ref; + G_a_ref.re.fill(0.f); + G_a_ref.im.fill(0.f); + + RunFilterUpdateTest(100, 65, num_render_channels, filter_length_blocks, + blocks_with_saturation, &e, &y, &G_a); + + EXPECT_EQ(G_a_ref.re, G_a.re); + EXPECT_EQ(G_a_ref.im, G_a.im); + } +} + +class CoarseFilterUpdateGainOneTwoFourRenderChannels + : public ::testing::Test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P( + MultiChannel, + CoarseFilterUpdateGainOneTwoFourRenderChannels, + ::testing::Values(1, 2, 4), + [](const ::testing::TestParamInfo< + CoarseFilterUpdateGainOneTwoFourRenderChannels::ParamType>& info) { + return (rtc::StringBuilder() << "Render" << info.param).str(); + }); + +// Verifies that the magnitude of the gain on average decreases for a +// persistently exciting signal. +TEST_P(CoarseFilterUpdateGainOneTwoFourRenderChannels, DecreasingGain) { + const size_t num_render_channels = GetParam(); + for (size_t filter_length_blocks : {12, 20, 30}) { + SCOPED_TRACE(ProduceDebugText(filter_length_blocks)); + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + + std::array e; + std::array y; + FftData G_a; + FftData G_b; + FftData G_c; + std::array G_a_power; + std::array G_b_power; + std::array G_c_power; + + RunFilterUpdateTest(100, 65, num_render_channels, filter_length_blocks, + blocks_with_saturation, &e, &y, &G_a); + RunFilterUpdateTest(200, 65, num_render_channels, filter_length_blocks, + blocks_with_saturation, &e, &y, &G_b); + RunFilterUpdateTest(300, 65, num_render_channels, filter_length_blocks, + blocks_with_saturation, &e, &y, &G_c); + + G_a.Spectrum(Aec3Optimization::kNone, G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, G_b_power); + G_c.Spectrum(Aec3Optimization::kNone, G_c_power); + + EXPECT_GT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + + EXPECT_GT(std::accumulate(G_b_power.begin(), G_b_power.end(), 0.), + std::accumulate(G_c_power.begin(), G_c_power.end(), 0.)); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc new file mode 100644 index 0000000000..de5227c089 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/comfort_noise_generator.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include +#include +#include +#include +#include +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aec3/vector_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Computes the noise floor value that matches a WGN input of noise_floor_dbfs. +float GetNoiseFloorFactor(float noise_floor_dbfs) { + // kdBfsNormalization = 20.f*log10(32768.f). + constexpr float kdBfsNormalization = 90.30899869919436f; + return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f); +} + +// Table of sqrt(2) * sin(2*pi*i/32). +constexpr float kSqrt2Sin[32] = { + +0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f, + +1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f, + +1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f, + +0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f, + -1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f, + -1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f, + -0.5411961f, -0.2758994f}; + +void GenerateComfortNoise(Aec3Optimization optimization, + const std::array& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise) { + FftData* N_low = lower_band_noise; + FftData* N_high = upper_band_noise; + + // Compute square root spectrum. + std::array N; + std::copy(N2.begin(), N2.end(), N.begin()); + aec3::VectorMath(optimization).Sqrt(N); + + // Compute the noise level for the upper bands. + constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1); + constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2; + const float high_band_noise_level = + std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) * + kOneByNumBands; + + // The analysis and synthesis windowing cause loss of power when + // cross-fading the noise where frames are completely uncorrelated + // (generated with random phase), hence the factor sqrt(2). + // This is not the case for the speech signal where the input is overlapping + // (strong correlation). + N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] = + N_high->re[kFftLengthBy2] = 0.f; + for (size_t k = 1; k < kFftLengthBy2; k++) { + constexpr int kIndexMask = 32 - 1; + // Generate a random 31-bit integer. + seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1); + // Convert to a 5-bit index. + int i = seed[0] >> 26; + + // y = sqrt(2) * sin(a) + const float x = kSqrt2Sin[i]; + // x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2) + const float y = kSqrt2Sin[(i + 8) & kIndexMask]; + + // Form low-frequency noise via spectral shaping. + N_low->re[k] = N[k] * x; + N_low->im[k] = N[k] * y; + + // Form the high-frequency noise via simple levelling. + N_high->re[k] = high_band_noise_level * x; + N_high->im[k] = high_band_noise_level * y; + } +} + +} // namespace + +ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config, + Aec3Optimization optimization, + size_t num_capture_channels) + : optimization_(optimization), + seed_(42), + num_capture_channels_(num_capture_channels), + noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)), + N2_initial_( + std::make_unique>>( + num_capture_channels_)), + Y2_smoothed_(num_capture_channels_), + N2_(num_capture_channels_) { + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + (*N2_initial_)[ch].fill(0.f); + Y2_smoothed_[ch].fill(0.f); + N2_[ch].fill(1.0e6f); + } +} + +ComfortNoiseGenerator::~ComfortNoiseGenerator() = default; + +void ComfortNoiseGenerator::Compute( + bool saturated_capture, + rtc::ArrayView> + capture_spectrum, + rtc::ArrayView lower_band_noise, + rtc::ArrayView upper_band_noise) { + const auto& Y2 = capture_spectrum; + + if (!saturated_capture) { + // Smooth Y2. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(), + Y2[ch].begin(), Y2_smoothed_[ch].begin(), + [](float a, float b) { return a + 0.1f * (b - a); }); + } + + if (N2_counter_ > 50) { + // Update N2 from Y2_smoothed. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(), + N2_[ch].begin(), [](float a, float b) { + return b < a ? (0.9f * b + 0.1f * a) * 1.0002f + : a * 1.0002f; + }); + } + } + + if (N2_initial_) { + if (++N2_counter_ == 1000) { + N2_initial_.reset(); + } else { + // Compute the N2_initial from N2. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::transform(N2_[ch].begin(), N2_[ch].end(), + (*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(), + [](float a, float b) { + return a > b ? b + 0.001f * (a - b) : a; + }); + } + } + } + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + for (auto& n : N2_[ch]) { + n = std::max(n, noise_floor_); + } + if (N2_initial_) { + for (auto& n : (*N2_initial_)[ch]) { + n = std::max(n, noise_floor_); + } + } + } + } + + // Choose N2 estimate to use. + const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_; + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch], + &upper_band_noise[ch]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.h b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.h new file mode 100644 index 0000000000..2785b765c5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_ + +#include + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace aec3 { +#if defined(WEBRTC_ARCH_X86_FAMILY) + +void EstimateComfortNoise_SSE2(const std::array& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise); +#endif +void EstimateComfortNoise(const std::array& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise); + +} // namespace aec3 + +// Generates the comfort noise. +class ComfortNoiseGenerator { + public: + ComfortNoiseGenerator(const EchoCanceller3Config& config, + Aec3Optimization optimization, + size_t num_capture_channels); + ComfortNoiseGenerator() = delete; + ~ComfortNoiseGenerator(); + ComfortNoiseGenerator(const ComfortNoiseGenerator&) = delete; + + // Computes the comfort noise. + void Compute(bool saturated_capture, + rtc::ArrayView> + capture_spectrum, + rtc::ArrayView lower_band_noise, + rtc::ArrayView upper_band_noise); + + // Returns the estimate of the background noise spectrum. + rtc::ArrayView> NoiseSpectrum() + const { + return N2_; + } + + private: + const Aec3Optimization optimization_; + uint32_t seed_; + const size_t num_capture_channels_; + const float noise_floor_; + std::unique_ptr>> + N2_initial_; + std::vector> Y2_smoothed_; + std::vector> N2_; + int N2_counter_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc new file mode 100644 index 0000000000..a9da17559a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/comfort_noise_generator.h" + +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "rtc_base/random.h" +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { +namespace { + +float Power(const FftData& N) { + std::array N2; + N.Spectrum(Aec3Optimization::kNone, N2); + return std::accumulate(N2.begin(), N2.end(), 0.f) / N2.size(); +} + +} // namespace + +TEST(ComfortNoiseGenerator, CorrectLevel) { + constexpr size_t kNumChannels = 5; + EchoCanceller3Config config; + ComfortNoiseGenerator cng(config, DetectOptimization(), kNumChannels); + AecState aec_state(config, kNumChannels); + + std::vector> N2(kNumChannels); + std::vector n_lower(kNumChannels); + std::vector n_upper(kNumChannels); + + for (size_t ch = 0; ch < kNumChannels; ++ch) { + N2[ch].fill(1000.f * 1000.f / (ch + 1)); + n_lower[ch].re.fill(0.f); + n_lower[ch].im.fill(0.f); + n_upper[ch].re.fill(0.f); + n_upper[ch].im.fill(0.f); + } + + // Ensure instantaneous updata to nonzero noise. + cng.Compute(false, N2, n_lower, n_upper); + + for (size_t ch = 0; ch < kNumChannels; ++ch) { + EXPECT_LT(0.f, Power(n_lower[ch])); + EXPECT_LT(0.f, Power(n_upper[ch])); + } + + for (int k = 0; k < 10000; ++k) { + cng.Compute(false, N2, n_lower, n_upper); + } + + for (size_t ch = 0; ch < kNumChannels; ++ch) { + EXPECT_NEAR(2.f * N2[ch][0], Power(n_lower[ch]), N2[ch][0] / 10.f); + EXPECT_NEAR(2.f * N2[ch][0], Power(n_upper[ch]), N2[ch][0] / 10.f); + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc new file mode 100644 index 0000000000..c55344da79 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/config_selector.h" + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Validates that the mono and the multichannel configs have compatible fields. +bool CompatibleConfigs(const EchoCanceller3Config& mono_config, + const EchoCanceller3Config& multichannel_config) { + if (mono_config.delay.fixed_capture_delay_samples != + multichannel_config.delay.fixed_capture_delay_samples) { + return false; + } + if (mono_config.filter.export_linear_aec_output != + multichannel_config.filter.export_linear_aec_output) { + return false; + } + if (mono_config.filter.high_pass_filter_echo_reference != + multichannel_config.filter.high_pass_filter_echo_reference) { + return false; + } + if (mono_config.multi_channel.detect_stereo_content != + multichannel_config.multi_channel.detect_stereo_content) { + return false; + } + if (mono_config.multi_channel.stereo_detection_timeout_threshold_seconds != + multichannel_config.multi_channel + .stereo_detection_timeout_threshold_seconds) { + return false; + } + return true; +} + +} // namespace + +ConfigSelector::ConfigSelector( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int num_render_input_channels) + : config_(config), multichannel_config_(multichannel_config) { + if (multichannel_config_.has_value()) { + RTC_DCHECK(CompatibleConfigs(config_, *multichannel_config_)); + } + + Update(!config_.multi_channel.detect_stereo_content && + num_render_input_channels > 1); + + RTC_DCHECK(active_config_); +} + +void ConfigSelector::Update(bool multichannel_content) { + if (multichannel_content && multichannel_config_.has_value()) { + active_config_ = &(*multichannel_config_); + } else { + active_config_ = &config_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.h b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.h new file mode 100644 index 0000000000..3b3f94e5ac --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_ + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" + +namespace webrtc { + +// Selects the config to use. +class ConfigSelector { + public: + ConfigSelector( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int num_render_input_channels); + + // Updates the config selection based on the detection of multichannel + // content. + void Update(bool multichannel_content); + + const EchoCanceller3Config& active_config() const { return *active_config_; } + + private: + const EchoCanceller3Config config_; + const absl::optional multichannel_config_; + const EchoCanceller3Config* active_config_ = nullptr; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/config_selector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector_unittest.cc new file mode 100644 index 0000000000..1826bfcace --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector_unittest.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/config_selector.h" + +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "test/gtest.h" + +namespace webrtc { + +class ConfigSelectorChannelsAndContentDetection + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters, + ConfigSelectorChannelsAndContentDetection, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(false, true))); + +class ConfigSelectorChannels : public ::testing::Test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters, + ConfigSelectorChannels, + ::testing::Values(1, 2, 8)); + +TEST_P(ConfigSelectorChannelsAndContentDetection, + MonoConfigIsSelectedWhenNoMultiChannelConfigPresent) { + const auto [num_channels, detect_stereo_content] = GetParam(); + EchoCanceller3Config config; + config.multi_channel.detect_stereo_content = detect_stereo_content; + absl::optional multichannel_config; + + config.delay.default_delay = config.delay.default_delay + 1; + const size_t custom_delay_value_in_config = config.delay.default_delay; + + ConfigSelector cs(config, multichannel_config, + /*num_render_input_channels=*/num_channels); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + + cs.Update(/*multichannel_content=*/false); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + + cs.Update(/*multichannel_content=*/true); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); +} + +TEST_P(ConfigSelectorChannelsAndContentDetection, + CorrectInitialConfigIsSelected) { + const auto [num_channels, detect_stereo_content] = GetParam(); + EchoCanceller3Config config; + config.multi_channel.detect_stereo_content = detect_stereo_content; + absl::optional multichannel_config = config; + + config.delay.default_delay += 1; + const size_t custom_delay_value_in_config = config.delay.default_delay; + multichannel_config->delay.default_delay += 2; + const size_t custom_delay_value_in_multichannel_config = + multichannel_config->delay.default_delay; + + ConfigSelector cs(config, multichannel_config, + /*num_render_input_channels=*/num_channels); + + if (num_channels == 1 || detect_stereo_content) { + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + } else { + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_multichannel_config); + } +} + +TEST_P(ConfigSelectorChannels, CorrectConfigUpdateBehavior) { + const int num_channels = GetParam(); + EchoCanceller3Config config; + config.multi_channel.detect_stereo_content = true; + absl::optional multichannel_config = config; + + config.delay.default_delay += 1; + const size_t custom_delay_value_in_config = config.delay.default_delay; + multichannel_config->delay.default_delay += 2; + const size_t custom_delay_value_in_multichannel_config = + multichannel_config->delay.default_delay; + + ConfigSelector cs(config, multichannel_config, + /*num_render_input_channels=*/num_channels); + + cs.Update(/*multichannel_content=*/false); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + + if (num_channels == 1) { + cs.Update(/*multichannel_content=*/false); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + } else { + cs.Update(/*multichannel_content=*/true); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_multichannel_config); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc new file mode 100644 index 0000000000..bd03237ca0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/decimator.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// signal.butter(2, 3400/8000.0, 'lowpass', analog=False) +const std::vector GetLowPassFilterDS2() { + return std::vector{ + {{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}, + {{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}, + {{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}}; +} + +// signal.ellip(6, 1, 40, 1800/8000, btype='lowpass', analog=False) +const std::vector GetLowPassFilterDS4() { + return std::vector{ + {{-0.08873842f, 0.99605496f}, {0.75916227f, 0.23841065f}, 0.26250696827f}, + {{0.62273832f, 0.78243018f}, {0.74892112f, 0.5410152f}, 0.26250696827f}, + {{0.71107693f, 0.70311421f}, {0.74895534f, 0.63924616f}, 0.26250696827f}}; +} + +// signal.cheby1(1, 6, [1000/8000, 2000/8000], btype='bandpass', analog=False) +const std::vector GetBandPassFilterDS8() { + return std::vector{ + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}, + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}, + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}, + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}, + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}}; +} + +// signal.butter(2, 1000/8000.0, 'highpass', analog=False) +const std::vector GetHighPassFilter() { + return std::vector{ + {{1.f, 0.f}, {0.72712179f, 0.21296904f}, 0.7570763753338849f}}; +} + +const std::vector GetPassThroughFilter() { + return std::vector{}; +} +} // namespace + +Decimator::Decimator(size_t down_sampling_factor) + : down_sampling_factor_(down_sampling_factor), + anti_aliasing_filter_(down_sampling_factor_ == 4 + ? GetLowPassFilterDS4() + : (down_sampling_factor_ == 8 + ? GetBandPassFilterDS8() + : GetLowPassFilterDS2())), + noise_reduction_filter_(down_sampling_factor_ == 8 + ? GetPassThroughFilter() + : GetHighPassFilter()) { + RTC_DCHECK(down_sampling_factor_ == 2 || down_sampling_factor_ == 4 || + down_sampling_factor_ == 8); +} + +void Decimator::Decimate(rtc::ArrayView in, + rtc::ArrayView out) { + RTC_DCHECK_EQ(kBlockSize, in.size()); + RTC_DCHECK_EQ(kBlockSize / down_sampling_factor_, out.size()); + std::array x; + + // Limit the frequency content of the signal to avoid aliasing. + anti_aliasing_filter_.Process(in, x); + + // Reduce the impact of near-end noise. + noise_reduction_filter_.Process(x); + + // Downsample the signal. + for (size_t j = 0, k = 0; j < out.size(); ++j, k += down_sampling_factor_) { + RTC_DCHECK_GT(kBlockSize, k); + out[j] = x[k]; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/decimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/decimator.h new file mode 100644 index 0000000000..dbff3d9fff --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/decimator.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" + +namespace webrtc { + +// Provides functionality for decimating a signal. +class Decimator { + public: + explicit Decimator(size_t down_sampling_factor); + + Decimator(const Decimator&) = delete; + Decimator& operator=(const Decimator&) = delete; + + // Downsamples the signal. + void Decimate(rtc::ArrayView in, rtc::ArrayView out); + + private: + const size_t down_sampling_factor_; + CascadedBiQuadFilter anti_aliasing_filter_; + CascadedBiQuadFilter noise_reduction_filter_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/decimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/decimator_unittest.cc new file mode 100644 index 0000000000..e6f5ea0403 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/decimator_unittest.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/decimator.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; +constexpr float kPi = 3.141592f; +constexpr size_t kNumStartupBlocks = 50; +constexpr size_t kNumBlocks = 1000; + +void ProduceDecimatedSinusoidalOutputPower(int sample_rate_hz, + size_t down_sampling_factor, + float sinusoidal_frequency_hz, + float* input_power, + float* output_power) { + float input[kBlockSize * kNumBlocks]; + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + // Produce a sinusoid of the specified frequency. + for (size_t k = 0; k < kBlockSize * kNumBlocks; ++k) { + input[k] = 32767.f * std::sin(2.f * kPi * sinusoidal_frequency_hz * k / + sample_rate_hz); + } + + Decimator decimator(down_sampling_factor); + std::vector output(sub_block_size * kNumBlocks); + + for (size_t k = 0; k < kNumBlocks; ++k) { + std::vector sub_block(sub_block_size); + decimator.Decimate( + rtc::ArrayView(&input[k * kBlockSize], kBlockSize), + sub_block); + + std::copy(sub_block.begin(), sub_block.end(), + output.begin() + k * sub_block_size); + } + + ASSERT_GT(kNumBlocks, kNumStartupBlocks); + rtc::ArrayView input_to_evaluate( + &input[kNumStartupBlocks * kBlockSize], + (kNumBlocks - kNumStartupBlocks) * kBlockSize); + rtc::ArrayView output_to_evaluate( + &output[kNumStartupBlocks * sub_block_size], + (kNumBlocks - kNumStartupBlocks) * sub_block_size); + *input_power = + std::inner_product(input_to_evaluate.begin(), input_to_evaluate.end(), + input_to_evaluate.begin(), 0.f) / + input_to_evaluate.size(); + *output_power = + std::inner_product(output_to_evaluate.begin(), output_to_evaluate.end(), + output_to_evaluate.begin(), 0.f) / + output_to_evaluate.size(); +} + +} // namespace + +// Verifies that there is little aliasing from upper frequencies in the +// downsampling. +TEST(Decimator, NoLeakageFromUpperFrequencies) { + float input_power; + float output_power; + for (auto rate : {16000, 32000, 48000}) { + for (auto down_sampling_factor : kDownSamplingFactors) { + ProduceDebugText(rate); + ProduceDecimatedSinusoidalOutputPower(rate, down_sampling_factor, + 3.f / 8.f * rate, &input_power, + &output_power); + EXPECT_GT(0.0001f * input_power, output_power); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies the check for the input size. +TEST(DecimatorDeathTest, WrongInputSize) { + Decimator decimator(4); + std::vector x(kBlockSize - 1, 0.f); + std::array x_downsampled; + EXPECT_DEATH(decimator.Decimate(x, x_downsampled), ""); +} + +// Verifies the check for non-null output parameter. +TEST(DecimatorDeathTest, NullOutput) { + Decimator decimator(4); + std::vector x(kBlockSize, 0.f); + EXPECT_DEATH(decimator.Decimate(x, nullptr), ""); +} + +// Verifies the check for the output size. +TEST(DecimatorDeathTest, WrongOutputSize) { + Decimator decimator(4); + std::vector x(kBlockSize, 0.f); + std::array x_downsampled; + EXPECT_DEATH(decimator.Decimate(x, x_downsampled), ""); +} + +// Verifies the check for the correct downsampling factor. +TEST(DecimatorDeathTest, CorrectDownSamplingFactor) { + EXPECT_DEATH(Decimator(3), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/delay_estimate.h b/third_party/libwebrtc/modules/audio_processing/aec3/delay_estimate.h new file mode 100644 index 0000000000..7838a0c255 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/delay_estimate.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_ + +#include + +namespace webrtc { + +// Stores delay_estimates. +struct DelayEstimate { + enum class Quality { kCoarse, kRefined }; + + DelayEstimate(Quality quality, size_t delay) + : quality(quality), delay(delay) {} + + Quality quality; + size_t delay; + size_t blocks_since_last_change = 0; + size_t blocks_since_last_update = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc new file mode 100644 index 0000000000..40073cf615 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/dominant_nearend_detector.h" + +#include + +namespace webrtc { +DominantNearendDetector::DominantNearendDetector( + const EchoCanceller3Config::Suppressor::DominantNearendDetection& config, + size_t num_capture_channels) + : enr_threshold_(config.enr_threshold), + enr_exit_threshold_(config.enr_exit_threshold), + snr_threshold_(config.snr_threshold), + hold_duration_(config.hold_duration), + trigger_threshold_(config.trigger_threshold), + use_during_initial_phase_(config.use_during_initial_phase), + num_capture_channels_(num_capture_channels), + trigger_counters_(num_capture_channels_), + hold_counters_(num_capture_channels_) {} + +void DominantNearendDetector::Update( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) { + nearend_state_ = false; + + auto low_frequency_energy = [](rtc::ArrayView spectrum) { + RTC_DCHECK_LE(16, spectrum.size()); + return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f); + }; + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + const float ne_sum = low_frequency_energy(nearend_spectrum[ch]); + const float echo_sum = low_frequency_energy(residual_echo_spectrum[ch]); + const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]); + + // Detect strong active nearend if the nearend is sufficiently stronger than + // the echo and the nearend noise. + if ((!initial_state || use_during_initial_phase_) && + echo_sum < enr_threshold_ * ne_sum && + ne_sum > snr_threshold_ * noise_sum) { + if (++trigger_counters_[ch] >= trigger_threshold_) { + // After a period of strong active nearend activity, flag nearend mode. + hold_counters_[ch] = hold_duration_; + trigger_counters_[ch] = trigger_threshold_; + } + } else { + // Forget previously detected strong active nearend activity. + trigger_counters_[ch] = std::max(0, trigger_counters_[ch] - 1); + } + + // Exit nearend-state early at strong echo. + if (echo_sum > enr_exit_threshold_ * ne_sum && + echo_sum > snr_threshold_ * noise_sum) { + hold_counters_[ch] = 0; + } + + // Remain in any nearend mode for a certain duration. + hold_counters_[ch] = std::max(0, hold_counters_[ch] - 1); + nearend_state_ = nearend_state_ || hold_counters_[ch] > 0; + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.h new file mode 100644 index 0000000000..046d1488d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_ + +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/nearend_detector.h" + +namespace webrtc { +// Class for selecting whether the suppressor is in the nearend or echo state. +class DominantNearendDetector : public NearendDetector { + public: + DominantNearendDetector( + const EchoCanceller3Config::Suppressor::DominantNearendDetection& config, + size_t num_capture_channels); + + // Returns whether the current state is the nearend state. + bool IsNearendState() const override { return nearend_state_; } + + // Updates the state selection based on latest spectral estimates. + void Update(rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) override; + + private: + const float enr_threshold_; + const float enr_exit_threshold_; + const float snr_threshold_; + const int hold_duration_; + const int trigger_threshold_; + const bool use_during_initial_phase_; + const size_t num_capture_channels_; + + bool nearend_state_ = false; + std::vector trigger_counters_; + std::vector hold_counters_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc new file mode 100644 index 0000000000..c105911aa8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" + +#include + +namespace webrtc { + +DownsampledRenderBuffer::DownsampledRenderBuffer(size_t downsampled_buffer_size) + : size(static_cast(downsampled_buffer_size)), + buffer(downsampled_buffer_size, 0.f) { + std::fill(buffer.begin(), buffer.end(), 0.f); +} + +DownsampledRenderBuffer::~DownsampledRenderBuffer() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.h new file mode 100644 index 0000000000..fbdc9b4e93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_ + +#include + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +// Holds the circular buffer of the downsampled render data. +struct DownsampledRenderBuffer { + explicit DownsampledRenderBuffer(size_t downsampled_buffer_size); + ~DownsampledRenderBuffer(); + + int IncIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index < size - 1 ? index + 1 : 0; + } + + int DecIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index > 0 ? index - 1 : size - 1; + } + + int OffsetIndex(int index, int offset) const { + RTC_DCHECK_GE(buffer.size(), offset); + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return (size + index + offset) % size; + } + + void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); } + void IncWriteIndex() { write = IncIndex(write); } + void DecWriteIndex() { write = DecIndex(write); } + void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); } + void IncReadIndex() { read = IncIndex(read); } + void DecReadIndex() { read = DecIndex(read); } + + const int size; + std::vector buffer; + int write = 0; + int read = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc new file mode 100644 index 0000000000..142a33d5e0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_audibility.h" + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/block_buffer.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "modules/audio_processing/aec3/stationarity_estimator.h" + +namespace webrtc { + +EchoAudibility::EchoAudibility(bool use_render_stationarity_at_init) + : use_render_stationarity_at_init_(use_render_stationarity_at_init) { + Reset(); +} + +EchoAudibility::~EchoAudibility() = default; + +void EchoAudibility::Update(const RenderBuffer& render_buffer, + rtc::ArrayView average_reverb, + int delay_blocks, + bool external_delay_seen) { + UpdateRenderNoiseEstimator(render_buffer.GetSpectrumBuffer(), + render_buffer.GetBlockBuffer(), + external_delay_seen); + + if (external_delay_seen || use_render_stationarity_at_init_) { + UpdateRenderStationarityFlags(render_buffer, average_reverb, delay_blocks); + } +} + +void EchoAudibility::Reset() { + render_stationarity_.Reset(); + non_zero_render_seen_ = false; + render_spectrum_write_prev_ = absl::nullopt; +} + +void EchoAudibility::UpdateRenderStationarityFlags( + const RenderBuffer& render_buffer, + rtc::ArrayView average_reverb, + int min_channel_delay_blocks) { + const SpectrumBuffer& spectrum_buffer = render_buffer.GetSpectrumBuffer(); + int idx_at_delay = spectrum_buffer.OffsetIndex(spectrum_buffer.read, + min_channel_delay_blocks); + + int num_lookahead = render_buffer.Headroom() - min_channel_delay_blocks + 1; + num_lookahead = std::max(0, num_lookahead); + + render_stationarity_.UpdateStationarityFlags(spectrum_buffer, average_reverb, + idx_at_delay, num_lookahead); +} + +void EchoAudibility::UpdateRenderNoiseEstimator( + const SpectrumBuffer& spectrum_buffer, + const BlockBuffer& block_buffer, + bool external_delay_seen) { + if (!render_spectrum_write_prev_) { + render_spectrum_write_prev_ = spectrum_buffer.write; + render_block_write_prev_ = block_buffer.write; + return; + } + int render_spectrum_write_current = spectrum_buffer.write; + if (!non_zero_render_seen_ && !external_delay_seen) { + non_zero_render_seen_ = !IsRenderTooLow(block_buffer); + } + if (non_zero_render_seen_) { + for (int idx = render_spectrum_write_prev_.value(); + idx != render_spectrum_write_current; + idx = spectrum_buffer.DecIndex(idx)) { + render_stationarity_.UpdateNoiseEstimator(spectrum_buffer.buffer[idx]); + } + } + render_spectrum_write_prev_ = render_spectrum_write_current; +} + +bool EchoAudibility::IsRenderTooLow(const BlockBuffer& block_buffer) { + const int num_render_channels = + static_cast(block_buffer.buffer[0].NumChannels()); + bool too_low = false; + const int render_block_write_current = block_buffer.write; + if (render_block_write_current == render_block_write_prev_) { + too_low = true; + } else { + for (int idx = render_block_write_prev_; idx != render_block_write_current; + idx = block_buffer.IncIndex(idx)) { + float max_abs_over_channels = 0.f; + for (int ch = 0; ch < num_render_channels; ++ch) { + rtc::ArrayView block = + block_buffer.buffer[idx].View(/*band=*/0, /*channel=*/ch); + auto r = std::minmax_element(block.cbegin(), block.cend()); + float max_abs_channel = + std::max(std::fabs(*r.first), std::fabs(*r.second)); + max_abs_over_channels = + std::max(max_abs_over_channels, max_abs_channel); + } + if (max_abs_over_channels < 10.f) { + too_low = true; // Discards all blocks if one of them is too low. + break; + } + } + } + render_block_write_prev_ = render_block_write_current; + return too_low; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.h new file mode 100644 index 0000000000..b9d6f87d2a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_ + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/block_buffer.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "modules/audio_processing/aec3/stationarity_estimator.h" + +namespace webrtc { + +class EchoAudibility { + public: + explicit EchoAudibility(bool use_render_stationarity_at_init); + ~EchoAudibility(); + + EchoAudibility(const EchoAudibility&) = delete; + EchoAudibility& operator=(const EchoAudibility&) = delete; + + // Feed new render data to the echo audibility estimator. + void Update(const RenderBuffer& render_buffer, + rtc::ArrayView average_reverb, + int min_channel_delay_blocks, + bool external_delay_seen); + // Get the residual echo scaling. + void GetResidualEchoScaling(bool filter_has_had_time_to_converge, + rtc::ArrayView residual_scaling) const { + for (size_t band = 0; band < residual_scaling.size(); ++band) { + if (render_stationarity_.IsBandStationary(band) && + (filter_has_had_time_to_converge || + use_render_stationarity_at_init_)) { + residual_scaling[band] = 0.f; + } else { + residual_scaling[band] = 1.0f; + } + } + } + + // Returns true if the current render block is estimated as stationary. + bool IsBlockStationary() const { + return render_stationarity_.IsBlockStationary(); + } + + private: + // Reset the EchoAudibility class. + void Reset(); + + // Updates the render stationarity flags for the current frame. + void UpdateRenderStationarityFlags(const RenderBuffer& render_buffer, + rtc::ArrayView average_reverb, + int delay_blocks); + + // Updates the noise estimator with the new render data since the previous + // call to this method. + void UpdateRenderNoiseEstimator(const SpectrumBuffer& spectrum_buffer, + const BlockBuffer& block_buffer, + bool external_delay_seen); + + // Returns a bool being true if the render signal contains just close to zero + // values. + bool IsRenderTooLow(const BlockBuffer& block_buffer); + + absl::optional render_spectrum_write_prev_; + int render_block_write_prev_; + bool non_zero_render_seen_; + const bool use_render_stationarity_at_init_; + StationarityEstimator render_stationarity_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc new file mode 100644 index 0000000000..e8e2175994 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc @@ -0,0 +1,992 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_canceller3.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/high_pass_filter.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +enum class EchoCanceller3ApiCall { kCapture, kRender }; + +bool DetectSaturation(rtc::ArrayView y) { + for (size_t k = 0; k < y.size(); ++k) { + if (y[k] >= 32700.0f || y[k] <= -32700.0f) { + return true; + } + } + return false; +} + +// Retrieves a value from a field trial if it is available. If no value is +// present, the default value is returned. If the retrieved value is beyond the +// specified limits, the default value is returned instead. +void RetrieveFieldTrialValue(absl::string_view trial_name, + float min, + float max, + float* value_to_update) { + const std::string field_trial_str = field_trial::FindFullName(trial_name); + + FieldTrialParameter field_trial_param(/*key=*/"", *value_to_update); + + ParseFieldTrial({&field_trial_param}, field_trial_str); + float field_trial_value = static_cast(field_trial_param.Get()); + + if (field_trial_value >= min && field_trial_value <= max && + field_trial_value != *value_to_update) { + RTC_LOG(LS_INFO) << "Key " << trial_name + << " changing AEC3 parameter value from " + << *value_to_update << " to " << field_trial_value; + *value_to_update = field_trial_value; + } +} + +void RetrieveFieldTrialValue(absl::string_view trial_name, + int min, + int max, + int* value_to_update) { + const std::string field_trial_str = field_trial::FindFullName(trial_name); + + FieldTrialParameter field_trial_param(/*key=*/"", *value_to_update); + + ParseFieldTrial({&field_trial_param}, field_trial_str); + float field_trial_value = field_trial_param.Get(); + + if (field_trial_value >= min && field_trial_value <= max && + field_trial_value != *value_to_update) { + RTC_LOG(LS_INFO) << "Key " << trial_name + << " changing AEC3 parameter value from " + << *value_to_update << " to " << field_trial_value; + *value_to_update = field_trial_value; + } +} + +void FillSubFrameView( + AudioBuffer* frame, + size_t sub_frame_index, + std::vector>>* sub_frame_view) { + RTC_DCHECK_GE(1, sub_frame_index); + RTC_DCHECK_LE(0, sub_frame_index); + RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size()); + RTC_DCHECK_EQ(frame->num_channels(), (*sub_frame_view)[0].size()); + for (size_t band = 0; band < sub_frame_view->size(); ++band) { + for (size_t channel = 0; channel < (*sub_frame_view)[0].size(); ++channel) { + (*sub_frame_view)[band][channel] = rtc::ArrayView( + &frame->split_bands(channel)[band][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } +} + +void FillSubFrameView( + bool proper_downmix_needed, + std::vector>>* frame, + size_t sub_frame_index, + std::vector>>* sub_frame_view) { + RTC_DCHECK_GE(1, sub_frame_index); + RTC_DCHECK_EQ(frame->size(), sub_frame_view->size()); + const size_t frame_num_channels = (*frame)[0].size(); + const size_t sub_frame_num_channels = (*sub_frame_view)[0].size(); + if (frame_num_channels > sub_frame_num_channels) { + RTC_DCHECK_EQ(sub_frame_num_channels, 1u); + if (proper_downmix_needed) { + // When a proper downmix is needed (which is the case when proper stereo + // is present in the echo reference signal but the echo canceller does the + // processing in mono) downmix the echo reference by averaging the channel + // content (otherwise downmixing is done by selecting channel 0). + for (size_t band = 0; band < frame->size(); ++band) { + for (size_t ch = 1; ch < frame_num_channels; ++ch) { + for (size_t k = 0; k < kSubFrameLength; ++k) { + (*frame)[band][/*channel=*/0] + [sub_frame_index * kSubFrameLength + k] += + (*frame)[band][ch][sub_frame_index * kSubFrameLength + k]; + } + } + const float one_by_num_channels = 1.0f / frame_num_channels; + for (size_t k = 0; k < kSubFrameLength; ++k) { + (*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength + + k] *= one_by_num_channels; + } + } + } + for (size_t band = 0; band < frame->size(); ++band) { + (*sub_frame_view)[band][/*channel=*/0] = rtc::ArrayView( + &(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } else { + RTC_DCHECK_EQ(frame_num_channels, sub_frame_num_channels); + for (size_t band = 0; band < frame->size(); ++band) { + for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) { + (*sub_frame_view)[band][channel] = rtc::ArrayView( + &(*frame)[band][channel][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } + } +} + +void ProcessCaptureFrameContent( + AudioBuffer* linear_output, + AudioBuffer* capture, + bool level_change, + bool aec_reference_is_downmixed_stereo, + bool saturated_microphone_signal, + size_t sub_frame_index, + FrameBlocker* capture_blocker, + BlockFramer* linear_output_framer, + BlockFramer* output_framer, + BlockProcessor* block_processor, + Block* linear_output_block, + std::vector>>* + linear_output_sub_frame_view, + Block* capture_block, + std::vector>>* capture_sub_frame_view) { + FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view); + + if (linear_output) { + RTC_DCHECK(linear_output_framer); + RTC_DCHECK(linear_output_block); + RTC_DCHECK(linear_output_sub_frame_view); + FillSubFrameView(linear_output, sub_frame_index, + linear_output_sub_frame_view); + } + + capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view, + capture_block); + block_processor->ProcessCapture( + /*echo_path_gain_change=*/level_change || + aec_reference_is_downmixed_stereo, + saturated_microphone_signal, linear_output_block, capture_block); + output_framer->InsertBlockAndExtractSubFrame(*capture_block, + capture_sub_frame_view); + + if (linear_output) { + RTC_DCHECK(linear_output_framer); + linear_output_framer->InsertBlockAndExtractSubFrame( + *linear_output_block, linear_output_sub_frame_view); + } +} + +void ProcessRemainingCaptureFrameContent(bool level_change, + bool aec_reference_is_downmixed_stereo, + bool saturated_microphone_signal, + FrameBlocker* capture_blocker, + BlockFramer* linear_output_framer, + BlockFramer* output_framer, + BlockProcessor* block_processor, + Block* linear_output_block, + Block* block) { + if (!capture_blocker->IsBlockAvailable()) { + return; + } + + capture_blocker->ExtractBlock(block); + block_processor->ProcessCapture( + /*echo_path_gain_change=*/level_change || + aec_reference_is_downmixed_stereo, + saturated_microphone_signal, linear_output_block, block); + output_framer->InsertBlock(*block); + + if (linear_output_framer) { + RTC_DCHECK(linear_output_block); + linear_output_framer->InsertBlock(*linear_output_block); + } +} + +void BufferRenderFrameContent( + bool proper_downmix_needed, + std::vector>>* render_frame, + size_t sub_frame_index, + FrameBlocker* render_blocker, + BlockProcessor* block_processor, + Block* block, + std::vector>>* sub_frame_view) { + FillSubFrameView(proper_downmix_needed, render_frame, sub_frame_index, + sub_frame_view); + render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block); + block_processor->BufferRender(*block); +} + +void BufferRemainingRenderFrameContent(FrameBlocker* render_blocker, + BlockProcessor* block_processor, + Block* block) { + if (!render_blocker->IsBlockAvailable()) { + return; + } + render_blocker->ExtractBlock(block); + block_processor->BufferRender(*block); +} + +void CopyBufferIntoFrame(const AudioBuffer& buffer, + size_t num_bands, + size_t num_channels, + std::vector>>* frame) { + RTC_DCHECK_EQ(num_bands, frame->size()); + RTC_DCHECK_EQ(num_channels, (*frame)[0].size()); + RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, (*frame)[0][0].size()); + for (size_t band = 0; band < num_bands; ++band) { + for (size_t channel = 0; channel < num_channels; ++channel) { + rtc::ArrayView buffer_view( + &buffer.split_bands_const(channel)[band][0], + AudioBuffer::kSplitBandSize); + std::copy(buffer_view.begin(), buffer_view.end(), + (*frame)[band][channel].begin()); + } + } +} + +} // namespace + +// TODO(webrtc:5298): Move this to a separate file. +EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) { + EchoCanceller3Config adjusted_cfg = config; + + if (field_trial::IsEnabled("WebRTC-Aec3StereoContentDetectionKillSwitch")) { + adjusted_cfg.multi_channel.detect_stereo_content = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) { + adjusted_cfg.suppressor.high_bands_suppression + .anti_howling_activation_threshold = 25.f; + adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 0.01f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3UseShortConfigChangeDuration")) { + adjusted_cfg.filter.config_change_duration_blocks = 10; + } + + if (field_trial::IsEnabled("WebRTC-Aec3UseZeroInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 0.f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot1SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .1f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot2SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .2f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot3SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .3f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot6SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .6f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot9SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .9f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3Use1Dot2SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 1.2f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3Use1Dot6SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 1.6f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3Use2Dot0SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 2.0f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3HighPassFilterEchoReference")) { + adjusted_cfg.filter.high_pass_filter_echo_reference = true; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) { + adjusted_cfg.ep_strength.echo_can_saturate = false; + } + + const std::string use_nearend_reverb_len_tunings = + field_trial::FindFullName("WebRTC-Aec3UseNearendReverbLen"); + FieldTrialParameter nearend_reverb_default_len( + "default_len", adjusted_cfg.ep_strength.default_len); + FieldTrialParameter nearend_reverb_nearend_len( + "nearend_len", adjusted_cfg.ep_strength.nearend_len); + + ParseFieldTrial({&nearend_reverb_default_len, &nearend_reverb_nearend_len}, + use_nearend_reverb_len_tunings); + float default_len = static_cast(nearend_reverb_default_len.Get()); + float nearend_len = static_cast(nearend_reverb_nearend_len.Get()); + if (default_len > -1 && default_len < 1 && nearend_len > -1 && + nearend_len < 1) { + adjusted_cfg.ep_strength.default_len = + static_cast(nearend_reverb_default_len.Get()); + adjusted_cfg.ep_strength.nearend_len = + static_cast(nearend_reverb_nearend_len.Get()); + } + + if (field_trial::IsEnabled("WebRTC-Aec3ConservativeTailFreqResponse")) { + adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = true; + } + + if (field_trial::IsDisabled("WebRTC-Aec3ConservativeTailFreqResponse")) { + adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) { + // Two blocks headroom. + adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2; + } + + if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToZeroKillSwitch")) { + adjusted_cfg.erle.clamp_quality_estimate_to_zero = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToOneKillSwitch")) { + adjusted_cfg.erle.clamp_quality_estimate_to_one = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3OnsetDetectionKillSwitch")) { + adjusted_cfg.erle.onset_detection = false; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceRenderDelayEstimationDownmixing")) { + adjusted_cfg.delay.render_alignment_mixing.downmix = true; + adjusted_cfg.delay.render_alignment_mixing.adaptive_selection = false; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceCaptureDelayEstimationDownmixing")) { + adjusted_cfg.delay.capture_alignment_mixing.downmix = true; + adjusted_cfg.delay.capture_alignment_mixing.adaptive_selection = false; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceCaptureDelayEstimationLeftRightPrioritization")) { + adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels = + true; + } + + if (field_trial::IsEnabled( + "WebRTC-" + "Aec3RenderDelayEstimationLeftRightPrioritizationKillSwitch")) { + adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels = + false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3DelayEstimatorDetectPreEcho")) { + adjusted_cfg.delay.detect_pre_echo = true; + } + + if (field_trial::IsDisabled("WebRTC-Aec3DelayEstimatorDetectPreEcho")) { + adjusted_cfg.delay.detect_pre_echo = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) { + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3VerySensitiveDominantNearendActivation")) { + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.75f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3TransparentAntiHowlingGain")) { + adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 1.f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorTuning")) { + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = 0.4f; + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = 0.5f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorTuning")) { + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = 1.29f; + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = 1.3f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorHfTuning")) { + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = 0.3f; + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = 0.4f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorHfTuning")) { + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = 1.09f; + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = 1.1f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceRapidlyAdjustingNormalSuppressorTunings")) { + adjusted_cfg.suppressor.normal_tuning.max_inc_factor = 2.5f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceRapidlyAdjustingNearendSuppressorTunings")) { + adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = 2.5f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceSlowlyAdjustingNormalSuppressorTunings")) { + adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = .2f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceSlowlyAdjustingNearendSuppressorTunings")) { + adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EnforceConservativeHfSuppression")) { + adjusted_cfg.suppressor.conservative_hf_suppression = true; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) { + adjusted_cfg.echo_audibility.use_stationarity_properties = true; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceStationarityPropertiesAtInit")) { + adjusted_cfg.echo_audibility.use_stationarity_properties_at_init = true; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EnforceLowActiveRenderLimit")) { + adjusted_cfg.render_levels.active_render_limit = 50.f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceVeryLowActiveRenderLimit")) { + adjusted_cfg.render_levels.active_render_limit = 30.f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3NonlinearModeReverbKillSwitch")) { + adjusted_cfg.echo_model.model_reverb_in_nonlinear_mode = false; + } + + // Field-trial based override for the whole suppressor tuning. + const std::string suppressor_tuning_override_trial_name = + field_trial::FindFullName("WebRTC-Aec3SuppressorTuningOverride"); + + FieldTrialParameter nearend_tuning_mask_lf_enr_transparent( + "nearend_tuning_mask_lf_enr_transparent", + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent); + FieldTrialParameter nearend_tuning_mask_lf_enr_suppress( + "nearend_tuning_mask_lf_enr_suppress", + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress); + FieldTrialParameter nearend_tuning_mask_hf_enr_transparent( + "nearend_tuning_mask_hf_enr_transparent", + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent); + FieldTrialParameter nearend_tuning_mask_hf_enr_suppress( + "nearend_tuning_mask_hf_enr_suppress", + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress); + FieldTrialParameter nearend_tuning_max_inc_factor( + "nearend_tuning_max_inc_factor", + adjusted_cfg.suppressor.nearend_tuning.max_inc_factor); + FieldTrialParameter nearend_tuning_max_dec_factor_lf( + "nearend_tuning_max_dec_factor_lf", + adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf); + FieldTrialParameter normal_tuning_mask_lf_enr_transparent( + "normal_tuning_mask_lf_enr_transparent", + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent); + FieldTrialParameter normal_tuning_mask_lf_enr_suppress( + "normal_tuning_mask_lf_enr_suppress", + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress); + FieldTrialParameter normal_tuning_mask_hf_enr_transparent( + "normal_tuning_mask_hf_enr_transparent", + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent); + FieldTrialParameter normal_tuning_mask_hf_enr_suppress( + "normal_tuning_mask_hf_enr_suppress", + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress); + FieldTrialParameter normal_tuning_max_inc_factor( + "normal_tuning_max_inc_factor", + adjusted_cfg.suppressor.normal_tuning.max_inc_factor); + FieldTrialParameter normal_tuning_max_dec_factor_lf( + "normal_tuning_max_dec_factor_lf", + adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf); + FieldTrialParameter dominant_nearend_detection_enr_threshold( + "dominant_nearend_detection_enr_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold); + FieldTrialParameter dominant_nearend_detection_enr_exit_threshold( + "dominant_nearend_detection_enr_exit_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold); + FieldTrialParameter dominant_nearend_detection_snr_threshold( + "dominant_nearend_detection_snr_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold); + FieldTrialParameter dominant_nearend_detection_hold_duration( + "dominant_nearend_detection_hold_duration", + adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration); + FieldTrialParameter dominant_nearend_detection_trigger_threshold( + "dominant_nearend_detection_trigger_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold); + + ParseFieldTrial( + {&nearend_tuning_mask_lf_enr_transparent, + &nearend_tuning_mask_lf_enr_suppress, + &nearend_tuning_mask_hf_enr_transparent, + &nearend_tuning_mask_hf_enr_suppress, &nearend_tuning_max_inc_factor, + &nearend_tuning_max_dec_factor_lf, + &normal_tuning_mask_lf_enr_transparent, + &normal_tuning_mask_lf_enr_suppress, + &normal_tuning_mask_hf_enr_transparent, + &normal_tuning_mask_hf_enr_suppress, &normal_tuning_max_inc_factor, + &normal_tuning_max_dec_factor_lf, + &dominant_nearend_detection_enr_threshold, + &dominant_nearend_detection_enr_exit_threshold, + &dominant_nearend_detection_snr_threshold, + &dominant_nearend_detection_hold_duration, + &dominant_nearend_detection_trigger_threshold}, + suppressor_tuning_override_trial_name); + + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = + static_cast(nearend_tuning_mask_lf_enr_transparent.Get()); + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = + static_cast(nearend_tuning_mask_lf_enr_suppress.Get()); + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = + static_cast(nearend_tuning_mask_hf_enr_transparent.Get()); + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = + static_cast(nearend_tuning_mask_hf_enr_suppress.Get()); + adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = + static_cast(nearend_tuning_max_inc_factor.Get()); + adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = + static_cast(nearend_tuning_max_dec_factor_lf.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = + static_cast(normal_tuning_mask_lf_enr_transparent.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = + static_cast(normal_tuning_mask_lf_enr_suppress.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = + static_cast(normal_tuning_mask_hf_enr_transparent.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = + static_cast(normal_tuning_mask_hf_enr_suppress.Get()); + adjusted_cfg.suppressor.normal_tuning.max_inc_factor = + static_cast(normal_tuning_max_inc_factor.Get()); + adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = + static_cast(normal_tuning_max_dec_factor_lf.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = + static_cast(dominant_nearend_detection_enr_threshold.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold = + static_cast(dominant_nearend_detection_enr_exit_threshold.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold = + static_cast(dominant_nearend_detection_snr_threshold.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration = + dominant_nearend_detection_hold_duration.Get(); + adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold = + dominant_nearend_detection_trigger_threshold.Get(); + + // Field trial-based overrides of individual suppressor parameters. + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendLfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendLfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendHfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendHfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendMaxIncFactorOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.max_inc_factor); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendMaxDecFactorLfOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf); + + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalLfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalLfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalHfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalHfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalMaxIncFactorOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.max_inc_factor); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalMaxDecFactorLfOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf); + + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendEnrThresholdOverride", 0.f, 100.f, + &adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendEnrExitThresholdOverride", 0.f, + 100.f, + &adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendSnrThresholdOverride", 0.f, 100.f, + &adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendHoldDurationOverride", 0, 1000, + &adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendTriggerThresholdOverride", 0, 1000, + &adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold); + + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain); + + // Field trial-based overrides of individual delay estimator parameters. + RetrieveFieldTrialValue("WebRTC-Aec3DelayEstimateSmoothingOverride", 0.f, 1.f, + &adjusted_cfg.delay.delay_estimate_smoothing); + RetrieveFieldTrialValue( + "WebRTC-Aec3DelayEstimateSmoothingDelayFoundOverride", 0.f, 1.f, + &adjusted_cfg.delay.delay_estimate_smoothing_delay_found); + + return adjusted_cfg; +} + +class EchoCanceller3::RenderWriter { + public: + RenderWriter(ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + SwapQueue>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue, + size_t num_bands, + size_t num_channels); + + RenderWriter() = delete; + RenderWriter(const RenderWriter&) = delete; + RenderWriter& operator=(const RenderWriter&) = delete; + + ~RenderWriter(); + void Insert(const AudioBuffer& input); + + private: + ApmDataDumper* data_dumper_; + const size_t num_bands_; + const size_t num_channels_; + std::unique_ptr high_pass_filter_; + std::vector>> render_queue_input_frame_; + SwapQueue>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue_; +}; + +EchoCanceller3::RenderWriter::RenderWriter( + ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + SwapQueue>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue, + size_t num_bands, + size_t num_channels) + : data_dumper_(data_dumper), + num_bands_(num_bands), + num_channels_(num_channels), + render_queue_input_frame_( + num_bands_, + std::vector>( + num_channels_, + std::vector(AudioBuffer::kSplitBandSize, 0.f))), + render_transfer_queue_(render_transfer_queue) { + RTC_DCHECK(data_dumper); + if (config.filter.high_pass_filter_echo_reference) { + high_pass_filter_ = std::make_unique(16000, num_channels); + } +} + +EchoCanceller3::RenderWriter::~RenderWriter() = default; + +void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) { + RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band()); + RTC_DCHECK_EQ(num_bands_, input.num_bands()); + RTC_DCHECK_EQ(num_channels_, input.num_channels()); + + // TODO(bugs.webrtc.org/8759) Temporary work-around. + if (num_bands_ != input.num_bands()) + return; + + data_dumper_->DumpWav("aec3_render_input", AudioBuffer::kSplitBandSize, + &input.split_bands_const(0)[0][0], 16000, 1); + + CopyBufferIntoFrame(input, num_bands_, num_channels_, + &render_queue_input_frame_); + if (high_pass_filter_) { + high_pass_filter_->Process(&render_queue_input_frame_[0]); + } + + static_cast(render_transfer_queue_->Insert(&render_queue_input_frame_)); +} + +std::atomic EchoCanceller3::instance_count_(0); + +EchoCanceller3::EchoCanceller3( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + config_(AdjustConfig(config)), + sample_rate_hz_(sample_rate_hz), + num_bands_(NumBandsForRate(sample_rate_hz_)), + num_render_input_channels_(num_render_channels), + num_capture_channels_(num_capture_channels), + config_selector_(AdjustConfig(config), + multichannel_config, + num_render_input_channels_), + multichannel_content_detector_( + config_selector_.active_config().multi_channel.detect_stereo_content, + num_render_input_channels_, + config_selector_.active_config() + .multi_channel.stereo_detection_threshold, + config_selector_.active_config() + .multi_channel.stereo_detection_timeout_threshold_seconds, + config_selector_.active_config() + .multi_channel.stereo_detection_hysteresis_seconds), + output_framer_(num_bands_, num_capture_channels_), + capture_blocker_(num_bands_, num_capture_channels_), + render_transfer_queue_( + kRenderTransferQueueSizeFrames, + std::vector>>( + num_bands_, + std::vector>( + num_render_input_channels_, + std::vector(AudioBuffer::kSplitBandSize, 0.f))), + Aec3RenderQueueItemVerifier(num_bands_, + num_render_input_channels_, + AudioBuffer::kSplitBandSize)), + render_queue_output_frame_( + num_bands_, + std::vector>( + num_render_input_channels_, + std::vector(AudioBuffer::kSplitBandSize, 0.f))), + render_block_(num_bands_, num_render_input_channels_), + capture_block_(num_bands_, num_capture_channels_), + capture_sub_frame_view_( + num_bands_, + std::vector>(num_capture_channels_)) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); + + if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) { + block_delay_buffer_.reset(new BlockDelayBuffer( + num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize, + config_.delay.fixed_capture_delay_samples)); + } + + render_writer_.reset(new RenderWriter( + data_dumper_.get(), config_selector_.active_config(), + &render_transfer_queue_, num_bands_, num_render_input_channels_)); + + RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000); + RTC_DCHECK_GE(kMaxNumBands, num_bands_); + + if (config_selector_.active_config().filter.export_linear_aec_output) { + linear_output_framer_.reset( + new BlockFramer(/*num_bands=*/1, num_capture_channels_)); + linear_output_block_ = + std::make_unique(/*num_bands=*/1, num_capture_channels_), + linear_output_sub_frame_view_ = + std::vector>>( + 1, std::vector>(num_capture_channels_)); + } + + Initialize(); + + RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_ + << " Hz, num render channels: " << num_render_input_channels_ + << ", num capture channels: " << num_capture_channels_; +} + +EchoCanceller3::~EchoCanceller3() = default; + +void EchoCanceller3::Initialize() { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + + num_render_channels_to_aec_ = + multichannel_content_detector_.IsProperMultiChannelContentDetected() + ? num_render_input_channels_ + : 1; + + config_selector_.Update( + multichannel_content_detector_.IsProperMultiChannelContentDetected()); + + render_block_.SetNumChannels(num_render_channels_to_aec_); + + render_blocker_.reset( + new FrameBlocker(num_bands_, num_render_channels_to_aec_)); + + block_processor_.reset(BlockProcessor::Create( + config_selector_.active_config(), sample_rate_hz_, + num_render_channels_to_aec_, num_capture_channels_)); + + render_sub_frame_view_ = std::vector>>( + num_bands_, + std::vector>(num_render_channels_to_aec_)); +} + +void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) { + RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_); + + RTC_DCHECK_EQ(render.num_channels(), num_render_input_channels_); + data_dumper_->DumpRaw("aec3_call_order", + static_cast(EchoCanceller3ApiCall::kRender)); + + return render_writer_->Insert(render); +} + +void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + data_dumper_->DumpWav("aec3_capture_analyze_input", capture.num_frames(), + capture.channels_const()[0], sample_rate_hz_, 1); + saturated_microphone_signal_ = false; + for (size_t channel = 0; channel < capture.num_channels(); ++channel) { + saturated_microphone_signal_ |= + DetectSaturation(rtc::ArrayView( + capture.channels_const()[channel], capture.num_frames())); + if (saturated_microphone_signal_) { + break; + } + } +} + +void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) { + ProcessCapture(capture, nullptr, level_change); +} + +void EchoCanceller3::ProcessCapture(AudioBuffer* capture, + AudioBuffer* linear_output, + bool level_change) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + RTC_DCHECK(capture); + RTC_DCHECK_EQ(num_bands_, capture->num_bands()); + RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band()); + RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_); + data_dumper_->DumpRaw("aec3_call_order", + static_cast(EchoCanceller3ApiCall::kCapture)); + + if (linear_output && !linear_output_framer_) { + RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without " + "properly configuring AEC3."; + RTC_DCHECK_NOTREACHED(); + } + + // Report capture call in the metrics and periodically update API call + // metrics. + api_call_metrics_.ReportCaptureCall(); + + // Optionally delay the capture signal. + if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) { + RTC_DCHECK(block_delay_buffer_); + block_delay_buffer_->DelaySignal(capture); + } + + rtc::ArrayView capture_lower_band = rtc::ArrayView( + &capture->split_bands(0)[0][0], AudioBuffer::kSplitBandSize); + + data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, 16000, 1); + + EmptyRenderQueue(); + + ProcessCaptureFrameContent( + linear_output, capture, level_change, + multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(), + saturated_microphone_signal_, 0, &capture_blocker_, + linear_output_framer_.get(), &output_framer_, block_processor_.get(), + linear_output_block_.get(), &linear_output_sub_frame_view_, + &capture_block_, &capture_sub_frame_view_); + + ProcessCaptureFrameContent( + linear_output, capture, level_change, + multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(), + saturated_microphone_signal_, 1, &capture_blocker_, + linear_output_framer_.get(), &output_framer_, block_processor_.get(), + linear_output_block_.get(), &linear_output_sub_frame_view_, + &capture_block_, &capture_sub_frame_view_); + + ProcessRemainingCaptureFrameContent( + level_change, + multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(), + saturated_microphone_signal_, &capture_blocker_, + linear_output_framer_.get(), &output_framer_, block_processor_.get(), + linear_output_block_.get(), &capture_block_); + + data_dumper_->DumpWav("aec3_capture_output", AudioBuffer::kSplitBandSize, + &capture->split_bands(0)[0][0], 16000, 1); +} + +EchoControl::Metrics EchoCanceller3::GetMetrics() const { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + Metrics metrics; + block_processor_->GetMetrics(&metrics); + return metrics; +} + +void EchoCanceller3::SetAudioBufferDelay(int delay_ms) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + block_processor_->SetAudioBufferDelay(delay_ms); +} + +void EchoCanceller3::SetCaptureOutputUsage(bool capture_output_used) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + block_processor_->SetCaptureOutputUsage(capture_output_used); +} + +bool EchoCanceller3::ActiveProcessing() const { + return true; +} + +EchoCanceller3Config EchoCanceller3::CreateDefaultMultichannelConfig() { + EchoCanceller3Config cfg; + // Use shorter and more rapidly adapting coarse filter to compensate for + // thge increased number of total filter parameters to adapt. + cfg.filter.coarse.length_blocks = 11; + cfg.filter.coarse.rate = 0.95f; + cfg.filter.coarse_initial.length_blocks = 11; + cfg.filter.coarse_initial.rate = 0.95f; + + // Use more concervative suppressor behavior for non-nearend speech. + cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f; + cfg.suppressor.normal_tuning.max_inc_factor = 1.5f; + return cfg; +} + +void EchoCanceller3::SetBlockProcessorForTesting( + std::unique_ptr block_processor) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + RTC_DCHECK(block_processor); + block_processor_ = std::move(block_processor); +} + +void EchoCanceller3::EmptyRenderQueue() { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + bool frame_to_buffer = + render_transfer_queue_.Remove(&render_queue_output_frame_); + while (frame_to_buffer) { + // Report render call in the metrics. + api_call_metrics_.ReportRenderCall(); + + if (multichannel_content_detector_.UpdateDetection( + render_queue_output_frame_)) { + // Reinitialize the AEC when proper stereo is detected. + Initialize(); + } + + // Buffer frame content. + BufferRenderFrameContent( + /*proper_downmix_needed=*/multichannel_content_detector_ + .IsTemporaryMultiChannelContentDetected(), + &render_queue_output_frame_, 0, render_blocker_.get(), + block_processor_.get(), &render_block_, &render_sub_frame_view_); + + BufferRenderFrameContent( + /*proper_downmix_needed=*/multichannel_content_detector_ + .IsTemporaryMultiChannelContentDetected(), + &render_queue_output_frame_, 1, render_blocker_.get(), + block_processor_.get(), &render_block_, &render_sub_frame_view_); + + BufferRemainingRenderFrameContent(render_blocker_.get(), + block_processor_.get(), &render_block_); + + frame_to_buffer = + render_transfer_queue_.Remove(&render_queue_output_frame_); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.h new file mode 100644 index 0000000000..7bf8e51a4b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.h @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_ + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "modules/audio_processing/aec3/api_call_jitter_metrics.h" +#include "modules/audio_processing/aec3/block_delay_buffer.h" +#include "modules/audio_processing/aec3/block_framer.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/config_selector.h" +#include "modules/audio_processing/aec3/frame_blocker.h" +#include "modules/audio_processing/aec3/multi_channel_content_detector.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/swap_queue.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +// Method for adjusting config parameter dependencies. +// Only to be used externally to AEC3 for testing purposes. +// TODO(webrtc:5298): Move this to a separate file. +EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config); + +// Functor for verifying the invariance of the frames being put into the render +// queue. +class Aec3RenderQueueItemVerifier { + public: + Aec3RenderQueueItemVerifier(size_t num_bands, + size_t num_channels, + size_t frame_length) + : num_bands_(num_bands), + num_channels_(num_channels), + frame_length_(frame_length) {} + + bool operator()(const std::vector>>& v) const { + if (v.size() != num_bands_) { + return false; + } + for (const auto& band : v) { + if (band.size() != num_channels_) { + return false; + } + for (const auto& channel : band) { + if (channel.size() != frame_length_) { + return false; + } + } + } + return true; + } + + private: + const size_t num_bands_; + const size_t num_channels_; + const size_t frame_length_; +}; + +// Main class for the echo canceller3. +// It does 4 things: +// -Receives 10 ms frames of band-split audio. +// -Provides the lower level echo canceller functionality with +// blocks of 64 samples of audio data. +// -Partially handles the jitter in the render and capture API +// call sequence. +// +// The class is supposed to be used in a non-concurrent manner apart from the +// AnalyzeRender call which can be called concurrently with the other methods. +class EchoCanceller3 : public EchoControl { + public: + EchoCanceller3( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels); + + ~EchoCanceller3() override; + + EchoCanceller3(const EchoCanceller3&) = delete; + EchoCanceller3& operator=(const EchoCanceller3&) = delete; + + // Analyzes and stores an internal copy of the split-band domain render + // signal. + void AnalyzeRender(AudioBuffer* render) override { AnalyzeRender(*render); } + // Analyzes the full-band domain capture signal to detect signal saturation. + void AnalyzeCapture(AudioBuffer* capture) override { + AnalyzeCapture(*capture); + } + // Processes the split-band domain capture signal in order to remove any echo + // present in the signal. + void ProcessCapture(AudioBuffer* capture, bool level_change) override; + // As above, but also returns the linear filter output. + void ProcessCapture(AudioBuffer* capture, + AudioBuffer* linear_output, + bool level_change) override; + // Collect current metrics from the echo canceller. + Metrics GetMetrics() const override; + // Provides an optional external estimate of the audio buffer delay. + void SetAudioBufferDelay(int delay_ms) override; + + // Specifies whether the capture output will be used. The purpose of this is + // to allow the echo controller to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + void SetCaptureOutputUsage(bool capture_output_used) override; + + bool ActiveProcessing() const override; + + // Signals whether an external detector has detected echo leakage from the + // echo canceller. + // Note that in the case echo leakage has been flagged, it should be unflagged + // once it is no longer occurring. + void UpdateEchoLeakageStatus(bool leakage_detected) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + block_processor_->UpdateEchoLeakageStatus(leakage_detected); + } + + // Produces a default configuration for multichannel. + static EchoCanceller3Config CreateDefaultMultichannelConfig(); + + private: + friend class EchoCanceller3Tester; + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo); + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, + DetectionOfProperStereoUsingThreshold); + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, + DetectionOfProperStereoUsingHysteresis); + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, + StereoContentDetectionForMonoSignals); + + class RenderWriter; + + // (Re-)Initializes the selected subset of the EchoCanceller3 fields, at + // creation as well as during reconfiguration. + void Initialize(); + + // Only for testing. Replaces the internal block processor. + void SetBlockProcessorForTesting( + std::unique_ptr block_processor); + + // Only for testing. Returns whether stereo processing is active. + bool StereoRenderProcessingActiveForTesting() const { + return multichannel_content_detector_.IsProperMultiChannelContentDetected(); + } + + // Only for testing. + const EchoCanceller3Config& GetActiveConfigForTesting() const { + return config_selector_.active_config(); + } + + // Empties the render SwapQueue. + void EmptyRenderQueue(); + + // Analyzes and stores an internal copy of the split-band domain render + // signal. + void AnalyzeRender(const AudioBuffer& render); + // Analyzes the full-band domain capture signal to detect signal saturation. + void AnalyzeCapture(const AudioBuffer& capture); + + rtc::RaceChecker capture_race_checker_; + rtc::RaceChecker render_race_checker_; + + // State that is accessed by the AnalyzeRender call. + std::unique_ptr render_writer_ + RTC_GUARDED_BY(render_race_checker_); + + // State that may be accessed by the capture thread. + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const EchoCanceller3Config config_; + const int sample_rate_hz_; + const int num_bands_; + const size_t num_render_input_channels_; + size_t num_render_channels_to_aec_; + const size_t num_capture_channels_; + ConfigSelector config_selector_; + MultiChannelContentDetector multichannel_content_detector_; + std::unique_ptr linear_output_framer_ + RTC_GUARDED_BY(capture_race_checker_); + BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_); + FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_); + std::unique_ptr render_blocker_ + RTC_GUARDED_BY(capture_race_checker_); + SwapQueue>>, + Aec3RenderQueueItemVerifier> + render_transfer_queue_; + std::unique_ptr block_processor_ + RTC_GUARDED_BY(capture_race_checker_); + std::vector>> render_queue_output_frame_ + RTC_GUARDED_BY(capture_race_checker_); + bool saturated_microphone_signal_ RTC_GUARDED_BY(capture_race_checker_) = + false; + Block render_block_ RTC_GUARDED_BY(capture_race_checker_); + std::unique_ptr linear_output_block_ + RTC_GUARDED_BY(capture_race_checker_); + Block capture_block_ RTC_GUARDED_BY(capture_race_checker_); + std::vector>> render_sub_frame_view_ + RTC_GUARDED_BY(capture_race_checker_); + std::vector>> linear_output_sub_frame_view_ + RTC_GUARDED_BY(capture_race_checker_); + std::vector>> capture_sub_frame_view_ + RTC_GUARDED_BY(capture_race_checker_); + std::unique_ptr block_delay_buffer_ + RTC_GUARDED_BY(capture_race_checker_); + ApiCallJitterMetrics api_call_metrics_ RTC_GUARDED_BY(capture_race_checker_); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc new file mode 100644 index 0000000000..ad126af4d3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc @@ -0,0 +1,1160 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_canceller3.h" + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/frame_blocker.h" +#include "modules/audio_processing/aec3/mock/mock_block_processor.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/high_pass_filter.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" +#include "rtc_base/strings/string_builder.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::_; +using ::testing::StrictMock; + +// Populates the frame with linearly increasing sample values for each band, +// with a band-specific offset, in order to allow simple bitexactness +// verification for each band. +void PopulateInputFrame(size_t frame_length, + size_t num_bands, + size_t frame_index, + float* const* frame, + int offset) { + for (size_t k = 0; k < num_bands; ++k) { + for (size_t i = 0; i < frame_length; ++i) { + float value = static_cast(frame_index * frame_length + i) + offset; + frame[k][i] = (value > 0 ? 5000 * k + value : 0); + } + } +} + +// Populates the frame with linearly increasing sample values. +void PopulateInputFrame(size_t frame_length, + size_t frame_index, + float* frame, + int offset) { + for (size_t i = 0; i < frame_length; ++i) { + float value = static_cast(frame_index * frame_length + i) + offset; + frame[i] = std::max(value, 0.f); + } +} + +// Verifies the that samples in the output frame are identical to the samples +// that were produced for the input frame, with an offset in order to compensate +// for buffering delays. +bool VerifyOutputFrameBitexactness(size_t frame_length, + size_t num_bands, + size_t frame_index, + const float* const* frame, + int offset) { + float reference_frame_data[kMaxNumBands][2 * kSubFrameLength]; + float* reference_frame[kMaxNumBands]; + for (size_t k = 0; k < num_bands; ++k) { + reference_frame[k] = &reference_frame_data[k][0]; + } + + PopulateInputFrame(frame_length, num_bands, frame_index, reference_frame, + offset); + for (size_t k = 0; k < num_bands; ++k) { + for (size_t i = 0; i < frame_length; ++i) { + if (reference_frame[k][i] != frame[k][i]) { + return false; + } + } + } + + return true; +} + +bool VerifyOutputFrameBitexactness(rtc::ArrayView reference, + rtc::ArrayView frame, + int offset) { + for (size_t k = 0; k < frame.size(); ++k) { + int reference_index = static_cast(k) + offset; + if (reference_index >= 0) { + if (reference[reference_index] != frame[k]) { + return false; + } + } + } + return true; +} + +// Class for testing that the capture data is properly received by the block +// processor and that the processor data is properly passed to the +// EchoCanceller3 output. +class CaptureTransportVerificationProcessor : public BlockProcessor { + public: + explicit CaptureTransportVerificationProcessor(size_t num_bands) {} + + CaptureTransportVerificationProcessor() = delete; + CaptureTransportVerificationProcessor( + const CaptureTransportVerificationProcessor&) = delete; + CaptureTransportVerificationProcessor& operator=( + const CaptureTransportVerificationProcessor&) = delete; + + ~CaptureTransportVerificationProcessor() override = default; + + void ProcessCapture(bool level_change, + bool saturated_microphone_signal, + Block* linear_output, + Block* capture_block) override {} + + void BufferRender(const Block& block) override {} + + void UpdateEchoLeakageStatus(bool leakage_detected) override {} + + void GetMetrics(EchoControl::Metrics* metrics) const override {} + + void SetAudioBufferDelay(int delay_ms) override {} + + void SetCaptureOutputUsage(bool capture_output_used) {} +}; + +// Class for testing that the render data is properly received by the block +// processor. +class RenderTransportVerificationProcessor : public BlockProcessor { + public: + explicit RenderTransportVerificationProcessor(size_t num_bands) {} + + RenderTransportVerificationProcessor() = delete; + RenderTransportVerificationProcessor( + const RenderTransportVerificationProcessor&) = delete; + RenderTransportVerificationProcessor& operator=( + const RenderTransportVerificationProcessor&) = delete; + + ~RenderTransportVerificationProcessor() override = default; + + void ProcessCapture(bool level_change, + bool saturated_microphone_signal, + Block* linear_output, + Block* capture_block) override { + Block render_block = received_render_blocks_.front(); + received_render_blocks_.pop_front(); + capture_block->Swap(render_block); + } + + void BufferRender(const Block& block) override { + received_render_blocks_.push_back(block); + } + + void UpdateEchoLeakageStatus(bool leakage_detected) override {} + + void GetMetrics(EchoControl::Metrics* metrics) const override {} + + void SetAudioBufferDelay(int delay_ms) override {} + + void SetCaptureOutputUsage(bool capture_output_used) {} + + private: + std::deque received_render_blocks_; +}; + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +std::string ProduceDebugText(int sample_rate_hz, int variant) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz << ", variant: " << variant; + return ss.Release(); +} + +void RunAecInStereo(AudioBuffer& buffer, + EchoCanceller3& aec3, + float channel_0_value, + float channel_1_value) { + rtc::ArrayView data_channel_0(&buffer.channels()[0][0], + buffer.num_frames()); + std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value); + rtc::ArrayView data_channel_1(&buffer.channels()[1][0], + buffer.num_frames()); + std::fill(data_channel_1.begin(), data_channel_1.end(), channel_1_value); + aec3.AnalyzeRender(&buffer); + aec3.AnalyzeCapture(&buffer); + aec3.ProcessCapture(&buffer, /*level_change=*/false); +} + +void RunAecInSMono(AudioBuffer& buffer, + EchoCanceller3& aec3, + float channel_0_value) { + rtc::ArrayView data_channel_0(&buffer.channels()[0][0], + buffer.num_frames()); + std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value); + aec3.AnalyzeRender(&buffer); + aec3.AnalyzeCapture(&buffer); + aec3.ProcessCapture(&buffer, /*level_change=*/false); +} + +} // namespace + +class EchoCanceller3Tester { + public: + explicit EchoCanceller3Tester(int sample_rate_hz) + : sample_rate_hz_(sample_rate_hz), + num_bands_(NumBandsForRate(sample_rate_hz_)), + frame_length_(160), + fullband_frame_length_(rtc::CheckedDivExact(sample_rate_hz_, 100)), + capture_buffer_(fullband_frame_length_ * 100, + 1, + fullband_frame_length_ * 100, + 1, + fullband_frame_length_ * 100, + 1), + render_buffer_(fullband_frame_length_ * 100, + 1, + fullband_frame_length_ * 100, + 1, + fullband_frame_length_ * 100, + 1) {} + + EchoCanceller3Tester() = delete; + EchoCanceller3Tester(const EchoCanceller3Tester&) = delete; + EchoCanceller3Tester& operator=(const EchoCanceller3Tester&) = delete; + + // Verifies that the capture data is properly received by the block processor + // and that the processor data is properly passed to the EchoCanceller3 + // output. + void RunCaptureTransportVerificationTest() { + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting( + std::make_unique(num_bands_)); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + EXPECT_TRUE(VerifyOutputFrameBitexactness( + frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], -64)); + } + } + + // Test method for testing that the render data is properly received by the + // block processor. + void RunRenderTransportVerificationTest() { + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting( + std::make_unique(num_bands_)); + + std::vector> render_input(1); + std::vector capture_output; + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 100); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &render_buffer_.split_bands(0)[0], 0); + + for (size_t k = 0; k < frame_length_; ++k) { + render_input[0].push_back(render_buffer_.split_bands(0)[0][k]); + } + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + for (size_t k = 0; k < frame_length_; ++k) { + capture_output.push_back(capture_buffer_.split_bands(0)[0][k]); + } + } + + EXPECT_TRUE( + VerifyOutputFrameBitexactness(render_input[0], capture_output, -64)); + } + + // Verifies that information about echo path changes are properly propagated + // to the block processor. + // The cases tested are: + // -That no set echo path change flags are received when there is no echo path + // change. + // -That set echo path change flags are received and continues to be received + // as long as echo path changes are flagged. + // -That set echo path change flags are no longer received when echo path + // change events stop being flagged. + enum class EchoPathChangeTestVariant { kNone, kOneSticky, kOneNonSticky }; + + void RunEchoPathChangeVerificationTest( + EchoPathChangeTestVariant echo_path_change_test_variant) { + constexpr size_t kNumFullBlocksPerFrame = 160 / kBlockSize; + constexpr size_t kExpectedNumBlocksToProcess = + (kNumFramesToProcess * 160) / kBlockSize; + std::unique_ptr> + block_processor_mock( + new StrictMock()); + EXPECT_CALL(*block_processor_mock, BufferRender(_)) + .Times(kExpectedNumBlocksToProcess); + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(_)).Times(0); + + switch (echo_path_change_test_variant) { + case EchoPathChangeTestVariant::kNone: + EXPECT_CALL(*block_processor_mock, ProcessCapture(false, _, _, _)) + .Times(kExpectedNumBlocksToProcess); + break; + case EchoPathChangeTestVariant::kOneSticky: + EXPECT_CALL(*block_processor_mock, ProcessCapture(true, _, _, _)) + .Times(kExpectedNumBlocksToProcess); + break; + case EchoPathChangeTestVariant::kOneNonSticky: + EXPECT_CALL(*block_processor_mock, ProcessCapture(true, _, _, _)) + .Times(kNumFullBlocksPerFrame); + EXPECT_CALL(*block_processor_mock, ProcessCapture(false, _, _, _)) + .Times(kExpectedNumBlocksToProcess - kNumFullBlocksPerFrame); + break; + } + + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting(std::move(block_processor_mock)); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + bool echo_path_change = false; + switch (echo_path_change_test_variant) { + case EchoPathChangeTestVariant::kNone: + break; + case EchoPathChangeTestVariant::kOneSticky: + echo_path_change = true; + break; + case EchoPathChangeTestVariant::kOneNonSticky: + if (frame_index == 0) { + echo_path_change = true; + } + break; + } + + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, echo_path_change); + } + } + + // Test for verifying that echo leakage information is being properly passed + // to the processor. + // The cases tested are: + // -That no method calls are received when they should not. + // -That false values are received each time they are flagged. + // -That true values are received each time they are flagged. + // -That a false value is received when flagged after a true value has been + // flagged. + enum class EchoLeakageTestVariant { + kNone, + kFalseSticky, + kTrueSticky, + kTrueNonSticky + }; + + void RunEchoLeakageVerificationTest( + EchoLeakageTestVariant leakage_report_variant) { + constexpr size_t kExpectedNumBlocksToProcess = + (kNumFramesToProcess * 160) / kBlockSize; + std::unique_ptr> + block_processor_mock( + new StrictMock()); + EXPECT_CALL(*block_processor_mock, BufferRender(_)) + .Times(kExpectedNumBlocksToProcess); + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, _, _, _)) + .Times(kExpectedNumBlocksToProcess); + + switch (leakage_report_variant) { + case EchoLeakageTestVariant::kNone: + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(_)).Times(0); + break; + case EchoLeakageTestVariant::kFalseSticky: + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(false)) + .Times(1); + break; + case EchoLeakageTestVariant::kTrueSticky: + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(true)) + .Times(1); + break; + case EchoLeakageTestVariant::kTrueNonSticky: { + ::testing::InSequence s; + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(true)) + .Times(1); + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(false)) + .Times(kNumFramesToProcess - 1); + } break; + } + + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting(std::move(block_processor_mock)); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + switch (leakage_report_variant) { + case EchoLeakageTestVariant::kNone: + break; + case EchoLeakageTestVariant::kFalseSticky: + if (frame_index == 0) { + aec3.UpdateEchoLeakageStatus(false); + } + break; + case EchoLeakageTestVariant::kTrueSticky: + if (frame_index == 0) { + aec3.UpdateEchoLeakageStatus(true); + } + break; + case EchoLeakageTestVariant::kTrueNonSticky: + if (frame_index == 0) { + aec3.UpdateEchoLeakageStatus(true); + } else { + aec3.UpdateEchoLeakageStatus(false); + } + break; + } + + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + } + } + + // This verifies that saturation information is properly passed to the + // BlockProcessor. + // The cases tested are: + // -That no saturation event is passed to the processor if there is no + // saturation. + // -That one frame with one negative saturated sample value is reported to be + // saturated and that following non-saturated frames are properly reported as + // not being saturated. + // -That one frame with one positive saturated sample value is reported to be + // saturated and that following non-saturated frames are properly reported as + // not being saturated. + enum class SaturationTestVariant { kNone, kOneNegative, kOnePositive }; + + void RunCaptureSaturationVerificationTest( + SaturationTestVariant saturation_variant) { + const size_t kNumFullBlocksPerFrame = 160 / kBlockSize; + const size_t kExpectedNumBlocksToProcess = + (kNumFramesToProcess * 160) / kBlockSize; + std::unique_ptr> + block_processor_mock( + new StrictMock()); + EXPECT_CALL(*block_processor_mock, BufferRender(_)) + .Times(kExpectedNumBlocksToProcess); + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(_)).Times(0); + + switch (saturation_variant) { + case SaturationTestVariant::kNone: + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, false, _, _)) + .Times(kExpectedNumBlocksToProcess); + break; + case SaturationTestVariant::kOneNegative: { + ::testing::InSequence s; + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, true, _, _)) + .Times(kNumFullBlocksPerFrame); + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, false, _, _)) + .Times(kExpectedNumBlocksToProcess - kNumFullBlocksPerFrame); + } break; + case SaturationTestVariant::kOnePositive: { + ::testing::InSequence s; + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, true, _, _)) + .Times(kNumFullBlocksPerFrame); + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, false, _, _)) + .Times(kExpectedNumBlocksToProcess - kNumFullBlocksPerFrame); + } break; + } + + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting(std::move(block_processor_mock)); + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + for (int k = 0; k < fullband_frame_length_; ++k) { + capture_buffer_.channels()[0][k] = 0.f; + } + switch (saturation_variant) { + case SaturationTestVariant::kNone: + break; + case SaturationTestVariant::kOneNegative: + if (frame_index == 0) { + capture_buffer_.channels()[0][10] = -32768.f; + } + break; + case SaturationTestVariant::kOnePositive: + if (frame_index == 0) { + capture_buffer_.channels()[0][10] = 32767.f; + } + break; + } + + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &render_buffer_.split_bands(0)[0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + } + } + + // This test verifies that the swapqueue is able to handle jitter in the + // capture and render API calls. + void RunRenderSwapQueueVerificationTest() { + const EchoCanceller3Config config; + EchoCanceller3 aec3(config, /*multichannel_config=*/absl::nullopt, + sample_rate_hz_, 1, 1); + aec3.SetBlockProcessorForTesting( + std::make_unique(num_bands_)); + + std::vector> render_input(1); + std::vector capture_output; + + for (size_t frame_index = 0; frame_index < kRenderTransferQueueSizeFrames; + ++frame_index) { + if (sample_rate_hz_ > 16000) { + render_buffer_.SplitIntoFrequencyBands(); + } + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &render_buffer_.split_bands(0)[0], 0); + + if (sample_rate_hz_ > 16000) { + render_buffer_.SplitIntoFrequencyBands(); + } + + for (size_t k = 0; k < frame_length_; ++k) { + render_input[0].push_back(render_buffer_.split_bands(0)[0][k]); + } + aec3.AnalyzeRender(&render_buffer_); + } + + for (size_t frame_index = 0; frame_index < kRenderTransferQueueSizeFrames; + ++frame_index) { + aec3.AnalyzeCapture(&capture_buffer_); + if (sample_rate_hz_ > 16000) { + capture_buffer_.SplitIntoFrequencyBands(); + } + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + + aec3.ProcessCapture(&capture_buffer_, false); + for (size_t k = 0; k < frame_length_; ++k) { + capture_output.push_back(capture_buffer_.split_bands(0)[0][k]); + } + } + + EXPECT_TRUE( + VerifyOutputFrameBitexactness(render_input[0], capture_output, -64)); + } + + // This test verifies that a buffer overrun in the render swapqueue is + // properly reported. + void RunRenderPipelineSwapQueueOverrunReturnValueTest() { + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + + constexpr size_t kRenderTransferQueueSize = 30; + for (size_t k = 0; k < 2; ++k) { + for (size_t frame_index = 0; frame_index < kRenderTransferQueueSize; + ++frame_index) { + if (sample_rate_hz_ > 16000) { + render_buffer_.SplitIntoFrequencyBands(); + } + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + } + } + } + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + // Verifies the that the check for the number of bands in the AnalyzeRender + // input is correct by adjusting the sample rates of EchoCanceller3 and the + // input AudioBuffer to have a different number of bands. + void RunAnalyzeRenderNumBandsCheckVerification() { + // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a + // way that the number of bands for the rates are different. + const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000; + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, + aec3_sample_rate_hz, 1, 1); + PopulateInputFrame(frame_length_, 0, &render_buffer_.channels_f()[0][0], 0); + + EXPECT_DEATH(aec3.AnalyzeRender(&render_buffer_), ""); + } + + // Verifies the that the check for the number of bands in the ProcessCapture + // input is correct by adjusting the sample rates of EchoCanceller3 and the + // input AudioBuffer to have a different number of bands. + void RunProcessCaptureNumBandsCheckVerification() { + // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a + // way that the number of bands for the rates are different. + const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000; + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, + aec3_sample_rate_hz, 1, 1); + PopulateInputFrame(frame_length_, num_bands_, 0, + &capture_buffer_.split_bands_f(0)[0], 100); + EXPECT_DEATH(aec3.ProcessCapture(&capture_buffer_, false), ""); + } + +#endif + + private: + void OptionalBandSplit() { + if (sample_rate_hz_ > 16000) { + capture_buffer_.SplitIntoFrequencyBands(); + render_buffer_.SplitIntoFrequencyBands(); + } + } + + static constexpr size_t kNumFramesToProcess = 20; + const int sample_rate_hz_; + const size_t num_bands_; + const size_t frame_length_; + const int fullband_frame_length_; + AudioBuffer capture_buffer_; + AudioBuffer render_buffer_; +}; + +TEST(EchoCanceller3Buffering, CaptureBitexactness) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunCaptureTransportVerificationTest(); + } +} + +TEST(EchoCanceller3Buffering, RenderBitexactness) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunRenderTransportVerificationTest(); + } +} + +TEST(EchoCanceller3Buffering, RenderSwapQueue) { + EchoCanceller3Tester(16000).RunRenderSwapQueueVerificationTest(); +} + +TEST(EchoCanceller3Buffering, RenderSwapQueueOverrunReturnValue) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate) + .RunRenderPipelineSwapQueueOverrunReturnValueTest(); + } +} + +TEST(EchoCanceller3Messaging, CaptureSaturation) { + auto variants = {EchoCanceller3Tester::SaturationTestVariant::kNone, + EchoCanceller3Tester::SaturationTestVariant::kOneNegative, + EchoCanceller3Tester::SaturationTestVariant::kOnePositive}; + for (auto rate : {16000, 32000, 48000}) { + for (auto variant : variants) { + SCOPED_TRACE(ProduceDebugText(rate, static_cast(variant))); + EchoCanceller3Tester(rate).RunCaptureSaturationVerificationTest(variant); + } + } +} + +TEST(EchoCanceller3Messaging, EchoPathChange) { + auto variants = { + EchoCanceller3Tester::EchoPathChangeTestVariant::kNone, + EchoCanceller3Tester::EchoPathChangeTestVariant::kOneSticky, + EchoCanceller3Tester::EchoPathChangeTestVariant::kOneNonSticky}; + for (auto rate : {16000, 32000, 48000}) { + for (auto variant : variants) { + SCOPED_TRACE(ProduceDebugText(rate, static_cast(variant))); + EchoCanceller3Tester(rate).RunEchoPathChangeVerificationTest(variant); + } + } +} + +TEST(EchoCanceller3Messaging, EchoLeakage) { + auto variants = { + EchoCanceller3Tester::EchoLeakageTestVariant::kNone, + EchoCanceller3Tester::EchoLeakageTestVariant::kFalseSticky, + EchoCanceller3Tester::EchoLeakageTestVariant::kTrueSticky, + EchoCanceller3Tester::EchoLeakageTestVariant::kTrueNonSticky}; + for (auto rate : {16000, 32000, 48000}) { + for (auto variant : variants) { + SCOPED_TRACE(ProduceDebugText(rate, static_cast(variant))); + EchoCanceller3Tester(rate).RunEchoLeakageVerificationTest(variant); + } + } +} + +// Tests the parameter functionality for the field trial override for the +// anti-howling gain. +TEST(EchoCanceller3FieldTrials, Aec3SuppressorAntiHowlingGainOverride) { + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + ASSERT_EQ( + default_config.suppressor.high_bands_suppression.anti_howling_gain, + adjusted_config.suppressor.high_bands_suppression.anti_howling_gain); + + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3SuppressorAntiHowlingGainOverride/0.02/"); + adjusted_config = AdjustConfig(default_config); + + ASSERT_NE( + default_config.suppressor.high_bands_suppression.anti_howling_gain, + adjusted_config.suppressor.high_bands_suppression.anti_howling_gain); + EXPECT_FLOAT_EQ( + 0.02f, + adjusted_config.suppressor.high_bands_suppression.anti_howling_gain); +} + +// Tests the field trial override for the enforcement of a low active render +// limit. +TEST(EchoCanceller3FieldTrials, Aec3EnforceLowActiveRenderLimit) { + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + ASSERT_EQ(default_config.render_levels.active_render_limit, + adjusted_config.render_levels.active_render_limit); + + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3EnforceLowActiveRenderLimit/Enabled/"); + adjusted_config = AdjustConfig(default_config); + + ASSERT_NE(default_config.render_levels.active_render_limit, + adjusted_config.render_levels.active_render_limit); + EXPECT_FLOAT_EQ(50.f, adjusted_config.render_levels.active_render_limit); +} + +// Testing the field trial-based override of the suppressor parameters for a +// joint passing of all parameters. +TEST(EchoCanceller3FieldTrials, Aec3SuppressorTuningOverrideAllParams) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3SuppressorTuningOverride/" + "nearend_tuning_mask_lf_enr_transparent:0.1,nearend_tuning_mask_lf_enr_" + "suppress:0.2,nearend_tuning_mask_hf_enr_transparent:0.3,nearend_tuning_" + "mask_hf_enr_suppress:0.4,nearend_tuning_max_inc_factor:0.5,nearend_" + "tuning_max_dec_factor_lf:0.6,normal_tuning_mask_lf_enr_transparent:0.7," + "normal_tuning_mask_lf_enr_suppress:0.8,normal_tuning_mask_hf_enr_" + "transparent:0.9,normal_tuning_mask_hf_enr_suppress:1.0,normal_tuning_" + "max_inc_factor:1.1,normal_tuning_max_dec_factor_lf:1.2,dominant_nearend_" + "detection_enr_threshold:1.3,dominant_nearend_detection_enr_exit_" + "threshold:1.4,dominant_nearend_detection_snr_threshold:1.5,dominant_" + "nearend_detection_hold_duration:10,dominant_nearend_detection_trigger_" + "threshold:11/"); + + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.mask_lf.enr_transparent, + default_config.suppressor.nearend_tuning.mask_lf.enr_transparent); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.mask_lf.enr_suppress, + default_config.suppressor.nearend_tuning.mask_lf.enr_suppress); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.mask_hf.enr_transparent, + default_config.suppressor.nearend_tuning.mask_hf.enr_transparent); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.mask_hf.enr_suppress, + default_config.suppressor.nearend_tuning.mask_hf.enr_suppress); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.max_inc_factor, + default_config.suppressor.nearend_tuning.max_inc_factor); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.max_dec_factor_lf, + default_config.suppressor.nearend_tuning.max_dec_factor_lf); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.mask_lf.enr_transparent, + default_config.suppressor.normal_tuning.mask_lf.enr_transparent); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.mask_lf.enr_suppress, + default_config.suppressor.normal_tuning.mask_lf.enr_suppress); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.mask_hf.enr_transparent, + default_config.suppressor.normal_tuning.mask_hf.enr_transparent); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.mask_hf.enr_suppress, + default_config.suppressor.normal_tuning.mask_hf.enr_suppress); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.max_inc_factor, + default_config.suppressor.normal_tuning.max_inc_factor); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.max_dec_factor_lf, + default_config.suppressor.normal_tuning.max_dec_factor_lf); + ASSERT_NE(adjusted_config.suppressor.dominant_nearend_detection.enr_threshold, + default_config.suppressor.dominant_nearend_detection.enr_threshold); + ASSERT_NE( + adjusted_config.suppressor.dominant_nearend_detection.enr_exit_threshold, + default_config.suppressor.dominant_nearend_detection.enr_exit_threshold); + ASSERT_NE(adjusted_config.suppressor.dominant_nearend_detection.snr_threshold, + default_config.suppressor.dominant_nearend_detection.snr_threshold); + ASSERT_NE(adjusted_config.suppressor.dominant_nearend_detection.hold_duration, + default_config.suppressor.dominant_nearend_detection.hold_duration); + ASSERT_NE( + adjusted_config.suppressor.dominant_nearend_detection.trigger_threshold, + default_config.suppressor.dominant_nearend_detection.trigger_threshold); + + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.nearend_tuning.mask_lf.enr_transparent, 0.1); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.nearend_tuning.mask_lf.enr_suppress, 0.2); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.nearend_tuning.mask_hf.enr_transparent, 0.3); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.nearend_tuning.mask_hf.enr_suppress, 0.4); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.nearend_tuning.max_inc_factor, + 0.5); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.nearend_tuning.max_dec_factor_lf, + 0.6); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.normal_tuning.mask_lf.enr_transparent, 0.7); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.normal_tuning.mask_lf.enr_suppress, + 0.8); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.normal_tuning.mask_hf.enr_transparent, 0.9); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.normal_tuning.mask_hf.enr_suppress, + 1.0); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.normal_tuning.max_inc_factor, 1.1); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.normal_tuning.max_dec_factor_lf, + 1.2); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.enr_threshold, 1.3); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.enr_exit_threshold, + 1.4); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.snr_threshold, 1.5); + EXPECT_EQ(adjusted_config.suppressor.dominant_nearend_detection.hold_duration, + 10); + EXPECT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.trigger_threshold, + 11); +} + +// Testing the field trial-based override of the suppressor parameters for +// passing one parameter. +TEST(EchoCanceller3FieldTrials, Aec3SuppressorTuningOverrideOneParam) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3SuppressorTuningOverride/nearend_tuning_max_inc_factor:0.5/"); + + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.mask_lf.enr_transparent, + default_config.suppressor.nearend_tuning.mask_lf.enr_transparent); + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.mask_lf.enr_suppress, + default_config.suppressor.nearend_tuning.mask_lf.enr_suppress); + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.mask_hf.enr_transparent, + default_config.suppressor.nearend_tuning.mask_hf.enr_transparent); + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.mask_hf.enr_suppress, + default_config.suppressor.nearend_tuning.mask_hf.enr_suppress); + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.max_dec_factor_lf, + default_config.suppressor.nearend_tuning.max_dec_factor_lf); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.mask_lf.enr_transparent, + default_config.suppressor.normal_tuning.mask_lf.enr_transparent); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.mask_lf.enr_suppress, + default_config.suppressor.normal_tuning.mask_lf.enr_suppress); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.mask_hf.enr_transparent, + default_config.suppressor.normal_tuning.mask_hf.enr_transparent); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.mask_hf.enr_suppress, + default_config.suppressor.normal_tuning.mask_hf.enr_suppress); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.max_inc_factor, + default_config.suppressor.normal_tuning.max_inc_factor); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.max_dec_factor_lf, + default_config.suppressor.normal_tuning.max_dec_factor_lf); + ASSERT_EQ(adjusted_config.suppressor.dominant_nearend_detection.enr_threshold, + default_config.suppressor.dominant_nearend_detection.enr_threshold); + ASSERT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.enr_exit_threshold, + default_config.suppressor.dominant_nearend_detection.enr_exit_threshold); + ASSERT_EQ(adjusted_config.suppressor.dominant_nearend_detection.snr_threshold, + default_config.suppressor.dominant_nearend_detection.snr_threshold); + ASSERT_EQ(adjusted_config.suppressor.dominant_nearend_detection.hold_duration, + default_config.suppressor.dominant_nearend_detection.hold_duration); + ASSERT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.trigger_threshold, + default_config.suppressor.dominant_nearend_detection.trigger_threshold); + + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.max_inc_factor, + default_config.suppressor.nearend_tuning.max_inc_factor); + + EXPECT_FLOAT_EQ(adjusted_config.suppressor.nearend_tuning.max_inc_factor, + 0.5); +} + +// Testing the field trial-based that override the exponential decay parameters. +TEST(EchoCanceller3FieldTrials, Aec3UseNearendReverb) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3UseNearendReverbLen/default_len:0.9,nearend_len:0.8/"); + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + EXPECT_FLOAT_EQ(adjusted_config.ep_strength.default_len, 0.9); + EXPECT_FLOAT_EQ(adjusted_config.ep_strength.nearend_len, 0.8); +} + +TEST(EchoCanceller3, DetectionOfProperStereo) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional multichannel_config; + + mono_config.multi_channel.detect_stereo_content = true; + mono_config.multi_channel.stereo_detection_threshold = 0.0f; + mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f; + multichannel_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + multichannel_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + EchoCanceller3 aec3(mono_config, multichannel_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/kNumChannels, + /*num_capture_input_channels=*/kNumChannels); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, 100.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, 101.0f); + EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForSurroundConfig); +} + +TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional multichannel_config; + + constexpr float kStereoDetectionThreshold = 2.0f; + mono_config.multi_channel.detect_stereo_content = true; + mono_config.multi_channel.stereo_detection_threshold = + kStereoDetectionThreshold; + mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f; + multichannel_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + multichannel_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + EchoCanceller3 aec3(mono_config, multichannel_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/kNumChannels, + /*num_capture_input_channels=*/kNumChannels); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, + 100.0f + kStereoDetectionThreshold - 1.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, + 100.0f + kStereoDetectionThreshold + 10.0f); + EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForSurroundConfig); +} + +TEST(EchoCanceller3, DetectionOfProperStereoUsingHysteresis) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional surround_config; + + mono_config.multi_channel.detect_stereo_content = true; + mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.5f; + surround_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + surround_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + EchoCanceller3 aec3(mono_config, surround_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/kNumChannels, + /*num_capture_input_channels=*/kNumChannels); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, 100.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + constexpr int kNumFramesPerSecond = 100; + for (int k = 0; + k < static_cast( + kNumFramesPerSecond * + mono_config.multi_channel.stereo_detection_hysteresis_seconds); + ++k) { + RunAecInStereo(buffer, aec3, 100.0f, 101.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + } + + RunAecInStereo(buffer, aec3, 100.0f, 101.0f); + EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForSurroundConfig); +} + +TEST(EchoCanceller3, StereoContentDetectionForMonoSignals) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional multichannel_config; + + for (bool detect_stereo_content : {false, true}) { + mono_config.multi_channel.detect_stereo_content = detect_stereo_content; + multichannel_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + multichannel_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + AudioBuffer mono_buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/1, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/1, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/1); + + EchoCanceller3 aec3(mono_config, multichannel_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/1, + /*num_capture_input_channels=*/1); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInSMono(mono_buffer, aec3, 100.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +TEST(EchoCanceller3InputCheckDeathTest, WrongCaptureNumBandsCheckVerification) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunProcessCaptureNumBandsCheckVerification(); + } +} + +// Verifiers that the verification for null input to the capture processing api +// call works. +TEST(EchoCanceller3InputCheckDeathTest, NullCaptureProcessingParameter) { + EXPECT_DEATH( + EchoCanceller3(EchoCanceller3Config(), + /*multichannel_config_=*/absl::nullopt, 16000, 1, 1) + .ProcessCapture(nullptr, false), + ""); +} + +// Verifies the check for correct sample rate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoCanceller3InputCheckDeathTest, DISABLED_WrongSampleRate) { + ApmDataDumper data_dumper(0); + EXPECT_DEATH( + EchoCanceller3(EchoCanceller3Config(), + /*multichannel_config_=*/absl::nullopt, 8001, 1, 1), + ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc new file mode 100644 index 0000000000..510e4b8a8d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_path_delay_estimator.h" + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +EchoPathDelayEstimator::EchoPathDelayEstimator( + ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + size_t num_capture_channels) + : data_dumper_(data_dumper), + down_sampling_factor_(config.delay.down_sampling_factor), + sub_block_size_(down_sampling_factor_ != 0 + ? kBlockSize / down_sampling_factor_ + : kBlockSize), + capture_mixer_(num_capture_channels, + config.delay.capture_alignment_mixing), + capture_decimator_(down_sampling_factor_), + matched_filter_( + data_dumper_, + DetectOptimization(), + sub_block_size_, + kMatchedFilterWindowSizeSubBlocks, + config.delay.num_filters, + kMatchedFilterAlignmentShiftSizeSubBlocks, + config.delay.down_sampling_factor == 8 + ? config.render_levels.poor_excitation_render_limit_ds8 + : config.render_levels.poor_excitation_render_limit, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + config.delay.detect_pre_echo), + matched_filter_lag_aggregator_(data_dumper_, + matched_filter_.GetMaxFilterLag(), + config.delay) { + RTC_DCHECK(data_dumper); + RTC_DCHECK(down_sampling_factor_ > 0); +} + +EchoPathDelayEstimator::~EchoPathDelayEstimator() = default; + +void EchoPathDelayEstimator::Reset(bool reset_delay_confidence) { + Reset(true, reset_delay_confidence); +} + +absl::optional EchoPathDelayEstimator::EstimateDelay( + const DownsampledRenderBuffer& render_buffer, + const Block& capture) { + std::array downsampled_capture_data; + rtc::ArrayView downsampled_capture(downsampled_capture_data.data(), + sub_block_size_); + + std::array downmixed_capture; + capture_mixer_.ProduceOutput(capture, downmixed_capture); + capture_decimator_.Decimate(downmixed_capture, downsampled_capture); + data_dumper_->DumpWav("aec3_capture_decimator_output", + downsampled_capture.size(), downsampled_capture.data(), + 16000 / down_sampling_factor_, 1); + matched_filter_.Update(render_buffer, downsampled_capture, + matched_filter_lag_aggregator_.ReliableDelayFound()); + + absl::optional aggregated_matched_filter_lag = + matched_filter_lag_aggregator_.Aggregate( + matched_filter_.GetBestLagEstimate()); + + // Run clockdrift detection. + if (aggregated_matched_filter_lag && + (*aggregated_matched_filter_lag).quality == + DelayEstimate::Quality::kRefined) + clockdrift_detector_.Update( + matched_filter_lag_aggregator_.GetDelayAtHighestPeak()); + + // TODO(peah): Move this logging outside of this class once EchoCanceller3 + // development is done. + data_dumper_->DumpRaw( + "aec3_echo_path_delay_estimator_delay", + aggregated_matched_filter_lag + ? static_cast(aggregated_matched_filter_lag->delay * + down_sampling_factor_) + : -1); + + // Return the detected delay in samples as the aggregated matched filter lag + // compensated by the down sampling factor for the signal being correlated. + if (aggregated_matched_filter_lag) { + aggregated_matched_filter_lag->delay *= down_sampling_factor_; + } + + if (old_aggregated_lag_ && aggregated_matched_filter_lag && + old_aggregated_lag_->delay == aggregated_matched_filter_lag->delay) { + ++consistent_estimate_counter_; + } else { + consistent_estimate_counter_ = 0; + } + old_aggregated_lag_ = aggregated_matched_filter_lag; + constexpr size_t kNumBlocksPerSecondBy2 = kNumBlocksPerSecond / 2; + if (consistent_estimate_counter_ > kNumBlocksPerSecondBy2) { + Reset(false, false); + } + + return aggregated_matched_filter_lag; +} + +void EchoPathDelayEstimator::Reset(bool reset_lag_aggregator, + bool reset_delay_confidence) { + if (reset_lag_aggregator) { + matched_filter_lag_aggregator_.Reset(reset_delay_confidence); + } + matched_filter_.Reset(/*full_reset=*/reset_lag_aggregator); + old_aggregated_lag_ = absl::nullopt; + consistent_estimate_counter_ = 0; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h new file mode 100644 index 0000000000..b24d0a29ec --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_ + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/alignment_mixer.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/clockdrift_detector.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/matched_filter.h" +#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h" + +namespace webrtc { + +class ApmDataDumper; +struct DownsampledRenderBuffer; +struct EchoCanceller3Config; + +// Estimates the delay of the echo path. +class EchoPathDelayEstimator { + public: + EchoPathDelayEstimator(ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + size_t num_capture_channels); + ~EchoPathDelayEstimator(); + + EchoPathDelayEstimator(const EchoPathDelayEstimator&) = delete; + EchoPathDelayEstimator& operator=(const EchoPathDelayEstimator&) = delete; + + // Resets the estimation. If the delay confidence is reset, the reset behavior + // is as if the call is restarted. + void Reset(bool reset_delay_confidence); + + // Produce a delay estimate if such is avaliable. + absl::optional EstimateDelay( + const DownsampledRenderBuffer& render_buffer, + const Block& capture); + + // Log delay estimator properties. + void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const { + matched_filter_.LogFilterProperties(sample_rate_hz, shift, + down_sampling_factor_); + } + + // Returns the level of detected clockdrift. + ClockdriftDetector::Level Clockdrift() const { + return clockdrift_detector_.ClockdriftLevel(); + } + + private: + ApmDataDumper* const data_dumper_; + const size_t down_sampling_factor_; + const size_t sub_block_size_; + AlignmentMixer capture_mixer_; + Decimator capture_decimator_; + MatchedFilter matched_filter_; + MatchedFilterLagAggregator matched_filter_lag_aggregator_; + absl::optional old_aggregated_lag_; + size_t consistent_estimate_counter_ = 0; + ClockdriftDetector clockdrift_detector_; + + // Internal reset method with more granularity. + void Reset(bool reset_lag_aggregator, bool reset_delay_confidence); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc new file mode 100644 index 0000000000..e2c101fb04 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_path_delay_estimator.h" + +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(size_t delay, size_t down_sampling_factor) { + rtc::StringBuilder ss; + ss << "Delay: " << delay; + ss << ", Down sampling factor: " << down_sampling_factor; + return ss.Release(); +} + +} // namespace + +class EchoPathDelayEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + EchoPathDelayEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 3, 6, 8), + ::testing::Values(1, 2, 4))); + +// Verifies that the basic API calls work. +TEST_P(EchoPathDelayEstimatorMultiChannel, BasicApiCalls) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + EchoPathDelayEstimator estimator(&data_dumper, config, num_capture_channels); + Block render(kNumBands, num_render_channels); + Block capture(/*num_bands=*/1, num_capture_channels); + for (size_t k = 0; k < 100; ++k) { + render_delay_buffer->Insert(render); + estimator.EstimateDelay(render_delay_buffer->GetDownsampledRenderBuffer(), + capture); + } +} + +// Verifies that the delay estimator produces correct delay for artificially +// delayed signals. +TEST(EchoPathDelayEstimator, DelayEstimation) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + Random random_generator(42U); + Block render(kNumBands, kNumRenderChannels); + Block capture(/*num_bands=*/1, kNumCaptureChannels); + ApmDataDumper data_dumper(0); + constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.delay_headroom_samples = 0; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = 10; + for (size_t delay_samples : {30, 64, 150, 200, 800, 4000}) { + SCOPED_TRACE(ProduceDebugText(delay_samples, down_sampling_factor)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); + DelayBuffer signal_delay_buffer(delay_samples); + EchoPathDelayEstimator estimator(&data_dumper, config, + kNumCaptureChannels); + + absl::optional estimated_delay_samples; + for (size_t k = 0; k < (500 + (delay_samples) / kBlockSize); ++k) { + RandomizeSampleVector(&random_generator, + render.View(/*band=*/0, /*channel=*/0)); + signal_delay_buffer.Delay(render.View(/*band=*/0, /*channel=*/0), + capture.View(/*band=*/0, /*channel=*/0)); + render_delay_buffer->Insert(render); + + if (k == 0) { + render_delay_buffer->Reset(); + } + + render_delay_buffer->PrepareCaptureProcessing(); + + auto estimate = estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture); + + if (estimate) { + estimated_delay_samples = estimate; + } + } + + if (estimated_delay_samples) { + // Allow estimated delay to be off by a block as internally the delay is + // quantized with an error up to a block. + size_t delay_ds = delay_samples / down_sampling_factor; + size_t estimated_delay_ds = + estimated_delay_samples->delay / down_sampling_factor; + EXPECT_NEAR(delay_ds, estimated_delay_ds, + kBlockSize / down_sampling_factor); + } else { + ADD_FAILURE(); + } + } + } +} + +// Verifies that the delay estimator does not produce delay estimates for render +// signals of low level. +TEST(EchoPathDelayEstimator, NoDelayEstimatesForLowLevelRenderSignals) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + Random random_generator(42U); + EchoCanceller3Config config; + Block render(kNumBands, kNumRenderChannels); + Block capture(/*num_bands=*/1, kNumCaptureChannels); + ApmDataDumper data_dumper(0); + EchoPathDelayEstimator estimator(&data_dumper, config, kNumCaptureChannels); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + kNumRenderChannels)); + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, + render.View(/*band=*/0, /*channel=*/0)); + for (auto& render_k : render.View(/*band=*/0, /*channel=*/0)) { + render_k *= 100.f / 32767.f; + } + std::copy(render.begin(/*band=*/0, /*channel=*/0), + render.end(/*band=*/0, /*channel=*/0), + capture.begin(/*band*/ 0, /*channel=*/0)); + render_delay_buffer->Insert(render); + render_delay_buffer->PrepareCaptureProcessing(); + EXPECT_FALSE(estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture)); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for the render blocksize. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoPathDelayEstimatorDeathTest, DISABLED_WrongRenderBlockSize) { + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EchoPathDelayEstimator estimator(&data_dumper, config, 1); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, 48000, 1)); + Block capture(/*num_bands=*/1, /*num_channels=*/1); + EXPECT_DEATH(estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture), + ""); +} + +// Verifies the check for non-null data dumper. +TEST(EchoPathDelayEstimatorDeathTest, NullDataDumper) { + EXPECT_DEATH(EchoPathDelayEstimator(nullptr, EchoCanceller3Config(), 1), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc new file mode 100644 index 0000000000..0ae9cff98e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_path_variability.h" + +namespace webrtc { + +EchoPathVariability::EchoPathVariability(bool gain_change, + DelayAdjustment delay_change, + bool clock_drift) + : gain_change(gain_change), + delay_change(delay_change), + clock_drift(clock_drift) {} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.h new file mode 100644 index 0000000000..78e4f64b2b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_ + +namespace webrtc { + +struct EchoPathVariability { + enum class DelayAdjustment { + kNone, + kBufferFlush, + kNewDetectedDelay + }; + + EchoPathVariability(bool gain_change, + DelayAdjustment delay_change, + bool clock_drift); + + bool AudioPathChanged() const { + return gain_change || delay_change != DelayAdjustment::kNone; + } + bool gain_change; + DelayAdjustment delay_change; + bool clock_drift; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc new file mode 100644 index 0000000000..0f10f95f72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_path_variability.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(EchoPathVariability, CorrectBehavior) { + // Test correct passing and reporting of the gain change information. + EchoPathVariability v( + true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false); + EXPECT_TRUE(v.gain_change); + EXPECT_TRUE(v.delay_change == + EchoPathVariability::DelayAdjustment::kNewDetectedDelay); + EXPECT_TRUE(v.AudioPathChanged()); + EXPECT_FALSE(v.clock_drift); + + v = EchoPathVariability(true, EchoPathVariability::DelayAdjustment::kNone, + false); + EXPECT_TRUE(v.gain_change); + EXPECT_TRUE(v.delay_change == EchoPathVariability::DelayAdjustment::kNone); + EXPECT_TRUE(v.AudioPathChanged()); + EXPECT_FALSE(v.clock_drift); + + v = EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false); + EXPECT_FALSE(v.gain_change); + EXPECT_TRUE(v.delay_change == + EchoPathVariability::DelayAdjustment::kNewDetectedDelay); + EXPECT_TRUE(v.AudioPathChanged()); + EXPECT_FALSE(v.clock_drift); + + v = EchoPathVariability(false, EchoPathVariability::DelayAdjustment::kNone, + false); + EXPECT_FALSE(v.gain_change); + EXPECT_TRUE(v.delay_change == EchoPathVariability::DelayAdjustment::kNone); + EXPECT_FALSE(v.AudioPathChanged()); + EXPECT_FALSE(v.clock_drift); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc new file mode 100644 index 0000000000..673d88af03 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc @@ -0,0 +1,521 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_remover.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/comfort_noise_generator.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/echo_remover_metrics.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/residual_echo_estimator.h" +#include "modules/audio_processing/aec3/subtractor.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/aec3/suppression_filter.h" +#include "modules/audio_processing/aec3/suppression_gain.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { + +// Maximum number of channels for which the capture channel data is stored on +// the stack. If the number of channels are larger than this, they are stored +// using scratch memory that is pre-allocated on the heap. The reason for this +// partitioning is not to waste heap space for handling the more common numbers +// of channels, while at the same time not limiting the support for higher +// numbers of channels by enforcing the capture channel data to be stored on the +// stack using a fixed maximum value. +constexpr size_t kMaxNumChannelsOnStack = 2; + +// Chooses the number of channels to store on the heap when that is required due +// to the number of capture channels being larger than the pre-defined number +// of channels to store on the stack. +size_t NumChannelsOnHeap(size_t num_capture_channels) { + return num_capture_channels > kMaxNumChannelsOnStack ? num_capture_channels + : 0; +} + +void LinearEchoPower(const FftData& E, + const FftData& Y, + std::array* S2) { + for (size_t k = 0; k < E.re.size(); ++k) { + (*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) + + (Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]); + } +} + +// Fades between two input signals using a fix-sized transition. +void SignalTransition(rtc::ArrayView from, + rtc::ArrayView to, + rtc::ArrayView out) { + if (from == to) { + RTC_DCHECK_EQ(to.size(), out.size()); + std::copy(to.begin(), to.end(), out.begin()); + } else { + constexpr size_t kTransitionSize = 30; + constexpr float kOneByTransitionSizePlusOne = 1.f / (kTransitionSize + 1); + + RTC_DCHECK_EQ(from.size(), to.size()); + RTC_DCHECK_EQ(from.size(), out.size()); + RTC_DCHECK_LE(kTransitionSize, out.size()); + + for (size_t k = 0; k < kTransitionSize; ++k) { + float a = (k + 1) * kOneByTransitionSizePlusOne; + out[k] = a * to[k] + (1.f - a) * from[k]; + } + + std::copy(to.begin() + kTransitionSize, to.end(), + out.begin() + kTransitionSize); + } +} + +// Computes a windowed (square root Hanning) padded FFT and updates the related +// memory. +void WindowedPaddedFft(const Aec3Fft& fft, + rtc::ArrayView v, + rtc::ArrayView v_old, + FftData* V) { + fft.PaddedFft(v, v_old, Aec3Fft::Window::kSqrtHanning, V); + std::copy(v.begin(), v.end(), v_old.begin()); +} + +// Class for removing the echo from the capture signal. +class EchoRemoverImpl final : public EchoRemover { + public: + EchoRemoverImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels); + ~EchoRemoverImpl() override; + EchoRemoverImpl(const EchoRemoverImpl&) = delete; + EchoRemoverImpl& operator=(const EchoRemoverImpl&) = delete; + + void GetMetrics(EchoControl::Metrics* metrics) const override; + + // Removes the echo from a block of samples from the capture signal. The + // supplied render signal is assumed to be pre-aligned with the capture + // signal. + void ProcessCapture(EchoPathVariability echo_path_variability, + bool capture_signal_saturation, + const absl::optional& external_delay, + RenderBuffer* render_buffer, + Block* linear_output, + Block* capture) override; + + // Updates the status on whether echo leakage is detected in the output of the + // echo remover. + void UpdateEchoLeakageStatus(bool leakage_detected) override { + echo_leakage_detected_ = leakage_detected; + } + + void SetCaptureOutputUsage(bool capture_output_used) override { + capture_output_used_ = capture_output_used; + } + + private: + // Selects which of the coarse and refined linear filter outputs that is most + // appropriate to pass to the suppressor and forms the linear filter output by + // smoothly transition between those. + void FormLinearFilterOutput(const SubtractorOutput& subtractor_output, + rtc::ArrayView output); + + static std::atomic instance_count_; + const EchoCanceller3Config config_; + const Aec3Fft fft_; + std::unique_ptr data_dumper_; + const Aec3Optimization optimization_; + const int sample_rate_hz_; + const size_t num_render_channels_; + const size_t num_capture_channels_; + const bool use_coarse_filter_output_; + Subtractor subtractor_; + SuppressionGain suppression_gain_; + ComfortNoiseGenerator cng_; + SuppressionFilter suppression_filter_; + RenderSignalAnalyzer render_signal_analyzer_; + ResidualEchoEstimator residual_echo_estimator_; + bool echo_leakage_detected_ = false; + bool capture_output_used_ = true; + AecState aec_state_; + EchoRemoverMetrics metrics_; + std::vector> e_old_; + std::vector> y_old_; + size_t block_counter_ = 0; + int gain_change_hangover_ = 0; + bool refined_filter_output_last_selected_ = true; + + std::vector> e_heap_; + std::vector> Y2_heap_; + std::vector> E2_heap_; + std::vector> R2_heap_; + std::vector> R2_unbounded_heap_; + std::vector> S2_linear_heap_; + std::vector Y_heap_; + std::vector E_heap_; + std::vector comfort_noise_heap_; + std::vector high_band_comfort_noise_heap_; + std::vector subtractor_output_heap_; +}; + +std::atomic EchoRemoverImpl::instance_count_(0); + +EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) + : config_(config), + fft_(), + data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + optimization_(DetectOptimization()), + sample_rate_hz_(sample_rate_hz), + num_render_channels_(num_render_channels), + num_capture_channels_(num_capture_channels), + use_coarse_filter_output_( + config_.filter.enable_coarse_filter_output_usage), + subtractor_(config, + num_render_channels_, + num_capture_channels_, + data_dumper_.get(), + optimization_), + suppression_gain_(config_, + optimization_, + sample_rate_hz, + num_capture_channels), + cng_(config_, optimization_, num_capture_channels_), + suppression_filter_(optimization_, + sample_rate_hz_, + num_capture_channels_), + render_signal_analyzer_(config_), + residual_echo_estimator_(config_, num_render_channels), + aec_state_(config_, num_capture_channels_), + e_old_(num_capture_channels_, {0.f}), + y_old_(num_capture_channels_, {0.f}), + e_heap_(NumChannelsOnHeap(num_capture_channels_), {0.f}), + Y2_heap_(NumChannelsOnHeap(num_capture_channels_)), + E2_heap_(NumChannelsOnHeap(num_capture_channels_)), + R2_heap_(NumChannelsOnHeap(num_capture_channels_)), + R2_unbounded_heap_(NumChannelsOnHeap(num_capture_channels_)), + S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)), + Y_heap_(NumChannelsOnHeap(num_capture_channels_)), + E_heap_(NumChannelsOnHeap(num_capture_channels_)), + comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)), + high_band_comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)), + subtractor_output_heap_(NumChannelsOnHeap(num_capture_channels_)) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); +} + +EchoRemoverImpl::~EchoRemoverImpl() = default; + +void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const { + // Echo return loss (ERL) is inverted to go from gain to attenuation. + metrics->echo_return_loss = -10.0 * std::log10(aec_state_.ErlTimeDomain()); + metrics->echo_return_loss_enhancement = + Log2TodB(aec_state_.FullBandErleLog2()); +} + +void EchoRemoverImpl::ProcessCapture( + EchoPathVariability echo_path_variability, + bool capture_signal_saturation, + const absl::optional& external_delay, + RenderBuffer* render_buffer, + Block* linear_output, + Block* capture) { + ++block_counter_; + const Block& x = render_buffer->GetBlock(0); + Block* y = capture; + RTC_DCHECK(render_buffer); + RTC_DCHECK(y); + RTC_DCHECK_EQ(x.NumBands(), NumBandsForRate(sample_rate_hz_)); + RTC_DCHECK_EQ(y->NumBands(), NumBandsForRate(sample_rate_hz_)); + RTC_DCHECK_EQ(x.NumChannels(), num_render_channels_); + RTC_DCHECK_EQ(y->NumChannels(), num_capture_channels_); + + // Stack allocated data to use when the number of channels is low. + std::array, kMaxNumChannelsOnStack> e_stack; + std::array, kMaxNumChannelsOnStack> + Y2_stack; + std::array, kMaxNumChannelsOnStack> + E2_stack; + std::array, kMaxNumChannelsOnStack> + R2_stack; + std::array, kMaxNumChannelsOnStack> + R2_unbounded_stack; + std::array, kMaxNumChannelsOnStack> + S2_linear_stack; + std::array Y_stack; + std::array E_stack; + std::array comfort_noise_stack; + std::array high_band_comfort_noise_stack; + std::array subtractor_output_stack; + + rtc::ArrayView> e(e_stack.data(), + num_capture_channels_); + rtc::ArrayView> Y2( + Y2_stack.data(), num_capture_channels_); + rtc::ArrayView> E2( + E2_stack.data(), num_capture_channels_); + rtc::ArrayView> R2( + R2_stack.data(), num_capture_channels_); + rtc::ArrayView> R2_unbounded( + R2_unbounded_stack.data(), num_capture_channels_); + rtc::ArrayView> S2_linear( + S2_linear_stack.data(), num_capture_channels_); + rtc::ArrayView Y(Y_stack.data(), num_capture_channels_); + rtc::ArrayView E(E_stack.data(), num_capture_channels_); + rtc::ArrayView comfort_noise(comfort_noise_stack.data(), + num_capture_channels_); + rtc::ArrayView high_band_comfort_noise( + high_band_comfort_noise_stack.data(), num_capture_channels_); + rtc::ArrayView subtractor_output( + subtractor_output_stack.data(), num_capture_channels_); + if (NumChannelsOnHeap(num_capture_channels_) > 0) { + // If the stack-allocated space is too small, use the heap for storing the + // microphone data. + e = rtc::ArrayView>(e_heap_.data(), + num_capture_channels_); + Y2 = rtc::ArrayView>( + Y2_heap_.data(), num_capture_channels_); + E2 = rtc::ArrayView>( + E2_heap_.data(), num_capture_channels_); + R2 = rtc::ArrayView>( + R2_heap_.data(), num_capture_channels_); + R2_unbounded = rtc::ArrayView>( + R2_unbounded_heap_.data(), num_capture_channels_); + S2_linear = rtc::ArrayView>( + S2_linear_heap_.data(), num_capture_channels_); + Y = rtc::ArrayView(Y_heap_.data(), num_capture_channels_); + E = rtc::ArrayView(E_heap_.data(), num_capture_channels_); + comfort_noise = rtc::ArrayView(comfort_noise_heap_.data(), + num_capture_channels_); + high_band_comfort_noise = rtc::ArrayView( + high_band_comfort_noise_heap_.data(), num_capture_channels_); + subtractor_output = rtc::ArrayView( + subtractor_output_heap_.data(), num_capture_channels_); + } + + data_dumper_->DumpWav("aec3_echo_remover_capture_input", + y->View(/*band=*/0, /*channel=*/0), 16000, 1); + data_dumper_->DumpWav("aec3_echo_remover_render_input", + x.View(/*band=*/0, /*channel=*/0), 16000, 1); + data_dumper_->DumpRaw("aec3_echo_remover_capture_input", + y->View(/*band=*/0, /*channel=*/0)); + data_dumper_->DumpRaw("aec3_echo_remover_render_input", + x.View(/*band=*/0, /*channel=*/0)); + + aec_state_.UpdateCaptureSaturation(capture_signal_saturation); + + if (echo_path_variability.AudioPathChanged()) { + // Ensure that the gain change is only acted on once per frame. + if (echo_path_variability.gain_change) { + if (gain_change_hangover_ == 0) { + constexpr int kMaxBlocksPerFrame = 3; + gain_change_hangover_ = kMaxBlocksPerFrame; + rtc::LoggingSeverity log_level = + config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING + : rtc::LS_VERBOSE; + RTC_LOG_V(log_level) + << "Gain change detected at block " << block_counter_; + } else { + echo_path_variability.gain_change = false; + } + } + + subtractor_.HandleEchoPathChange(echo_path_variability); + aec_state_.HandleEchoPathChange(echo_path_variability); + + if (echo_path_variability.delay_change != + EchoPathVariability::DelayAdjustment::kNone) { + suppression_gain_.SetInitialState(true); + } + } + if (gain_change_hangover_ > 0) { + --gain_change_hangover_; + } + + // Analyze the render signal. + render_signal_analyzer_.Update(*render_buffer, + aec_state_.MinDirectPathFilterDelay()); + + // State transition. + if (aec_state_.TransitionTriggered()) { + subtractor_.ExitInitialState(); + suppression_gain_.SetInitialState(false); + } + + // Perform linear echo cancellation. + subtractor_.Process(*render_buffer, *y, render_signal_analyzer_, aec_state_, + subtractor_output); + + // Compute spectra. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + FormLinearFilterOutput(subtractor_output[ch], e[ch]); + WindowedPaddedFft(fft_, y->View(/*band=*/0, ch), y_old_[ch], &Y[ch]); + WindowedPaddedFft(fft_, e[ch], e_old_[ch], &E[ch]); + LinearEchoPower(E[ch], Y[ch], &S2_linear[ch]); + Y[ch].Spectrum(optimization_, Y2[ch]); + E[ch].Spectrum(optimization_, E2[ch]); + } + + // Optionally return the linear filter output. + if (linear_output) { + RTC_DCHECK_GE(1, linear_output->NumBands()); + RTC_DCHECK_EQ(num_capture_channels_, linear_output->NumChannels()); + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::copy(e[ch].begin(), e[ch].end(), + linear_output->begin(/*band=*/0, ch)); + } + } + + // Update the AEC state information. + aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponses(), + subtractor_.FilterImpulseResponses(), *render_buffer, E2, + Y2, subtractor_output); + + // Choose the linear output. + const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y; + + data_dumper_->DumpWav("aec3_output_linear", + y->View(/*band=*/0, /*channel=*/0), 16000, 1); + data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1); + + // Estimate the comfort noise. + cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise, + high_band_comfort_noise); + + // Only do the below processing if the output of the audio processing module + // is used. + std::array G; + if (capture_output_used_) { + // Estimate the residual echo power. + residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2, + suppression_gain_.IsDominantNearend(), R2, + R2_unbounded); + + // Suppressor nearend estimate. + if (aec_state_.UsableLinearEstimate()) { + // E2 is bound by Y2. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(), + E2[ch].begin(), + [](float a, float b) { return std::min(a, b); }); + } + } + const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2; + + // Suppressor echo estimate. + const auto& echo_spectrum = + aec_state_.UsableLinearEstimate() ? S2_linear : R2; + + // Determine if the suppressor should assume clock drift. + const bool clock_drift = config_.echo_removal_control.has_clock_drift || + echo_path_variability.clock_drift; + + // Compute preferred gains. + float high_bands_gain; + suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2, R2_unbounded, + cng_.NoiseSpectrum(), render_signal_analyzer_, + aec_state_, x, clock_drift, &high_bands_gain, &G); + + suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, + high_bands_gain, Y_fft, y); + + } else { + G.fill(0.f); + } + + // Update the metrics. + metrics_.Update(aec_state_, cng_.NoiseSpectrum()[0], G); + + // Debug outputs for the purpose of development and analysis. + data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize, + &subtractor_output[0].s_refined[0], 16000, 1); + data_dumper_->DumpRaw("aec3_output", y->View(/*band=*/0, /*channel=*/0)); + data_dumper_->DumpRaw("aec3_narrow_render", + render_signal_analyzer_.NarrowPeakBand() ? 1 : 0); + data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()[0]); + data_dumper_->DumpRaw("aec3_suppressor_gain", G); + data_dumper_->DumpWav("aec3_output", y->View(/*band=*/0, /*channel=*/0), + 16000, 1); + data_dumper_->DumpRaw("aec3_using_subtractor_output[0]", + aec_state_.UseLinearFilterOutput() ? 1 : 0); + data_dumper_->DumpRaw("aec3_E2", E2[0]); + data_dumper_->DumpRaw("aec3_S2_linear", S2_linear[0]); + data_dumper_->DumpRaw("aec3_Y2", Y2[0]); + data_dumper_->DumpRaw( + "aec3_X2", render_buffer->Spectrum( + aec_state_.MinDirectPathFilterDelay())[/*channel=*/0]); + data_dumper_->DumpRaw("aec3_R2", R2[0]); + data_dumper_->DumpRaw("aec3_filter_delay", + aec_state_.MinDirectPathFilterDelay()); + data_dumper_->DumpRaw("aec3_capture_saturation", + aec_state_.SaturatedCapture() ? 1 : 0); +} + +void EchoRemoverImpl::FormLinearFilterOutput( + const SubtractorOutput& subtractor_output, + rtc::ArrayView output) { + RTC_DCHECK_EQ(subtractor_output.e_refined.size(), output.size()); + RTC_DCHECK_EQ(subtractor_output.e_coarse.size(), output.size()); + bool use_refined_output = true; + if (use_coarse_filter_output_) { + // As the output of the refined adaptive filter generally should be better + // than the coarse filter output, add a margin and threshold for when + // choosing the coarse filter output. + if (subtractor_output.e2_coarse < 0.9f * subtractor_output.e2_refined && + subtractor_output.y2 > 30.f * 30.f * kBlockSize && + (subtractor_output.s2_refined > 60.f * 60.f * kBlockSize || + subtractor_output.s2_coarse > 60.f * 60.f * kBlockSize)) { + use_refined_output = false; + } else { + // If the refined filter is diverged, choose the filter output that has + // the lowest power. + if (subtractor_output.e2_coarse < subtractor_output.e2_refined && + subtractor_output.y2 < subtractor_output.e2_refined) { + use_refined_output = false; + } + } + } + + SignalTransition(refined_filter_output_last_selected_ + ? subtractor_output.e_refined + : subtractor_output.e_coarse, + use_refined_output ? subtractor_output.e_refined + : subtractor_output.e_coarse, + output); + refined_filter_output_last_selected_ = use_refined_output; +} + +} // namespace + +EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) { + return new EchoRemoverImpl(config, sample_rate_hz, num_render_channels, + num_capture_channels); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.h new file mode 100644 index 0000000000..f2f4f5e64d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_ + +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/render_buffer.h" + +namespace webrtc { + +// Class for removing the echo from the capture signal. +class EchoRemover { + public: + static EchoRemover* Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels); + virtual ~EchoRemover() = default; + + // Get current metrics. + virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0; + + // Removes the echo from a block of samples from the capture signal. The + // supplied render signal is assumed to be pre-aligned with the capture + // signal. + virtual void ProcessCapture( + EchoPathVariability echo_path_variability, + bool capture_signal_saturation, + const absl::optional& external_delay, + RenderBuffer* render_buffer, + Block* linear_output, + Block* capture) = 0; + + // Updates the status on whether echo leakage is detected in the output of the + // echo remover. + virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0; + + // Specifies whether the capture output will be used. The purpose of this is + // to allow the echo remover to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + virtual void SetCaptureOutputUsage(bool capture_output_used) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc new file mode 100644 index 0000000000..c3fc80773a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_remover_metrics.h" + +#include +#include + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +EchoRemoverMetrics::DbMetric::DbMetric() : DbMetric(0.f, 0.f, 0.f) {} +EchoRemoverMetrics::DbMetric::DbMetric(float sum_value, + float floor_value, + float ceil_value) + : sum_value(sum_value), floor_value(floor_value), ceil_value(ceil_value) {} + +void EchoRemoverMetrics::DbMetric::Update(float value) { + sum_value += value; + floor_value = std::min(floor_value, value); + ceil_value = std::max(ceil_value, value); +} + +void EchoRemoverMetrics::DbMetric::UpdateInstant(float value) { + sum_value = value; + floor_value = std::min(floor_value, value); + ceil_value = std::max(ceil_value, value); +} + +EchoRemoverMetrics::EchoRemoverMetrics() { + ResetMetrics(); +} + +void EchoRemoverMetrics::ResetMetrics() { + erl_time_domain_ = DbMetric(0.f, 10000.f, 0.000f); + erle_time_domain_ = DbMetric(0.f, 0.f, 1000.f); + saturated_capture_ = false; +} + +void EchoRemoverMetrics::Update( + const AecState& aec_state, + const std::array& comfort_noise_spectrum, + const std::array& suppressor_gain) { + metrics_reported_ = false; + if (++block_counter_ <= kMetricsCollectionBlocks) { + erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain()); + erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2()); + saturated_capture_ = saturated_capture_ || aec_state.SaturatedCapture(); + } else { + // Report the metrics over several frames in order to lower the impact of + // the logarithms involved on the computational complexity. + switch (block_counter_) { + case kMetricsCollectionBlocks + 1: + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.UsableLinearEstimate", + static_cast(aec_state.UsableLinearEstimate() ? 1 : 0)); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay", + aec_state.MinDirectPathFilterDelay(), 0, 30, + 31); + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation", + static_cast(saturated_capture_ ? 1 : 0)); + break; + case kMetricsCollectionBlocks + 2: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erl.Value", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_time_domain_.sum_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erl.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_time_domain_.ceil_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erl.Min", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_time_domain_.floor_value), + 0, 59, 30); + break; + case kMetricsCollectionBlocks + 3: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erle.Value", + aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f, + erle_time_domain_.sum_value), + 0, 19, 20); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erle.Max", + aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f, + erle_time_domain_.ceil_value), + 0, 19, 20); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erle.Min", + aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f, + erle_time_domain_.floor_value), + 0, 19, 20); + metrics_reported_ = true; + RTC_DCHECK_EQ(kMetricsReportingIntervalBlocks, block_counter_); + block_counter_ = 0; + ResetMetrics(); + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + } +} + +namespace aec3 { + +void UpdateDbMetric(const std::array& value, + std::array* statistic) { + RTC_DCHECK(statistic); + // Truncation is intended in the band width computation. + constexpr int kNumBands = 2; + constexpr int kBandWidth = 65 / kNumBands; + constexpr float kOneByBandWidth = 1.f / kBandWidth; + RTC_DCHECK_EQ(kNumBands, statistic->size()); + RTC_DCHECK_EQ(65, value.size()); + for (size_t k = 0; k < statistic->size(); ++k) { + float average_band = + std::accumulate(value.begin() + kBandWidth * k, + value.begin() + kBandWidth * (k + 1), 0.f) * + kOneByBandWidth; + (*statistic)[k].Update(average_band); + } +} + +int TransformDbMetricForReporting(bool negate, + float min_value, + float max_value, + float offset, + float scaling, + float value) { + float new_value = 10.f * std::log10(value * scaling + 1e-10f) + offset; + if (negate) { + new_value = -new_value; + } + return static_cast(rtc::SafeClamp(new_value, min_value, max_value)); +} + +} // namespace aec3 + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.h new file mode 100644 index 0000000000..aec8084d78 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_ + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" + +namespace webrtc { + +// Handles the reporting of metrics for the echo remover. +class EchoRemoverMetrics { + public: + struct DbMetric { + DbMetric(); + DbMetric(float sum_value, float floor_value, float ceil_value); + void Update(float value); + void UpdateInstant(float value); + float sum_value; + float floor_value; + float ceil_value; + }; + + EchoRemoverMetrics(); + + EchoRemoverMetrics(const EchoRemoverMetrics&) = delete; + EchoRemoverMetrics& operator=(const EchoRemoverMetrics&) = delete; + + // Updates the metric with new data. + void Update( + const AecState& aec_state, + const std::array& comfort_noise_spectrum, + const std::array& suppressor_gain); + + // Returns true if the metrics have just been reported, otherwise false. + bool MetricsReported() { return metrics_reported_; } + + private: + // Resets the metrics. + void ResetMetrics(); + + int block_counter_ = 0; + DbMetric erl_time_domain_; + DbMetric erle_time_domain_; + bool saturated_capture_ = false; + bool metrics_reported_ = false; +}; + +namespace aec3 { + +// Updates a banded metric of type DbMetric with the values in the supplied +// array. +void UpdateDbMetric(const std::array& value, + std::array* statistic); + +// Transforms a DbMetric from the linear domain into the logarithmic domain. +int TransformDbMetricForReporting(bool negate, + float min_value, + float max_value, + float offset, + float scaling, + float value); + +} // namespace aec3 + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc new file mode 100644 index 0000000000..45b30a9c74 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_remover_metrics.h" + +#include + +#include + +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for non-null input. +TEST(UpdateDbMetricDeathTest, NullValue) { + std::array value; + value.fill(0.f); + EXPECT_DEATH(aec3::UpdateDbMetric(value, nullptr), ""); +} + +#endif + +// Verifies the updating functionality of UpdateDbMetric. +TEST(UpdateDbMetric, Updating) { + std::array value; + std::array statistic; + statistic.fill(EchoRemoverMetrics::DbMetric(0.f, 100.f, -100.f)); + constexpr float kValue0 = 10.f; + constexpr float kValue1 = 20.f; + std::fill(value.begin(), value.begin() + 32, kValue0); + std::fill(value.begin() + 32, value.begin() + 64, kValue1); + + aec3::UpdateDbMetric(value, &statistic); + EXPECT_FLOAT_EQ(kValue0, statistic[0].sum_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].ceil_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].floor_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].sum_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].ceil_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].floor_value); + + aec3::UpdateDbMetric(value, &statistic); + EXPECT_FLOAT_EQ(2.f * kValue0, statistic[0].sum_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].ceil_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].floor_value); + EXPECT_FLOAT_EQ(2.f * kValue1, statistic[1].sum_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].ceil_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].floor_value); +} + +// Verifies that the TransformDbMetricForReporting method produces the desired +// output for values for dBFS. +TEST(TransformDbMetricForReporting, DbFsScaling) { + std::array x; + FftData X; + std::array X2; + Aec3Fft fft; + x.fill(1000.f); + fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X); + X.Spectrum(Aec3Optimization::kNone, X2); + + float offset = -10.f * std::log10(32768.f * 32768.f); + EXPECT_NEAR(offset, -90.3f, 0.1f); + EXPECT_EQ( + static_cast(30.3f), + aec3::TransformDbMetricForReporting( + true, 0.f, 90.f, offset, 1.f / (kBlockSize * kBlockSize), X2[0])); +} + +// Verifies that the TransformDbMetricForReporting method is able to properly +// limit the output. +TEST(TransformDbMetricForReporting, Limits) { + EXPECT_EQ(0, aec3::TransformDbMetricForReporting(false, 0.f, 10.f, 0.f, 1.f, + 0.001f)); + EXPECT_EQ(10, aec3::TransformDbMetricForReporting(false, 0.f, 10.f, 0.f, 1.f, + 100.f)); +} + +// Verifies that the TransformDbMetricForReporting method is able to properly +// negate output. +TEST(TransformDbMetricForReporting, Negate) { + EXPECT_EQ(10, aec3::TransformDbMetricForReporting(true, -20.f, 20.f, 0.f, 1.f, + 0.1f)); + EXPECT_EQ(-10, aec3::TransformDbMetricForReporting(true, -20.f, 20.f, 0.f, + 1.f, 10.f)); +} + +// Verify the Update functionality of DbMetric. +TEST(DbMetric, Update) { + EchoRemoverMetrics::DbMetric metric(0.f, 20.f, -20.f); + constexpr int kNumValues = 100; + constexpr float kValue = 10.f; + for (int k = 0; k < kNumValues; ++k) { + metric.Update(kValue); + } + EXPECT_FLOAT_EQ(kValue * kNumValues, metric.sum_value); + EXPECT_FLOAT_EQ(kValue, metric.ceil_value); + EXPECT_FLOAT_EQ(kValue, metric.floor_value); +} + +// Verify the Update functionality of DbMetric. +TEST(DbMetric, UpdateInstant) { + EchoRemoverMetrics::DbMetric metric(0.f, 20.f, -20.f); + constexpr float kMinValue = -77.f; + constexpr float kMaxValue = 33.f; + constexpr float kLastValue = (kMinValue + kMaxValue) / 2.0f; + for (float value = kMinValue; value <= kMaxValue; value++) + metric.UpdateInstant(value); + metric.UpdateInstant(kLastValue); + EXPECT_FLOAT_EQ(kLastValue, metric.sum_value); + EXPECT_FLOAT_EQ(kMaxValue, metric.ceil_value); + EXPECT_FLOAT_EQ(kMinValue, metric.floor_value); +} + +// Verify the constructor functionality of DbMetric. +TEST(DbMetric, Constructor) { + EchoRemoverMetrics::DbMetric metric; + EXPECT_FLOAT_EQ(0.f, metric.sum_value); + EXPECT_FLOAT_EQ(0.f, metric.ceil_value); + EXPECT_FLOAT_EQ(0.f, metric.floor_value); + + metric = EchoRemoverMetrics::DbMetric(1.f, 2.f, 3.f); + EXPECT_FLOAT_EQ(1.f, metric.sum_value); + EXPECT_FLOAT_EQ(2.f, metric.floor_value); + EXPECT_FLOAT_EQ(3.f, metric.ceil_value); +} + +// Verify the general functionality of EchoRemoverMetrics. +TEST(EchoRemoverMetrics, NormalUsage) { + EchoRemoverMetrics metrics; + AecState aec_state(EchoCanceller3Config{}, 1); + std::array comfort_noise_spectrum; + std::array suppressor_gain; + comfort_noise_spectrum.fill(10.f); + suppressor_gain.fill(1.f); + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < kMetricsReportingIntervalBlocks - 1; ++k) { + metrics.Update(aec_state, comfort_noise_spectrum, suppressor_gain); + EXPECT_FALSE(metrics.MetricsReported()); + } + metrics.Update(aec_state, comfort_noise_spectrum, suppressor_gain); + EXPECT_TRUE(metrics.MetricsReported()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_unittest.cc new file mode 100644 index 0000000000..66168ab08d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_unittest.cc @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_remover.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +std::string ProduceDebugText(int sample_rate_hz, int delay) { + rtc::StringBuilder ss(ProduceDebugText(sample_rate_hz)); + ss << ", Delay: " << delay; + return ss.Release(); +} + +} // namespace + +class EchoRemoverMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + EchoRemoverMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(1, 2, 8))); + +// Verifies the basic API call sequence +TEST_P(EchoRemoverMultiChannel, BasicApiCalls) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + absl::optional delay_estimate; + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr remover( + EchoRemover::Create(EchoCanceller3Config(), rate, num_render_channels, + num_capture_channels)); + std::unique_ptr render_buffer(RenderDelayBuffer::Create( + EchoCanceller3Config(), rate, num_render_channels)); + + Block render(NumBandsForRate(rate), num_render_channels); + Block capture(NumBandsForRate(rate), num_capture_channels); + for (size_t k = 0; k < 100; ++k) { + EchoPathVariability echo_path_variability( + k % 3 == 0 ? true : false, + k % 5 == 0 ? EchoPathVariability::DelayAdjustment::kNewDetectedDelay + : EchoPathVariability::DelayAdjustment::kNone, + false); + render_buffer->Insert(render); + render_buffer->PrepareCaptureProcessing(); + + remover->ProcessCapture(echo_path_variability, k % 2 == 0 ? true : false, + delay_estimate, render_buffer->GetRenderBuffer(), + nullptr, &capture); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for the samplerate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoRemoverDeathTest, DISABLED_WrongSampleRate) { + EXPECT_DEATH(std::unique_ptr( + EchoRemover::Create(EchoCanceller3Config(), 8001, 1, 1)), + ""); +} + +// Verifies the check for the number of capture bands. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed.c +TEST(EchoRemoverDeathTest, DISABLED_WrongCaptureNumBands) { + absl::optional delay_estimate; + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr remover( + EchoRemover::Create(EchoCanceller3Config(), rate, 1, 1)); + std::unique_ptr render_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), rate, 1)); + Block capture(NumBandsForRate(rate == 48000 ? 16000 : rate + 16000), 1); + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + EXPECT_DEATH(remover->ProcessCapture( + echo_path_variability, false, delay_estimate, + render_buffer->GetRenderBuffer(), nullptr, &capture), + ""); + } +} + +// Verifies the check for non-null capture block. +TEST(EchoRemoverDeathTest, NullCapture) { + absl::optional delay_estimate; + std::unique_ptr remover( + EchoRemover::Create(EchoCanceller3Config(), 16000, 1, 1)); + std::unique_ptr render_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), 16000, 1)); + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + EXPECT_DEATH(remover->ProcessCapture( + echo_path_variability, false, delay_estimate, + render_buffer->GetRenderBuffer(), nullptr, nullptr), + ""); +} + +#endif + +// Performs a sanity check that the echo_remover is able to properly +// remove echoes. +TEST(EchoRemover, BasicEchoRemoval) { + constexpr int kNumBlocksToProcess = 500; + Random random_generator(42U); + absl::optional delay_estimate; + for (size_t num_channels : {1, 2, 4}) { + for (auto rate : {16000, 32000, 48000}) { + Block x(NumBandsForRate(rate), num_channels); + Block y(NumBandsForRate(rate), num_channels); + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(rate, delay_samples)); + EchoCanceller3Config config; + std::unique_ptr remover( + EchoRemover::Create(config, rate, num_channels, num_channels)); + std::unique_ptr render_buffer( + RenderDelayBuffer::Create(config, rate, num_channels)); + render_buffer->AlignFromDelay(delay_samples / kBlockSize); + + std::vector>>> + delay_buffers(x.NumBands()); + for (size_t band = 0; band < delay_buffers.size(); ++band) { + delay_buffers[band].resize(x.NumChannels()); + } + + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + delay_buffers[band][channel].reset( + new DelayBuffer(delay_samples)); + } + } + + float input_energy = 0.f; + float output_energy = 0.f; + for (int k = 0; k < kNumBlocksToProcess; ++k) { + const bool silence = k < 100 || (k % 100 >= 10); + + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + if (silence) { + std::fill(x.begin(band, channel), x.end(band, channel), 0.f); + } else { + RandomizeSampleVector(&random_generator, x.View(band, channel)); + } + delay_buffers[band][channel]->Delay(x.View(band, channel), + y.View(band, channel)); + } + } + + if (k > kNumBlocksToProcess / 2) { + input_energy = std::inner_product( + y.begin(/*band=*/0, /*channel=*/0), + y.end(/*band=*/0, /*channel=*/0), + y.begin(/*band=*/0, /*channel=*/0), input_energy); + } + + render_buffer->Insert(x); + render_buffer->PrepareCaptureProcessing(); + + remover->ProcessCapture(echo_path_variability, false, delay_estimate, + render_buffer->GetRenderBuffer(), nullptr, + &y); + + if (k > kNumBlocksToProcess / 2) { + output_energy = std::inner_product( + y.begin(/*band=*/0, /*channel=*/0), + y.end(/*band=*/0, /*channel=*/0), + y.begin(/*band=*/0, /*channel=*/0), output_energy); + } + } + EXPECT_GT(input_energy, 10.f * output_energy); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc new file mode 100644 index 0000000000..01cc33cb80 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erl_estimator.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +constexpr float kMinErl = 0.01f; +constexpr float kMaxErl = 1000.f; + +} // namespace + +ErlEstimator::ErlEstimator(size_t startup_phase_length_blocks_) + : startup_phase_length_blocks__(startup_phase_length_blocks_) { + erl_.fill(kMaxErl); + hold_counters_.fill(0); + erl_time_domain_ = kMaxErl; + hold_counter_time_domain_ = 0; +} + +ErlEstimator::~ErlEstimator() = default; + +void ErlEstimator::Reset() { + blocks_since_reset_ = 0; +} + +void ErlEstimator::Update( + const std::vector& converged_filters, + rtc::ArrayView> render_spectra, + rtc::ArrayView> + capture_spectra) { + const size_t num_capture_channels = converged_filters.size(); + RTC_DCHECK_EQ(capture_spectra.size(), num_capture_channels); + + // Corresponds to WGN of power -46 dBFS. + constexpr float kX2Min = 44015068.0f; + + const auto first_converged_iter = + std::find(converged_filters.begin(), converged_filters.end(), true); + const bool any_filter_converged = + first_converged_iter != converged_filters.end(); + + if (++blocks_since_reset_ < startup_phase_length_blocks__ || + !any_filter_converged) { + return; + } + + // Use the maximum spectrum across capture and the maximum across render. + std::array max_capture_spectrum_data; + std::array max_capture_spectrum = + capture_spectra[/*channel=*/0]; + if (num_capture_channels > 1) { + // Initialize using the first channel with a converged filter. + const size_t first_converged = + std::distance(converged_filters.begin(), first_converged_iter); + RTC_DCHECK_GE(first_converged, 0); + RTC_DCHECK_LT(first_converged, num_capture_channels); + max_capture_spectrum_data = capture_spectra[first_converged]; + + for (size_t ch = first_converged + 1; ch < num_capture_channels; ++ch) { + if (!converged_filters[ch]) { + continue; + } + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + max_capture_spectrum_data[k] = + std::max(max_capture_spectrum_data[k], capture_spectra[ch][k]); + } + } + max_capture_spectrum = max_capture_spectrum_data; + } + + const size_t num_render_channels = render_spectra.size(); + std::array max_render_spectrum_data; + rtc::ArrayView max_render_spectrum = + render_spectra[/*channel=*/0]; + if (num_render_channels > 1) { + std::copy(render_spectra[0].begin(), render_spectra[0].end(), + max_render_spectrum_data.begin()); + for (size_t ch = 1; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + max_render_spectrum_data[k] = + std::max(max_render_spectrum_data[k], render_spectra[ch][k]); + } + } + max_render_spectrum = max_render_spectrum_data; + } + + const auto& X2 = max_render_spectrum; + const auto& Y2 = max_capture_spectrum; + + // Update the estimates in a maximum statistics manner. + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (X2[k] > kX2Min) { + const float new_erl = Y2[k] / X2[k]; + if (new_erl < erl_[k]) { + hold_counters_[k - 1] = 1000; + erl_[k] += 0.1f * (new_erl - erl_[k]); + erl_[k] = std::max(erl_[k], kMinErl); + } + } + } + + std::for_each(hold_counters_.begin(), hold_counters_.end(), + [](int& a) { --a; }); + std::transform(hold_counters_.begin(), hold_counters_.end(), erl_.begin() + 1, + erl_.begin() + 1, [](int a, float b) { + return a > 0 ? b : std::min(kMaxErl, 2.f * b); + }); + + erl_[0] = erl_[1]; + erl_[kFftLengthBy2] = erl_[kFftLengthBy2 - 1]; + + // Compute ERL over all frequency bins. + const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); + + if (X2_sum > kX2Min * X2.size()) { + const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); + const float new_erl = Y2_sum / X2_sum; + if (new_erl < erl_time_domain_) { + hold_counter_time_domain_ = 1000; + erl_time_domain_ += 0.1f * (new_erl - erl_time_domain_); + erl_time_domain_ = std::max(erl_time_domain_, kMinErl); + } + } + + --hold_counter_time_domain_; + erl_time_domain_ = (hold_counter_time_domain_ > 0) + ? erl_time_domain_ + : std::min(kMaxErl, 2.f * erl_time_domain_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.h new file mode 100644 index 0000000000..639a52c561 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Estimates the echo return loss based on the signal spectra. +class ErlEstimator { + public: + explicit ErlEstimator(size_t startup_phase_length_blocks_); + ~ErlEstimator(); + + ErlEstimator(const ErlEstimator&) = delete; + ErlEstimator& operator=(const ErlEstimator&) = delete; + + // Resets the ERL estimation. + void Reset(); + + // Updates the ERL estimate. + void Update(const std::vector& converged_filters, + rtc::ArrayView> + render_spectra, + rtc::ArrayView> + capture_spectra); + + // Returns the most recent ERL estimate. + const std::array& Erl() const { return erl_; } + float ErlTimeDomain() const { return erl_time_domain_; } + + private: + const size_t startup_phase_length_blocks__; + std::array erl_; + std::array hold_counters_; + float erl_time_domain_; + int hold_counter_time_domain_; + size_t blocks_since_reset_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc new file mode 100644 index 0000000000..79e5465e3c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erl_estimator.h" + +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +std::string ProduceDebugText(size_t num_render_channels, + size_t num_capture_channels) { + rtc::StringBuilder ss; + ss << "Render channels: " << num_render_channels; + ss << ", Capture channels: " << num_capture_channels; + return ss.Release(); +} + +void VerifyErl(const std::array& erl, + float erl_time_domain, + float reference) { + std::for_each(erl.begin(), erl.end(), + [reference](float a) { EXPECT_NEAR(reference, a, 0.001); }); + EXPECT_NEAR(reference, erl_time_domain, 0.001); +} + +} // namespace + +class ErlEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + ErlEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(1, 2, 8))); + +// Verifies that the correct ERL estimates are achieved. +TEST_P(ErlEstimatorMultiChannel, Estimates) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + SCOPED_TRACE(ProduceDebugText(num_render_channels, num_capture_channels)); + std::vector> X2(num_render_channels); + for (auto& X2_ch : X2) { + X2_ch.fill(0.f); + } + std::vector> Y2(num_capture_channels); + for (auto& Y2_ch : Y2) { + Y2_ch.fill(0.f); + } + std::vector converged_filters(num_capture_channels, false); + const size_t converged_idx = num_capture_channels - 1; + converged_filters[converged_idx] = true; + + ErlEstimator estimator(0); + + // Verifies that the ERL estimate is properly reduced to lower values. + for (auto& X2_ch : X2) { + X2_ch.fill(500 * 1000.f * 1000.f); + } + Y2[converged_idx].fill(10 * X2[0][0]); + for (size_t k = 0; k < 200; ++k) { + estimator.Update(converged_filters, X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 10.f); + + // Verifies that the ERL is not immediately increased when the ERL in the + // data increases. + Y2[converged_idx].fill(10000 * X2[0][0]); + for (size_t k = 0; k < 998; ++k) { + estimator.Update(converged_filters, X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 10.f); + + // Verifies that the rate of increase is 3 dB. + estimator.Update(converged_filters, X2, Y2); + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 20.f); + + // Verifies that the maximum ERL is achieved when there are no low RLE + // estimates. + for (size_t k = 0; k < 1000; ++k) { + estimator.Update(converged_filters, X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 1000.f); + + // Verifies that the ERL estimate is is not updated for low-level signals + for (auto& X2_ch : X2) { + X2_ch.fill(1000.f * 1000.f); + } + Y2[converged_idx].fill(10 * X2[0][0]); + for (size_t k = 0; k < 200; ++k) { + estimator.Update(converged_filters, X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 1000.f); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc new file mode 100644 index 0000000000..0e3d715c59 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erle_estimator.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks, + const EchoCanceller3Config& config, + size_t num_capture_channels) + : startup_phase_length_blocks_(startup_phase_length_blocks), + fullband_erle_estimator_(config.erle, num_capture_channels), + subband_erle_estimator_(config, num_capture_channels) { + if (config.erle.num_sections > 1) { + signal_dependent_erle_estimator_ = + std::make_unique(config, + num_capture_channels); + } + Reset(true); +} + +ErleEstimator::~ErleEstimator() = default; + +void ErleEstimator::Reset(bool delay_change) { + fullband_erle_estimator_.Reset(); + subband_erle_estimator_.Reset(); + if (signal_dependent_erle_estimator_) { + signal_dependent_erle_estimator_->Reset(); + } + if (delay_change) { + blocks_since_reset_ = 0; + } +} + +void ErleEstimator::Update( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses, + rtc::ArrayView + avg_render_spectrum_with_reverb, + rtc::ArrayView> capture_spectra, + rtc::ArrayView> + subtractor_spectra, + const std::vector& converged_filters) { + RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(), + capture_spectra.size()); + RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(), + subtractor_spectra.size()); + const auto& X2_reverb = avg_render_spectrum_with_reverb; + const auto& Y2 = capture_spectra; + const auto& E2 = subtractor_spectra; + + if (++blocks_since_reset_ < startup_phase_length_blocks_) { + return; + } + + subband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters); + + if (signal_dependent_erle_estimator_) { + signal_dependent_erle_estimator_->Update( + render_buffer, filter_frequency_responses, X2_reverb, Y2, E2, + subband_erle_estimator_.Erle(/*onset_compensated=*/false), + subband_erle_estimator_.Erle(/*onset_compensated=*/true), + converged_filters); + } + + fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters); +} + +void ErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + fullband_erle_estimator_.Dump(data_dumper); + subband_erle_estimator_.Dump(data_dumper); + if (signal_dependent_erle_estimator_) { + signal_dependent_erle_estimator_->Dump(data_dumper); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.h new file mode 100644 index 0000000000..55797592a9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fullband_erle_estimator.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" +#include "modules/audio_processing/aec3/subband_erle_estimator.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Estimates the echo return loss enhancement. One estimate is done per subband +// and another one is done using the aggreation of energy over all the subbands. +class ErleEstimator { + public: + ErleEstimator(size_t startup_phase_length_blocks, + const EchoCanceller3Config& config, + size_t num_capture_channels); + ~ErleEstimator(); + + // Resets the fullband ERLE estimator and the subbands ERLE estimators. + void Reset(bool delay_change); + + // Updates the ERLE estimates. + void Update( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses, + rtc::ArrayView + avg_render_spectrum_with_reverb, + rtc::ArrayView> + capture_spectra, + rtc::ArrayView> + subtractor_spectra, + const std::vector& converged_filters); + + // Returns the most recent subband ERLE estimates. + rtc::ArrayView> Erle( + bool onset_compensated) const { + return signal_dependent_erle_estimator_ + ? signal_dependent_erle_estimator_->Erle(onset_compensated) + : subband_erle_estimator_.Erle(onset_compensated); + } + + // Returns the non-capped subband ERLE. + rtc::ArrayView> ErleUnbounded() + const { + // Unbounded ERLE is only used with the subband erle estimator where the + // ERLE is often capped at low values. When the signal dependent ERLE + // estimator is used the capped ERLE is returned. + return !signal_dependent_erle_estimator_ + ? subband_erle_estimator_.ErleUnbounded() + : signal_dependent_erle_estimator_->Erle( + /*onset_compensated=*/false); + } + + // Returns the subband ERLE that are estimated during onsets (only used for + // testing). + rtc::ArrayView> ErleDuringOnsets() + const { + return subband_erle_estimator_.ErleDuringOnsets(); + } + + // Returns the fullband ERLE estimate. + float FullbandErleLog2() const { + return fullband_erle_estimator_.FullbandErleLog2(); + } + + // Returns an estimation of the current linear filter quality based on the + // current and past fullband ERLE estimates. The returned value is a float + // vector with content between 0 and 1 where 1 indicates that, at this current + // time instant, the linear filter is reaching its maximum subtraction + // performance. + rtc::ArrayView> GetInstLinearQualityEstimates() + const { + return fullband_erle_estimator_.GetInstLinearQualityEstimates(); + } + + void Dump(const std::unique_ptr& data_dumper) const; + + private: + const size_t startup_phase_length_blocks_; + FullBandErleEstimator fullband_erle_estimator_; + SubbandErleEstimator subband_erle_estimator_; + std::unique_ptr + signal_dependent_erle_estimator_; + size_t blocks_since_reset_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc new file mode 100644 index 0000000000..42be7d9c7d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erle_estimator.h" + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +constexpr int kLowFrequencyLimit = kFftLengthBy2 / 2; +constexpr float kTrueErle = 10.f; +constexpr float kTrueErleOnsets = 1.0f; +constexpr float kEchoPathGain = 3.f; + +void VerifyErleBands( + rtc::ArrayView> erle, + float reference_lf, + float reference_hf) { + for (size_t ch = 0; ch < erle.size(); ++ch) { + std::for_each( + erle[ch].begin(), erle[ch].begin() + kLowFrequencyLimit, + [reference_lf](float a) { EXPECT_NEAR(reference_lf, a, 0.001); }); + std::for_each( + erle[ch].begin() + kLowFrequencyLimit, erle[ch].end(), + [reference_hf](float a) { EXPECT_NEAR(reference_hf, a, 0.001); }); + } +} + +void VerifyErle( + rtc::ArrayView> erle, + float erle_time_domain, + float reference_lf, + float reference_hf) { + VerifyErleBands(erle, reference_lf, reference_hf); + EXPECT_NEAR(kTrueErle, erle_time_domain, 0.5); +} + +void VerifyErleGreaterOrEqual( + rtc::ArrayView> erle1, + rtc::ArrayView> erle2) { + for (size_t ch = 0; ch < erle1.size(); ++ch) { + for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) { + EXPECT_GE(erle1[ch][i], erle2[ch][i]); + } + } +} + +void FormFarendTimeFrame(Block* x) { + const std::array frame = { + 7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85, + 6665.52, 14808.6, 9342.3, 7483.28, 19261.7, 4145.98, 1622.18, 13475.2, + 7166.32, 6856.61, 21937, 7263.14, 9569.07, 14919, 8413.32, 7551.89, + 7848.65, 6011.27, 13080.6, 15865.2, 12656, 17459.6, 4263.93, 4503.03, + 9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6, + 11405, 15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8, + 1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4, + 12416.2, 16434, 2454.69, 9840.8, 6867.23, 1615.75, 6059.9, 8394.19}; + for (int band = 0; band < x->NumBands(); ++band) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + RTC_DCHECK_GE(kBlockSize, frame.size()); + std::copy(frame.begin(), frame.end(), x->begin(band, channel)); + } + } +} + +void FormFarendFrame(const RenderBuffer& render_buffer, + float erle, + std::array* X2, + rtc::ArrayView> E2, + rtc::ArrayView> Y2) { + const auto& spectrum_buffer = render_buffer.GetSpectrumBuffer(); + const int num_render_channels = spectrum_buffer.buffer[0].size(); + const int num_capture_channels = Y2.size(); + + X2->fill(0.f); + for (int ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + (*X2)[k] += spectrum_buffer.buffer[spectrum_buffer.write][ch][k] / + num_render_channels; + } + } + + for (int ch = 0; ch < num_capture_channels; ++ch) { + std::transform(X2->begin(), X2->end(), Y2[ch].begin(), + [](float a) { return a * kEchoPathGain * kEchoPathGain; }); + std::transform(Y2[ch].begin(), Y2[ch].end(), E2[ch].begin(), + [erle](float a) { return a / erle; }); + } +} + +void FormNearendFrame( + Block* x, + std::array* X2, + rtc::ArrayView> E2, + rtc::ArrayView> Y2) { + for (int band = 0; band < x->NumBands(); ++band) { + for (int ch = 0; ch < x->NumChannels(); ++ch) { + std::fill(x->begin(band, ch), x->end(band, ch), 0.f); + } + } + + X2->fill(0.f); + for (size_t ch = 0; ch < Y2.size(); ++ch) { + Y2[ch].fill(500.f * 1000.f * 1000.f); + E2[ch].fill(Y2[ch][0]); + } +} + +void GetFilterFreq( + size_t delay_headroom_samples, + rtc::ArrayView>> + filter_frequency_response) { + const size_t delay_headroom_blocks = delay_headroom_samples / kBlockSize; + for (size_t ch = 0; ch < filter_frequency_response[0].size(); ++ch) { + for (auto& block_freq_resp : filter_frequency_response) { + block_freq_resp[ch].fill(0.f); + } + + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + filter_frequency_response[delay_headroom_blocks][ch][k] = kEchoPathGain; + } + } +} + +} // namespace + +class ErleEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + ErleEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 4, 8), + ::testing::Values(1, 2, 8))); + +TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + std::array X2; + std::vector> E2(num_capture_channels); + std::vector> Y2(num_capture_channels); + std::vector converged_filters(num_capture_channels, true); + + EchoCanceller3Config config; + config.erle.onset_detection = true; + + Block x(kNumBands, num_render_channels); + std::vector>> + filter_frequency_response( + config.filter.refined.length_blocks, + std::vector>( + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + + GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response); + + ErleEstimator estimator(0, config, num_capture_channels); + + FormFarendTimeFrame(&x); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + // Verifies that the ERLE estimate is properly increased to higher values. + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2, + Y2); + for (size_t k = 0; k < 1000; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, converged_filters); + } + VerifyErle(estimator.Erle(/*onset_compensated=*/true), + std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, + config.erle.max_h); + VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false), + estimator.Erle(/*onset_compensated=*/true)); + VerifyErleGreaterOrEqual(estimator.ErleUnbounded(), + estimator.Erle(/*onset_compensated=*/false)); + + FormNearendFrame(&x, &X2, E2, Y2); + // Verifies that the ERLE is not immediately decreased during nearend + // activity. + for (size_t k = 0; k < 50; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, converged_filters); + } + VerifyErle(estimator.Erle(/*onset_compensated=*/true), + std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, + config.erle.max_h); + VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false), + estimator.Erle(/*onset_compensated=*/true)); + VerifyErleGreaterOrEqual(estimator.ErleUnbounded(), + estimator.Erle(/*onset_compensated=*/false)); +} + +TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + std::array X2; + std::vector> E2(num_capture_channels); + std::vector> Y2(num_capture_channels); + std::vector converged_filters(num_capture_channels, true); + EchoCanceller3Config config; + config.erle.onset_detection = true; + Block x(kNumBands, num_render_channels); + std::vector>> + filter_frequency_response( + config.filter.refined.length_blocks, + std::vector>( + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + + GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response); + + ErleEstimator estimator(/*startup_phase_length_blocks=*/0, config, + num_capture_channels); + + FormFarendTimeFrame(&x); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + + for (size_t burst = 0; burst < 20; ++burst) { + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErleOnsets, + &X2, E2, Y2); + for (size_t k = 0; k < 10; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, + converged_filters); + } + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2, + Y2); + for (size_t k = 0; k < 1000; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, + converged_filters); + } + FormNearendFrame(&x, &X2, E2, Y2); + for (size_t k = 0; k < 300; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, + converged_filters); + } + } + VerifyErleBands(estimator.ErleDuringOnsets(), config.erle.min, + config.erle.min); + FormNearendFrame(&x, &X2, E2, Y2); + for (size_t k = 0; k < 1000; k++) { + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, converged_filters); + } + // Verifies that during ne activity, Erle converges to the Erle for + // onsets. + VerifyErle(estimator.Erle(/*onset_compensated=*/true), + std::pow(2.f, estimator.FullbandErleLog2()), config.erle.min, + config.erle.min); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc new file mode 100644 index 0000000000..1ce2d31d8f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fft_buffer.h" + +namespace webrtc { + +FftBuffer::FftBuffer(size_t size, size_t num_channels) + : size(static_cast(size)), + buffer(size, std::vector(num_channels)) { + for (auto& block : buffer) { + for (auto& channel_fft_data : block) { + channel_fft_data.Clear(); + } + } +} + +FftBuffer::~FftBuffer() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.h new file mode 100644 index 0000000000..4187315863 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_ + +#include + +#include + +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Struct for bundling a circular buffer of FftData objects together with the +// read and write indices. +struct FftBuffer { + FftBuffer(size_t size, size_t num_channels); + ~FftBuffer(); + + int IncIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index < size - 1 ? index + 1 : 0; + } + + int DecIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index > 0 ? index - 1 : size - 1; + } + + int OffsetIndex(int index, int offset) const { + RTC_DCHECK_GE(buffer.size(), offset); + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return (size + index + offset) % size; + } + + void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); } + void IncWriteIndex() { write = IncIndex(write); } + void DecWriteIndex() { write = DecIndex(write); } + void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); } + void IncReadIndex() { read = IncIndex(read); } + void DecReadIndex() { read = DecIndex(read); } + + const int size; + std::vector> buffer; + int write = 0; + int read = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_data.h b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data.h new file mode 100644 index 0000000000..9c25e784aa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_ + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Struct that holds imaginary data produced from 128 point real-valued FFTs. +struct FftData { + // Copies the data in src. + void Assign(const FftData& src) { + std::copy(src.re.begin(), src.re.end(), re.begin()); + std::copy(src.im.begin(), src.im.end(), im.begin()); + im[0] = im[kFftLengthBy2] = 0; + } + + // Clears all the imaginary. + void Clear() { + re.fill(0.f); + im.fill(0.f); + } + + // Computes the power spectrum of the data. + void SpectrumAVX2(rtc::ArrayView power_spectrum) const; + + // Computes the power spectrum of the data. + void Spectrum(Aec3Optimization optimization, + rtc::ArrayView power_spectrum) const { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size()); + switch (optimization) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + constexpr int kNumFourBinBands = kFftLengthBy2 / 4; + constexpr int kLimit = kNumFourBinBands * 4; + for (size_t k = 0; k < kLimit; k += 4) { + const __m128 r = _mm_loadu_ps(&re[k]); + const __m128 i = _mm_loadu_ps(&im[k]); + const __m128 ii = _mm_mul_ps(i, i); + const __m128 rr = _mm_mul_ps(r, r); + const __m128 rrii = _mm_add_ps(rr, ii); + _mm_storeu_ps(&power_spectrum[k], rrii); + } + power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] + + im[kFftLengthBy2] * im[kFftLengthBy2]; + } break; + case Aec3Optimization::kAvx2: + SpectrumAVX2(power_spectrum); + break; +#endif + default: + std::transform(re.begin(), re.end(), im.begin(), power_spectrum.begin(), + [](float a, float b) { return a * a + b * b; }); + } + } + + // Copy the data from an interleaved array. + void CopyFromPackedArray(const std::array& v) { + re[0] = v[0]; + re[kFftLengthBy2] = v[1]; + im[0] = im[kFftLengthBy2] = 0; + for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) { + re[k] = v[j++]; + im[k] = v[j++]; + } + } + + // Copies the data into an interleaved array. + void CopyToPackedArray(std::array* v) const { + RTC_DCHECK(v); + (*v)[0] = re[0]; + (*v)[1] = re[kFftLengthBy2]; + for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) { + (*v)[j++] = re[k]; + (*v)[j++] = im[k]; + } + } + + std::array re; + std::array im; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc new file mode 100644 index 0000000000..1fe4bd69c6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fft_data.h" + +#include + +#include "api/array_view.h" + +namespace webrtc { + +// Computes the power spectrum of the data. +void FftData::SpectrumAVX2(rtc::ArrayView power_spectrum) const { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size()); + for (size_t k = 0; k < kFftLengthBy2; k += 8) { + __m256 r = _mm256_loadu_ps(&re[k]); + __m256 i = _mm256_loadu_ps(&im[k]); + __m256 ii = _mm256_mul_ps(i, i); + ii = _mm256_fmadd_ps(r, r, ii); + _mm256_storeu_ps(&power_spectrum[k], ii); + } + power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] + + im[kFftLengthBy2] * im[kFftLengthBy2]; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_gn/moz.build new file mode 100644 index 0000000000..d77163999b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("fft_data_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_unittest.cc new file mode 100644 index 0000000000..d76fabdbd6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_unittest.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fft_data.h" + +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods are bitexact to their reference +// counterparts. +TEST(FftData, TestSse2Optimizations) { + if (GetCPUInfo(kSSE2) != 0) { + FftData x; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + std::array spectrum; + std::array spectrum_sse2; + x.Spectrum(Aec3Optimization::kNone, spectrum); + x.Spectrum(Aec3Optimization::kSse2, spectrum_sse2); + EXPECT_EQ(spectrum, spectrum_sse2); + } +} + +// Verifies that the optimized methods are bitexact to their reference +// counterparts. +TEST(FftData, TestAvx2Optimizations) { + if (GetCPUInfo(kAVX2) != 0) { + FftData x; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + std::array spectrum; + std::array spectrum_avx2; + x.Spectrum(Aec3Optimization::kNone, spectrum); + x.Spectrum(Aec3Optimization::kAvx2, spectrum_avx2); + EXPECT_EQ(spectrum, spectrum_avx2); + } +} +#endif + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for null output in CopyToPackedArray. +TEST(FftDataDeathTest, NonNullCopyToPackedArrayOutput) { + EXPECT_DEATH(FftData().CopyToPackedArray(nullptr), ""); +} + +// Verifies the check for null output in Spectrum. +TEST(FftDataDeathTest, NonNullSpectrumOutput) { + EXPECT_DEATH(FftData().Spectrum(Aec3Optimization::kNone, nullptr), ""); +} + +#endif + +// Verifies that the Assign method properly copies the data from the source and +// ensures that the imaginary components for the DC and Nyquist bins are 0. +TEST(FftData, Assign) { + FftData x; + FftData y; + + x.re.fill(1.f); + x.im.fill(2.f); + y.Assign(x); + EXPECT_EQ(x.re, y.re); + EXPECT_EQ(0.f, y.im[0]); + EXPECT_EQ(0.f, y.im[x.im.size() - 1]); + for (size_t k = 1; k < x.im.size() - 1; ++k) { + EXPECT_EQ(x.im[k], y.im[k]); + } +} + +// Verifies that the Clear method properly clears all the data. +TEST(FftData, Clear) { + FftData x_ref; + FftData x; + + x_ref.re.fill(0.f); + x_ref.im.fill(0.f); + + x.re.fill(1.f); + x.im.fill(2.f); + x.Clear(); + + EXPECT_EQ(x_ref.re, x.re); + EXPECT_EQ(x_ref.im, x.im); +} + +// Verifies that the spectrum is correctly computed. +TEST(FftData, Spectrum) { + FftData x; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + std::array spectrum; + x.Spectrum(Aec3Optimization::kNone, spectrum); + + EXPECT_EQ(x.re[0] * x.re[0], spectrum[0]); + EXPECT_EQ(x.re[spectrum.size() - 1] * x.re[spectrum.size() - 1], + spectrum[spectrum.size() - 1]); + for (size_t k = 1; k < spectrum.size() - 1; ++k) { + EXPECT_EQ(x.re[k] * x.re[k] + x.im[k] * x.im[k], spectrum[k]); + } +} + +// Verifies that the functionality in CopyToPackedArray works as intended. +TEST(FftData, CopyToPackedArray) { + FftData x; + std::array x_packed; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + x.CopyToPackedArray(&x_packed); + + EXPECT_EQ(x.re[0], x_packed[0]); + EXPECT_EQ(x.re[x.re.size() - 1], x_packed[1]); + for (size_t k = 1; k < x_packed.size() / 2; ++k) { + EXPECT_EQ(x.re[k], x_packed[2 * k]); + EXPECT_EQ(x.im[k], x_packed[2 * k + 1]); + } +} + +// Verifies that the functionality in CopyFromPackedArray works as intended +// (relies on that the functionality in CopyToPackedArray has been verified in +// the test above). +TEST(FftData, CopyFromPackedArray) { + FftData x_ref; + FftData x; + std::array x_packed; + + for (size_t k = 0; k < x_ref.re.size(); ++k) { + x_ref.re[k] = k + 1; + } + + x_ref.im[0] = x_ref.im[x_ref.im.size() - 1] = 0.f; + for (size_t k = 1; k < x_ref.im.size() - 1; ++k) { + x_ref.im[k] = 2.f * (k + 1); + } + + x_ref.CopyToPackedArray(&x_packed); + x.CopyFromPackedArray(x_packed); + + EXPECT_EQ(x_ref.re, x.re); + EXPECT_EQ(x_ref.im, x.im); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc new file mode 100644 index 0000000000..d8fd3aa275 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/filter_analyzer.h" + +#include + +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +size_t FindPeakIndex(rtc::ArrayView filter_time_domain, + size_t peak_index_in, + size_t start_sample, + size_t end_sample) { + size_t peak_index_out = peak_index_in; + float max_h2 = + filter_time_domain[peak_index_out] * filter_time_domain[peak_index_out]; + for (size_t k = start_sample; k <= end_sample; ++k) { + float tmp = filter_time_domain[k] * filter_time_domain[k]; + if (tmp > max_h2) { + peak_index_out = k; + max_h2 = tmp; + } + } + + return peak_index_out; +} + +} // namespace + +std::atomic FilterAnalyzer::instance_count_(0); + +FilterAnalyzer::FilterAnalyzer(const EchoCanceller3Config& config, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + bounded_erl_(config.ep_strength.bounded_erl), + default_gain_(config.ep_strength.default_gain), + h_highpass_(num_capture_channels, + std::vector( + GetTimeDomainLength(config.filter.refined.length_blocks), + 0.f)), + filter_analysis_states_(num_capture_channels, + FilterAnalysisState(config)), + filter_delays_blocks_(num_capture_channels, 0) { + Reset(); +} + +FilterAnalyzer::~FilterAnalyzer() = default; + +void FilterAnalyzer::Reset() { + blocks_since_reset_ = 0; + ResetRegion(); + for (auto& state : filter_analysis_states_) { + state.Reset(default_gain_); + } + std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(), 0); +} + +void FilterAnalyzer::Update( + rtc::ArrayView> filters_time_domain, + const RenderBuffer& render_buffer, + bool* any_filter_consistent, + float* max_echo_path_gain) { + RTC_DCHECK(any_filter_consistent); + RTC_DCHECK(max_echo_path_gain); + RTC_DCHECK_EQ(filters_time_domain.size(), filter_analysis_states_.size()); + RTC_DCHECK_EQ(filters_time_domain.size(), h_highpass_.size()); + + ++blocks_since_reset_; + SetRegionToAnalyze(filters_time_domain[0].size()); + AnalyzeRegion(filters_time_domain, render_buffer); + + // Aggregate the results for all capture channels. + auto& st_ch0 = filter_analysis_states_[0]; + *any_filter_consistent = st_ch0.consistent_estimate; + *max_echo_path_gain = st_ch0.gain; + min_filter_delay_blocks_ = filter_delays_blocks_[0]; + for (size_t ch = 1; ch < filters_time_domain.size(); ++ch) { + auto& st_ch = filter_analysis_states_[ch]; + *any_filter_consistent = + *any_filter_consistent || st_ch.consistent_estimate; + *max_echo_path_gain = std::max(*max_echo_path_gain, st_ch.gain); + min_filter_delay_blocks_ = + std::min(min_filter_delay_blocks_, filter_delays_blocks_[ch]); + } +} + +void FilterAnalyzer::AnalyzeRegion( + rtc::ArrayView> filters_time_domain, + const RenderBuffer& render_buffer) { + // Preprocess the filter to avoid issues with low-frequency components in the + // filter. + PreProcessFilters(filters_time_domain); + data_dumper_->DumpRaw("aec3_linear_filter_processed_td", h_highpass_[0]); + + constexpr float kOneByBlockSize = 1.f / kBlockSize; + for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) { + RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size()); + RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size()); + + auto& st_ch = filter_analysis_states_[ch]; + RTC_DCHECK_EQ(h_highpass_[ch].size(), filters_time_domain[ch].size()); + RTC_DCHECK_GT(h_highpass_[ch].size(), 0); + st_ch.peak_index = std::min(st_ch.peak_index, h_highpass_[ch].size() - 1); + + st_ch.peak_index = + FindPeakIndex(h_highpass_[ch], st_ch.peak_index, region_.start_sample_, + region_.end_sample_); + filter_delays_blocks_[ch] = st_ch.peak_index >> kBlockSizeLog2; + UpdateFilterGain(h_highpass_[ch], &st_ch); + st_ch.filter_length_blocks = + filters_time_domain[ch].size() * kOneByBlockSize; + + st_ch.consistent_estimate = st_ch.consistent_filter_detector.Detect( + h_highpass_[ch], region_, + render_buffer.GetBlock(-filter_delays_blocks_[ch]), st_ch.peak_index, + filter_delays_blocks_[ch]); + } +} + +void FilterAnalyzer::UpdateFilterGain( + rtc::ArrayView filter_time_domain, + FilterAnalysisState* st) { + bool sufficient_time_to_converge = + blocks_since_reset_ > 5 * kNumBlocksPerSecond; + + if (sufficient_time_to_converge && st->consistent_estimate) { + st->gain = fabsf(filter_time_domain[st->peak_index]); + } else { + // TODO(peah): Verify whether this check against a float is ok. + if (st->gain) { + st->gain = std::max(st->gain, fabsf(filter_time_domain[st->peak_index])); + } + } + + if (bounded_erl_ && st->gain) { + st->gain = std::max(st->gain, 0.01f); + } +} + +void FilterAnalyzer::PreProcessFilters( + rtc::ArrayView> filters_time_domain) { + for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) { + RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size()); + RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size()); + + RTC_DCHECK_GE(h_highpass_[ch].capacity(), filters_time_domain[ch].size()); + h_highpass_[ch].resize(filters_time_domain[ch].size()); + // Minimum phase high-pass filter with cutoff frequency at about 600 Hz. + constexpr std::array h = { + {0.7929742f, -0.36072128f, -0.47047766f}}; + + std::fill(h_highpass_[ch].begin() + region_.start_sample_, + h_highpass_[ch].begin() + region_.end_sample_ + 1, 0.f); + float* h_highpass_ch = h_highpass_[ch].data(); + const float* filters_time_domain_ch = filters_time_domain[ch].data(); + const size_t region_end = region_.end_sample_; + for (size_t k = std::max(h.size() - 1, region_.start_sample_); + k <= region_end; ++k) { + float tmp = h_highpass_ch[k]; + for (size_t j = 0; j < h.size(); ++j) { + tmp += filters_time_domain_ch[k - j] * h[j]; + } + h_highpass_ch[k] = tmp; + } + } +} + +void FilterAnalyzer::ResetRegion() { + region_.start_sample_ = 0; + region_.end_sample_ = 0; +} + +void FilterAnalyzer::SetRegionToAnalyze(size_t filter_size) { + constexpr size_t kNumberBlocksToUpdate = 1; + auto& r = region_; + r.start_sample_ = r.end_sample_ >= filter_size - 1 ? 0 : r.end_sample_ + 1; + r.end_sample_ = + std::min(r.start_sample_ + kNumberBlocksToUpdate * kBlockSize - 1, + filter_size - 1); + + // Check range. + RTC_DCHECK_LT(r.start_sample_, filter_size); + RTC_DCHECK_LT(r.end_sample_, filter_size); + RTC_DCHECK_LE(r.start_sample_, r.end_sample_); +} + +FilterAnalyzer::ConsistentFilterDetector::ConsistentFilterDetector( + const EchoCanceller3Config& config) + : active_render_threshold_(config.render_levels.active_render_limit * + config.render_levels.active_render_limit * + kFftLengthBy2) { + Reset(); +} + +void FilterAnalyzer::ConsistentFilterDetector::Reset() { + significant_peak_ = false; + filter_floor_accum_ = 0.f; + filter_secondary_peak_ = 0.f; + filter_floor_low_limit_ = 0; + filter_floor_high_limit_ = 0; + consistent_estimate_counter_ = 0; + consistent_delay_reference_ = -10; +} + +bool FilterAnalyzer::ConsistentFilterDetector::Detect( + rtc::ArrayView filter_to_analyze, + const FilterRegion& region, + const Block& x_block, + size_t peak_index, + int delay_blocks) { + if (region.start_sample_ == 0) { + filter_floor_accum_ = 0.f; + filter_secondary_peak_ = 0.f; + filter_floor_low_limit_ = peak_index < 64 ? 0 : peak_index - 64; + filter_floor_high_limit_ = + peak_index > filter_to_analyze.size() - 129 ? 0 : peak_index + 128; + } + + float filter_floor_accum = filter_floor_accum_; + float filter_secondary_peak = filter_secondary_peak_; + for (size_t k = region.start_sample_; + k < std::min(region.end_sample_ + 1, filter_floor_low_limit_); ++k) { + float abs_h = fabsf(filter_to_analyze[k]); + filter_floor_accum += abs_h; + filter_secondary_peak = std::max(filter_secondary_peak, abs_h); + } + + for (size_t k = std::max(filter_floor_high_limit_, region.start_sample_); + k <= region.end_sample_; ++k) { + float abs_h = fabsf(filter_to_analyze[k]); + filter_floor_accum += abs_h; + filter_secondary_peak = std::max(filter_secondary_peak, abs_h); + } + filter_floor_accum_ = filter_floor_accum; + filter_secondary_peak_ = filter_secondary_peak; + + if (region.end_sample_ == filter_to_analyze.size() - 1) { + float filter_floor = filter_floor_accum_ / + (filter_floor_low_limit_ + filter_to_analyze.size() - + filter_floor_high_limit_); + + float abs_peak = fabsf(filter_to_analyze[peak_index]); + significant_peak_ = abs_peak > 10.f * filter_floor && + abs_peak > 2.f * filter_secondary_peak_; + } + + if (significant_peak_) { + bool active_render_block = false; + for (int ch = 0; ch < x_block.NumChannels(); ++ch) { + rtc::ArrayView x_channel = + x_block.View(/*band=*/0, ch); + const float x_energy = std::inner_product( + x_channel.begin(), x_channel.end(), x_channel.begin(), 0.f); + if (x_energy > active_render_threshold_) { + active_render_block = true; + break; + } + } + + if (consistent_delay_reference_ == delay_blocks) { + if (active_render_block) { + ++consistent_estimate_counter_; + } + } else { + consistent_estimate_counter_ = 0; + consistent_delay_reference_ = delay_blocks; + } + } + return consistent_estimate_counter_ > 1.5f * kNumBlocksPerSecond; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.h b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.h new file mode 100644 index 0000000000..9aec8b14d7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_ + +#include + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block.h" + +namespace webrtc { + +class ApmDataDumper; +class RenderBuffer; + +// Class for analyzing the properties of an adaptive filter. +class FilterAnalyzer { + public: + FilterAnalyzer(const EchoCanceller3Config& config, + size_t num_capture_channels); + ~FilterAnalyzer(); + + FilterAnalyzer(const FilterAnalyzer&) = delete; + FilterAnalyzer& operator=(const FilterAnalyzer&) = delete; + + // Resets the analysis. + void Reset(); + + // Updates the estimates with new input data. + void Update(rtc::ArrayView> filters_time_domain, + const RenderBuffer& render_buffer, + bool* any_filter_consistent, + float* max_echo_path_gain); + + // Returns the delay in blocks for each filter. + rtc::ArrayView FilterDelaysBlocks() const { + return filter_delays_blocks_; + } + + // Returns the minimum delay of all filters in terms of blocks. + int MinFilterDelayBlocks() const { return min_filter_delay_blocks_; } + + // Returns the number of blocks for the current used filter. + int FilterLengthBlocks() const { + return filter_analysis_states_[0].filter_length_blocks; + } + + // Returns the preprocessed filter. + rtc::ArrayView> GetAdjustedFilters() const { + return h_highpass_; + } + + // Public for testing purposes only. + void SetRegionToAnalyze(size_t filter_size); + + private: + struct FilterAnalysisState; + + void AnalyzeRegion( + rtc::ArrayView> filters_time_domain, + const RenderBuffer& render_buffer); + + void UpdateFilterGain(rtc::ArrayView filters_time_domain, + FilterAnalysisState* st); + void PreProcessFilters( + rtc::ArrayView> filters_time_domain); + + void ResetRegion(); + + struct FilterRegion { + size_t start_sample_; + size_t end_sample_; + }; + + // This class checks whether the shape of the impulse response has been + // consistent over time. + class ConsistentFilterDetector { + public: + explicit ConsistentFilterDetector(const EchoCanceller3Config& config); + void Reset(); + bool Detect(rtc::ArrayView filter_to_analyze, + const FilterRegion& region, + const Block& x_block, + size_t peak_index, + int delay_blocks); + + private: + bool significant_peak_; + float filter_floor_accum_; + float filter_secondary_peak_; + size_t filter_floor_low_limit_; + size_t filter_floor_high_limit_; + const float active_render_threshold_; + size_t consistent_estimate_counter_ = 0; + int consistent_delay_reference_ = -10; + }; + + struct FilterAnalysisState { + explicit FilterAnalysisState(const EchoCanceller3Config& config) + : filter_length_blocks(config.filter.refined_initial.length_blocks), + consistent_filter_detector(config) { + Reset(config.ep_strength.default_gain); + } + + void Reset(float default_gain) { + peak_index = 0; + gain = default_gain; + consistent_filter_detector.Reset(); + } + + float gain; + size_t peak_index; + int filter_length_blocks; + bool consistent_estimate = false; + ConsistentFilterDetector consistent_filter_detector; + }; + + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const bool bounded_erl_; + const float default_gain_; + std::vector> h_highpass_; + + size_t blocks_since_reset_ = 0; + FilterRegion region_; + + std::vector filter_analysis_states_; + std::vector filter_delays_blocks_; + + int min_filter_delay_blocks_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer_unittest.cc new file mode 100644 index 0000000000..f1e2e4c188 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer_unittest.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/filter_analyzer.h" + +#include + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verifies that the filter analyzer handles filter resizes properly. +TEST(FilterAnalyzer, FilterResize) { + EchoCanceller3Config c; + std::vector filter(65, 0.f); + for (size_t num_capture_channels : {1, 2, 4}) { + FilterAnalyzer fa(c, num_capture_channels); + fa.SetRegionToAnalyze(filter.size()); + fa.SetRegionToAnalyze(filter.size()); + filter.resize(32); + fa.SetRegionToAnalyze(filter.size()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc new file mode 100644 index 0000000000..3039dcf7f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/frame_blocker.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +FrameBlocker::FrameBlocker(size_t num_bands, size_t num_channels) + : num_bands_(num_bands), + num_channels_(num_channels), + buffer_(num_bands_, std::vector>(num_channels)) { + RTC_DCHECK_LT(0, num_bands); + RTC_DCHECK_LT(0, num_channels); + for (auto& band : buffer_) { + for (auto& channel : band) { + channel.reserve(kBlockSize); + RTC_DCHECK(channel.empty()); + } + } +} + +FrameBlocker::~FrameBlocker() = default; + +void FrameBlocker::InsertSubFrameAndExtractBlock( + const std::vector>>& sub_frame, + Block* block) { + RTC_DCHECK(block); + RTC_DCHECK_EQ(num_bands_, block->NumBands()); + RTC_DCHECK_EQ(num_bands_, sub_frame.size()); + for (size_t band = 0; band < num_bands_; ++band) { + RTC_DCHECK_EQ(num_channels_, block->NumChannels()); + RTC_DCHECK_EQ(num_channels_, sub_frame[band].size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + RTC_DCHECK_GE(kBlockSize - 16, buffer_[band][channel].size()); + RTC_DCHECK_EQ(kSubFrameLength, sub_frame[band][channel].size()); + const int samples_to_block = kBlockSize - buffer_[band][channel].size(); + std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(), + block->begin(band, channel)); + std::copy(sub_frame[band][channel].begin(), + sub_frame[band][channel].begin() + samples_to_block, + block->begin(band, channel) + kBlockSize - samples_to_block); + buffer_[band][channel].clear(); + buffer_[band][channel].insert( + buffer_[band][channel].begin(), + sub_frame[band][channel].begin() + samples_to_block, + sub_frame[band][channel].end()); + } + } +} + +bool FrameBlocker::IsBlockAvailable() const { + return kBlockSize == buffer_[0][0].size(); +} + +void FrameBlocker::ExtractBlock(Block* block) { + RTC_DCHECK(block); + RTC_DCHECK_EQ(num_bands_, block->NumBands()); + RTC_DCHECK_EQ(num_channels_, block->NumChannels()); + RTC_DCHECK(IsBlockAvailable()); + for (size_t band = 0; band < num_bands_; ++band) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + RTC_DCHECK_EQ(kBlockSize, buffer_[band][channel].size()); + std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(), + block->begin(band, channel)); + buffer_[band][channel].clear(); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.h b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.h new file mode 100644 index 0000000000..623c812157 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_ + +#include + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block.h" + +namespace webrtc { + +// Class for producing 64 sample multiband blocks from frames consisting of 2 +// subframes of 80 samples. +class FrameBlocker { + public: + FrameBlocker(size_t num_bands, size_t num_channels); + ~FrameBlocker(); + FrameBlocker(const FrameBlocker&) = delete; + FrameBlocker& operator=(const FrameBlocker&) = delete; + + // Inserts one 80 sample multiband subframe from the multiband frame and + // extracts one 64 sample multiband block. + void InsertSubFrameAndExtractBlock( + const std::vector>>& sub_frame, + Block* block); + // Reports whether a multiband block of 64 samples is available for + // extraction. + bool IsBlockAvailable() const; + // Extracts a multiband block of 64 samples. + void ExtractBlock(Block* block); + + private: + const size_t num_bands_; + const size_t num_channels_; + std::vector>> buffer_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc new file mode 100644 index 0000000000..92e393023a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc @@ -0,0 +1,425 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/frame_blocker.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_framer.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +float ComputeSampleValue(size_t chunk_counter, + size_t chunk_size, + size_t band, + size_t channel, + size_t sample_index, + int offset) { + float value = + static_cast(chunk_counter * chunk_size + sample_index + channel) + + offset; + return value > 0 ? 5000 * band + value : 0; +} + +void FillSubFrame(size_t sub_frame_counter, + int offset, + std::vector>>* sub_frame) { + for (size_t band = 0; band < sub_frame->size(); ++band) { + for (size_t channel = 0; channel < (*sub_frame)[band].size(); ++channel) { + for (size_t sample = 0; sample < (*sub_frame)[band][channel].size(); + ++sample) { + (*sub_frame)[band][channel][sample] = ComputeSampleValue( + sub_frame_counter, kSubFrameLength, band, channel, sample, offset); + } + } + } +} + +void FillSubFrameView( + size_t sub_frame_counter, + int offset, + std::vector>>* sub_frame, + std::vector>>* sub_frame_view) { + FillSubFrame(sub_frame_counter, offset, sub_frame); + for (size_t band = 0; band < sub_frame_view->size(); ++band) { + for (size_t channel = 0; channel < (*sub_frame_view)[band].size(); + ++channel) { + (*sub_frame_view)[band][channel] = rtc::ArrayView( + &(*sub_frame)[band][channel][0], (*sub_frame)[band][channel].size()); + } + } +} + +bool VerifySubFrame( + size_t sub_frame_counter, + int offset, + const std::vector>>& sub_frame_view) { + std::vector>> reference_sub_frame( + sub_frame_view.size(), + std::vector>( + sub_frame_view[0].size(), + std::vector(sub_frame_view[0][0].size(), 0.f))); + FillSubFrame(sub_frame_counter, offset, &reference_sub_frame); + for (size_t band = 0; band < sub_frame_view.size(); ++band) { + for (size_t channel = 0; channel < sub_frame_view[band].size(); ++channel) { + for (size_t sample = 0; sample < sub_frame_view[band][channel].size(); + ++sample) { + if (reference_sub_frame[band][channel][sample] != + sub_frame_view[band][channel][sample]) { + return false; + } + } + } + } + return true; +} + +bool VerifyBlock(size_t block_counter, int offset, const Block& block) { + for (int band = 0; band < block.NumBands(); ++band) { + for (int channel = 0; channel < block.NumChannels(); ++channel) { + for (size_t sample = 0; sample < kBlockSize; ++sample) { + auto it = block.begin(band, channel) + sample; + const float reference_value = ComputeSampleValue( + block_counter, kBlockSize, band, channel, sample, offset); + if (reference_value != *it) { + return false; + } + } + } + } + return true; +} + +// Verifies that the FrameBlocker properly forms blocks out of the frames. +void RunBlockerTest(int sample_rate_hz, size_t num_channels) { + constexpr size_t kNumSubFramesToProcess = 20; + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_bands, num_channels); + std::vector>> input_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> input_sub_frame_view( + num_bands, std::vector>(num_channels)); + FrameBlocker blocker(num_bands, num_channels); + + size_t block_counter = 0; + for (size_t sub_frame_index = 0; sub_frame_index < kNumSubFramesToProcess; + ++sub_frame_index) { + FillSubFrameView(sub_frame_index, 0, &input_sub_frame, + &input_sub_frame_view); + + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block); + VerifyBlock(block_counter++, 0, block); + + if ((sub_frame_index + 1) % 4 == 0) { + EXPECT_TRUE(blocker.IsBlockAvailable()); + } else { + EXPECT_FALSE(blocker.IsBlockAvailable()); + } + if (blocker.IsBlockAvailable()) { + blocker.ExtractBlock(&block); + VerifyBlock(block_counter++, 0, block); + } + } +} + +// Verifies that the FrameBlocker and BlockFramer work well together and produce +// the expected output. +void RunBlockerAndFramerTest(int sample_rate_hz, size_t num_channels) { + const size_t kNumSubFramesToProcess = 20; + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_bands, num_channels); + std::vector>> input_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame_view( + num_bands, std::vector>(num_channels)); + std::vector>> input_sub_frame_view( + num_bands, std::vector>(num_channels)); + FrameBlocker blocker(num_bands, num_channels); + BlockFramer framer(num_bands, num_channels); + + for (size_t sub_frame_index = 0; sub_frame_index < kNumSubFramesToProcess; + ++sub_frame_index) { + FillSubFrameView(sub_frame_index, 0, &input_sub_frame, + &input_sub_frame_view); + FillSubFrameView(sub_frame_index, 0, &output_sub_frame, + &output_sub_frame_view); + + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block); + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view); + + if ((sub_frame_index + 1) % 4 == 0) { + EXPECT_TRUE(blocker.IsBlockAvailable()); + } else { + EXPECT_FALSE(blocker.IsBlockAvailable()); + } + if (blocker.IsBlockAvailable()) { + blocker.ExtractBlock(&block); + framer.InsertBlock(block); + } + if (sub_frame_index > 1) { + EXPECT_TRUE(VerifySubFrame(sub_frame_index, -64, output_sub_frame_view)); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the FrameBlocker crashes if the InsertSubFrameAndExtractBlock +// method is called for inputs with the wrong number of bands or band lengths. +void RunWronglySizedInsertAndExtractParametersTest( + int sample_rate_hz, + size_t correct_num_channels, + size_t num_block_bands, + size_t num_block_channels, + size_t num_sub_frame_bands, + size_t num_sub_frame_channels, + size_t sub_frame_length) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_block_bands, num_block_channels); + std::vector>> input_sub_frame( + num_sub_frame_bands, + std::vector>( + num_sub_frame_channels, std::vector(sub_frame_length, 0.f))); + std::vector>> input_sub_frame_view( + input_sub_frame.size(), + std::vector>(num_sub_frame_channels)); + FillSubFrameView(0, 0, &input_sub_frame, &input_sub_frame_view); + FrameBlocker blocker(correct_num_bands, correct_num_channels); + EXPECT_DEATH( + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block), ""); +} + +// Verifies that the FrameBlocker crashes if the ExtractBlock method is called +// for inputs with the wrong number of bands or band lengths. +void RunWronglySizedExtractParameterTest(int sample_rate_hz, + size_t correct_num_channels, + size_t num_block_bands, + size_t num_block_channels) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block correct_block(correct_num_bands, correct_num_channels); + Block wrong_block(num_block_bands, num_block_channels); + std::vector>> input_sub_frame( + correct_num_bands, + std::vector>( + correct_num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> input_sub_frame_view( + input_sub_frame.size(), + std::vector>(correct_num_channels)); + FillSubFrameView(0, 0, &input_sub_frame, &input_sub_frame_view); + FrameBlocker blocker(correct_num_bands, correct_num_channels); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + + EXPECT_DEATH(blocker.ExtractBlock(&wrong_block), ""); +} + +// Verifies that the FrameBlocker crashes if the ExtractBlock method is called +// after a wrong number of previous InsertSubFrameAndExtractBlock method calls +// have been made. +void RunWrongExtractOrderTest(int sample_rate_hz, + size_t num_channels, + size_t num_preceeding_api_calls) { + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_bands, num_channels); + std::vector>> input_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> input_sub_frame_view( + input_sub_frame.size(), std::vector>(num_channels)); + FillSubFrameView(0, 0, &input_sub_frame, &input_sub_frame_view); + FrameBlocker blocker(num_bands, num_channels); + for (size_t k = 0; k < num_preceeding_api_calls; ++k) { + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block); + } + + EXPECT_DEATH(blocker.ExtractBlock(&block), ""); +} +#endif + +std::string ProduceDebugText(int sample_rate_hz, size_t num_channels) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + ss << ", number of channels: " << num_channels; + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(FrameBlockerDeathTest, + WrongNumberOfBandsInBlockForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, wrong_num_bands, correct_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(FrameBlockerDeathTest, + WrongNumberOfChannelsInBlockForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, wrong_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(FrameBlockerDeathTest, + WrongNumberOfBandsInSubFrameForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + wrong_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(FrameBlockerDeathTest, + WrongNumberOfChannelsInSubFrameForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, wrong_num_channels, + correct_num_bands, wrong_num_channels, kSubFrameLength); + } + } +} + +TEST(FrameBlockerDeathTest, + WrongNumberOfSamplesInSubFrameForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength - 1); + } + } +} + +TEST(FrameBlockerDeathTest, WrongNumberOfBandsInBlockForExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedExtractParameterTest( + rate, correct_num_channels, wrong_num_bands, correct_num_channels); + } + } +} + +TEST(FrameBlockerDeathTest, WrongNumberOfChannelsInBlockForExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedExtractParameterTest( + rate, correct_num_channels, correct_num_bands, wrong_num_channels); + } + } +} + +TEST(FrameBlockerDeathTest, WrongNumberOfPreceedingApiCallsForExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 4, 8}) { + for (size_t num_calls = 0; num_calls < 4; ++num_calls) { + rtc::StringBuilder ss; + ss << "Sample rate: " << rate; + ss << "Num channels: " << num_channels; + ss << ", Num preceeding InsertSubFrameAndExtractBlock calls: " + << num_calls; + + SCOPED_TRACE(ss.str()); + RunWrongExtractOrderTest(rate, num_channels, num_calls); + } + } + } +} + +// Verifies that the verification for 0 number of channels works. +TEST(FrameBlockerDeathTest, ZeroNumberOfChannelsParameter) { + EXPECT_DEATH(FrameBlocker(16000, 0), ""); +} + +// Verifies that the verification for 0 number of bands works. +TEST(FrameBlockerDeathTest, ZeroNumberOfBandsParameter) { + EXPECT_DEATH(FrameBlocker(0, 1), ""); +} + +// Verifiers that the verification for null sub_frame pointer works. +TEST(FrameBlockerDeathTest, NullBlockParameter) { + std::vector>> sub_frame( + 1, std::vector>( + 1, std::vector(kSubFrameLength, 0.f))); + std::vector>> sub_frame_view( + sub_frame.size()); + FillSubFrameView(0, 0, &sub_frame, &sub_frame_view); + EXPECT_DEATH( + FrameBlocker(1, 1).InsertSubFrameAndExtractBlock(sub_frame_view, nullptr), + ""); +} + +#endif + +TEST(FrameBlocker, BlockBitexactness) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, num_channels)); + RunBlockerTest(rate, num_channels); + } + } +} + +TEST(FrameBlocker, BlockerAndFramer) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, num_channels)); + RunBlockerAndFramerTest(rate, num_channels); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc new file mode 100644 index 0000000000..e56674e4c9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fullband_erle_estimator.h" + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { +constexpr float kEpsilon = 1e-3f; +constexpr float kX2BandEnergyThreshold = 44015068.0f; +constexpr int kBlocksToHoldErle = 100; +constexpr int kPointsToAccumulate = 6; +} // namespace + +FullBandErleEstimator::FullBandErleEstimator( + const EchoCanceller3Config::Erle& config, + size_t num_capture_channels) + : min_erle_log2_(FastApproxLog2f(config.min + kEpsilon)), + max_erle_lf_log2_(FastApproxLog2f(config.max_l + kEpsilon)), + hold_counters_instantaneous_erle_(num_capture_channels, 0), + erle_time_domain_log2_(num_capture_channels, min_erle_log2_), + instantaneous_erle_(num_capture_channels, ErleInstantaneous(config)), + linear_filters_qualities_(num_capture_channels) { + Reset(); +} + +FullBandErleEstimator::~FullBandErleEstimator() = default; + +void FullBandErleEstimator::Reset() { + for (auto& instantaneous_erle_ch : instantaneous_erle_) { + instantaneous_erle_ch.Reset(); + } + + UpdateQualityEstimates(); + std::fill(erle_time_domain_log2_.begin(), erle_time_domain_log2_.end(), + min_erle_log2_); + std::fill(hold_counters_instantaneous_erle_.begin(), + hold_counters_instantaneous_erle_.end(), 0); +} + +void FullBandErleEstimator::Update( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters) { + for (size_t ch = 0; ch < Y2.size(); ++ch) { + if (converged_filters[ch]) { + // Computes the fullband ERLE. + const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); + if (X2_sum > kX2BandEnergyThreshold * X2.size()) { + const float Y2_sum = + std::accumulate(Y2[ch].begin(), Y2[ch].end(), 0.0f); + const float E2_sum = + std::accumulate(E2[ch].begin(), E2[ch].end(), 0.0f); + if (instantaneous_erle_[ch].Update(Y2_sum, E2_sum)) { + hold_counters_instantaneous_erle_[ch] = kBlocksToHoldErle; + erle_time_domain_log2_[ch] += + 0.05f * ((instantaneous_erle_[ch].GetInstErleLog2().value()) - + erle_time_domain_log2_[ch]); + erle_time_domain_log2_[ch] = + std::max(erle_time_domain_log2_[ch], min_erle_log2_); + } + } + } + --hold_counters_instantaneous_erle_[ch]; + if (hold_counters_instantaneous_erle_[ch] == 0) { + instantaneous_erle_[ch].ResetAccumulators(); + } + } + + UpdateQualityEstimates(); +} + +void FullBandErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + data_dumper->DumpRaw("aec3_fullband_erle_log2", FullbandErleLog2()); + instantaneous_erle_[0].Dump(data_dumper); +} + +void FullBandErleEstimator::UpdateQualityEstimates() { + for (size_t ch = 0; ch < instantaneous_erle_.size(); ++ch) { + linear_filters_qualities_[ch] = + instantaneous_erle_[ch].GetQualityEstimate(); + } +} + +FullBandErleEstimator::ErleInstantaneous::ErleInstantaneous( + const EchoCanceller3Config::Erle& config) + : clamp_inst_quality_to_zero_(config.clamp_quality_estimate_to_zero), + clamp_inst_quality_to_one_(config.clamp_quality_estimate_to_one) { + Reset(); +} + +FullBandErleEstimator::ErleInstantaneous::~ErleInstantaneous() = default; + +bool FullBandErleEstimator::ErleInstantaneous::Update(const float Y2_sum, + const float E2_sum) { + bool update_estimates = false; + E2_acum_ += E2_sum; + Y2_acum_ += Y2_sum; + num_points_++; + if (num_points_ == kPointsToAccumulate) { + if (E2_acum_ > 0.f) { + update_estimates = true; + erle_log2_ = FastApproxLog2f(Y2_acum_ / E2_acum_ + kEpsilon); + } + num_points_ = 0; + E2_acum_ = 0.f; + Y2_acum_ = 0.f; + } + + if (update_estimates) { + UpdateMaxMin(); + UpdateQualityEstimate(); + } + return update_estimates; +} + +void FullBandErleEstimator::ErleInstantaneous::Reset() { + ResetAccumulators(); + max_erle_log2_ = -10.f; // -30 dB. + min_erle_log2_ = 33.f; // 100 dB. + inst_quality_estimate_ = 0.f; +} + +void FullBandErleEstimator::ErleInstantaneous::ResetAccumulators() { + erle_log2_ = absl::nullopt; + inst_quality_estimate_ = 0.f; + num_points_ = 0; + E2_acum_ = 0.f; + Y2_acum_ = 0.f; +} + +void FullBandErleEstimator::ErleInstantaneous::Dump( + const std::unique_ptr& data_dumper) const { + data_dumper->DumpRaw("aec3_fullband_erle_inst_log2", + erle_log2_ ? *erle_log2_ : -10.f); + data_dumper->DumpRaw( + "aec3_erle_instantaneous_quality", + GetQualityEstimate() ? GetQualityEstimate().value() : 0.f); + data_dumper->DumpRaw("aec3_fullband_erle_max_log2", max_erle_log2_); + data_dumper->DumpRaw("aec3_fullband_erle_min_log2", min_erle_log2_); +} + +void FullBandErleEstimator::ErleInstantaneous::UpdateMaxMin() { + RTC_DCHECK(erle_log2_); + // Adding the forgetting factors for the maximum and minimum and capping the + // result to the incoming value. + max_erle_log2_ -= 0.0004f; // Forget factor, approx 1dB every 3 sec. + max_erle_log2_ = std::max(max_erle_log2_, erle_log2_.value()); + min_erle_log2_ += 0.0004f; // Forget factor, approx 1dB every 3 sec. + min_erle_log2_ = std::min(min_erle_log2_, erle_log2_.value()); +} + +void FullBandErleEstimator::ErleInstantaneous::UpdateQualityEstimate() { + const float alpha = 0.07f; + float quality_estimate = 0.f; + RTC_DCHECK(erle_log2_); + // TODO(peah): Currently, the estimate can become be less than 0; this should + // be corrected. + if (max_erle_log2_ > min_erle_log2_) { + quality_estimate = (erle_log2_.value() - min_erle_log2_) / + (max_erle_log2_ - min_erle_log2_); + } + if (quality_estimate > inst_quality_estimate_) { + inst_quality_estimate_ = quality_estimate; + } else { + inst_quality_estimate_ += + alpha * (quality_estimate - inst_quality_estimate_); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.h new file mode 100644 index 0000000000..7a082176d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Estimates the echo return loss enhancement using the energy of all the +// freuquency bands. +class FullBandErleEstimator { + public: + FullBandErleEstimator(const EchoCanceller3Config::Erle& config, + size_t num_capture_channels); + ~FullBandErleEstimator(); + // Resets the ERLE estimator. + void Reset(); + + // Updates the ERLE estimator. + void Update(rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters); + + // Returns the fullband ERLE estimates in log2 units. + float FullbandErleLog2() const { + float min_erle = erle_time_domain_log2_[0]; + for (size_t ch = 1; ch < erle_time_domain_log2_.size(); ++ch) { + min_erle = std::min(min_erle, erle_time_domain_log2_[ch]); + } + return min_erle; + } + + // Returns an estimation of the current linear filter quality. It returns a + // float number between 0 and 1 mapping 1 to the highest possible quality. + rtc::ArrayView> GetInstLinearQualityEstimates() + const { + return linear_filters_qualities_; + } + + void Dump(const std::unique_ptr& data_dumper) const; + + private: + void UpdateQualityEstimates(); + + class ErleInstantaneous { + public: + explicit ErleInstantaneous(const EchoCanceller3Config::Erle& config); + ~ErleInstantaneous(); + + // Updates the estimator with a new point, returns true + // if the instantaneous ERLE was updated due to having enough + // points for performing the estimate. + bool Update(float Y2_sum, float E2_sum); + // Resets the instantaneous ERLE estimator to its initial state. + void Reset(); + // Resets the members related with an instantaneous estimate. + void ResetAccumulators(); + // Returns the instantaneous ERLE in log2 units. + absl::optional GetInstErleLog2() const { return erle_log2_; } + // Gets an indication between 0 and 1 of the performance of the linear + // filter for the current time instant. + absl::optional GetQualityEstimate() const { + if (erle_log2_) { + float value = inst_quality_estimate_; + if (clamp_inst_quality_to_zero_) { + value = std::max(0.f, value); + } + if (clamp_inst_quality_to_one_) { + value = std::min(1.f, value); + } + return absl::optional(value); + } + return absl::nullopt; + } + void Dump(const std::unique_ptr& data_dumper) const; + + private: + void UpdateMaxMin(); + void UpdateQualityEstimate(); + const bool clamp_inst_quality_to_zero_; + const bool clamp_inst_quality_to_one_; + absl::optional erle_log2_; + float inst_quality_estimate_; + float max_erle_log2_; + float min_erle_log2_; + float Y2_acum_; + float E2_acum_; + int num_points_; + }; + + const float min_erle_log2_; + const float max_erle_lf_log2_; + std::vector hold_counters_instantaneous_erle_; + std::vector erle_time_domain_log2_; + std::vector instantaneous_erle_; + std::vector> linear_filters_qualities_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc new file mode 100644 index 0000000000..af30ff1b9f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc @@ -0,0 +1,900 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/matched_filter.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace { + +// Subsample rate used for computing the accumulated error. +// The implementation of some core functions depends on this constant being +// equal to 4. +constexpr int kAccumulatedErrorSubSampleRate = 4; + +void UpdateAccumulatedError( + const rtc::ArrayView instantaneous_accumulated_error, + const rtc::ArrayView accumulated_error, + float one_over_error_sum_anchor, + float smooth_constant_increases) { + for (size_t k = 0; k < instantaneous_accumulated_error.size(); ++k) { + float error_norm = + instantaneous_accumulated_error[k] * one_over_error_sum_anchor; + if (error_norm < accumulated_error[k]) { + accumulated_error[k] = error_norm; + } else { + accumulated_error[k] += + smooth_constant_increases * (error_norm - accumulated_error[k]); + } + } +} + +size_t ComputePreEchoLag( + const webrtc::MatchedFilter::PreEchoConfiguration& pre_echo_configuration, + const rtc::ArrayView accumulated_error, + size_t lag, + size_t alignment_shift_winner) { + RTC_DCHECK_GE(lag, alignment_shift_winner); + size_t pre_echo_lag_estimate = lag - alignment_shift_winner; + size_t maximum_pre_echo_lag = + std::min(pre_echo_lag_estimate / kAccumulatedErrorSubSampleRate, + accumulated_error.size()); + switch (pre_echo_configuration.mode) { + case 0: + // Mode 0: Pre echo lag is defined as the first coefficient with an error + // lower than a threshold with a certain decrease slope. + for (size_t k = 1; k < maximum_pre_echo_lag; ++k) { + if (accumulated_error[k] < + pre_echo_configuration.threshold * accumulated_error[k - 1] && + accumulated_error[k] < pre_echo_configuration.threshold) { + pre_echo_lag_estimate = (k + 1) * kAccumulatedErrorSubSampleRate - 1; + break; + } + } + break; + case 1: + // Mode 1: Pre echo lag is defined as the first coefficient with an error + // lower than a certain threshold. + for (size_t k = 0; k < maximum_pre_echo_lag; ++k) { + if (accumulated_error[k] < pre_echo_configuration.threshold) { + pre_echo_lag_estimate = (k + 1) * kAccumulatedErrorSubSampleRate - 1; + break; + } + } + break; + case 2: + case 3: + // Mode 2,3: Pre echo lag is defined as the closest coefficient to the lag + // with an error lower than a certain threshold. + for (int k = static_cast(maximum_pre_echo_lag) - 1; k >= 0; --k) { + if (accumulated_error[k] > pre_echo_configuration.threshold) { + break; + } + pre_echo_lag_estimate = (k + 1) * kAccumulatedErrorSubSampleRate - 1; + } + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + return pre_echo_lag_estimate + alignment_shift_winner; +} + +webrtc::MatchedFilter::PreEchoConfiguration FetchPreEchoConfiguration() { + float threshold = 0.5f; + int mode = 0; + const std::string pre_echo_configuration_field_trial = + webrtc::field_trial::FindFullName("WebRTC-Aec3PreEchoConfiguration"); + webrtc::FieldTrialParameter threshold_field_trial_parameter( + /*key=*/"threshold", /*default_value=*/threshold); + webrtc::FieldTrialParameter mode_field_trial_parameter( + /*key=*/"mode", /*default_value=*/mode); + webrtc::ParseFieldTrial( + {&threshold_field_trial_parameter, &mode_field_trial_parameter}, + pre_echo_configuration_field_trial); + float threshold_read = + static_cast(threshold_field_trial_parameter.Get()); + int mode_read = mode_field_trial_parameter.Get(); + if (threshold_read < 1.0f && threshold_read > 0.0f) { + threshold = threshold_read; + } else { + RTC_LOG(LS_ERROR) + << "AEC3: Pre echo configuration: wrong input, threshold = " + << threshold_read << "."; + } + if (mode_read >= 0 && mode_read <= 3) { + mode = mode_read; + } else { + RTC_LOG(LS_ERROR) << "AEC3: Pre echo configuration: wrong input, mode = " + << mode_read << "."; + } + RTC_LOG(LS_INFO) << "AEC3: Pre echo configuration: threshold = " << threshold + << ", mode = " << mode << "."; + return {.threshold = threshold, .mode = mode}; +} + +} // namespace + +namespace webrtc { +namespace aec3 { + +#if defined(WEBRTC_HAS_NEON) + +inline float SumAllElements(float32x4_t elements) { + float32x2_t sum = vpadd_f32(vget_low_f32(elements), vget_high_f32(elements)); + sum = vpadd_f32(sum, sum); + return vget_lane_f32(sum, 0); +} + +void MatchedFilterCoreWithAccumulatedError_NEON( + size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 4); + std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f); + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + RTC_DCHECK_GT(x_size, x_start_index); + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + if (chunk1 != h_size) { + const int chunk2 = h_size - chunk1; + std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); + std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); + } + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + const float* h_p = &h[0]; + float* accumulated_error_p = &accumulated_error[0]; + // Initialize values for the accumulation. + float32x4_t x2_sum_128 = vdupq_n_f32(0); + float x2_sum = 0.f; + float s = 0; + // Perform 128 bit vector operations. + const int limit_by_4 = h_size >> 2; + for (int k = limit_by_4; k > 0; + --k, h_p += 4, x_p += 4, accumulated_error_p++) { + // Load the data into 128 bit vectors. + const float32x4_t x_k = vld1q_f32(x_p); + const float32x4_t h_k = vld1q_f32(h_p); + // Compute and accumulate x * x. + x2_sum_128 = vmlaq_f32(x2_sum_128, x_k, x_k); + // Compute x * h + float32x4_t hk_xk_128 = vmulq_f32(h_k, x_k); + s += SumAllElements(hk_xk_128); + const float e = s - y[i]; + accumulated_error_p[0] += e * e; + } + // Combine the accumulated vector and scalar values. + x2_sum += SumAllElements(x2_sum_128); + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const float32x4_t alpha_128 = vmovq_n_f32(alpha); + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + x_p = chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + // Perform 128 bit vector operations. + const int limit_by_4 = h_size >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + float32x4_t h_k = vld1q_f32(h_p); + const float32x4_t x_k = vld1q_f32(x_p); + // Compute h = h + alpha * x. + h_k = vmlaq_f32(h_k, alpha_128, x_k); + // Store the result. + vst1q_f32(h_p, h_k); + } + *filters_updated = true; + } + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +void MatchedFilterCore_NEON(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 4); + + if (compute_accumulated_error) { + return MatchedFilterCoreWithAccumulatedError_NEON( + x_start_index, x2_sum_threshold, smoothing, x, y, h, filters_updated, + error_sum, accumulated_error, scratch_memory); + } + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + + RTC_DCHECK_GT(x_size, x_start_index); + const float* x_p = &x[x_start_index]; + const float* h_p = &h[0]; + + // Initialize values for the accumulation. + float32x4_t s_128 = vdupq_n_f32(0); + float32x4_t x2_sum_128 = vdupq_n_f32(0); + float x2_sum = 0.f; + float s = 0; + + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + + // Perform the loop in two chunks. + const int chunk2 = h_size - chunk1; + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + const float32x4_t x_k = vld1q_f32(x_p); + const float32x4_t h_k = vld1q_f32(h_p); + // Compute and accumulate x * x and h * x. + x2_sum_128 = vmlaq_f32(x2_sum_128, x_k, x_k); + s_128 = vmlaq_f32(s_128, h_k, x_k); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + const float x_k = *x_p; + x2_sum += x_k * x_k; + s += *h_p * x_k; + } + + x_p = &x[0]; + } + + // Combine the accumulated vector and scalar values. + s += SumAllElements(s_128); + x2_sum += SumAllElements(x2_sum_128); + + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const float32x4_t alpha_128 = vmovq_n_f32(alpha); + + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + x_p = &x[x_start_index]; + + // Perform the loop in two chunks. + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + float32x4_t h_k = vld1q_f32(h_p); + const float32x4_t x_k = vld1q_f32(x_p); + // Compute h = h + alpha * x. + h_k = vmlaq_f32(h_k, alpha_128, x_k); + + // Store the result. + vst1q_f32(h_p, h_k); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + *h_p += alpha * *x_p; + } + + x_p = &x[0]; + } + + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +void MatchedFilterCore_AccumulatedError_SSE2( + size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 8); + std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f); + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + RTC_DCHECK_GT(x_size, x_start_index); + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + if (chunk1 != h_size) { + const int chunk2 = h_size - chunk1; + std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); + std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); + } + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + const float* h_p = &h[0]; + float* a_p = &accumulated_error[0]; + __m128 s_inst_128; + __m128 s_inst_128_4; + __m128 x2_sum_128 = _mm_set1_ps(0); + __m128 x2_sum_128_4 = _mm_set1_ps(0); + __m128 e_128; + float* const s_p = reinterpret_cast(&s_inst_128); + float* const s_4_p = reinterpret_cast(&s_inst_128_4); + float* const e_p = reinterpret_cast(&e_128); + float x2_sum = 0.0f; + float s_acum = 0; + // Perform 128 bit vector operations. + const int limit_by_8 = h_size >> 3; + for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8, a_p += 2) { + // Load the data into 128 bit vectors. + const __m128 x_k = _mm_loadu_ps(x_p); + const __m128 h_k = _mm_loadu_ps(h_p); + const __m128 x_k_4 = _mm_loadu_ps(x_p + 4); + const __m128 h_k_4 = _mm_loadu_ps(h_p + 4); + const __m128 xx = _mm_mul_ps(x_k, x_k); + const __m128 xx_4 = _mm_mul_ps(x_k_4, x_k_4); + // Compute and accumulate x * x and h * x. + x2_sum_128 = _mm_add_ps(x2_sum_128, xx); + x2_sum_128_4 = _mm_add_ps(x2_sum_128_4, xx_4); + s_inst_128 = _mm_mul_ps(h_k, x_k); + s_inst_128_4 = _mm_mul_ps(h_k_4, x_k_4); + s_acum += s_p[0] + s_p[1] + s_p[2] + s_p[3]; + e_p[0] = s_acum - y[i]; + s_acum += s_4_p[0] + s_4_p[1] + s_4_p[2] + s_4_p[3]; + e_p[1] = s_acum - y[i]; + a_p[0] += e_p[0] * e_p[0]; + a_p[1] += e_p[1] * e_p[1]; + } + // Combine the accumulated vector and scalar values. + x2_sum_128 = _mm_add_ps(x2_sum_128, x2_sum_128_4); + float* v = reinterpret_cast(&x2_sum_128); + x2_sum += v[0] + v[1] + v[2] + v[3]; + // Compute the matched filter error. + float e = y[i] - s_acum; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const __m128 alpha_128 = _mm_set1_ps(alpha); + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + // Perform 128 bit vector operations. + const int limit_by_4 = h_size >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + __m128 h_k = _mm_loadu_ps(h_p); + const __m128 x_k = _mm_loadu_ps(x_p); + // Compute h = h + alpha * x. + const __m128 alpha_x = _mm_mul_ps(alpha_128, x_k); + h_k = _mm_add_ps(h_k, alpha_x); + // Store the result. + _mm_storeu_ps(h_p, h_k); + } + *filters_updated = true; + } + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +void MatchedFilterCore_SSE2(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + if (compute_accumulated_error) { + return MatchedFilterCore_AccumulatedError_SSE2( + x_start_index, x2_sum_threshold, smoothing, x, y, h, filters_updated, + error_sum, accumulated_error, scratch_memory); + } + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 4); + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + RTC_DCHECK_GT(x_size, x_start_index); + const float* x_p = &x[x_start_index]; + const float* h_p = &h[0]; + // Initialize values for the accumulation. + __m128 s_128 = _mm_set1_ps(0); + __m128 s_128_4 = _mm_set1_ps(0); + __m128 x2_sum_128 = _mm_set1_ps(0); + __m128 x2_sum_128_4 = _mm_set1_ps(0); + float x2_sum = 0.f; + float s = 0; + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + // Perform the loop in two chunks. + const int chunk2 = h_size - chunk1; + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_8 = limit >> 3; + for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) { + // Load the data into 128 bit vectors. + const __m128 x_k = _mm_loadu_ps(x_p); + const __m128 h_k = _mm_loadu_ps(h_p); + const __m128 x_k_4 = _mm_loadu_ps(x_p + 4); + const __m128 h_k_4 = _mm_loadu_ps(h_p + 4); + const __m128 xx = _mm_mul_ps(x_k, x_k); + const __m128 xx_4 = _mm_mul_ps(x_k_4, x_k_4); + // Compute and accumulate x * x and h * x. + x2_sum_128 = _mm_add_ps(x2_sum_128, xx); + x2_sum_128_4 = _mm_add_ps(x2_sum_128_4, xx_4); + const __m128 hx = _mm_mul_ps(h_k, x_k); + const __m128 hx_4 = _mm_mul_ps(h_k_4, x_k_4); + s_128 = _mm_add_ps(s_128, hx); + s_128_4 = _mm_add_ps(s_128_4, hx_4); + } + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) { + const float x_k = *x_p; + x2_sum += x_k * x_k; + s += *h_p * x_k; + } + x_p = &x[0]; + } + // Combine the accumulated vector and scalar values. + x2_sum_128 = _mm_add_ps(x2_sum_128, x2_sum_128_4); + float* v = reinterpret_cast(&x2_sum_128); + x2_sum += v[0] + v[1] + v[2] + v[3]; + s_128 = _mm_add_ps(s_128, s_128_4); + v = reinterpret_cast(&s_128); + s += v[0] + v[1] + v[2] + v[3]; + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const __m128 alpha_128 = _mm_set1_ps(alpha); + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + x_p = &x[x_start_index]; + // Perform the loop in two chunks. + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + __m128 h_k = _mm_loadu_ps(h_p); + const __m128 x_k = _mm_loadu_ps(x_p); + + // Compute h = h + alpha * x. + const __m128 alpha_x = _mm_mul_ps(alpha_128, x_k); + h_k = _mm_add_ps(h_k, alpha_x); + // Store the result. + _mm_storeu_ps(h_p, h_k); + } + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + *h_p += alpha * *x_p; + } + x_p = &x[0]; + } + *filters_updated = true; + } + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} +#endif + +void MatchedFilterCore(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error) { + if (compute_accumulated_error) { + std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f); + } + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + float x2_sum = 0.f; + float s = 0; + size_t x_index = x_start_index; + if (compute_accumulated_error) { + for (size_t k = 0; k < h.size(); ++k) { + x2_sum += x[x_index] * x[x_index]; + s += h[k] * x[x_index]; + x_index = x_index < (x.size() - 1) ? x_index + 1 : 0; + if ((k + 1 & 0b11) == 0) { + int idx = k >> 2; + accumulated_error[idx] += (y[i] - s) * (y[i] - s); + } + } + } else { + for (size_t k = 0; k < h.size(); ++k) { + x2_sum += x[x_index] * x[x_index]; + s += h[k] * x[x_index]; + x_index = x_index < (x.size() - 1) ? x_index + 1 : 0; + } + } + + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + + // filter = filter + smoothing * (y - filter * x) * x / x * x. + size_t x_index = x_start_index; + for (size_t k = 0; k < h.size(); ++k) { + h[k] += alpha * x[x_index]; + x_index = x_index < (x.size() - 1) ? x_index + 1 : 0; + } + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x.size() - 1; + } +} + +size_t MaxSquarePeakIndex(rtc::ArrayView h) { + if (h.size() < 2) { + return 0; + } + float max_element1 = h[0] * h[0]; + float max_element2 = h[1] * h[1]; + size_t lag_estimate1 = 0; + size_t lag_estimate2 = 1; + const size_t last_index = h.size() - 1; + // Keeping track of even & odd max elements separately typically allows the + // compiler to produce more efficient code. + for (size_t k = 2; k < last_index; k += 2) { + float element1 = h[k] * h[k]; + float element2 = h[k + 1] * h[k + 1]; + if (element1 > max_element1) { + max_element1 = element1; + lag_estimate1 = k; + } + if (element2 > max_element2) { + max_element2 = element2; + lag_estimate2 = k + 1; + } + } + if (max_element2 > max_element1) { + max_element1 = max_element2; + lag_estimate1 = lag_estimate2; + } + // In case of odd h size, we have not yet checked the last element. + float last_element = h[last_index] * h[last_index]; + if (last_element > max_element1) { + return last_index; + } + return lag_estimate1; +} + +} // namespace aec3 + +MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper, + Aec3Optimization optimization, + size_t sub_block_size, + size_t window_size_sub_blocks, + int num_matched_filters, + size_t alignment_shift_sub_blocks, + float excitation_limit, + float smoothing_fast, + float smoothing_slow, + float matching_filter_threshold, + bool detect_pre_echo) + : data_dumper_(data_dumper), + optimization_(optimization), + sub_block_size_(sub_block_size), + filter_intra_lag_shift_(alignment_shift_sub_blocks * sub_block_size_), + filters_( + num_matched_filters, + std::vector(window_size_sub_blocks * sub_block_size_, 0.f)), + filters_offsets_(num_matched_filters, 0), + excitation_limit_(excitation_limit), + smoothing_fast_(smoothing_fast), + smoothing_slow_(smoothing_slow), + matching_filter_threshold_(matching_filter_threshold), + detect_pre_echo_(detect_pre_echo), + pre_echo_config_(FetchPreEchoConfiguration()) { + RTC_DCHECK(data_dumper); + RTC_DCHECK_LT(0, window_size_sub_blocks); + RTC_DCHECK((kBlockSize % sub_block_size) == 0); + RTC_DCHECK((sub_block_size % 4) == 0); + static_assert(kAccumulatedErrorSubSampleRate == 4); + if (detect_pre_echo_) { + accumulated_error_ = std::vector>( + num_matched_filters, + std::vector(window_size_sub_blocks * sub_block_size_ / + kAccumulatedErrorSubSampleRate, + 1.0f)); + + instantaneous_accumulated_error_ = + std::vector(window_size_sub_blocks * sub_block_size_ / + kAccumulatedErrorSubSampleRate, + 0.0f); + scratch_memory_ = + std::vector(window_size_sub_blocks * sub_block_size_); + } +} + +MatchedFilter::~MatchedFilter() = default; + +void MatchedFilter::Reset(bool full_reset) { + for (auto& f : filters_) { + std::fill(f.begin(), f.end(), 0.f); + } + + winner_lag_ = absl::nullopt; + reported_lag_estimate_ = absl::nullopt; + if (pre_echo_config_.mode != 3 || full_reset) { + for (auto& e : accumulated_error_) { + std::fill(e.begin(), e.end(), 1.0f); + } + number_pre_echo_updates_ = 0; + } +} + +void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView capture, + bool use_slow_smoothing) { + RTC_DCHECK_EQ(sub_block_size_, capture.size()); + auto& y = capture; + + const float smoothing = + use_slow_smoothing ? smoothing_slow_ : smoothing_fast_; + + const float x2_sum_threshold = + filters_[0].size() * excitation_limit_ * excitation_limit_; + + // Compute anchor for the matched filter error. + float error_sum_anchor = 0.0f; + for (size_t k = 0; k < y.size(); ++k) { + error_sum_anchor += y[k] * y[k]; + } + + // Apply all matched filters. + float winner_error_sum = error_sum_anchor; + winner_lag_ = absl::nullopt; + reported_lag_estimate_ = absl::nullopt; + size_t alignment_shift = 0; + absl::optional previous_lag_estimate; + const int num_filters = static_cast(filters_.size()); + int winner_index = -1; + for (int n = 0; n < num_filters; ++n) { + float error_sum = 0.f; + bool filters_updated = false; + const bool compute_pre_echo = + detect_pre_echo_ && n == last_detected_best_lag_filter_; + + size_t x_start_index = + (render_buffer.read + alignment_shift + sub_block_size_ - 1) % + render_buffer.buffer.size(); + + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::MatchedFilterCore_SSE2( + x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y, + filters_[n], &filters_updated, &error_sum, compute_pre_echo, + instantaneous_accumulated_error_, scratch_memory_); + break; + case Aec3Optimization::kAvx2: + aec3::MatchedFilterCore_AVX2( + x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y, + filters_[n], &filters_updated, &error_sum, compute_pre_echo, + instantaneous_accumulated_error_, scratch_memory_); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::MatchedFilterCore_NEON( + x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y, + filters_[n], &filters_updated, &error_sum, compute_pre_echo, + instantaneous_accumulated_error_, scratch_memory_); + break; +#endif + default: + aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing, + render_buffer.buffer, y, filters_[n], + &filters_updated, &error_sum, compute_pre_echo, + instantaneous_accumulated_error_); + } + + // Estimate the lag in the matched filter as the distance to the portion in + // the filter that contributes the most to the matched filter output. This + // is detected as the peak of the matched filter. + const size_t lag_estimate = aec3::MaxSquarePeakIndex(filters_[n]); + const bool reliable = + lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) && + error_sum < matching_filter_threshold_ * error_sum_anchor; + + // Find the best estimate + const size_t lag = lag_estimate + alignment_shift; + if (filters_updated && reliable && error_sum < winner_error_sum) { + winner_error_sum = error_sum; + winner_index = n; + // In case that 2 matched filters return the same winner candidate + // (overlap region), the one with the smaller index is chosen in order + // to search for pre-echoes. + if (previous_lag_estimate && previous_lag_estimate == lag) { + winner_lag_ = previous_lag_estimate; + winner_index = n - 1; + } else { + winner_lag_ = lag; + } + } + previous_lag_estimate = lag; + alignment_shift += filter_intra_lag_shift_; + } + + if (winner_index != -1) { + RTC_DCHECK(winner_lag_.has_value()); + reported_lag_estimate_ = + LagEstimate(winner_lag_.value(), /*pre_echo_lag=*/winner_lag_.value()); + if (detect_pre_echo_ && last_detected_best_lag_filter_ == winner_index) { + const float energy_threshold = + pre_echo_config_.mode == 3 ? 1.0f : 30.0f * 30.0f * y.size(); + + if (error_sum_anchor > energy_threshold) { + const float smooth_constant_increases = + pre_echo_config_.mode != 3 ? 0.01f : 0.015f; + + UpdateAccumulatedError( + instantaneous_accumulated_error_, accumulated_error_[winner_index], + 1.0f / error_sum_anchor, smooth_constant_increases); + number_pre_echo_updates_++; + } + if (pre_echo_config_.mode != 3 || number_pre_echo_updates_ >= 50) { + reported_lag_estimate_->pre_echo_lag = ComputePreEchoLag( + pre_echo_config_, accumulated_error_[winner_index], + winner_lag_.value(), + winner_index * filter_intra_lag_shift_ /*alignment_shift_winner*/); + } else { + reported_lag_estimate_->pre_echo_lag = winner_lag_.value(); + } + } + last_detected_best_lag_filter_ = winner_index; + } + if (ApmDataDumper::IsAvailable()) { + Dump(); + data_dumper_->DumpRaw("error_sum_anchor", error_sum_anchor / y.size()); + data_dumper_->DumpRaw("number_pre_echo_updates", number_pre_echo_updates_); + data_dumper_->DumpRaw("filter_smoothing", smoothing); + } +} + +void MatchedFilter::LogFilterProperties(int sample_rate_hz, + size_t shift, + size_t downsampling_factor) const { + size_t alignment_shift = 0; + constexpr int kFsBy1000 = 16; + for (size_t k = 0; k < filters_.size(); ++k) { + int start = static_cast(alignment_shift * downsampling_factor); + int end = static_cast((alignment_shift + filters_[k].size()) * + downsampling_factor); + RTC_LOG(LS_VERBOSE) << "Filter " << k << ": start: " + << (start - static_cast(shift)) / kFsBy1000 + << " ms, end: " + << (end - static_cast(shift)) / kFsBy1000 + << " ms."; + alignment_shift += filter_intra_lag_shift_; + } +} + +void MatchedFilter::Dump() { + for (size_t n = 0; n < filters_.size(); ++n) { + const size_t lag_estimate = aec3::MaxSquarePeakIndex(filters_[n]); + std::string dumper_filter = "aec3_correlator_" + std::to_string(n) + "_h"; + data_dumper_->DumpRaw(dumper_filter.c_str(), filters_[n]); + std::string dumper_lag = "aec3_correlator_lag_" + std::to_string(n); + data_dumper_->DumpRaw(dumper_lag.c_str(), + lag_estimate + n * filter_intra_lag_shift_); + if (detect_pre_echo_) { + std::string dumper_error = + "aec3_correlator_error_" + std::to_string(n) + "_h"; + data_dumper_->DumpRaw(dumper_error.c_str(), accumulated_error_[n]); + + size_t pre_echo_lag = + ComputePreEchoLag(pre_echo_config_, accumulated_error_[n], + lag_estimate + n * filter_intra_lag_shift_, + n * filter_intra_lag_shift_); + std::string dumper_pre_lag = + "aec3_correlator_pre_echo_lag_" + std::to_string(n); + data_dumper_->DumpRaw(dumper_pre_lag.c_str(), pre_echo_lag); + if (static_cast(n) == last_detected_best_lag_filter_) { + data_dumper_->DumpRaw("aec3_pre_echo_delay_winner_inst", pre_echo_lag); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.h b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.h new file mode 100644 index 0000000000..bb54fba2b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_ + +#include + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/gtest_prod_util.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { + +class ApmDataDumper; +struct DownsampledRenderBuffer; + +namespace aec3 { + +#if defined(WEBRTC_HAS_NEON) + +// Filter core for the matched filter that is optimized for NEON. +void MatchedFilterCore_NEON(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulation_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory); + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +// Filter core for the matched filter that is optimized for SSE2. +void MatchedFilterCore_SSE2(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory); + +// Filter core for the matched filter that is optimized for AVX2. +void MatchedFilterCore_AVX2(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory); + +#endif + +// Filter core for the matched filter. +void MatchedFilterCore(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulation_error, + rtc::ArrayView accumulated_error); + +// Find largest peak of squared values in array. +size_t MaxSquarePeakIndex(rtc::ArrayView h); + +} // namespace aec3 + +// Produces recursively updated cross-correlation estimates for several signal +// shifts where the intra-shift spacing is uniform. +class MatchedFilter { + public: + // Stores properties for the lag estimate corresponding to a particular signal + // shift. + struct LagEstimate { + LagEstimate() = default; + LagEstimate(size_t lag, size_t pre_echo_lag) + : lag(lag), pre_echo_lag(pre_echo_lag) {} + size_t lag = 0; + size_t pre_echo_lag = 0; + }; + + struct PreEchoConfiguration { + const float threshold; + const int mode; + }; + + MatchedFilter(ApmDataDumper* data_dumper, + Aec3Optimization optimization, + size_t sub_block_size, + size_t window_size_sub_blocks, + int num_matched_filters, + size_t alignment_shift_sub_blocks, + float excitation_limit, + float smoothing_fast, + float smoothing_slow, + float matching_filter_threshold, + bool detect_pre_echo); + + MatchedFilter() = delete; + MatchedFilter(const MatchedFilter&) = delete; + MatchedFilter& operator=(const MatchedFilter&) = delete; + + ~MatchedFilter(); + + // Updates the correlation with the values in the capture buffer. + void Update(const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView capture, + bool use_slow_smoothing); + + // Resets the matched filter. + void Reset(bool full_reset); + + // Returns the current lag estimates. + absl::optional GetBestLagEstimate() const { + return reported_lag_estimate_; + } + + // Returns the maximum filter lag. + size_t GetMaxFilterLag() const { + return filters_.size() * filter_intra_lag_shift_ + filters_[0].size(); + } + + // Log matched filter properties. + void LogFilterProperties(int sample_rate_hz, + size_t shift, + size_t downsampling_factor) const; + + private: + FRIEND_TEST_ALL_PREFIXES(MatchedFilterFieldTrialTest, + PreEchoConfigurationTest); + FRIEND_TEST_ALL_PREFIXES(MatchedFilterFieldTrialTest, + WrongPreEchoConfigurationTest); + + // Only for testing. Gets the pre echo detection configuration. + const PreEchoConfiguration& GetPreEchoConfiguration() const { + return pre_echo_config_; + } + void Dump(); + + ApmDataDumper* const data_dumper_; + const Aec3Optimization optimization_; + const size_t sub_block_size_; + const size_t filter_intra_lag_shift_; + std::vector> filters_; + std::vector> accumulated_error_; + std::vector instantaneous_accumulated_error_; + std::vector scratch_memory_; + absl::optional reported_lag_estimate_; + absl::optional winner_lag_; + int last_detected_best_lag_filter_ = -1; + std::vector filters_offsets_; + int number_pre_echo_updates_ = 0; + const float excitation_limit_; + const float smoothing_fast_; + const float smoothing_slow_; + const float matching_filter_threshold_; + const bool detect_pre_echo_; + const PreEchoConfiguration pre_echo_config_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc new file mode 100644 index 0000000000..8c2ffcbd1e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "modules/audio_processing/aec3/matched_filter.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace aec3 { + +// Let ha denote the horizontal of a, and hb the horizontal sum of b +// returns [ha, hb, ha, hb] +inline __m128 hsum_ab(__m256 a, __m256 b) { + __m256 s_256 = _mm256_hadd_ps(a, b); + const __m256i mask = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0); + s_256 = _mm256_permutevar8x32_ps(s_256, mask); + __m128 s = _mm_hadd_ps(_mm256_extractf128_ps(s_256, 0), + _mm256_extractf128_ps(s_256, 1)); + s = _mm_hadd_ps(s, s); + return s; +} + +void MatchedFilterCore_AccumulatedError_AVX2( + size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 16); + std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f); + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + RTC_DCHECK_GT(x_size, x_start_index); + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + if (chunk1 != h_size) { + const int chunk2 = h_size - chunk1; + std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); + std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); + } + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + const float* h_p = &h[0]; + float* a_p = &accumulated_error[0]; + __m256 s_inst_hadd_256; + __m256 s_inst_256; + __m256 s_inst_256_8; + __m256 x2_sum_256 = _mm256_set1_ps(0); + __m256 x2_sum_256_8 = _mm256_set1_ps(0); + __m128 e_128; + float x2_sum = 0.0f; + float s_acum = 0; + const int limit_by_16 = h_size >> 4; + for (int k = limit_by_16; k > 0; --k, h_p += 16, x_p += 16, a_p += 4) { + // Load the data into 256 bit vectors. + __m256 x_k = _mm256_loadu_ps(x_p); + __m256 h_k = _mm256_loadu_ps(h_p); + __m256 x_k_8 = _mm256_loadu_ps(x_p + 8); + __m256 h_k_8 = _mm256_loadu_ps(h_p + 8); + // Compute and accumulate x * x and h * x. + x2_sum_256 = _mm256_fmadd_ps(x_k, x_k, x2_sum_256); + x2_sum_256_8 = _mm256_fmadd_ps(x_k_8, x_k_8, x2_sum_256_8); + s_inst_256 = _mm256_mul_ps(h_k, x_k); + s_inst_256_8 = _mm256_mul_ps(h_k_8, x_k_8); + s_inst_hadd_256 = _mm256_hadd_ps(s_inst_256, s_inst_256_8); + s_inst_hadd_256 = _mm256_hadd_ps(s_inst_hadd_256, s_inst_hadd_256); + s_acum += s_inst_hadd_256[0]; + e_128[0] = s_acum - y[i]; + s_acum += s_inst_hadd_256[4]; + e_128[1] = s_acum - y[i]; + s_acum += s_inst_hadd_256[1]; + e_128[2] = s_acum - y[i]; + s_acum += s_inst_hadd_256[5]; + e_128[3] = s_acum - y[i]; + + __m128 accumulated_error = _mm_load_ps(a_p); + accumulated_error = _mm_fmadd_ps(e_128, e_128, accumulated_error); + _mm_storeu_ps(a_p, accumulated_error); + } + // Sum components together. + x2_sum_256 = _mm256_add_ps(x2_sum_256, x2_sum_256_8); + __m128 x2_sum_128 = _mm_add_ps(_mm256_extractf128_ps(x2_sum_256, 0), + _mm256_extractf128_ps(x2_sum_256, 1)); + // Combine the accumulated vector and scalar values. + float* v = reinterpret_cast(&x2_sum_128); + x2_sum += v[0] + v[1] + v[2] + v[3]; + + // Compute the matched filter error. + float e = y[i] - s_acum; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const __m256 alpha_256 = _mm256_set1_ps(alpha); + + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + // Perform 256 bit vector operations. + const int limit_by_8 = h_size >> 3; + for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) { + // Load the data into 256 bit vectors. + __m256 h_k = _mm256_loadu_ps(h_p); + __m256 x_k = _mm256_loadu_ps(x_p); + // Compute h = h + alpha * x. + h_k = _mm256_fmadd_ps(x_k, alpha_256, h_k); + + // Store the result. + _mm256_storeu_ps(h_p, h_k); + } + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +void MatchedFilterCore_AVX2(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + if (compute_accumulated_error) { + return MatchedFilterCore_AccumulatedError_AVX2( + x_start_index, x2_sum_threshold, smoothing, x, y, h, filters_updated, + error_sum, accumulated_error, scratch_memory); + } + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 8); + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + + RTC_DCHECK_GT(x_size, x_start_index); + const float* x_p = &x[x_start_index]; + const float* h_p = &h[0]; + + // Initialize values for the accumulation. + __m256 s_256 = _mm256_set1_ps(0); + __m256 s_256_8 = _mm256_set1_ps(0); + __m256 x2_sum_256 = _mm256_set1_ps(0); + __m256 x2_sum_256_8 = _mm256_set1_ps(0); + float x2_sum = 0.f; + float s = 0; + + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + + // Perform the loop in two chunks. + const int chunk2 = h_size - chunk1; + for (int limit : {chunk1, chunk2}) { + // Perform 256 bit vector operations. + const int limit_by_16 = limit >> 4; + for (int k = limit_by_16; k > 0; --k, h_p += 16, x_p += 16) { + // Load the data into 256 bit vectors. + __m256 x_k = _mm256_loadu_ps(x_p); + __m256 h_k = _mm256_loadu_ps(h_p); + __m256 x_k_8 = _mm256_loadu_ps(x_p + 8); + __m256 h_k_8 = _mm256_loadu_ps(h_p + 8); + // Compute and accumulate x * x and h * x. + x2_sum_256 = _mm256_fmadd_ps(x_k, x_k, x2_sum_256); + x2_sum_256_8 = _mm256_fmadd_ps(x_k_8, x_k_8, x2_sum_256_8); + s_256 = _mm256_fmadd_ps(h_k, x_k, s_256); + s_256_8 = _mm256_fmadd_ps(h_k_8, x_k_8, s_256_8); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_16 * 16; k > 0; --k, ++h_p, ++x_p) { + const float x_k = *x_p; + x2_sum += x_k * x_k; + s += *h_p * x_k; + } + + x_p = &x[0]; + } + + // Sum components together. + x2_sum_256 = _mm256_add_ps(x2_sum_256, x2_sum_256_8); + s_256 = _mm256_add_ps(s_256, s_256_8); + __m128 sum = hsum_ab(x2_sum_256, s_256); + x2_sum += sum[0]; + s += sum[1]; + + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const __m256 alpha_256 = _mm256_set1_ps(alpha); + + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + x_p = &x[x_start_index]; + + // Perform the loop in two chunks. + for (int limit : {chunk1, chunk2}) { + // Perform 256 bit vector operations. + const int limit_by_8 = limit >> 3; + for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) { + // Load the data into 256 bit vectors. + __m256 h_k = _mm256_loadu_ps(h_p); + __m256 x_k = _mm256_loadu_ps(x_p); + // Compute h = h + alpha * x. + h_k = _mm256_fmadd_ps(x_k, alpha_256, h_k); + + // Store the result. + _mm256_storeu_ps(h_p, h_k); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) { + *h_p += alpha * *x_p; + } + + x_p = &x[0]; + } + + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_gn/moz.build new file mode 100644 index 0000000000..bae4fa2972 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("matched_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc new file mode 100644 index 0000000000..bea7868a91 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h" + +#include +#include + +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { +constexpr int kPreEchoHistogramDataNotUpdated = -1; + +int GetDownSamplingBlockSizeLog2(int down_sampling_factor) { + int down_sampling_factor_log2 = 0; + down_sampling_factor >>= 1; + while (down_sampling_factor > 0) { + down_sampling_factor_log2++; + down_sampling_factor >>= 1; + } + return static_cast(kBlockSizeLog2) > down_sampling_factor_log2 + ? static_cast(kBlockSizeLog2) - down_sampling_factor_log2 + : 0; +} +} // namespace + +MatchedFilterLagAggregator::MatchedFilterLagAggregator( + ApmDataDumper* data_dumper, + size_t max_filter_lag, + const EchoCanceller3Config::Delay& delay_config) + : data_dumper_(data_dumper), + thresholds_(delay_config.delay_selection_thresholds), + headroom_(static_cast(delay_config.delay_headroom_samples / + delay_config.down_sampling_factor)), + highest_peak_aggregator_(max_filter_lag) { + if (delay_config.detect_pre_echo) { + pre_echo_lag_aggregator_ = std::make_unique( + max_filter_lag, delay_config.down_sampling_factor); + } + RTC_DCHECK(data_dumper); + RTC_DCHECK_LE(thresholds_.initial, thresholds_.converged); +} + +MatchedFilterLagAggregator::~MatchedFilterLagAggregator() = default; + +void MatchedFilterLagAggregator::Reset(bool hard_reset) { + highest_peak_aggregator_.Reset(); + if (pre_echo_lag_aggregator_ != nullptr) { + pre_echo_lag_aggregator_->Reset(); + } + if (hard_reset) { + significant_candidate_found_ = false; + } +} + +absl::optional MatchedFilterLagAggregator::Aggregate( + const absl::optional& lag_estimate) { + if (lag_estimate && pre_echo_lag_aggregator_) { + pre_echo_lag_aggregator_->Dump(data_dumper_); + pre_echo_lag_aggregator_->Aggregate( + std::max(0, static_cast(lag_estimate->pre_echo_lag) - headroom_)); + } + + if (lag_estimate) { + highest_peak_aggregator_.Aggregate( + std::max(0, static_cast(lag_estimate->lag) - headroom_)); + rtc::ArrayView histogram = highest_peak_aggregator_.histogram(); + int candidate = highest_peak_aggregator_.candidate(); + significant_candidate_found_ = significant_candidate_found_ || + histogram[candidate] > thresholds_.converged; + if (histogram[candidate] > thresholds_.converged || + (histogram[candidate] > thresholds_.initial && + !significant_candidate_found_)) { + DelayEstimate::Quality quality = significant_candidate_found_ + ? DelayEstimate::Quality::kRefined + : DelayEstimate::Quality::kCoarse; + int reported_delay = pre_echo_lag_aggregator_ != nullptr + ? pre_echo_lag_aggregator_->pre_echo_candidate() + : candidate; + return DelayEstimate(quality, reported_delay); + } + } + + return absl::nullopt; +} + +MatchedFilterLagAggregator::HighestPeakAggregator::HighestPeakAggregator( + size_t max_filter_lag) + : histogram_(max_filter_lag + 1, 0) { + histogram_data_.fill(0); +} + +void MatchedFilterLagAggregator::HighestPeakAggregator::Reset() { + std::fill(histogram_.begin(), histogram_.end(), 0); + histogram_data_.fill(0); + histogram_data_index_ = 0; +} + +void MatchedFilterLagAggregator::HighestPeakAggregator::Aggregate(int lag) { + RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]); + RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]); + --histogram_[histogram_data_[histogram_data_index_]]; + histogram_data_[histogram_data_index_] = lag; + RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]); + RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]); + ++histogram_[histogram_data_[histogram_data_index_]]; + histogram_data_index_ = (histogram_data_index_ + 1) % histogram_data_.size(); + candidate_ = + std::distance(histogram_.begin(), + std::max_element(histogram_.begin(), histogram_.end())); +} + +MatchedFilterLagAggregator::PreEchoLagAggregator::PreEchoLagAggregator( + size_t max_filter_lag, + size_t down_sampling_factor) + : block_size_log2_(GetDownSamplingBlockSizeLog2(down_sampling_factor)), + histogram_( + ((max_filter_lag + 1) * down_sampling_factor) >> kBlockSizeLog2, + 0) { + Reset(); +} + +void MatchedFilterLagAggregator::PreEchoLagAggregator::Reset() { + std::fill(histogram_.begin(), histogram_.end(), 0); + histogram_data_.fill(kPreEchoHistogramDataNotUpdated); + histogram_data_index_ = 0; + pre_echo_candidate_ = 0; +} + +void MatchedFilterLagAggregator::PreEchoLagAggregator::Aggregate( + int pre_echo_lag) { + int pre_echo_block_size = pre_echo_lag >> block_size_log2_; + RTC_DCHECK(pre_echo_block_size >= 0 && + pre_echo_block_size < static_cast(histogram_.size())); + pre_echo_block_size = + rtc::SafeClamp(pre_echo_block_size, 0, histogram_.size() - 1); + // Remove the oldest point from the `histogram_`, it ignores the initial + // points where no updates have been done to the `histogram_data_` array. + if (histogram_data_[histogram_data_index_] != + kPreEchoHistogramDataNotUpdated) { + --histogram_[histogram_data_[histogram_data_index_]]; + } + histogram_data_[histogram_data_index_] = pre_echo_block_size; + ++histogram_[histogram_data_[histogram_data_index_]]; + histogram_data_index_ = (histogram_data_index_ + 1) % histogram_data_.size(); + int pre_echo_candidate_block_size = + std::distance(histogram_.begin(), + std::max_element(histogram_.begin(), histogram_.end())); + pre_echo_candidate_ = (pre_echo_candidate_block_size << block_size_log2_); +} + +void MatchedFilterLagAggregator::PreEchoLagAggregator::Dump( + ApmDataDumper* const data_dumper) { + data_dumper->DumpRaw("aec3_pre_echo_delay_candidate", pre_echo_candidate_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h new file mode 100644 index 0000000000..c0598bf226 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_ + +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/matched_filter.h" + +namespace webrtc { + +class ApmDataDumper; + +// Aggregates lag estimates produced by the MatchedFilter class into a single +// reliable combined lag estimate. +class MatchedFilterLagAggregator { + public: + MatchedFilterLagAggregator(ApmDataDumper* data_dumper, + size_t max_filter_lag, + const EchoCanceller3Config::Delay& delay_config); + + MatchedFilterLagAggregator() = delete; + MatchedFilterLagAggregator(const MatchedFilterLagAggregator&) = delete; + MatchedFilterLagAggregator& operator=(const MatchedFilterLagAggregator&) = + delete; + + ~MatchedFilterLagAggregator(); + + // Resets the aggregator. + void Reset(bool hard_reset); + + // Aggregates the provided lag estimates. + absl::optional Aggregate( + const absl::optional& lag_estimate); + + // Returns whether a reliable delay estimate has been found. + bool ReliableDelayFound() const { return significant_candidate_found_; } + + // Returns the delay candidate that is computed by looking at the highest peak + // on the matched filters. + int GetDelayAtHighestPeak() const { + return highest_peak_aggregator_.candidate(); + } + + private: + class PreEchoLagAggregator { + public: + PreEchoLagAggregator(size_t max_filter_lag, size_t down_sampling_factor); + void Reset(); + void Aggregate(int pre_echo_lag); + int pre_echo_candidate() const { return pre_echo_candidate_; } + void Dump(ApmDataDumper* const data_dumper); + + private: + const int block_size_log2_; + std::array histogram_data_; + std::vector histogram_; + int histogram_data_index_ = 0; + int pre_echo_candidate_ = 0; + }; + + class HighestPeakAggregator { + public: + explicit HighestPeakAggregator(size_t max_filter_lag); + void Reset(); + void Aggregate(int lag); + int candidate() const { return candidate_; } + rtc::ArrayView histogram() const { return histogram_; } + + private: + std::vector histogram_; + std::array histogram_data_; + int histogram_data_index_ = 0; + int candidate_ = -1; + }; + + ApmDataDumper* const data_dumper_; + bool significant_candidate_found_ = false; + const EchoCanceller3Config::Delay::DelaySelectionThresholds thresholds_; + const int headroom_; + HighestPeakAggregator highest_peak_aggregator_; + std::unique_ptr pre_echo_lag_aggregator_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc new file mode 100644 index 0000000000..6804102584 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr size_t kNumLagsBeforeDetection = 26; + +} // namespace + +// Verifies that varying lag estimates causes lag estimates to not be deemed +// reliable. +TEST(MatchedFilterLagAggregator, + LagEstimateInvarianceRequiredForAggregatedLag) { + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilterLagAggregator aggregator(&data_dumper, /*max_filter_lag=*/100, + config.delay); + + absl::optional aggregated_lag; + for (size_t k = 0; k < kNumLagsBeforeDetection; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/10, /*pre_echo_lag=*/10)); + } + EXPECT_TRUE(aggregated_lag); + + for (size_t k = 0; k < kNumLagsBeforeDetection * 100; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/k % 100, /*pre_echo_lag=*/k % 100)); + } + EXPECT_FALSE(aggregated_lag); + + for (size_t k = 0; k < kNumLagsBeforeDetection * 100; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/k % 100, /*pre_echo_lag=*/k % 100)); + EXPECT_FALSE(aggregated_lag); + } +} + +// Verifies that lag estimate updates are required to produce an updated lag +// aggregate. +TEST(MatchedFilterLagAggregator, + DISABLED_LagEstimateUpdatesRequiredForAggregatedLag) { + constexpr size_t kLag = 5; + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilterLagAggregator aggregator(&data_dumper, /*max_filter_lag=*/kLag, + config.delay); + for (size_t k = 0; k < kNumLagsBeforeDetection * 10; ++k) { + absl::optional aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/kLag, /*pre_echo_lag=*/kLag)); + EXPECT_FALSE(aggregated_lag); + EXPECT_EQ(kLag, aggregated_lag->delay); + } +} + +// Verifies that an aggregated lag is persistent if the lag estimates do not +// change and that an aggregated lag is not produced without gaining lag +// estimate confidence. +TEST(MatchedFilterLagAggregator, DISABLED_PersistentAggregatedLag) { + constexpr size_t kLag1 = 5; + constexpr size_t kLag2 = 10; + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + std::vector lag_estimates(1); + MatchedFilterLagAggregator aggregator(&data_dumper, std::max(kLag1, kLag2), + config.delay); + absl::optional aggregated_lag; + for (size_t k = 0; k < kNumLagsBeforeDetection; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/kLag1, /*pre_echo_lag=*/kLag1)); + } + EXPECT_TRUE(aggregated_lag); + EXPECT_EQ(kLag1, aggregated_lag->delay); + + for (size_t k = 0; k < kNumLagsBeforeDetection * 40; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/kLag2, /*pre_echo_lag=*/kLag2)); + EXPECT_TRUE(aggregated_lag); + EXPECT_EQ(kLag1, aggregated_lag->delay); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for non-null data dumper. +TEST(MatchedFilterLagAggregatorDeathTest, NullDataDumper) { + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilterLagAggregator(nullptr, 10, config.delay), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_unittest.cc new file mode 100644 index 0000000000..0a04c7809c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_unittest.cc @@ -0,0 +1,612 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/matched_filter.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { +namespace { + +std::string ProduceDebugText(size_t delay, size_t down_sampling_factor) { + rtc::StringBuilder ss; + ss << "Delay: " << delay; + ss << ", Down sampling factor: " << down_sampling_factor; + return ss.Release(); +} + +constexpr size_t kNumMatchedFilters = 10; +constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; +constexpr size_t kWindowSizeSubBlocks = 32; +constexpr size_t kAlignmentShiftSubBlocks = kWindowSizeSubBlocks * 3 / 4; + +} // namespace + +class MatchedFilterTest : public ::testing::TestWithParam {}; + +#if defined(WEBRTC_HAS_NEON) +// Verifies that the optimized methods for NEON are similar to their reference +// counterparts. +TEST_P(MatchedFilterTest, TestNeonOptimizations) { + Random random_generator(42U); + constexpr float kSmoothing = 0.7f; + const bool kComputeAccumulatederror = GetParam(); + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + std::vector x(2000); + RandomizeSampleVector(&random_generator, x); + std::vector y(sub_block_size); + std::vector h_NEON(512); + std::vector h(512); + std::vector accumulated_error(512); + std::vector accumulated_error_NEON(512); + std::vector scratch_memory(512); + + int x_index = 0; + for (int k = 0; k < 1000; ++k) { + RandomizeSampleVector(&random_generator, y); + + bool filters_updated = false; + float error_sum = 0.f; + bool filters_updated_NEON = false; + float error_sum_NEON = 0.f; + + MatchedFilterCore_NEON(x_index, h.size() * 150.f * 150.f, kSmoothing, x, + y, h_NEON, &filters_updated_NEON, &error_sum_NEON, + kComputeAccumulatederror, accumulated_error_NEON, + scratch_memory); + + MatchedFilterCore(x_index, h.size() * 150.f * 150.f, kSmoothing, x, y, h, + &filters_updated, &error_sum, kComputeAccumulatederror, + accumulated_error); + + EXPECT_EQ(filters_updated, filters_updated_NEON); + EXPECT_NEAR(error_sum, error_sum_NEON, error_sum / 100000.f); + + for (size_t j = 0; j < h.size(); ++j) { + EXPECT_NEAR(h[j], h_NEON[j], 0.00001f); + } + + if (kComputeAccumulatederror) { + for (size_t j = 0; j < accumulated_error.size(); ++j) { + float difference = + std::abs(accumulated_error[j] - accumulated_error_NEON[j]); + float relative_difference = accumulated_error[j] > 0 + ? difference / accumulated_error[j] + : difference; + EXPECT_NEAR(relative_difference, 0.0f, 0.02f); + } + } + + x_index = (x_index + sub_block_size) % x.size(); + } + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods for SSE2 are bitexact to their reference +// counterparts. +TEST_P(MatchedFilterTest, TestSse2Optimizations) { + const bool kComputeAccumulatederror = GetParam(); + bool use_sse2 = (GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + Random random_generator(42U); + constexpr float kSmoothing = 0.7f; + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + std::vector x(2000); + RandomizeSampleVector(&random_generator, x); + std::vector y(sub_block_size); + std::vector h_SSE2(512); + std::vector h(512); + std::vector accumulated_error(512 / 4); + std::vector accumulated_error_SSE2(512 / 4); + std::vector scratch_memory(512); + int x_index = 0; + for (int k = 0; k < 1000; ++k) { + RandomizeSampleVector(&random_generator, y); + + bool filters_updated = false; + float error_sum = 0.f; + bool filters_updated_SSE2 = false; + float error_sum_SSE2 = 0.f; + + MatchedFilterCore_SSE2(x_index, h.size() * 150.f * 150.f, kSmoothing, x, + y, h_SSE2, &filters_updated_SSE2, + &error_sum_SSE2, kComputeAccumulatederror, + accumulated_error_SSE2, scratch_memory); + + MatchedFilterCore(x_index, h.size() * 150.f * 150.f, kSmoothing, x, y, + h, &filters_updated, &error_sum, + kComputeAccumulatederror, accumulated_error); + + EXPECT_EQ(filters_updated, filters_updated_SSE2); + EXPECT_NEAR(error_sum, error_sum_SSE2, error_sum / 100000.f); + + for (size_t j = 0; j < h.size(); ++j) { + EXPECT_NEAR(h[j], h_SSE2[j], 0.00001f); + } + + for (size_t j = 0; j < accumulated_error.size(); ++j) { + float difference = + std::abs(accumulated_error[j] - accumulated_error_SSE2[j]); + float relative_difference = accumulated_error[j] > 0 + ? difference / accumulated_error[j] + : difference; + EXPECT_NEAR(relative_difference, 0.0f, 0.00001f); + } + + x_index = (x_index + sub_block_size) % x.size(); + } + } + } +} + +TEST_P(MatchedFilterTest, TestAvx2Optimizations) { + bool use_avx2 = (GetCPUInfo(kAVX2) != 0); + const bool kComputeAccumulatederror = GetParam(); + if (use_avx2) { + Random random_generator(42U); + constexpr float kSmoothing = 0.7f; + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + std::vector x(2000); + RandomizeSampleVector(&random_generator, x); + std::vector y(sub_block_size); + std::vector h_AVX2(512); + std::vector h(512); + std::vector accumulated_error(512 / 4); + std::vector accumulated_error_AVX2(512 / 4); + std::vector scratch_memory(512); + int x_index = 0; + for (int k = 0; k < 1000; ++k) { + RandomizeSampleVector(&random_generator, y); + bool filters_updated = false; + float error_sum = 0.f; + bool filters_updated_AVX2 = false; + float error_sum_AVX2 = 0.f; + MatchedFilterCore_AVX2(x_index, h.size() * 150.f * 150.f, kSmoothing, x, + y, h_AVX2, &filters_updated_AVX2, + &error_sum_AVX2, kComputeAccumulatederror, + accumulated_error_AVX2, scratch_memory); + MatchedFilterCore(x_index, h.size() * 150.f * 150.f, kSmoothing, x, y, + h, &filters_updated, &error_sum, + kComputeAccumulatederror, accumulated_error); + EXPECT_EQ(filters_updated, filters_updated_AVX2); + EXPECT_NEAR(error_sum, error_sum_AVX2, error_sum / 100000.f); + for (size_t j = 0; j < h.size(); ++j) { + EXPECT_NEAR(h[j], h_AVX2[j], 0.00001f); + } + for (size_t j = 0; j < accumulated_error.size(); j += 4) { + float difference = + std::abs(accumulated_error[j] - accumulated_error_AVX2[j]); + float relative_difference = accumulated_error[j] > 0 + ? difference / accumulated_error[j] + : difference; + EXPECT_NEAR(relative_difference, 0.0f, 0.00001f); + } + x_index = (x_index + sub_block_size) % x.size(); + } + } + } +} + +#endif + +// Verifies that the (optimized) function MaxSquarePeakIndex() produces output +// equal to the corresponding std-functions. +TEST(MatchedFilter, MaxSquarePeakIndex) { + Random random_generator(42U); + constexpr int kMaxLength = 128; + constexpr int kNumIterationsPerLength = 256; + for (int length = 1; length < kMaxLength; ++length) { + std::vector y(length); + for (int i = 0; i < kNumIterationsPerLength; ++i) { + RandomizeSampleVector(&random_generator, y); + + size_t lag_from_function = MaxSquarePeakIndex(y); + size_t lag_from_std = std::distance( + y.begin(), + std::max_element(y.begin(), y.end(), [](float a, float b) -> bool { + return a * a < b * b; + })); + EXPECT_EQ(lag_from_function, lag_from_std); + } + } +} + +// Verifies that the matched filter produces proper lag estimates for +// artificially delayed signals. +TEST_P(MatchedFilterTest, LagEstimation) { + const bool kDetectPreEcho = GetParam(); + Random random_generator(42U); + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + Block render(kNumBands, kNumChannels); + std::vector> capture( + 1, std::vector(kBlockSize, 0.f)); + ApmDataDumper data_dumper(0); + for (size_t delay_samples : {5, 64, 150, 200, 800, 1000}) { + SCOPED_TRACE(ProduceDebugText(delay_samples, down_sampling_factor)); + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = kNumMatchedFilters; + Decimator capture_decimator(down_sampling_factor); + DelayBuffer signal_delay_buffer(down_sampling_factor * + delay_samples); + MatchedFilter filter( + &data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, + 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, kDetectPreEcho); + + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + + // Analyze the correlation between render and capture. + for (size_t k = 0; k < (600 + delay_samples / sub_block_size); ++k) { + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t channel = 0; channel < kNumChannels; ++channel) { + RandomizeSampleVector(&random_generator, + render.View(band, channel)); + } + } + signal_delay_buffer.Delay(render.View(/*band=*/0, /*channel=*/0), + capture[0]); + render_delay_buffer->Insert(render); + + if (k == 0) { + render_delay_buffer->Reset(); + } + + render_delay_buffer->PrepareCaptureProcessing(); + std::array downsampled_capture_data; + rtc::ArrayView downsampled_capture( + downsampled_capture_data.data(), sub_block_size); + capture_decimator.Decimate(capture[0], downsampled_capture); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), + downsampled_capture, /*use_slow_smoothing=*/false); + } + + // Obtain the lag estimates. + auto lag_estimate = filter.GetBestLagEstimate(); + EXPECT_TRUE(lag_estimate.has_value()); + + // Verify that the expected most accurate lag estimate is correct. + if (lag_estimate.has_value()) { + EXPECT_EQ(delay_samples, lag_estimate->lag); + EXPECT_EQ(delay_samples, lag_estimate->pre_echo_lag); + } + } + } +} + +// Test the pre echo estimation. +TEST_P(MatchedFilterTest, PreEchoEstimation) { + const bool kDetectPreEcho = GetParam(); + Random random_generator(42U); + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + Block render(kNumBands, kNumChannels); + std::vector> capture( + 1, std::vector(kBlockSize, 0.f)); + std::vector capture_with_pre_echo(kBlockSize, 0.f); + ApmDataDumper data_dumper(0); + // data_dumper.SetActivated(true); + size_t pre_echo_delay_samples = 20e-3 * 16000 / down_sampling_factor; + size_t echo_delay_samples = 50e-3 * 16000 / down_sampling_factor; + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = kNumMatchedFilters; + Decimator capture_decimator(down_sampling_factor); + DelayBuffer signal_echo_delay_buffer(down_sampling_factor * + echo_delay_samples); + DelayBuffer signal_pre_echo_delay_buffer(down_sampling_factor * + pre_echo_delay_samples); + MatchedFilter filter( + &data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, 150, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, kDetectPreEcho); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + // Analyze the correlation between render and capture. + for (size_t k = 0; k < (600 + echo_delay_samples / sub_block_size); ++k) { + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t channel = 0; channel < kNumChannels; ++channel) { + RandomizeSampleVector(&random_generator, render.View(band, channel)); + } + } + signal_echo_delay_buffer.Delay(render.View(0, 0), capture[0]); + signal_pre_echo_delay_buffer.Delay(render.View(0, 0), + capture_with_pre_echo); + for (size_t k = 0; k < capture[0].size(); ++k) { + constexpr float gain_pre_echo = 0.8f; + capture[0][k] += gain_pre_echo * capture_with_pre_echo[k]; + } + render_delay_buffer->Insert(render); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + std::array downsampled_capture_data; + rtc::ArrayView downsampled_capture(downsampled_capture_data.data(), + sub_block_size); + capture_decimator.Decimate(capture[0], downsampled_capture); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), + downsampled_capture, /*use_slow_smoothing=*/false); + } + // Obtain the lag estimates. + auto lag_estimate = filter.GetBestLagEstimate(); + EXPECT_TRUE(lag_estimate.has_value()); + // Verify that the expected most accurate lag estimate is correct. + if (lag_estimate.has_value()) { + EXPECT_EQ(echo_delay_samples, lag_estimate->lag); + if (kDetectPreEcho) { + // The pre echo delay is estimated in a subsampled domain and a larger + // error is allowed. + EXPECT_NEAR(pre_echo_delay_samples, lag_estimate->pre_echo_lag, 4); + } else { + // The pre echo delay fallback to the highest mached filter peak when + // its detection is disabled. + EXPECT_EQ(echo_delay_samples, lag_estimate->pre_echo_lag); + } + } + } +} + +// Verifies that the matched filter does not produce reliable and accurate +// estimates for uncorrelated render and capture signals. +TEST_P(MatchedFilterTest, LagNotReliableForUncorrelatedRenderAndCapture) { + const bool kDetectPreEcho = GetParam(); + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + Random random_generator(42U); + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = kNumMatchedFilters; + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + Block render(kNumBands, kNumChannels); + std::array capture_data; + rtc::ArrayView capture(capture_data.data(), sub_block_size); + std::fill(capture.begin(), capture.end(), 0.f); + ApmDataDumper data_dumper(0); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + MatchedFilter filter( + &data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, 150, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, kDetectPreEcho); + + // Analyze the correlation between render and capture. + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, + render.View(/*band=*/0, /*channel=*/0)); + RandomizeSampleVector(&random_generator, capture); + render_delay_buffer->Insert(render); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), capture, + false); + } + + // Obtain the best lag estimate and Verify that no lag estimates are + // reliable. + auto best_lag_estimates = filter.GetBestLagEstimate(); + EXPECT_FALSE(best_lag_estimates.has_value()); + } +} + +// Verifies that the matched filter does not produce updated lag estimates for +// render signals of low level. +TEST_P(MatchedFilterTest, LagNotUpdatedForLowLevelRender) { + const bool kDetectPreEcho = GetParam(); + Random random_generator(42U); + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + Block render(kNumBands, kNumChannels); + std::vector> capture( + 1, std::vector(kBlockSize, 0.f)); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilter filter( + &data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, 150, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, kDetectPreEcho); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + kNumChannels)); + Decimator capture_decimator(down_sampling_factor); + + // Analyze the correlation between render and capture. + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, render.View(0, 0)); + for (auto& render_k : render.View(0, 0)) { + render_k *= 149.f / 32767.f; + } + std::copy(render.begin(0, 0), render.end(0, 0), capture[0].begin()); + std::array downsampled_capture_data; + rtc::ArrayView downsampled_capture(downsampled_capture_data.data(), + sub_block_size); + capture_decimator.Decimate(capture[0], downsampled_capture); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), + downsampled_capture, false); + } + + // Verify that no lag estimate has been produced. + auto lag_estimate = filter.GetBestLagEstimate(); + EXPECT_FALSE(lag_estimate.has_value()); + } +} + +INSTANTIATE_TEST_SUITE_P(_, MatchedFilterTest, testing::Values(true, false)); + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +class MatchedFilterDeathTest : public ::testing::TestWithParam {}; + +// Verifies the check for non-zero windows size. +TEST_P(MatchedFilterDeathTest, ZeroWindowSize) { + const bool kDetectPreEcho = GetParam(); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 16, 0, 1, 1, + 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + kDetectPreEcho), + ""); +} + +// Verifies the check for non-null data dumper. +TEST_P(MatchedFilterDeathTest, NullDataDumper) { + const bool kDetectPreEcho = GetParam(); + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilter(nullptr, DetectOptimization(), 16, 1, 1, 1, 150, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + kDetectPreEcho), + ""); +} + +// Verifies the check for that the sub block size is a multiple of 4. +// TODO(peah): Activate the unittest once the required code has been landed. +TEST_P(MatchedFilterDeathTest, DISABLED_BlockSizeMultipleOf4) { + const bool kDetectPreEcho = GetParam(); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 15, 1, 1, 1, + 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + kDetectPreEcho), + ""); +} + +// Verifies the check for that there is an integer number of sub blocks that add +// up to a block size. +// TODO(peah): Activate the unittest once the required code has been landed. +TEST_P(MatchedFilterDeathTest, DISABLED_SubBlockSizeAddsUpToBlockSize) { + const bool kDetectPreEcho = GetParam(); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 12, 1, 1, 1, + 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + kDetectPreEcho), + ""); +} + +INSTANTIATE_TEST_SUITE_P(_, + MatchedFilterDeathTest, + testing::Values(true, false)); + +#endif + +} // namespace aec3 + +TEST(MatchedFilterFieldTrialTest, PreEchoConfigurationTest) { + float threshold_in = 0.1f; + int mode_in = 2; + rtc::StringBuilder field_trial_name; + field_trial_name << "WebRTC-Aec3PreEchoConfiguration/threshold:" + << threshold_in << ",mode:" << mode_in << "/"; + webrtc::test::ScopedFieldTrials field_trials(field_trial_name.str()); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilter matched_filter( + &data_dumper, DetectOptimization(), + kBlockSize / config.delay.down_sampling_factor, + aec3::kWindowSizeSubBlocks, aec3::kNumMatchedFilters, + aec3::kAlignmentShiftSubBlocks, + config.render_levels.poor_excitation_render_limit, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + config.delay.detect_pre_echo); + + auto& pre_echo_config = matched_filter.GetPreEchoConfiguration(); + EXPECT_EQ(pre_echo_config.threshold, threshold_in); + EXPECT_EQ(pre_echo_config.mode, mode_in); +} + +TEST(MatchedFilterFieldTrialTest, WrongPreEchoConfigurationTest) { + constexpr float kDefaultThreshold = 0.5f; + constexpr int kDefaultMode = 0; + float threshold_in = -0.1f; + int mode_in = 5; + rtc::StringBuilder field_trial_name; + field_trial_name << "WebRTC-Aec3PreEchoConfiguration/threshold:" + << threshold_in << ",mode:" << mode_in << "/"; + webrtc::test::ScopedFieldTrials field_trials(field_trial_name.str()); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilter matched_filter( + &data_dumper, DetectOptimization(), + kBlockSize / config.delay.down_sampling_factor, + aec3::kWindowSizeSubBlocks, aec3::kNumMatchedFilters, + aec3::kAlignmentShiftSubBlocks, + config.render_levels.poor_excitation_render_limit, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + config.delay.detect_pre_echo); + + auto& pre_echo_config = matched_filter.GetPreEchoConfiguration(); + EXPECT_EQ(pre_echo_config.threshold, kDefaultThreshold); + EXPECT_EQ(pre_echo_config.mode, kDefaultMode); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.cc b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.cc new file mode 100644 index 0000000000..c5c33dbd68 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.cc @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/mock/mock_block_processor.h" + +namespace webrtc { +namespace test { + +MockBlockProcessor::MockBlockProcessor() = default; +MockBlockProcessor::~MockBlockProcessor() = default; + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.h b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.h new file mode 100644 index 0000000000..c9ae38c4aa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_BLOCK_PROCESSOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_BLOCK_PROCESSOR_H_ + +#include + +#include "modules/audio_processing/aec3/block_processor.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockBlockProcessor : public BlockProcessor { + public: + MockBlockProcessor(); + virtual ~MockBlockProcessor(); + + MOCK_METHOD(void, + ProcessCapture, + (bool level_change, + bool saturated_microphone_signal, + Block* linear_output, + Block* capture_block), + (override)); + MOCK_METHOD(void, BufferRender, (const Block& block), (override)); + MOCK_METHOD(void, + UpdateEchoLeakageStatus, + (bool leakage_detected), + (override)); + MOCK_METHOD(void, + GetMetrics, + (EchoControl::Metrics * metrics), + (const, override)); + MOCK_METHOD(void, SetAudioBufferDelay, (int delay_ms), (override)); + MOCK_METHOD(void, + SetCaptureOutputUsage, + (bool capture_output_used), + (override)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_BLOCK_PROCESSOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.cc b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.cc new file mode 100644 index 0000000000..b903bf0785 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.cc @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/mock/mock_echo_remover.h" + +namespace webrtc { +namespace test { + +MockEchoRemover::MockEchoRemover() = default; +MockEchoRemover::~MockEchoRemover() = default; + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h new file mode 100644 index 0000000000..31f075ef0a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_ECHO_REMOVER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_ECHO_REMOVER_H_ + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/echo_remover.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockEchoRemover : public EchoRemover { + public: + MockEchoRemover(); + virtual ~MockEchoRemover(); + + MOCK_METHOD(void, + ProcessCapture, + (EchoPathVariability echo_path_variability, + bool capture_signal_saturation, + const absl::optional& delay_estimate, + RenderBuffer* render_buffer, + Block* linear_output, + Block* capture), + (override)); + MOCK_METHOD(void, + UpdateEchoLeakageStatus, + (bool leakage_detected), + (override)); + MOCK_METHOD(void, + GetMetrics, + (EchoControl::Metrics * metrics), + (const, override)); + MOCK_METHOD(void, + SetCaptureOutputUsage, + (bool capture_output_used), + (override)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_ECHO_REMOVER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc new file mode 100644 index 0000000000..d4ad09b4bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/mock/mock_render_delay_buffer.h" + +namespace webrtc { +namespace test { + +MockRenderDelayBuffer::MockRenderDelayBuffer(int sample_rate_hz, + size_t num_channels) + : block_buffer_(GetRenderDelayBufferSize(4, 4, 12), + NumBandsForRate(sample_rate_hz), + num_channels), + spectrum_buffer_(block_buffer_.buffer.size(), num_channels), + fft_buffer_(block_buffer_.buffer.size(), num_channels), + render_buffer_(&block_buffer_, &spectrum_buffer_, &fft_buffer_), + downsampled_render_buffer_(GetDownSampledBufferSize(4, 4)) { + ON_CALL(*this, GetRenderBuffer()) + .WillByDefault( + ::testing::Invoke(this, &MockRenderDelayBuffer::FakeGetRenderBuffer)); + ON_CALL(*this, GetDownsampledRenderBuffer()) + .WillByDefault(::testing::Invoke( + this, &MockRenderDelayBuffer::FakeGetDownsampledRenderBuffer)); +} + +MockRenderDelayBuffer::~MockRenderDelayBuffer() = default; + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h new file mode 100644 index 0000000000..c17fd62caa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_BUFFER_H_ + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockRenderDelayBuffer : public RenderDelayBuffer { + public: + MockRenderDelayBuffer(int sample_rate_hz, size_t num_channels); + virtual ~MockRenderDelayBuffer(); + + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(RenderDelayBuffer::BufferingEvent, + Insert, + (const Block& block), + (override)); + MOCK_METHOD(void, HandleSkippedCaptureProcessing, (), (override)); + MOCK_METHOD(RenderDelayBuffer::BufferingEvent, + PrepareCaptureProcessing, + (), + (override)); + MOCK_METHOD(bool, AlignFromDelay, (size_t delay), (override)); + MOCK_METHOD(void, AlignFromExternalDelay, (), (override)); + MOCK_METHOD(size_t, Delay, (), (const, override)); + MOCK_METHOD(size_t, MaxDelay, (), (const, override)); + MOCK_METHOD(RenderBuffer*, GetRenderBuffer, (), (override)); + MOCK_METHOD(const DownsampledRenderBuffer&, + GetDownsampledRenderBuffer, + (), + (const, override)); + MOCK_METHOD(void, SetAudioBufferDelay, (int delay_ms), (override)); + MOCK_METHOD(bool, HasReceivedBufferDelay, (), (override)); + + private: + RenderBuffer* FakeGetRenderBuffer() { return &render_buffer_; } + const DownsampledRenderBuffer& FakeGetDownsampledRenderBuffer() const { + return downsampled_render_buffer_; + } + BlockBuffer block_buffer_; + SpectrumBuffer spectrum_buffer_; + FftBuffer fft_buffer_; + RenderBuffer render_buffer_; + DownsampledRenderBuffer downsampled_render_buffer_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.cc b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.cc new file mode 100644 index 0000000000..4ae2af96bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.cc @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/mock/mock_render_delay_controller.h" + +namespace webrtc { +namespace test { + +MockRenderDelayController::MockRenderDelayController() = default; +MockRenderDelayController::~MockRenderDelayController() = default; + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h new file mode 100644 index 0000000000..14d499dd28 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_CONTROLLER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_controller.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockRenderDelayController : public RenderDelayController { + public: + MockRenderDelayController(); + virtual ~MockRenderDelayController(); + + MOCK_METHOD(void, Reset, (bool reset_delay_statistics), (override)); + MOCK_METHOD(void, LogRenderCall, (), (override)); + MOCK_METHOD(absl::optional, + GetDelay, + (const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const Block& capture), + (override)); + MOCK_METHOD(bool, HasClockdrift, (), (const, override)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc new file mode 100644 index 0000000000..7a81ee89ea --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc @@ -0,0 +1,60 @@ + +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/moving_average.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace aec3 { + +MovingAverage::MovingAverage(size_t num_elem, size_t mem_len) + : num_elem_(num_elem), + mem_len_(mem_len - 1), + scaling_(1.0f / static_cast(mem_len)), + memory_(num_elem * mem_len_, 0.f), + mem_index_(0) { + RTC_DCHECK(num_elem_ > 0); + RTC_DCHECK(mem_len > 0); +} + +MovingAverage::~MovingAverage() = default; + +void MovingAverage::Average(rtc::ArrayView input, + rtc::ArrayView output) { + RTC_DCHECK(input.size() == num_elem_); + RTC_DCHECK(output.size() == num_elem_); + + // Sum all contributions. + std::copy(input.begin(), input.end(), output.begin()); + for (auto i = memory_.begin(); i < memory_.end(); i += num_elem_) { + std::transform(i, i + num_elem_, output.begin(), output.begin(), + std::plus()); + } + + // Divide by mem_len_. + for (float& o : output) { + o *= scaling_; + } + + // Update memory. + if (mem_len_ > 0) { + std::copy(input.begin(), input.end(), + memory_.begin() + mem_index_ * num_elem_); + mem_index_ = (mem_index_ + 1) % mem_len_; + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.h b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.h new file mode 100644 index 0000000000..913d78519c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_ + +#include + +#include + +#include "api/array_view.h" + +namespace webrtc { +namespace aec3 { + +class MovingAverage { + public: + // Creates an instance of MovingAverage that accepts inputs of length num_elem + // and averages over mem_len inputs. + MovingAverage(size_t num_elem, size_t mem_len); + ~MovingAverage(); + + // Computes the average of input and mem_len-1 previous inputs and stores the + // result in output. + void Average(rtc::ArrayView input, rtc::ArrayView output); + + private: + const size_t num_elem_; + const size_t mem_len_; + const float scaling_; + std::vector memory_; + size_t mem_index_; +}; + +} // namespace aec3 +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/moving_average_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average_unittest.cc new file mode 100644 index 0000000000..84ba9cbc5b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average_unittest.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/moving_average.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(MovingAverage, Average) { + constexpr size_t num_elem = 4; + constexpr size_t mem_len = 3; + constexpr float e = 1e-6f; + aec3::MovingAverage ma(num_elem, mem_len); + std::array data1 = {1, 2, 3, 4}; + std::array data2 = {5, 1, 9, 7}; + std::array data3 = {3, 3, 5, 6}; + std::array data4 = {8, 4, 2, 1}; + std::array output; + + ma.Average(data1, output); + EXPECT_NEAR(output[0], data1[0] / 3.0f, e); + EXPECT_NEAR(output[1], data1[1] / 3.0f, e); + EXPECT_NEAR(output[2], data1[2] / 3.0f, e); + EXPECT_NEAR(output[3], data1[3] / 3.0f, e); + + ma.Average(data2, output); + EXPECT_NEAR(output[0], (data1[0] + data2[0]) / 3.0f, e); + EXPECT_NEAR(output[1], (data1[1] + data2[1]) / 3.0f, e); + EXPECT_NEAR(output[2], (data1[2] + data2[2]) / 3.0f, e); + EXPECT_NEAR(output[3], (data1[3] + data2[3]) / 3.0f, e); + + ma.Average(data3, output); + EXPECT_NEAR(output[0], (data1[0] + data2[0] + data3[0]) / 3.0f, e); + EXPECT_NEAR(output[1], (data1[1] + data2[1] + data3[1]) / 3.0f, e); + EXPECT_NEAR(output[2], (data1[2] + data2[2] + data3[2]) / 3.0f, e); + EXPECT_NEAR(output[3], (data1[3] + data2[3] + data3[3]) / 3.0f, e); + + ma.Average(data4, output); + EXPECT_NEAR(output[0], (data2[0] + data3[0] + data4[0]) / 3.0f, e); + EXPECT_NEAR(output[1], (data2[1] + data3[1] + data4[1]) / 3.0f, e); + EXPECT_NEAR(output[2], (data2[2] + data3[2] + data4[2]) / 3.0f, e); + EXPECT_NEAR(output[3], (data2[3] + data3[3] + data4[3]) / 3.0f, e); +} + +TEST(MovingAverage, PassThrough) { + constexpr size_t num_elem = 4; + constexpr size_t mem_len = 1; + constexpr float e = 1e-6f; + aec3::MovingAverage ma(num_elem, mem_len); + std::array data1 = {1, 2, 3, 4}; + std::array data2 = {5, 1, 9, 7}; + std::array data3 = {3, 3, 5, 6}; + std::array data4 = {8, 4, 2, 1}; + std::array output; + + ma.Average(data1, output); + EXPECT_NEAR(output[0], data1[0], e); + EXPECT_NEAR(output[1], data1[1], e); + EXPECT_NEAR(output[2], data1[2], e); + EXPECT_NEAR(output[3], data1[3], e); + + ma.Average(data2, output); + EXPECT_NEAR(output[0], data2[0], e); + EXPECT_NEAR(output[1], data2[1], e); + EXPECT_NEAR(output[2], data2[2], e); + EXPECT_NEAR(output[3], data2[3], e); + + ma.Average(data3, output); + EXPECT_NEAR(output[0], data3[0], e); + EXPECT_NEAR(output[1], data3[1], e); + EXPECT_NEAR(output[2], data3[2], e); + EXPECT_NEAR(output[3], data3[3], e); + + ma.Average(data4, output); + EXPECT_NEAR(output[0], data4[0], e); + EXPECT_NEAR(output[1], data4[1], e); + EXPECT_NEAR(output[2], data4[2], e); + EXPECT_NEAR(output[3], data4[3], e); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc new file mode 100644 index 0000000000..98068964d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/multi_channel_content_detector.h" + +#include + +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +constexpr int kNumFramesPerSecond = 100; + +// Compares the left and right channels in the render `frame` to determine +// whether the signal is a proper stereo signal. To allow for differences +// introduced by hardware drivers, a threshold `detection_threshold` is used for +// the detection. +bool HasStereoContent(const std::vector>>& frame, + float detection_threshold) { + if (frame[0].size() < 2) { + return false; + } + + for (size_t band = 0; band < frame.size(); ++band) { + for (size_t k = 0; k < frame[band][0].size(); ++k) { + if (std::fabs(frame[band][0][k] - frame[band][1][k]) > + detection_threshold) { + return true; + } + } + } + return false; +} + +// In order to avoid logging metrics for very short lifetimes that are unlikely +// to reflect real calls and that may dilute the "real" data, logging is limited +// to lifetimes of at leats 5 seconds. +constexpr int kMinNumberOfFramesRequiredToLogMetrics = 500; + +// Continuous metrics are logged every 10 seconds. +constexpr int kFramesPer10Seconds = 1000; + +} // namespace + +MultiChannelContentDetector::MetricsLogger::MetricsLogger() {} + +MultiChannelContentDetector::MetricsLogger::~MetricsLogger() { + if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics) + return; + + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.PersistentMultichannelContentEverDetected", + any_multichannel_content_detected_ ? 1 : 0); +} + +void MultiChannelContentDetector::MetricsLogger::Update( + bool persistent_multichannel_content_detected) { + ++frame_counter_; + if (persistent_multichannel_content_detected) { + any_multichannel_content_detected_ = true; + ++persistent_multichannel_frame_counter_; + } + + if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics) + return; + if (frame_counter_ % kFramesPer10Seconds != 0) + return; + const bool mostly_multichannel_last_10_seconds = + (persistent_multichannel_frame_counter_ >= kFramesPer10Seconds / 2); + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.ProcessingPersistentMultichannelContent", + mostly_multichannel_last_10_seconds ? 1 : 0); + + persistent_multichannel_frame_counter_ = 0; +} + +MultiChannelContentDetector::MultiChannelContentDetector( + bool detect_stereo_content, + int num_render_input_channels, + float detection_threshold, + int stereo_detection_timeout_threshold_seconds, + float stereo_detection_hysteresis_seconds) + : detect_stereo_content_(detect_stereo_content), + detection_threshold_(detection_threshold), + detection_timeout_threshold_frames_( + stereo_detection_timeout_threshold_seconds > 0 + ? absl::make_optional(stereo_detection_timeout_threshold_seconds * + kNumFramesPerSecond) + : absl::nullopt), + stereo_detection_hysteresis_frames_(static_cast( + stereo_detection_hysteresis_seconds * kNumFramesPerSecond)), + metrics_logger_((detect_stereo_content && num_render_input_channels > 1) + ? std::make_unique() + : nullptr), + persistent_multichannel_content_detected_( + !detect_stereo_content && num_render_input_channels > 1) {} + +bool MultiChannelContentDetector::UpdateDetection( + const std::vector>>& frame) { + if (!detect_stereo_content_) { + RTC_DCHECK_EQ(frame[0].size() > 1, + persistent_multichannel_content_detected_); + return false; + } + + const bool previous_persistent_multichannel_content_detected = + persistent_multichannel_content_detected_; + const bool stereo_detected_in_frame = + HasStereoContent(frame, detection_threshold_); + + consecutive_frames_with_stereo_ = + stereo_detected_in_frame ? consecutive_frames_with_stereo_ + 1 : 0; + frames_since_stereo_detected_last_ = + stereo_detected_in_frame ? 0 : frames_since_stereo_detected_last_ + 1; + + // Detect persistent multichannel content. + if (consecutive_frames_with_stereo_ > stereo_detection_hysteresis_frames_) { + persistent_multichannel_content_detected_ = true; + } + if (detection_timeout_threshold_frames_.has_value() && + frames_since_stereo_detected_last_ >= + *detection_timeout_threshold_frames_) { + persistent_multichannel_content_detected_ = false; + } + + // Detect temporary multichannel content. + temporary_multichannel_content_detected_ = + persistent_multichannel_content_detected_ ? false + : stereo_detected_in_frame; + + if (metrics_logger_) + metrics_logger_->Update(persistent_multichannel_content_detected_); + + return previous_persistent_multichannel_content_detected != + persistent_multichannel_content_detected_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.h new file mode 100644 index 0000000000..1742c5fc17 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_ + +#include +#include + +#include +#include + +#include "absl/types/optional.h" + +namespace webrtc { + +// Analyzes audio content to determine whether the contained audio is proper +// multichannel, or only upmixed mono. To allow for differences introduced by +// hardware drivers, a threshold `detection_threshold` is used for the +// detection. +// Logs metrics continously and upon destruction. +class MultiChannelContentDetector { + public: + // If |stereo_detection_timeout_threshold_seconds| <= 0, no timeout is + // applied: Once multichannel is detected, the detector remains in that state + // for its lifetime. + MultiChannelContentDetector(bool detect_stereo_content, + int num_render_input_channels, + float detection_threshold, + int stereo_detection_timeout_threshold_seconds, + float stereo_detection_hysteresis_seconds); + + // Compares the left and right channels in the render `frame` to determine + // whether the signal is a proper multichannel signal. Returns a bool + // indicating whether a change in the proper multichannel content was + // detected. + bool UpdateDetection( + const std::vector>>& frame); + + bool IsProperMultiChannelContentDetected() const { + return persistent_multichannel_content_detected_; + } + + bool IsTemporaryMultiChannelContentDetected() const { + return temporary_multichannel_content_detected_; + } + + private: + // Tracks and logs metrics for the amount of multichannel content detected. + class MetricsLogger { + public: + MetricsLogger(); + + // The destructor logs call summary statistics. + ~MetricsLogger(); + + // Updates and logs metrics. + void Update(bool persistent_multichannel_content_detected); + + private: + int frame_counter_ = 0; + + // Counts the number of frames of persistent multichannel audio observed + // during the current metrics collection interval. + int persistent_multichannel_frame_counter_ = 0; + + // Indicates whether persistent multichannel content has ever been detected. + bool any_multichannel_content_detected_ = false; + }; + + const bool detect_stereo_content_; + const float detection_threshold_; + const absl::optional detection_timeout_threshold_frames_; + const int stereo_detection_hysteresis_frames_; + + // Collects and reports metrics on the amount of multichannel content + // detected. Only created if |num_render_input_channels| > 1 and + // |detect_stereo_content_| is true. + const std::unique_ptr metrics_logger_; + + bool persistent_multichannel_content_detected_; + bool temporary_multichannel_content_detected_ = false; + int64_t frames_since_stereo_detected_last_ = 0; + int64_t consecutive_frames_with_stereo_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc new file mode 100644 index 0000000000..8d38dd0991 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc @@ -0,0 +1,470 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/multi_channel_content_detector.h" + +#include "system_wrappers/include/metrics.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(MultiChannelContentDetector, HandlingOfMono) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/1, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); +} + +TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/false, + /*num_render_input_channels=*/1, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); +} + +TEST(MultiChannelContentDetector, HandlingOfDetectionOff) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/false, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f); + + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, InitialDetectionOfStereo) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); +} + +TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f); + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWhenStereo) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f); + EXPECT_TRUE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f); + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); + + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f); + + EXPECT_TRUE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) { + constexpr float kThreshold = 1.0f; + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/kThreshold, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold); + + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) { + constexpr float kThreshold = 1.0f; + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/kThreshold, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f); + + EXPECT_TRUE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +class MultiChannelContentDetectorTimeoutBehavior + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannelContentDetector, + MultiChannelContentDetectorTimeoutBehavior, + ::testing::Combine(::testing::Values(false, true), + ::testing::Values(0, 1, 10))); + +TEST_P(MultiChannelContentDetectorTimeoutBehavior, + TimeOutBehaviorForNonTrueStereo) { + constexpr int kNumFramesPerSecond = 100; + const bool detect_stereo_content = std::get<0>(GetParam()); + const int stereo_detection_timeout_threshold_seconds = + std::get<1>(GetParam()); + const int stereo_detection_timeout_threshold_frames = + stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond; + + MultiChannelContentDetector mc(detect_stereo_content, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + stereo_detection_timeout_threshold_seconds, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> true_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 101.0f)}}; + + std::vector>> fake_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 100.0f)}}; + + // Pass fake stereo frames and verify the content detection. + for (int k = 0; k < 10; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + if (detect_stereo_content) { + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + } else { + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + } + + // Pass a true stereo frame and verify that it is properly detected. + if (detect_stereo_content) { + EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame)); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + } + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + // Pass fake stereo frames until any timeouts are about to occur. + for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + + // Pass a fake stereo frame and verify that any timeouts properly occur. + if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) { + EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + + // Pass fake stereo frames and verify the behavior after any timeout. + for (int k = 0; k < 10; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + if (detect_stereo_content && + stereo_detection_timeout_threshold_frames > 0) { + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + } else { + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + } +} + +class MultiChannelContentDetectorHysteresisBehavior + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P( + MultiChannelContentDetector, + MultiChannelContentDetectorHysteresisBehavior, + ::testing::Combine(::testing::Values(false, true), + ::testing::Values(0.0f, 0.1f, 0.2f))); + +TEST_P(MultiChannelContentDetectorHysteresisBehavior, + PeriodBeforeStereoDetectionIsTriggered) { + constexpr int kNumFramesPerSecond = 100; + const bool detect_stereo_content = std::get<0>(GetParam()); + const int stereo_detection_hysteresis_seconds = std::get<1>(GetParam()); + const int stereo_detection_hysteresis_frames = + stereo_detection_hysteresis_seconds * kNumFramesPerSecond; + + MultiChannelContentDetector mc( + detect_stereo_content, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + stereo_detection_hysteresis_seconds); + std::vector>> true_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 101.0f)}}; + + std::vector>> fake_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 100.0f)}}; + + // Pass fake stereo frames and verify the content detection. + for (int k = 0; k < 10; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + if (detect_stereo_content) { + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + } else { + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + + // Pass a two true stereo frames and verify that they are properly detected. + ASSERT_TRUE(stereo_detection_hysteresis_frames > 2 || + stereo_detection_hysteresis_frames == 0); + for (int k = 0; k < 2; ++k) { + if (detect_stereo_content) { + if (stereo_detection_hysteresis_seconds == 0.0f) { + if (k == 0) { + EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame)); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + } + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetected()); + } + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + } + + if (stereo_detection_hysteresis_seconds == 0.0f) { + return; + } + + // Pass true stereo frames until any timeouts are about to occur. + for (int k = 0; k < stereo_detection_hysteresis_frames - 3; ++k) { + if (detect_stereo_content) { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + } + + // Pass a true stereo frame and verify that it is properly detected. + if (detect_stereo_content) { + EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + + // Pass an additional true stereo frame and verify that it is properly + // detected. + if (detect_stereo_content) { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + + // Pass a fake stereo frame and verify that it is properly detected. + if (detect_stereo_content) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } +} + +class MultiChannelContentDetectorMetricsDisabled + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P( + /*no prefix*/, + MultiChannelContentDetectorMetricsDisabled, + ::testing::Values(std::tuple(false, 2), + std::tuple(true, 1))); + +// Test that no metrics are logged when they are clearly uninteresting and would +// dilute relevant data: when the reference audio is single channel, or when +// dynamic detection is disabled. +TEST_P(MultiChannelContentDetectorMetricsDisabled, ReportsNoMetrics) { + metrics::Reset(); + constexpr int kNumFramesPerSecond = 100; + const bool detect_stereo_content = std::get<0>(GetParam()); + const int channel_count = std::get<1>(GetParam()); + std::vector>> audio_frame = { + std::vector>(channel_count, + std::vector(160, 100.0f))}; + { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/detect_stereo_content, + /*num_render_input_channels=*/channel_count, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/1, + /*stereo_detection_hysteresis_seconds=*/0.0f); + for (int k = 0; k < 20 * kNumFramesPerSecond; ++k) { + mc.UpdateDetection(audio_frame); + } + } + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent")); + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected")); +} + +// Tests that after 3 seconds, no metrics are reported. +TEST(MultiChannelContentDetectorMetrics, ReportsNoMetricsForShortLifetime) { + metrics::Reset(); + constexpr int kNumFramesPerSecond = 100; + constexpr int kTooFewFramesToLogMetrics = 3 * kNumFramesPerSecond; + std::vector>> audio_frame = { + std::vector>(2, std::vector(160, 100.0f))}; + { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/1, + /*stereo_detection_hysteresis_seconds=*/0.0f); + for (int k = 0; k < kTooFewFramesToLogMetrics; ++k) { + mc.UpdateDetection(audio_frame); + } + } + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent")); + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected")); +} + +// Tests that after 25 seconds, metrics are reported. +TEST(MultiChannelContentDetectorMetrics, ReportsMetrics) { + metrics::Reset(); + constexpr int kNumFramesPerSecond = 100; + std::vector>> true_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 101.0f)}}; + std::vector>> fake_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 100.0f)}}; + { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/1, + /*stereo_detection_hysteresis_seconds=*/0.0f); + for (int k = 0; k < 10 * kNumFramesPerSecond; ++k) { + mc.UpdateDetection(true_stereo_frame); + } + for (int k = 0; k < 15 * kNumFramesPerSecond; ++k) { + mc.UpdateDetection(fake_stereo_frame); + } + } + // After 10 seconds of true stereo and the remainder fake stereo, we expect + // one lifetime metric sample (multichannel detected) and two periodic samples + // (one multichannel, one mono). + + // Check lifetime metric. + EXPECT_METRIC_EQ( + 1, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected")); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected", 1)); + + // Check periodic metric. + EXPECT_METRIC_EQ( + 2, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent")); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent", 0)); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent", 1)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/nearend_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/nearend_detector.h new file mode 100644 index 0000000000..0d8a06b2cd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/nearend_detector.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_ + +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { +// Class for selecting whether the suppressor is in the nearend or echo state. +class NearendDetector { + public: + virtual ~NearendDetector() {} + + // Returns whether the current state is the nearend state. + virtual bool IsNearendState() const = 0; + + // Updates the state selection based on latest spectral estimates. + virtual void Update( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc new file mode 100644 index 0000000000..8e391d6fa6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/refined_filter_update_gain.h" + +#include +#include + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr float kHErrorInitial = 10000.f; +constexpr int kPoorExcitationCounterInitial = 1000; + +} // namespace + +std::atomic RefinedFilterUpdateGain::instance_count_(0); + +RefinedFilterUpdateGain::RefinedFilterUpdateGain( + const EchoCanceller3Config::Filter::RefinedConfiguration& config, + size_t config_change_duration_blocks) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + config_change_duration_blocks_( + static_cast(config_change_duration_blocks)), + poor_excitation_counter_(kPoorExcitationCounterInitial) { + SetConfig(config, true); + H_error_.fill(kHErrorInitial); + RTC_DCHECK_LT(0, config_change_duration_blocks_); + one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_; +} + +RefinedFilterUpdateGain::~RefinedFilterUpdateGain() {} + +void RefinedFilterUpdateGain::HandleEchoPathChange( + const EchoPathVariability& echo_path_variability) { + if (echo_path_variability.gain_change) { + // TODO(bugs.webrtc.org/9526) Handle gain changes. + } + + if (echo_path_variability.delay_change != + EchoPathVariability::DelayAdjustment::kNone) { + H_error_.fill(kHErrorInitial); + } + + if (!echo_path_variability.gain_change) { + poor_excitation_counter_ = kPoorExcitationCounterInitial; + call_counter_ = 0; + } +} + +void RefinedFilterUpdateGain::Compute( + const std::array& render_power, + const RenderSignalAnalyzer& render_signal_analyzer, + const SubtractorOutput& subtractor_output, + rtc::ArrayView erl, + size_t size_partitions, + bool saturated_capture_signal, + bool disallow_leakage_diverged, + FftData* gain_fft) { + RTC_DCHECK(gain_fft); + // Introducing shorter notation to improve readability. + const FftData& E_refined = subtractor_output.E_refined; + const auto& E2_refined = subtractor_output.E2_refined; + const auto& E2_coarse = subtractor_output.E2_coarse; + FftData* G = gain_fft; + const auto& X2 = render_power; + + ++call_counter_; + + UpdateCurrentConfig(); + + if (render_signal_analyzer.PoorSignalExcitation()) { + poor_excitation_counter_ = 0; + } + + // Do not update the filter if the render is not sufficiently excited. + if (++poor_excitation_counter_ < size_partitions || + saturated_capture_signal || call_counter_ <= size_partitions) { + G->re.fill(0.f); + G->im.fill(0.f); + } else { + // Corresponds to WGN of power -39 dBFS. + std::array mu; + // mu = H_error / (0.5* H_error* X2 + n * E2). + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (X2[k] >= current_config_.noise_gate) { + mu[k] = H_error_[k] / + (0.5f * H_error_[k] * X2[k] + size_partitions * E2_refined[k]); + } else { + mu[k] = 0.f; + } + } + + // Avoid updating the filter close to narrow bands in the render signals. + render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu); + + // H_error = H_error - 0.5 * mu * X2 * H_error. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + H_error_[k] -= 0.5f * mu[k] * X2[k] * H_error_[k]; + } + + // G = mu * E. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + G->re[k] = mu[k] * E_refined.re[k]; + G->im[k] = mu[k] * E_refined.im[k]; + } + } + + // H_error = H_error + factor * erl. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (E2_refined[k] <= E2_coarse[k] || disallow_leakage_diverged) { + H_error_[k] += current_config_.leakage_converged * erl[k]; + } else { + H_error_[k] += current_config_.leakage_diverged * erl[k]; + } + + H_error_[k] = std::max(H_error_[k], current_config_.error_floor); + H_error_[k] = std::min(H_error_[k], current_config_.error_ceil); + } + + data_dumper_->DumpRaw("aec3_refined_gain_H_error", H_error_); +} + +void RefinedFilterUpdateGain::UpdateCurrentConfig() { + RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_); + if (config_change_counter_ > 0) { + if (--config_change_counter_ > 0) { + auto average = [](float from, float to, float from_weight) { + return from * from_weight + to * (1.f - from_weight); + }; + + float change_factor = + config_change_counter_ * one_by_config_change_duration_blocks_; + + current_config_.leakage_converged = + average(old_target_config_.leakage_converged, + target_config_.leakage_converged, change_factor); + current_config_.leakage_diverged = + average(old_target_config_.leakage_diverged, + target_config_.leakage_diverged, change_factor); + current_config_.error_floor = + average(old_target_config_.error_floor, target_config_.error_floor, + change_factor); + current_config_.error_ceil = + average(old_target_config_.error_ceil, target_config_.error_ceil, + change_factor); + current_config_.noise_gate = + average(old_target_config_.noise_gate, target_config_.noise_gate, + change_factor); + } else { + current_config_ = old_target_config_ = target_config_; + } + } + RTC_DCHECK_LE(0, config_change_counter_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.h b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.h new file mode 100644 index 0000000000..1a68ebc296 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_ + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +class AdaptiveFirFilter; +class ApmDataDumper; +struct EchoPathVariability; +struct FftData; +class RenderSignalAnalyzer; +struct SubtractorOutput; + +// Provides functionality for computing the adaptive gain for the refined +// filter. +class RefinedFilterUpdateGain { + public: + RefinedFilterUpdateGain( + const EchoCanceller3Config::Filter::RefinedConfiguration& config, + size_t config_change_duration_blocks); + ~RefinedFilterUpdateGain(); + + RefinedFilterUpdateGain(const RefinedFilterUpdateGain&) = delete; + RefinedFilterUpdateGain& operator=(const RefinedFilterUpdateGain&) = delete; + + // Takes action in the case of a known echo path change. + void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + + // Computes the gain. + void Compute(const std::array& render_power, + const RenderSignalAnalyzer& render_signal_analyzer, + const SubtractorOutput& subtractor_output, + rtc::ArrayView erl, + size_t size_partitions, + bool saturated_capture_signal, + bool disallow_leakage_diverged, + FftData* gain_fft); + + // Sets a new config. + void SetConfig( + const EchoCanceller3Config::Filter::RefinedConfiguration& config, + bool immediate_effect) { + if (immediate_effect) { + old_target_config_ = current_config_ = target_config_ = config; + config_change_counter_ = 0; + } else { + old_target_config_ = current_config_; + target_config_ = config; + config_change_counter_ = config_change_duration_blocks_; + } + } + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const int config_change_duration_blocks_; + float one_by_config_change_duration_blocks_; + EchoCanceller3Config::Filter::RefinedConfiguration current_config_; + EchoCanceller3Config::Filter::RefinedConfiguration target_config_; + EchoCanceller3Config::Filter::RefinedConfiguration old_target_config_; + std::array H_error_; + size_t poor_excitation_counter_; + size_t call_counter_ = 0; + int config_change_counter_ = 0; + + // Updates the current config towards the target config. + void UpdateCurrentConfig(); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc new file mode 100644 index 0000000000..c77c5b53d5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/refined_filter_update_gain.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// Method for performing the simulations needed to test the refined filter +// update gain functionality. +void RunFilterUpdateTest(int num_blocks_to_process, + size_t delay_samples, + int filter_length_blocks, + const std::vector& blocks_with_echo_path_changes, + const std::vector& blocks_with_saturation, + bool use_silent_render_in_second_half, + std::array* e_last_block, + std::array* y_last_block, + FftData* G_last_block) { + ApmDataDumper data_dumper(42); + Aec3Optimization optimization = DetectOptimization(); + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + EchoCanceller3Config config; + config.filter.refined.length_blocks = filter_length_blocks; + config.filter.coarse.length_blocks = filter_length_blocks; + AdaptiveFirFilter refined_filter( + config.filter.refined.length_blocks, config.filter.refined.length_blocks, + config.filter.config_change_duration_blocks, kNumRenderChannels, + optimization, &data_dumper); + AdaptiveFirFilter coarse_filter( + config.filter.coarse.length_blocks, config.filter.coarse.length_blocks, + config.filter.config_change_duration_blocks, kNumRenderChannels, + optimization, &data_dumper); + std::vector>> H2( + kNumCaptureChannels, std::vector>( + refined_filter.max_filter_size_partitions(), + std::array())); + for (auto& H2_ch : H2) { + for (auto& H2_k : H2_ch) { + H2_k.fill(0.f); + } + } + std::vector> h( + kNumCaptureChannels, + std::vector( + GetTimeDomainLength(refined_filter.max_filter_size_partitions()), + 0.f)); + + Aec3Fft fft; + std::array x_old; + x_old.fill(0.f); + CoarseFilterUpdateGain coarse_gain( + config.filter.coarse, config.filter.config_change_duration_blocks); + RefinedFilterUpdateGain refined_gain( + config.filter.refined, config.filter.config_change_duration_blocks); + Random random_generator(42U); + Block x(kNumBands, kNumRenderChannels); + std::vector y(kBlockSize, 0.f); + config.delay.default_delay = 1; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); + AecState aec_state(config, kNumCaptureChannels); + RenderSignalAnalyzer render_signal_analyzer(config); + absl::optional delay_estimate; + std::array s_scratch; + std::array s; + FftData S; + FftData G; + std::vector output(kNumCaptureChannels); + for (auto& subtractor_output : output) { + subtractor_output.Reset(); + } + FftData& E_refined = output[0].E_refined; + FftData E_coarse; + std::vector> Y2(kNumCaptureChannels); + std::vector> E2_refined( + kNumCaptureChannels); + std::array& e_refined = output[0].e_refined; + std::array& e_coarse = output[0].e_coarse; + for (auto& Y2_ch : Y2) { + Y2_ch.fill(0.f); + } + + constexpr float kScale = 1.0f / kFftLengthBy2; + + DelayBuffer delay_buffer(delay_samples); + for (int k = 0; k < num_blocks_to_process; ++k) { + // Handle echo path changes. + if (std::find(blocks_with_echo_path_changes.begin(), + blocks_with_echo_path_changes.end(), + k) != blocks_with_echo_path_changes.end()) { + refined_filter.HandleEchoPathChange(); + } + + // Handle saturation. + const bool saturation = + std::find(blocks_with_saturation.begin(), blocks_with_saturation.end(), + k) != blocks_with_saturation.end(); + + // Create the render signal. + if (use_silent_render_in_second_half && k > num_blocks_to_process / 2) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + std::fill(x.begin(band, channel), x.end(band, channel), 0.f); + } + } + } else { + for (int band = 0; band < x.NumChannels(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + RandomizeSampleVector(&random_generator, x.View(band, channel)); + } + } + } + delay_buffer.Delay(x.View(/*band=*/0, /*channel=*/0), y); + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + + render_signal_analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + aec_state.MinDirectPathFilterDelay()); + + // Apply the refined filter. + refined_filter.Filter(*render_delay_buffer->GetRenderBuffer(), &S); + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e_refined.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e_refined.begin(), e_refined.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e_refined, Aec3Fft::Window::kRectangular, &E_refined); + for (size_t k = 0; k < kBlockSize; ++k) { + s[k] = kScale * s_scratch[k + kFftLengthBy2]; + } + + // Apply the coarse filter. + coarse_filter.Filter(*render_delay_buffer->GetRenderBuffer(), &S); + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e_coarse.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e_coarse.begin(), e_coarse.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e_coarse, Aec3Fft::Window::kRectangular, &E_coarse); + + // Compute spectra for future use. + E_refined.Spectrum(Aec3Optimization::kNone, output[0].E2_refined); + E_coarse.Spectrum(Aec3Optimization::kNone, output[0].E2_coarse); + + // Adapt the coarse filter. + std::array render_power; + render_delay_buffer->GetRenderBuffer()->SpectralSum( + coarse_filter.SizePartitions(), &render_power); + coarse_gain.Compute(render_power, render_signal_analyzer, E_coarse, + coarse_filter.SizePartitions(), saturation, &G); + coarse_filter.Adapt(*render_delay_buffer->GetRenderBuffer(), G); + + // Adapt the refined filter + render_delay_buffer->GetRenderBuffer()->SpectralSum( + refined_filter.SizePartitions(), &render_power); + + std::array erl; + ComputeErl(optimization, H2[0], erl); + refined_gain.Compute(render_power, render_signal_analyzer, output[0], erl, + refined_filter.SizePartitions(), saturation, false, + &G); + refined_filter.Adapt(*render_delay_buffer->GetRenderBuffer(), G, &h[0]); + + // Update the delay. + aec_state.HandleEchoPathChange(EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNone, false)); + refined_filter.ComputeFrequencyResponse(&H2[0]); + std::copy(output[0].E2_refined.begin(), output[0].E2_refined.end(), + E2_refined[0].begin()); + aec_state.Update(delay_estimate, H2, h, + *render_delay_buffer->GetRenderBuffer(), E2_refined, Y2, + output); + } + + std::copy(e_refined.begin(), e_refined.end(), e_last_block->begin()); + std::copy(y.begin(), y.end(), y_last_block->begin()); + std::copy(G.re.begin(), G.re.end(), G_last_block->re.begin()); + std::copy(G.im.begin(), G.im.end(), G_last_block->im.begin()); +} + +std::string ProduceDebugText(int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "Length: " << filter_length_blocks; + return ss.Release(); +} + +std::string ProduceDebugText(size_t delay, int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "Delay: " << delay << ", "; + ss << ProduceDebugText(filter_length_blocks); + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output gain parameter works. +TEST(RefinedFilterUpdateGainDeathTest, NullDataOutputGain) { + ApmDataDumper data_dumper(42); + EchoCanceller3Config config; + RenderSignalAnalyzer analyzer(config); + SubtractorOutput output; + RefinedFilterUpdateGain gain(config.filter.refined, + config.filter.config_change_duration_blocks); + std::array render_power; + render_power.fill(0.f); + std::array erl; + erl.fill(0.f); + EXPECT_DEATH( + gain.Compute(render_power, analyzer, output, erl, + config.filter.refined.length_blocks, false, false, nullptr), + ""); +} + +#endif + +// Verifies that the gain formed causes the filter using it to converge. +TEST(RefinedFilterUpdateGain, GainCausesFilterToConverge) { + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + for (size_t filter_length_blocks : {12, 20, 30}) { + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(delay_samples, filter_length_blocks)); + + std::array e; + std::array y; + FftData G; + + RunFilterUpdateTest(600, delay_samples, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G); + + // Verify that the refined filter is able to perform well. + // Use different criteria to take overmodelling into account. + if (filter_length_blocks == 12) { + EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } else { + EXPECT_LT(std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } + } + } +} + +// Verifies that the magnitude of the gain on average decreases for a +// persistently exciting signal. +TEST(RefinedFilterUpdateGain, DecreasingGain) { + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + + std::array e; + std::array y; + FftData G_a; + FftData G_b; + FftData G_c; + std::array G_a_power; + std::array G_b_power; + std::array G_c_power; + + RunFilterUpdateTest(250, 65, 12, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_a); + RunFilterUpdateTest(500, 65, 12, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_b); + RunFilterUpdateTest(750, 65, 12, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_c); + + G_a.Spectrum(Aec3Optimization::kNone, G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, G_b_power); + G_c.Spectrum(Aec3Optimization::kNone, G_c_power); + + EXPECT_GT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + + EXPECT_GT(std::accumulate(G_b_power.begin(), G_b_power.end(), 0.), + std::accumulate(G_c_power.begin(), G_c_power.end(), 0.)); +} + +// Verifies that the gain is zero when there is saturation and that the internal +// error estimates cause the gain to increase after a period of saturation. +TEST(RefinedFilterUpdateGain, SaturationBehavior) { + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + for (int k = 99; k < 200; ++k) { + blocks_with_saturation.push_back(k); + } + + for (size_t filter_length_blocks : {12, 20, 30}) { + SCOPED_TRACE(ProduceDebugText(filter_length_blocks)); + std::array e; + std::array y; + FftData G_a; + FftData G_b; + FftData G_a_ref; + G_a_ref.re.fill(0.f); + G_a_ref.im.fill(0.f); + + std::array G_a_power; + std::array G_b_power; + + RunFilterUpdateTest(100, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_a); + + EXPECT_EQ(G_a_ref.re, G_a.re); + EXPECT_EQ(G_a_ref.im, G_a.im); + + RunFilterUpdateTest(99, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_a); + RunFilterUpdateTest(201, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_b); + + G_a.Spectrum(Aec3Optimization::kNone, G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, G_b_power); + + EXPECT_LT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + } +} + +// Verifies that the gain increases after an echo path change. +// TODO(peah): Correct and reactivate this test. +TEST(RefinedFilterUpdateGain, DISABLED_EchoPathChangeBehavior) { + for (size_t filter_length_blocks : {12, 20, 30}) { + SCOPED_TRACE(ProduceDebugText(filter_length_blocks)); + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + blocks_with_echo_path_changes.push_back(99); + + std::array e; + std::array y; + FftData G_a; + FftData G_b; + std::array G_a_power; + std::array G_b_power; + + RunFilterUpdateTest(100, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_a); + RunFilterUpdateTest(101, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_b); + + G_a.Spectrum(Aec3Optimization::kNone, G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, G_b_power); + + EXPECT_LT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc new file mode 100644 index 0000000000..aa511e2b6b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_buffer.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +RenderBuffer::RenderBuffer(BlockBuffer* block_buffer, + SpectrumBuffer* spectrum_buffer, + FftBuffer* fft_buffer) + : block_buffer_(block_buffer), + spectrum_buffer_(spectrum_buffer), + fft_buffer_(fft_buffer) { + RTC_DCHECK(block_buffer_); + RTC_DCHECK(spectrum_buffer_); + RTC_DCHECK(fft_buffer_); + RTC_DCHECK_EQ(block_buffer_->buffer.size(), fft_buffer_->buffer.size()); + RTC_DCHECK_EQ(spectrum_buffer_->buffer.size(), fft_buffer_->buffer.size()); + RTC_DCHECK_EQ(spectrum_buffer_->read, fft_buffer_->read); + RTC_DCHECK_EQ(spectrum_buffer_->write, fft_buffer_->write); +} + +RenderBuffer::~RenderBuffer() = default; + +void RenderBuffer::SpectralSum( + size_t num_spectra, + std::array* X2) const { + X2->fill(0.f); + int position = spectrum_buffer_->read; + for (size_t j = 0; j < num_spectra; ++j) { + for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) { + for (size_t k = 0; k < X2->size(); ++k) { + (*X2)[k] += channel_spectrum[k]; + } + } + position = spectrum_buffer_->IncIndex(position); + } +} + +void RenderBuffer::SpectralSums( + size_t num_spectra_shorter, + size_t num_spectra_longer, + std::array* X2_shorter, + std::array* X2_longer) const { + RTC_DCHECK_LE(num_spectra_shorter, num_spectra_longer); + X2_shorter->fill(0.f); + int position = spectrum_buffer_->read; + size_t j = 0; + for (; j < num_spectra_shorter; ++j) { + for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) { + for (size_t k = 0; k < X2_shorter->size(); ++k) { + (*X2_shorter)[k] += channel_spectrum[k]; + } + } + position = spectrum_buffer_->IncIndex(position); + } + std::copy(X2_shorter->begin(), X2_shorter->end(), X2_longer->begin()); + for (; j < num_spectra_longer; ++j) { + for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) { + for (size_t k = 0; k < X2_longer->size(); ++k) { + (*X2_longer)[k] += channel_spectrum[k]; + } + } + position = spectrum_buffer_->IncIndex(position); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.h new file mode 100644 index 0000000000..8adc996087 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_buffer.h" +#include "modules/audio_processing/aec3/fft_buffer.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Provides a buffer of the render data for the echo remover. +class RenderBuffer { + public: + RenderBuffer(BlockBuffer* block_buffer, + SpectrumBuffer* spectrum_buffer, + FftBuffer* fft_buffer); + + RenderBuffer() = delete; + RenderBuffer(const RenderBuffer&) = delete; + RenderBuffer& operator=(const RenderBuffer&) = delete; + + ~RenderBuffer(); + + // Get a block. + const Block& GetBlock(int buffer_offset_blocks) const { + int position = + block_buffer_->OffsetIndex(block_buffer_->read, buffer_offset_blocks); + return block_buffer_->buffer[position]; + } + + // Get the spectrum from one of the FFTs in the buffer. + rtc::ArrayView> Spectrum( + int buffer_offset_ffts) const { + int position = spectrum_buffer_->OffsetIndex(spectrum_buffer_->read, + buffer_offset_ffts); + return spectrum_buffer_->buffer[position]; + } + + // Returns the circular fft buffer. + rtc::ArrayView> GetFftBuffer() const { + return fft_buffer_->buffer; + } + + // Returns the current position in the circular buffer. + size_t Position() const { + RTC_DCHECK_EQ(spectrum_buffer_->read, fft_buffer_->read); + RTC_DCHECK_EQ(spectrum_buffer_->write, fft_buffer_->write); + return fft_buffer_->read; + } + + // Returns the sum of the spectrums for a certain number of FFTs. + void SpectralSum(size_t num_spectra, + std::array* X2) const; + + // Returns the sums of the spectrums for two numbers of FFTs. + void SpectralSums(size_t num_spectra_shorter, + size_t num_spectra_longer, + std::array* X2_shorter, + std::array* X2_longer) const; + + // Gets the recent activity seen in the render signal. + bool GetRenderActivity() const { return render_activity_; } + + // Specifies the recent activity seen in the render signal. + void SetRenderActivity(bool activity) { render_activity_ = activity; } + + // Returns the headroom between the write and the read positions in the + // buffer. + int Headroom() const { + // The write and read indices are decreased over time. + int headroom = + fft_buffer_->write < fft_buffer_->read + ? fft_buffer_->read - fft_buffer_->write + : fft_buffer_->size - fft_buffer_->write + fft_buffer_->read; + + RTC_DCHECK_LE(0, headroom); + RTC_DCHECK_GE(fft_buffer_->size, headroom); + + return headroom; + } + + // Returns a reference to the spectrum buffer. + const SpectrumBuffer& GetSpectrumBuffer() const { return *spectrum_buffer_; } + + // Returns a reference to the block buffer. + const BlockBuffer& GetBlockBuffer() const { return *block_buffer_; } + + private: + const BlockBuffer* const block_buffer_; + const SpectrumBuffer* const spectrum_buffer_; + const FftBuffer* const fft_buffer_; + bool render_activity_ = false; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_gn/moz.build new file mode 100644 index 0000000000..b7a10f5d7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("render_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_unittest.cc new file mode 100644 index 0000000000..5d9d646e76 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_unittest.cc @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_buffer.h" + +#include +#include +#include + +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for non-null fft buffer. +TEST(RenderBufferDeathTest, NullExternalFftBuffer) { + BlockBuffer block_buffer(10, 3, 1); + SpectrumBuffer spectrum_buffer(10, 1); + EXPECT_DEATH(RenderBuffer(&block_buffer, &spectrum_buffer, nullptr), ""); +} + +// Verifies the check for non-null spectrum buffer. +TEST(RenderBufferDeathTest, NullExternalSpectrumBuffer) { + FftBuffer fft_buffer(10, 1); + BlockBuffer block_buffer(10, 3, 1); + EXPECT_DEATH(RenderBuffer(&block_buffer, nullptr, &fft_buffer), ""); +} + +// Verifies the check for non-null block buffer. +TEST(RenderBufferDeathTest, NullExternalBlockBuffer) { + FftBuffer fft_buffer(10, 1); + SpectrumBuffer spectrum_buffer(10, 1); + EXPECT_DEATH(RenderBuffer(nullptr, &spectrum_buffer, &fft_buffer), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc new file mode 100644 index 0000000000..ec5d35507e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc @@ -0,0 +1,519 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_buffer.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/alignment_mixer.h" +#include "modules/audio_processing/aec3/block_buffer.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/fft_buffer.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +bool UpdateCaptureCallCounterOnSkippedBlocks() { + return !field_trial::IsEnabled( + "WebRTC-Aec3RenderBufferCallCounterUpdateKillSwitch"); +} + +class RenderDelayBufferImpl final : public RenderDelayBuffer { + public: + RenderDelayBufferImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels); + RenderDelayBufferImpl() = delete; + ~RenderDelayBufferImpl() override; + + void Reset() override; + BufferingEvent Insert(const Block& block) override; + BufferingEvent PrepareCaptureProcessing() override; + void HandleSkippedCaptureProcessing() override; + bool AlignFromDelay(size_t delay) override; + void AlignFromExternalDelay() override; + size_t Delay() const override { return ComputeDelay(); } + size_t MaxDelay() const override { + return blocks_.buffer.size() - 1 - buffer_headroom_; + } + RenderBuffer* GetRenderBuffer() override { return &echo_remover_buffer_; } + + const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const override { + return low_rate_; + } + + int BufferLatency() const; + void SetAudioBufferDelay(int delay_ms) override; + bool HasReceivedBufferDelay() override; + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const Aec3Optimization optimization_; + const EchoCanceller3Config config_; + const bool update_capture_call_counter_on_skipped_blocks_; + const float render_linear_amplitude_gain_; + const rtc::LoggingSeverity delay_log_level_; + size_t down_sampling_factor_; + const int sub_block_size_; + BlockBuffer blocks_; + SpectrumBuffer spectra_; + FftBuffer ffts_; + absl::optional delay_; + RenderBuffer echo_remover_buffer_; + DownsampledRenderBuffer low_rate_; + AlignmentMixer render_mixer_; + Decimator render_decimator_; + const Aec3Fft fft_; + std::vector render_ds_; + const int buffer_headroom_; + bool last_call_was_render_ = false; + int num_api_calls_in_a_row_ = 0; + int max_observed_jitter_ = 1; + int64_t capture_call_counter_ = 0; + int64_t render_call_counter_ = 0; + bool render_activity_ = false; + size_t render_activity_counter_ = 0; + absl::optional external_audio_buffer_delay_; + bool external_audio_buffer_delay_verified_after_reset_ = false; + size_t min_latency_blocks_ = 0; + size_t excess_render_detection_counter_ = 0; + + int MapDelayToTotalDelay(size_t delay) const; + int ComputeDelay() const; + void ApplyTotalDelay(int delay); + void InsertBlock(const Block& block, int previous_write); + bool DetectActiveRender(rtc::ArrayView x) const; + bool DetectExcessRenderBlocks(); + void IncrementWriteIndices(); + void IncrementLowRateReadIndices(); + void IncrementReadIndices(); + bool RenderOverrun(); + bool RenderUnderrun(); +}; + +std::atomic RenderDelayBufferImpl::instance_count_ = 0; + +RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + optimization_(DetectOptimization()), + config_(config), + update_capture_call_counter_on_skipped_blocks_( + UpdateCaptureCallCounterOnSkippedBlocks()), + render_linear_amplitude_gain_( + std::pow(10.0f, config_.render_levels.render_power_gain_db / 20.f)), + delay_log_level_(config_.delay.log_warning_on_delay_changes + ? rtc::LS_WARNING + : rtc::LS_VERBOSE), + down_sampling_factor_(config.delay.down_sampling_factor), + sub_block_size_(static_cast(down_sampling_factor_ > 0 + ? kBlockSize / down_sampling_factor_ + : kBlockSize)), + blocks_(GetRenderDelayBufferSize(down_sampling_factor_, + config.delay.num_filters, + config.filter.refined.length_blocks), + NumBandsForRate(sample_rate_hz), + num_render_channels), + spectra_(blocks_.buffer.size(), num_render_channels), + ffts_(blocks_.buffer.size(), num_render_channels), + delay_(config_.delay.default_delay), + echo_remover_buffer_(&blocks_, &spectra_, &ffts_), + low_rate_(GetDownSampledBufferSize(down_sampling_factor_, + config.delay.num_filters)), + render_mixer_(num_render_channels, config.delay.render_alignment_mixing), + render_decimator_(down_sampling_factor_), + fft_(), + render_ds_(sub_block_size_, 0.f), + buffer_headroom_(config.filter.refined.length_blocks) { + RTC_DCHECK_EQ(blocks_.buffer.size(), ffts_.buffer.size()); + RTC_DCHECK_EQ(spectra_.buffer.size(), ffts_.buffer.size()); + for (size_t i = 0; i < blocks_.buffer.size(); ++i) { + RTC_DCHECK_EQ(blocks_.buffer[i].NumChannels(), ffts_.buffer[i].size()); + RTC_DCHECK_EQ(spectra_.buffer[i].size(), ffts_.buffer[i].size()); + } + + Reset(); +} + +RenderDelayBufferImpl::~RenderDelayBufferImpl() = default; + +// Resets the buffer delays and clears the reported delays. +void RenderDelayBufferImpl::Reset() { + last_call_was_render_ = false; + num_api_calls_in_a_row_ = 1; + min_latency_blocks_ = 0; + excess_render_detection_counter_ = 0; + + // Initialize the read index to one sub-block before the write index. + low_rate_.read = low_rate_.OffsetIndex(low_rate_.write, sub_block_size_); + + // Check for any external audio buffer delay and whether it is feasible. + if (external_audio_buffer_delay_) { + const int headroom = 2; + size_t audio_buffer_delay_to_set; + // Minimum delay is 1 (like the low-rate render buffer). + if (*external_audio_buffer_delay_ <= headroom) { + audio_buffer_delay_to_set = 1; + } else { + audio_buffer_delay_to_set = *external_audio_buffer_delay_ - headroom; + } + + audio_buffer_delay_to_set = std::min(audio_buffer_delay_to_set, MaxDelay()); + + // When an external delay estimate is available, use that delay as the + // initial render buffer delay. + ApplyTotalDelay(audio_buffer_delay_to_set); + delay_ = ComputeDelay(); + + external_audio_buffer_delay_verified_after_reset_ = false; + } else { + // If an external delay estimate is not available, use that delay as the + // initial delay. Set the render buffer delays to the default delay. + ApplyTotalDelay(config_.delay.default_delay); + + // Unset the delays which are set by AlignFromDelay. + delay_ = absl::nullopt; + } +} + +// Inserts a new block into the render buffers. +RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert( + const Block& block) { + ++render_call_counter_; + if (delay_) { + if (!last_call_was_render_) { + last_call_was_render_ = true; + num_api_calls_in_a_row_ = 1; + } else { + if (++num_api_calls_in_a_row_ > max_observed_jitter_) { + max_observed_jitter_ = num_api_calls_in_a_row_; + RTC_LOG_V(delay_log_level_) + << "New max number api jitter observed at render block " + << render_call_counter_ << ": " << num_api_calls_in_a_row_ + << " blocks"; + } + } + } + + // Increase the write indices to where the new blocks should be written. + const int previous_write = blocks_.write; + IncrementWriteIndices(); + + // Allow overrun and do a reset when render overrun occurrs due to more render + // data being inserted than capture data is received. + BufferingEvent event = + RenderOverrun() ? BufferingEvent::kRenderOverrun : BufferingEvent::kNone; + + // Detect and update render activity. + if (!render_activity_) { + render_activity_counter_ += + DetectActiveRender(block.View(/*band=*/0, /*channel=*/0)) ? 1 : 0; + render_activity_ = render_activity_counter_ >= 20; + } + + // Insert the new render block into the specified position. + InsertBlock(block, previous_write); + + if (event != BufferingEvent::kNone) { + Reset(); + } + + return event; +} + +void RenderDelayBufferImpl::HandleSkippedCaptureProcessing() { + if (update_capture_call_counter_on_skipped_blocks_) { + ++capture_call_counter_; + } +} + +// Prepares the render buffers for processing another capture block. +RenderDelayBuffer::BufferingEvent +RenderDelayBufferImpl::PrepareCaptureProcessing() { + RenderDelayBuffer::BufferingEvent event = BufferingEvent::kNone; + ++capture_call_counter_; + + if (delay_) { + if (last_call_was_render_) { + last_call_was_render_ = false; + num_api_calls_in_a_row_ = 1; + } else { + if (++num_api_calls_in_a_row_ > max_observed_jitter_) { + max_observed_jitter_ = num_api_calls_in_a_row_; + RTC_LOG_V(delay_log_level_) + << "New max number api jitter observed at capture block " + << capture_call_counter_ << ": " << num_api_calls_in_a_row_ + << " blocks"; + } + } + } + + if (DetectExcessRenderBlocks()) { + // Too many render blocks compared to capture blocks. Risk of delay ending + // up before the filter used by the delay estimator. + RTC_LOG_V(delay_log_level_) + << "Excess render blocks detected at block " << capture_call_counter_; + Reset(); + event = BufferingEvent::kRenderOverrun; + } else if (RenderUnderrun()) { + // Don't increment the read indices of the low rate buffer if there is a + // render underrun. + RTC_LOG_V(delay_log_level_) + << "Render buffer underrun detected at block " << capture_call_counter_; + IncrementReadIndices(); + // Incrementing the buffer index without increasing the low rate buffer + // index means that the delay is reduced by one. + if (delay_ && *delay_ > 0) + delay_ = *delay_ - 1; + event = BufferingEvent::kRenderUnderrun; + } else { + // Increment the read indices in the render buffers to point to the most + // recent block to use in the capture processing. + IncrementLowRateReadIndices(); + IncrementReadIndices(); + } + + echo_remover_buffer_.SetRenderActivity(render_activity_); + if (render_activity_) { + render_activity_counter_ = 0; + render_activity_ = false; + } + + return event; +} + +// Sets the delay and returns a bool indicating whether the delay was changed. +bool RenderDelayBufferImpl::AlignFromDelay(size_t delay) { + RTC_DCHECK(!config_.delay.use_external_delay_estimator); + if (!external_audio_buffer_delay_verified_after_reset_ && + external_audio_buffer_delay_ && delay_) { + int difference = static_cast(delay) - static_cast(*delay_); + RTC_LOG_V(delay_log_level_) + << "Mismatch between first estimated delay after reset " + "and externally reported audio buffer delay: " + << difference << " blocks"; + external_audio_buffer_delay_verified_after_reset_ = true; + } + if (delay_ && *delay_ == delay) { + return false; + } + delay_ = delay; + + // Compute the total delay and limit the delay to the allowed range. + int total_delay = MapDelayToTotalDelay(*delay_); + total_delay = + std::min(MaxDelay(), static_cast(std::max(total_delay, 0))); + + // Apply the delay to the buffers. + ApplyTotalDelay(total_delay); + return true; +} + +void RenderDelayBufferImpl::SetAudioBufferDelay(int delay_ms) { + if (!external_audio_buffer_delay_) { + RTC_LOG_V(delay_log_level_) + << "Receiving a first externally reported audio buffer delay of " + << delay_ms << " ms."; + } + + // Convert delay from milliseconds to blocks (rounded down). + external_audio_buffer_delay_ = delay_ms / 4; +} + +bool RenderDelayBufferImpl::HasReceivedBufferDelay() { + return external_audio_buffer_delay_.has_value(); +} + +// Maps the externally computed delay to the delay used internally. +int RenderDelayBufferImpl::MapDelayToTotalDelay( + size_t external_delay_blocks) const { + const int latency_blocks = BufferLatency(); + return latency_blocks + static_cast(external_delay_blocks); +} + +// Returns the delay (not including call jitter). +int RenderDelayBufferImpl::ComputeDelay() const { + const int latency_blocks = BufferLatency(); + int internal_delay = spectra_.read >= spectra_.write + ? spectra_.read - spectra_.write + : spectra_.size + spectra_.read - spectra_.write; + + return internal_delay - latency_blocks; +} + +// Set the read indices according to the delay. +void RenderDelayBufferImpl::ApplyTotalDelay(int delay) { + RTC_LOG_V(delay_log_level_) + << "Applying total delay of " << delay << " blocks."; + blocks_.read = blocks_.OffsetIndex(blocks_.write, -delay); + spectra_.read = spectra_.OffsetIndex(spectra_.write, delay); + ffts_.read = ffts_.OffsetIndex(ffts_.write, delay); +} + +void RenderDelayBufferImpl::AlignFromExternalDelay() { + RTC_DCHECK(config_.delay.use_external_delay_estimator); + if (external_audio_buffer_delay_) { + const int64_t delay = render_call_counter_ - capture_call_counter_ + + *external_audio_buffer_delay_; + const int64_t delay_with_headroom = + delay - config_.delay.delay_headroom_samples / kBlockSize; + ApplyTotalDelay(delay_with_headroom); + } +} + +// Inserts a block into the render buffers. +void RenderDelayBufferImpl::InsertBlock(const Block& block, + int previous_write) { + auto& b = blocks_; + auto& lr = low_rate_; + auto& ds = render_ds_; + auto& f = ffts_; + auto& s = spectra_; + const size_t num_bands = b.buffer[b.write].NumBands(); + const size_t num_render_channels = b.buffer[b.write].NumChannels(); + RTC_DCHECK_EQ(block.NumBands(), num_bands); + RTC_DCHECK_EQ(block.NumChannels(), num_render_channels); + for (size_t band = 0; band < num_bands; ++band) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + std::copy(block.begin(band, ch), block.end(band, ch), + b.buffer[b.write].begin(band, ch)); + } + } + + if (render_linear_amplitude_gain_ != 1.f) { + for (size_t band = 0; band < num_bands; ++band) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + rtc::ArrayView b_view = + b.buffer[b.write].View(band, ch); + for (float& sample : b_view) { + sample *= render_linear_amplitude_gain_; + } + } + } + } + + std::array downmixed_render; + render_mixer_.ProduceOutput(b.buffer[b.write], downmixed_render); + render_decimator_.Decimate(downmixed_render, ds); + data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(), + 16000 / down_sampling_factor_, 1); + std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write); + for (int channel = 0; channel < b.buffer[b.write].NumChannels(); ++channel) { + fft_.PaddedFft(b.buffer[b.write].View(/*band=*/0, channel), + b.buffer[previous_write].View(/*band=*/0, channel), + &f.buffer[f.write][channel]); + f.buffer[f.write][channel].Spectrum(optimization_, + s.buffer[s.write][channel]); + } +} + +bool RenderDelayBufferImpl::DetectActiveRender( + rtc::ArrayView x) const { + const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); + return x_energy > (config_.render_levels.active_render_limit * + config_.render_levels.active_render_limit) * + kFftLengthBy2; +} + +bool RenderDelayBufferImpl::DetectExcessRenderBlocks() { + bool excess_render_detected = false; + const size_t latency_blocks = static_cast(BufferLatency()); + // The recently seen minimum latency in blocks. Should be close to 0. + min_latency_blocks_ = std::min(min_latency_blocks_, latency_blocks); + // After processing a configurable number of blocks the minimum latency is + // checked. + if (++excess_render_detection_counter_ >= + config_.buffering.excess_render_detection_interval_blocks) { + // If the minimum latency is not lower than the threshold there have been + // more render than capture frames. + excess_render_detected = min_latency_blocks_ > + config_.buffering.max_allowed_excess_render_blocks; + // Reset the counter and let the minimum latency be the current latency. + min_latency_blocks_ = latency_blocks; + excess_render_detection_counter_ = 0; + } + + data_dumper_->DumpRaw("aec3_latency_blocks", latency_blocks); + data_dumper_->DumpRaw("aec3_min_latency_blocks", min_latency_blocks_); + data_dumper_->DumpRaw("aec3_excess_render_detected", excess_render_detected); + return excess_render_detected; +} + +// Computes the latency in the buffer (the number of unread sub-blocks). +int RenderDelayBufferImpl::BufferLatency() const { + const DownsampledRenderBuffer& l = low_rate_; + int latency_samples = (l.buffer.size() + l.read - l.write) % l.buffer.size(); + int latency_blocks = latency_samples / sub_block_size_; + return latency_blocks; +} + +// Increments the write indices for the render buffers. +void RenderDelayBufferImpl::IncrementWriteIndices() { + low_rate_.UpdateWriteIndex(-sub_block_size_); + blocks_.IncWriteIndex(); + spectra_.DecWriteIndex(); + ffts_.DecWriteIndex(); +} + +// Increments the read indices of the low rate render buffers. +void RenderDelayBufferImpl::IncrementLowRateReadIndices() { + low_rate_.UpdateReadIndex(-sub_block_size_); +} + +// Increments the read indices for the render buffers. +void RenderDelayBufferImpl::IncrementReadIndices() { + if (blocks_.read != blocks_.write) { + blocks_.IncReadIndex(); + spectra_.DecReadIndex(); + ffts_.DecReadIndex(); + } +} + +// Checks for a render buffer overrun. +bool RenderDelayBufferImpl::RenderOverrun() { + return low_rate_.read == low_rate_.write || blocks_.read == blocks_.write; +} + +// Checks for a render buffer underrun. +bool RenderDelayBufferImpl::RenderUnderrun() { + return low_rate_.read == low_rate_.write; +} + +} // namespace + +RenderDelayBuffer* RenderDelayBuffer::Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels) { + return new RenderDelayBufferImpl(config, sample_rate_hz, num_render_channels); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.h new file mode 100644 index 0000000000..6dc1aefb85 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_ + +#include + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_buffer.h" + +namespace webrtc { + +// Class for buffering the incoming render blocks such that these may be +// extracted with a specified delay. +class RenderDelayBuffer { + public: + enum class BufferingEvent { + kNone, + kRenderUnderrun, + kRenderOverrun, + kApiCallSkew + }; + + static RenderDelayBuffer* Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels); + virtual ~RenderDelayBuffer() = default; + + // Resets the buffer alignment. + virtual void Reset() = 0; + + // Inserts a block into the buffer. + virtual BufferingEvent Insert(const Block& block) = 0; + + // Updates the buffers one step based on the specified buffer delay. Returns + // an enum indicating whether there was a special event that occurred. + virtual BufferingEvent PrepareCaptureProcessing() = 0; + + // Called on capture blocks where PrepareCaptureProcessing is not called. + virtual void HandleSkippedCaptureProcessing() = 0; + + // Sets the buffer delay and returns a bool indicating whether the delay + // changed. + virtual bool AlignFromDelay(size_t delay) = 0; + + // Sets the buffer delay from the most recently reported external delay. + virtual void AlignFromExternalDelay() = 0; + + // Gets the buffer delay. + virtual size_t Delay() const = 0; + + // Gets the buffer delay. + virtual size_t MaxDelay() const = 0; + + // Returns the render buffer for the echo remover. + virtual RenderBuffer* GetRenderBuffer() = 0; + + // Returns the downsampled render buffer. + virtual const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const = 0; + + // Returns the maximum non calusal offset that can occur in the delay buffer. + static int DelayEstimatorOffset(const EchoCanceller3Config& config); + + // Provides an optional external estimate of the audio buffer delay. + virtual void SetAudioBufferDelay(int delay_ms) = 0; + + // Returns whether an external delay estimate has been reported via + // SetAudioBufferDelay. + virtual bool HasReceivedBufferDelay() = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc new file mode 100644 index 0000000000..d51e06a1ac --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_buffer.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +} // namespace + +// Verifies that the buffer overflow is correctly reported. +TEST(RenderDelayBuffer, BufferOverflow) { + const EchoCanceller3Config config; + for (auto num_channels : {1, 2, 8}) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(config, rate, num_channels)); + Block block_to_insert(NumBandsForRate(rate), num_channels); + for (size_t k = 0; k < 10; ++k) { + EXPECT_EQ(RenderDelayBuffer::BufferingEvent::kNone, + delay_buffer->Insert(block_to_insert)); + } + bool overrun_occurred = false; + for (size_t k = 0; k < 1000; ++k) { + RenderDelayBuffer::BufferingEvent event = + delay_buffer->Insert(block_to_insert); + overrun_occurred = + overrun_occurred || + RenderDelayBuffer::BufferingEvent::kRenderOverrun == event; + } + + EXPECT_TRUE(overrun_occurred); + } + } +} + +// Verifies that the check for available block works. +TEST(RenderDelayBuffer, AvailableBlock) { + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + std::unique_ptr delay_buffer(RenderDelayBuffer::Create( + EchoCanceller3Config(), kSampleRateHz, kNumChannels)); + Block input_block(kNumBands, kNumChannels, 1.0f); + EXPECT_EQ(RenderDelayBuffer::BufferingEvent::kNone, + delay_buffer->Insert(input_block)); + delay_buffer->PrepareCaptureProcessing(); +} + +// Verifies the AlignFromDelay method. +TEST(RenderDelayBuffer, AlignFromDelay) { + EchoCanceller3Config config; + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(config, 16000, 1)); + ASSERT_TRUE(delay_buffer->Delay()); + delay_buffer->Reset(); + size_t initial_internal_delay = 0; + for (size_t delay = initial_internal_delay; + delay < initial_internal_delay + 20; ++delay) { + ASSERT_TRUE(delay_buffer->AlignFromDelay(delay)); + EXPECT_EQ(delay, delay_buffer->Delay()); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for feasible delay. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(RenderDelayBufferDeathTest, DISABLED_WrongDelay) { + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, 1)); + EXPECT_DEATH(delay_buffer->AlignFromDelay(21), ""); +} + +// Verifies the check for the number of bands in the inserted blocks. +TEST(RenderDelayBufferDeathTest, WrongNumberOfBands) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr delay_buffer(RenderDelayBuffer::Create( + EchoCanceller3Config(), rate, num_channels)); + Block block_to_insert( + NumBandsForRate(rate < 48000 ? rate + 16000 : 16000), num_channels); + EXPECT_DEATH(delay_buffer->Insert(block_to_insert), ""); + } + } +} + +// Verifies the check for the number of channels in the inserted blocks. +TEST(RenderDelayBufferDeathTest, WrongNumberOfChannels) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr delay_buffer(RenderDelayBuffer::Create( + EchoCanceller3Config(), rate, num_channels)); + Block block_to_insert(NumBandsForRate(rate), num_channels + 1); + EXPECT_DEATH(delay_buffer->Insert(block_to_insert), ""); + } + } +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc new file mode 100644 index 0000000000..465e77fb7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/render_delay_controller.h" + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/echo_path_delay_estimator.h" +#include "modules/audio_processing/aec3/render_delay_controller_metrics.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +class RenderDelayControllerImpl final : public RenderDelayController { + public: + RenderDelayControllerImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_capture_channels); + + RenderDelayControllerImpl() = delete; + RenderDelayControllerImpl(const RenderDelayControllerImpl&) = delete; + RenderDelayControllerImpl& operator=(const RenderDelayControllerImpl&) = + delete; + + ~RenderDelayControllerImpl() override; + void Reset(bool reset_delay_confidence) override; + void LogRenderCall() override; + absl::optional GetDelay( + const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const Block& capture) override; + bool HasClockdrift() const override; + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const int hysteresis_limit_blocks_; + absl::optional delay_; + EchoPathDelayEstimator delay_estimator_; + RenderDelayControllerMetrics metrics_; + absl::optional delay_samples_; + size_t capture_call_counter_ = 0; + int delay_change_counter_ = 0; + DelayEstimate::Quality last_delay_estimate_quality_; +}; + +DelayEstimate ComputeBufferDelay( + const absl::optional& current_delay, + int hysteresis_limit_blocks, + DelayEstimate estimated_delay) { + // Compute the buffer delay increase required to achieve the desired latency. + size_t new_delay_blocks = estimated_delay.delay >> kBlockSizeLog2; + // Add hysteresis. + if (current_delay) { + size_t current_delay_blocks = current_delay->delay; + if (new_delay_blocks > current_delay_blocks && + new_delay_blocks <= current_delay_blocks + hysteresis_limit_blocks) { + new_delay_blocks = current_delay_blocks; + } + } + DelayEstimate new_delay = estimated_delay; + new_delay.delay = new_delay_blocks; + return new_delay; +} + +std::atomic RenderDelayControllerImpl::instance_count_(0); + +RenderDelayControllerImpl::RenderDelayControllerImpl( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + hysteresis_limit_blocks_( + static_cast(config.delay.hysteresis_limit_blocks)), + delay_estimator_(data_dumper_.get(), config, num_capture_channels), + last_delay_estimate_quality_(DelayEstimate::Quality::kCoarse) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); + delay_estimator_.LogDelayEstimationProperties(sample_rate_hz, 0); +} + +RenderDelayControllerImpl::~RenderDelayControllerImpl() = default; + +void RenderDelayControllerImpl::Reset(bool reset_delay_confidence) { + delay_ = absl::nullopt; + delay_samples_ = absl::nullopt; + delay_estimator_.Reset(reset_delay_confidence); + delay_change_counter_ = 0; + if (reset_delay_confidence) { + last_delay_estimate_quality_ = DelayEstimate::Quality::kCoarse; + } +} + +void RenderDelayControllerImpl::LogRenderCall() {} + +absl::optional RenderDelayControllerImpl::GetDelay( + const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const Block& capture) { + ++capture_call_counter_; + + auto delay_samples = delay_estimator_.EstimateDelay(render_buffer, capture); + + if (delay_samples) { + if (!delay_samples_ || delay_samples->delay != delay_samples_->delay) { + delay_change_counter_ = 0; + } + if (delay_samples_) { + delay_samples_->blocks_since_last_change = + delay_samples_->delay == delay_samples->delay + ? delay_samples_->blocks_since_last_change + 1 + : 0; + delay_samples_->blocks_since_last_update = 0; + delay_samples_->delay = delay_samples->delay; + delay_samples_->quality = delay_samples->quality; + } else { + delay_samples_ = delay_samples; + } + } else { + if (delay_samples_) { + ++delay_samples_->blocks_since_last_change; + ++delay_samples_->blocks_since_last_update; + } + } + + if (delay_change_counter_ < 2 * kNumBlocksPerSecond) { + ++delay_change_counter_; + } + + if (delay_samples_) { + // Compute the render delay buffer delay. + const bool use_hysteresis = + last_delay_estimate_quality_ == DelayEstimate::Quality::kRefined && + delay_samples_->quality == DelayEstimate::Quality::kRefined; + delay_ = ComputeBufferDelay( + delay_, use_hysteresis ? hysteresis_limit_blocks_ : 0, *delay_samples_); + last_delay_estimate_quality_ = delay_samples_->quality; + } + + metrics_.Update( + delay_samples_ ? absl::optional(delay_samples_->delay) + : absl::nullopt, + delay_ ? absl::optional(delay_->delay) : absl::nullopt, + delay_estimator_.Clockdrift()); + + data_dumper_->DumpRaw("aec3_render_delay_controller_delay", + delay_samples ? delay_samples->delay : 0); + data_dumper_->DumpRaw("aec3_render_delay_controller_buffer_delay", + delay_ ? delay_->delay : 0); + + return delay_; +} + +bool RenderDelayControllerImpl::HasClockdrift() const { + return delay_estimator_.Clockdrift() != ClockdriftDetector::Level::kNone; +} + +} // namespace + +RenderDelayController* RenderDelayController::Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_capture_channels) { + return new RenderDelayControllerImpl(config, sample_rate_hz, + num_capture_channels); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.h new file mode 100644 index 0000000000..4a18a11e36 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Class for aligning the render and capture signal using a RenderDelayBuffer. +class RenderDelayController { + public: + static RenderDelayController* Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_capture_channels); + virtual ~RenderDelayController() = default; + + // Resets the delay controller. If the delay confidence is reset, the reset + // behavior is as if the call is restarted. + virtual void Reset(bool reset_delay_confidence) = 0; + + // Logs a render call. + virtual void LogRenderCall() = 0; + + // Aligns the render buffer content with the capture signal. + virtual absl::optional GetDelay( + const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const Block& capture) = 0; + + // Returns true if clockdrift has been detected. + virtual bool HasClockdrift() const = 0; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc new file mode 100644 index 0000000000..1e0a0f443e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_controller_metrics.h" + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +enum class DelayReliabilityCategory { + kNone, + kPoor, + kMedium, + kGood, + kExcellent, + kNumCategories +}; +enum class DelayChangesCategory { + kNone, + kFew, + kSeveral, + kMany, + kConstant, + kNumCategories +}; + +} // namespace + +RenderDelayControllerMetrics::RenderDelayControllerMetrics() = default; + +void RenderDelayControllerMetrics::Update( + absl::optional delay_samples, + absl::optional buffer_delay_blocks, + ClockdriftDetector::Level clockdrift) { + ++call_counter_; + + if (!initial_update) { + size_t delay_blocks; + if (delay_samples) { + ++reliable_delay_estimate_counter_; + // Add an offset by 1 (metric is halved before reporting) to reserve 0 for + // absent delay. + delay_blocks = (*delay_samples) / kBlockSize + 2; + } else { + delay_blocks = 0; + } + + if (delay_blocks != delay_blocks_) { + ++delay_change_counter_; + delay_blocks_ = delay_blocks; + } + + } else if (++initial_call_counter_ == 5 * kNumBlocksPerSecond) { + initial_update = false; + } + + if (call_counter_ == kMetricsReportingIntervalBlocks) { + int value_to_report = static_cast(delay_blocks_); + // Divide by 2 to compress metric range. + value_to_report = std::min(124, value_to_report >> 1); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.EchoPathDelay", + value_to_report, 0, 124, 125); + + // Divide by 2 to compress metric range. + // Offset by 1 to reserve 0 for absent delay. + value_to_report = buffer_delay_blocks ? (*buffer_delay_blocks + 2) >> 1 : 0; + value_to_report = std::min(124, value_to_report); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.BufferDelay", + value_to_report, 0, 124, 125); + + DelayReliabilityCategory delay_reliability; + if (reliable_delay_estimate_counter_ == 0) { + delay_reliability = DelayReliabilityCategory::kNone; + } else if (reliable_delay_estimate_counter_ > (call_counter_ >> 1)) { + delay_reliability = DelayReliabilityCategory::kExcellent; + } else if (reliable_delay_estimate_counter_ > 100) { + delay_reliability = DelayReliabilityCategory::kGood; + } else if (reliable_delay_estimate_counter_ > 10) { + delay_reliability = DelayReliabilityCategory::kMedium; + } else { + delay_reliability = DelayReliabilityCategory::kPoor; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.ReliableDelayEstimates", + static_cast(delay_reliability), + static_cast(DelayReliabilityCategory::kNumCategories)); + + DelayChangesCategory delay_changes; + if (delay_change_counter_ == 0) { + delay_changes = DelayChangesCategory::kNone; + } else if (delay_change_counter_ > 10) { + delay_changes = DelayChangesCategory::kConstant; + } else if (delay_change_counter_ > 5) { + delay_changes = DelayChangesCategory::kMany; + } else if (delay_change_counter_ > 2) { + delay_changes = DelayChangesCategory::kSeveral; + } else { + delay_changes = DelayChangesCategory::kFew; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.DelayChanges", + static_cast(delay_changes), + static_cast(DelayChangesCategory::kNumCategories)); + + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.Clockdrift", static_cast(clockdrift), + static_cast(ClockdriftDetector::Level::kNumCategories)); + + call_counter_ = 0; + ResetMetrics(); + } +} + +void RenderDelayControllerMetrics::ResetMetrics() { + delay_change_counter_ = 0; + reliable_delay_estimate_counter_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h new file mode 100644 index 0000000000..b81833b43f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_ + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/aec3/clockdrift_detector.h" + +namespace webrtc { + +// Handles the reporting of metrics for the render delay controller. +class RenderDelayControllerMetrics { + public: + RenderDelayControllerMetrics(); + + RenderDelayControllerMetrics(const RenderDelayControllerMetrics&) = delete; + RenderDelayControllerMetrics& operator=(const RenderDelayControllerMetrics&) = + delete; + + // Updates the metric with new data. + void Update(absl::optional delay_samples, + absl::optional buffer_delay_blocks, + ClockdriftDetector::Level clockdrift); + + private: + // Resets the metrics. + void ResetMetrics(); + + size_t delay_blocks_ = 0; + int reliable_delay_estimate_counter_ = 0; + int delay_change_counter_ = 0; + int call_counter_ = 0; + int initial_call_counter_ = 0; + bool initial_update = true; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc new file mode 100644 index 0000000000..cf9df6b297 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_controller_metrics.h" + +#include "absl/types/optional.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "system_wrappers/include/metrics.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verify the general functionality of RenderDelayControllerMetrics. +TEST(RenderDelayControllerMetrics, NormalUsage) { + metrics::Reset(); + + RenderDelayControllerMetrics metrics; + + int expected_num_metric_reports = 0; + + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < kMetricsReportingIntervalBlocks - 1; ++k) { + metrics.Update(absl::nullopt, absl::nullopt, + ClockdriftDetector::Level::kNone); + } + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.EchoPathDelay"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.BufferDelay"), + expected_num_metric_reports); + EXPECT_METRIC_EQ(metrics::NumSamples( + "WebRTC.Audio.EchoCanceller.ReliableDelayEstimates"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.DelayChanges"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.Clockdrift"), + expected_num_metric_reports); + + // We expect metric reports every kMetricsReportingIntervalBlocks blocks. + ++expected_num_metric_reports; + + metrics.Update(absl::nullopt, absl::nullopt, + ClockdriftDetector::Level::kNone); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.EchoPathDelay"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.BufferDelay"), + expected_num_metric_reports); + EXPECT_METRIC_EQ(metrics::NumSamples( + "WebRTC.Audio.EchoCanceller.ReliableDelayEstimates"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.DelayChanges"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.Clockdrift"), + expected_num_metric_reports); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc new file mode 100644 index 0000000000..e1a54fca9e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_controller.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +std::string ProduceDebugText(int sample_rate_hz, + size_t delay, + size_t num_render_channels, + size_t num_capture_channels) { + rtc::StringBuilder ss; + ss << ProduceDebugText(sample_rate_hz) << ", Delay: " << delay + << ", Num render channels: " << num_render_channels + << ", Num capture channels: " << num_capture_channels; + return ss.Release(); +} + +constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; + +} // namespace + +// Verifies the output of GetDelay when there are no AnalyzeRender calls. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_NoRenderSignal) { + for (size_t num_render_channels : {1, 2, 8}) { + Block block(/*num_bands=1*/ 1, /*num_channels=*/1); + EchoCanceller3Config config; + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(config, rate, + /*num_capture_channels*/ 1)); + for (size_t k = 0; k < 100; ++k) { + auto delay = delay_controller->GetDelay( + delay_buffer->GetDownsampledRenderBuffer(), + delay_buffer->Delay(), block); + EXPECT_FALSE(delay->delay); + } + } + } + } + } +} + +// Verifies the basic API call sequence. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_BasicApiCalls) { + for (size_t num_capture_channels : {1, 2, 4}) { + for (size_t num_render_channels : {1, 2, 8}) { + Block capture_block(/*num_bands=*/1, num_capture_channels); + absl::optional delay_blocks; + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + config.delay.capture_alignment_mixing.downmix = false; + config.delay.capture_alignment_mixing.adaptive_selection = false; + + for (auto rate : {16000, 32000, 48000}) { + Block render_block(NumBandsForRate(rate), num_render_channels); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate, + num_capture_channels)); + for (size_t k = 0; k < 10; ++k) { + render_delay_buffer->Insert(render_block); + render_delay_buffer->PrepareCaptureProcessing(); + + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), capture_block); + } + EXPECT_TRUE(delay_blocks); + EXPECT_FALSE(delay_blocks->delay); + } + } + } + } + } +} + +// Verifies that the RenderDelayController is able to align the signals for +// simple timeshifts between the signals. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_Alignment) { + Random random_generator(42U); + for (size_t num_capture_channels : {1, 2, 4}) { + Block capture_block(/*num_bands=*/1, num_capture_channels); + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + config.delay.capture_alignment_mixing.downmix = false; + config.delay.capture_alignment_mixing.adaptive_selection = false; + + for (size_t num_render_channels : {1, 2, 8}) { + for (auto rate : {16000, 32000, 48000}) { + Block render_block(NumBandsForRate(rate), num_render_channels); + + for (size_t delay_samples : {15, 50, 150, 200, 800, 4000}) { + absl::optional delay_blocks; + SCOPED_TRACE(ProduceDebugText(rate, delay_samples, + num_render_channels, + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(config, rate, + num_capture_channels)); + DelayBuffer signal_delay_buffer(delay_samples); + for (size_t k = 0; k < (400 + delay_samples / kBlockSize); ++k) { + for (int band = 0; band < render_block.NumBands(); ++band) { + for (int channel = 0; channel < render_block.NumChannels(); + ++channel) { + RandomizeSampleVector(&random_generator, + render_block.View(band, channel)); + } + } + signal_delay_buffer.Delay( + render_block.View(/*band=*/0, /*channel=*/0), + capture_block.View(/*band=*/0, /*channel=*/0)); + render_delay_buffer->Insert(render_block); + render_delay_buffer->PrepareCaptureProcessing(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), capture_block); + } + ASSERT_TRUE(!!delay_blocks); + + constexpr int kDelayHeadroomBlocks = 1; + size_t expected_delay_blocks = + std::max(0, static_cast(delay_samples / kBlockSize) - + kDelayHeadroomBlocks); + + EXPECT_EQ(expected_delay_blocks, delay_blocks->delay); + } + } + } + } + } + } +} + +// Verifies that the RenderDelayController is able to properly handle noncausal +// delays. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_NonCausalAlignment) { + Random random_generator(42U); + for (size_t num_capture_channels : {1, 2, 4}) { + for (size_t num_render_channels : {1, 2, 8}) { + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + config.delay.capture_alignment_mixing.downmix = false; + config.delay.capture_alignment_mixing.adaptive_selection = false; + for (auto rate : {16000, 32000, 48000}) { + Block render_block(NumBandsForRate(rate), num_render_channels); + Block capture_block(NumBandsForRate(rate), num_capture_channels); + + for (int delay_samples : {-15, -50, -150, -200}) { + absl::optional delay_blocks; + SCOPED_TRACE(ProduceDebugText(rate, -delay_samples, + num_render_channels, + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate, + num_capture_channels)); + DelayBuffer signal_delay_buffer(-delay_samples); + for (int k = 0; + k < (400 - delay_samples / static_cast(kBlockSize)); + ++k) { + RandomizeSampleVector( + &random_generator, + capture_block.View(/*band=*/0, /*channel=*/0)); + signal_delay_buffer.Delay( + capture_block.View(/*band=*/0, /*channel=*/0), + render_block.View(/*band=*/0, /*channel=*/0)); + render_delay_buffer->Insert(render_block); + render_delay_buffer->PrepareCaptureProcessing(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), capture_block); + } + + ASSERT_FALSE(delay_blocks); + } + } + } + } + } + } +} + +// Verifies that the RenderDelayController is able to align the signals for +// simple timeshifts between the signals when there is jitter in the API calls. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_AlignmentWithJitter) { + Random random_generator(42U); + for (size_t num_capture_channels : {1, 2, 4}) { + for (size_t num_render_channels : {1, 2, 8}) { + Block capture_block( + /*num_bands=*/1, num_capture_channels); + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + config.delay.capture_alignment_mixing.downmix = false; + config.delay.capture_alignment_mixing.adaptive_selection = false; + + for (auto rate : {16000, 32000, 48000}) { + Block render_block(NumBandsForRate(rate), num_render_channels); + for (size_t delay_samples : {15, 50, 300, 800}) { + absl::optional delay_blocks; + SCOPED_TRACE(ProduceDebugText(rate, delay_samples, + num_render_channels, + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(config, rate, + num_capture_channels)); + DelayBuffer signal_delay_buffer(delay_samples); + constexpr size_t kMaxTestJitterBlocks = 26; + for (size_t j = 0; j < (1000 + delay_samples / kBlockSize) / + kMaxTestJitterBlocks + + 1; + ++j) { + std::vector capture_block_buffer; + for (size_t k = 0; k < (kMaxTestJitterBlocks - 1); ++k) { + RandomizeSampleVector( + &random_generator, + render_block.View(/*band=*/0, /*channel=*/0)); + signal_delay_buffer.Delay( + render_block.View(/*band=*/0, /*channel=*/0), + capture_block.View(/*band=*/0, /*channel=*/0)); + capture_block_buffer.push_back(capture_block); + render_delay_buffer->Insert(render_block); + } + for (size_t k = 0; k < (kMaxTestJitterBlocks - 1); ++k) { + render_delay_buffer->PrepareCaptureProcessing(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), capture_block_buffer[k]); + } + } + + constexpr int kDelayHeadroomBlocks = 1; + size_t expected_delay_blocks = + std::max(0, static_cast(delay_samples / kBlockSize) - + kDelayHeadroomBlocks); + if (expected_delay_blocks < 2) { + expected_delay_blocks = 0; + } + + ASSERT_TRUE(delay_blocks); + EXPECT_EQ(expected_delay_blocks, delay_blocks->delay); + } + } + } + } + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for correct sample rate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(RenderDelayControllerDeathTest, DISABLED_WrongSampleRate) { + for (auto rate : {-1, 0, 8001, 16001}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Config config; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, 1)); + EXPECT_DEATH( + std::unique_ptr( + RenderDelayController::Create(EchoCanceller3Config(), rate, 1)), + ""); + } +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc new file mode 100644 index 0000000000..bfbeb0ec2e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_signal_analyzer.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +constexpr size_t kCounterThreshold = 5; + +// Identifies local bands with narrow characteristics. +void IdentifySmallNarrowBandRegions( + const RenderBuffer& render_buffer, + const absl::optional& delay_partitions, + std::array* narrow_band_counters) { + RTC_DCHECK(narrow_band_counters); + + if (!delay_partitions) { + narrow_band_counters->fill(0); + return; + } + + std::array channel_counters; + channel_counters.fill(0); + rtc::ArrayView> X2 = + render_buffer.Spectrum(*delay_partitions); + for (size_t ch = 0; ch < X2.size(); ++ch) { + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (X2[ch][k] > 3 * std::max(X2[ch][k - 1], X2[ch][k + 1])) { + ++channel_counters[k - 1]; + } + } + } + for (size_t k = 1; k < kFftLengthBy2; ++k) { + (*narrow_band_counters)[k - 1] = + channel_counters[k - 1] > 0 ? (*narrow_band_counters)[k - 1] + 1 : 0; + } +} + +// Identifies whether the signal has a single strong narrow-band component. +void IdentifyStrongNarrowBandComponent(const RenderBuffer& render_buffer, + int strong_peak_freeze_duration, + absl::optional* narrow_peak_band, + size_t* narrow_peak_counter) { + RTC_DCHECK(narrow_peak_band); + RTC_DCHECK(narrow_peak_counter); + if (*narrow_peak_band && + ++(*narrow_peak_counter) > + static_cast(strong_peak_freeze_duration)) { + *narrow_peak_band = absl::nullopt; + } + + const Block& x_latest = render_buffer.GetBlock(0); + float max_peak_level = 0.f; + for (int channel = 0; channel < x_latest.NumChannels(); ++channel) { + rtc::ArrayView X2_latest = + render_buffer.Spectrum(0)[channel]; + + // Identify the spectral peak. + const int peak_bin = + static_cast(std::max_element(X2_latest.begin(), X2_latest.end()) - + X2_latest.begin()); + + // Compute the level around the peak. + float non_peak_power = 0.f; + for (int k = std::max(0, peak_bin - 14); k < peak_bin - 4; ++k) { + non_peak_power = std::max(X2_latest[k], non_peak_power); + } + for (int k = peak_bin + 5; + k < std::min(peak_bin + 15, static_cast(kFftLengthBy2Plus1)); + ++k) { + non_peak_power = std::max(X2_latest[k], non_peak_power); + } + + // Assess the render signal strength. + auto result0 = std::minmax_element(x_latest.begin(/*band=*/0, channel), + x_latest.end(/*band=*/0, channel)); + float max_abs = std::max(fabs(*result0.first), fabs(*result0.second)); + + if (x_latest.NumBands() > 1) { + const auto result1 = + std::minmax_element(x_latest.begin(/*band=*/1, channel), + x_latest.end(/*band=*/1, channel)); + max_abs = + std::max(max_abs, static_cast(std::max( + fabs(*result1.first), fabs(*result1.second)))); + } + + // Detect whether the spectral peak has as strong narrowband nature. + const float peak_level = X2_latest[peak_bin]; + if (peak_bin > 0 && max_abs > 100 && peak_level > 100 * non_peak_power) { + // Store the strongest peak across channels. + if (peak_level > max_peak_level) { + max_peak_level = peak_level; + *narrow_peak_band = peak_bin; + *narrow_peak_counter = 0; + } + } + } +} + +} // namespace + +RenderSignalAnalyzer::RenderSignalAnalyzer(const EchoCanceller3Config& config) + : strong_peak_freeze_duration_(config.filter.refined.length_blocks) { + narrow_band_counters_.fill(0); +} +RenderSignalAnalyzer::~RenderSignalAnalyzer() = default; + +void RenderSignalAnalyzer::Update( + const RenderBuffer& render_buffer, + const absl::optional& delay_partitions) { + // Identify bands of narrow nature. + IdentifySmallNarrowBandRegions(render_buffer, delay_partitions, + &narrow_band_counters_); + + // Identify the presence of a strong narrow band. + IdentifyStrongNarrowBandComponent(render_buffer, strong_peak_freeze_duration_, + &narrow_peak_band_, &narrow_peak_counter_); +} + +void RenderSignalAnalyzer::MaskRegionsAroundNarrowBands( + std::array* v) const { + RTC_DCHECK(v); + + // Set v to zero around narrow band signal regions. + if (narrow_band_counters_[0] > kCounterThreshold) { + (*v)[1] = (*v)[0] = 0.f; + } + for (size_t k = 2; k < kFftLengthBy2 - 1; ++k) { + if (narrow_band_counters_[k - 1] > kCounterThreshold) { + (*v)[k - 2] = (*v)[k - 1] = (*v)[k] = (*v)[k + 1] = (*v)[k + 2] = 0.f; + } + } + if (narrow_band_counters_[kFftLengthBy2 - 2] > kCounterThreshold) { + (*v)[kFftLengthBy2] = (*v)[kFftLengthBy2 - 1] = 0.f; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.h new file mode 100644 index 0000000000..2e4aaa4ba7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_ + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Provides functionality for analyzing the properties of the render signal. +class RenderSignalAnalyzer { + public: + explicit RenderSignalAnalyzer(const EchoCanceller3Config& config); + ~RenderSignalAnalyzer(); + + RenderSignalAnalyzer(const RenderSignalAnalyzer&) = delete; + RenderSignalAnalyzer& operator=(const RenderSignalAnalyzer&) = delete; + + // Updates the render signal analysis with the most recent render signal. + void Update(const RenderBuffer& render_buffer, + const absl::optional& delay_partitions); + + // Returns true if the render signal is poorly exciting. + bool PoorSignalExcitation() const { + RTC_DCHECK_LT(2, narrow_band_counters_.size()); + return std::any_of(narrow_band_counters_.begin(), + narrow_band_counters_.end(), + [](size_t a) { return a > 10; }); + } + + // Zeros the array around regions with narrow bands signal characteristics. + void MaskRegionsAroundNarrowBands( + std::array* v) const; + + absl::optional NarrowPeakBand() const { return narrow_peak_band_; } + + private: + const int strong_peak_freeze_duration_; + std::array narrow_band_counters_; + absl::optional narrow_peak_band_; + size_t narrow_peak_counter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc new file mode 100644 index 0000000000..16f6280cb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_signal_analyzer.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr float kPi = 3.141592f; + +void ProduceSinusoidInNoise(int sample_rate_hz, + size_t sinusoid_channel, + float sinusoidal_frequency_hz, + Random* random_generator, + size_t* sample_counter, + Block* x) { + // Fill x with low-amplitude noise. + for (int band = 0; band < x->NumBands(); ++band) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + RandomizeSampleVector(random_generator, x->View(band, channel), + /*amplitude=*/500.f); + } + } + // Produce a sinusoid of the specified frequency in the specified channel. + for (size_t k = *sample_counter, j = 0; k < (*sample_counter + kBlockSize); + ++k, ++j) { + x->View(/*band=*/0, sinusoid_channel)[j] += + 32000.f * + std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz); + } + *sample_counter = *sample_counter + kBlockSize; +} + +void RunNarrowBandDetectionTest(size_t num_channels) { + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + Random random_generator(42U); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + Block x(kNumBands, num_channels); + std::array x_old; + Aec3Fft fft; + EchoCanceller3Config config; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_channels)); + + std::array mask; + x_old.fill(0.f); + constexpr int kSinusFrequencyBin = 32; + + auto generate_sinusoid_test = [&](bool known_delay) { + size_t sample_counter = 0; + for (size_t k = 0; k < 100; ++k) { + ProduceSinusoidInNoise(16000, num_channels - 1, + 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2, + &random_generator, &sample_counter, &x); + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + + analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + known_delay ? absl::optional(0) : absl::nullopt); + } + }; + + generate_sinusoid_test(true); + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + for (int k = 0; k < static_cast(mask.size()); ++k) { + EXPECT_EQ(abs(k - kSinusFrequencyBin) <= 2 ? 0.f : 1.f, mask[k]); + } + EXPECT_TRUE(analyzer.PoorSignalExcitation()); + EXPECT_TRUE(static_cast(analyzer.NarrowPeakBand())); + EXPECT_EQ(*analyzer.NarrowPeakBand(), 32); + + // Verify that no bands are detected as narrow when the delay is unknown. + generate_sinusoid_test(false); + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + std::for_each(mask.begin(), mask.end(), [](float a) { EXPECT_EQ(1.f, a); }); + EXPECT_FALSE(analyzer.PoorSignalExcitation()); +} + +std::string ProduceDebugText(size_t num_channels) { + rtc::StringBuilder ss; + ss << "number of channels: " << num_channels; + return ss.Release(); +} +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check for non-null output parameter works. +TEST(RenderSignalAnalyzerDeathTest, NullMaskOutput) { + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + EXPECT_DEATH(analyzer.MaskRegionsAroundNarrowBands(nullptr), ""); +} + +#endif + +// Verify that no narrow bands are detected in a Gaussian noise signal. +TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) { + for (auto num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(num_channels)); + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + Random random_generator(42U); + Block x(3, num_channels); + std::array x_old; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, num_channels)); + std::array mask; + x_old.fill(0.f); + + for (int k = 0; k < 100; ++k) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + RandomizeSampleVector(&random_generator, x.View(band, channel)); + } + } + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + + analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + absl::optional(0)); + } + + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + EXPECT_TRUE(std::all_of(mask.begin(), mask.end(), + [](float a) { return a == 1.f; })); + EXPECT_FALSE(analyzer.PoorSignalExcitation()); + EXPECT_FALSE(static_cast(analyzer.NarrowPeakBand())); + } +} + +// Verify that a sinusoid signal is detected as narrow bands. +TEST(RenderSignalAnalyzer, NarrowBandDetection) { + for (auto num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(num_channels)); + RunNarrowBandDetectionTest(num_channels); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc new file mode 100644 index 0000000000..640a3e3cb9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/residual_echo_estimator.h" + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/reverb_model.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +constexpr float kDefaultTransparentModeGain = 0.01f; + +float GetTransparentModeGain() { + return kDefaultTransparentModeGain; +} + +float GetEarlyReflectionsDefaultModeGain( + const EchoCanceller3Config::EpStrength& config) { + if (field_trial::IsEnabled("WebRTC-Aec3UseLowEarlyReflectionsDefaultGain")) { + return 0.1f; + } + return config.default_gain; +} + +float GetLateReflectionsDefaultModeGain( + const EchoCanceller3Config::EpStrength& config) { + if (field_trial::IsEnabled("WebRTC-Aec3UseLowLateReflectionsDefaultGain")) { + return 0.1f; + } + return config.default_gain; +} + +bool UseErleOnsetCompensationInDominantNearend( + const EchoCanceller3Config::EpStrength& config) { + return config.erle_onset_compensation_in_dominant_nearend || + field_trial::IsEnabled( + "WebRTC-Aec3UseErleOnsetCompensationInDominantNearend"); +} + +// Computes the indexes that will be used for computing spectral power over +// the blocks surrounding the delay. +void GetRenderIndexesToAnalyze( + const SpectrumBuffer& spectrum_buffer, + const EchoCanceller3Config::EchoModel& echo_model, + int filter_delay_blocks, + int* idx_start, + int* idx_stop) { + RTC_DCHECK(idx_start); + RTC_DCHECK(idx_stop); + size_t window_start; + size_t window_end; + window_start = + std::max(0, filter_delay_blocks - + static_cast(echo_model.render_pre_window_size)); + window_end = filter_delay_blocks + + static_cast(echo_model.render_post_window_size); + *idx_start = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_start); + *idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1); +} + +// Estimates the residual echo power based on the echo return loss enhancement +// (ERLE) and the linear power estimate. +void LinearEstimate( + rtc::ArrayView> S2_linear, + rtc::ArrayView> erle, + rtc::ArrayView> R2) { + RTC_DCHECK_EQ(S2_linear.size(), erle.size()); + RTC_DCHECK_EQ(S2_linear.size(), R2.size()); + + const size_t num_capture_channels = R2.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + RTC_DCHECK_LT(0.f, erle[ch][k]); + R2[ch][k] = S2_linear[ch][k] / erle[ch][k]; + } + } +} + +// Estimates the residual echo power based on the estimate of the echo path +// gain. +void NonLinearEstimate( + float echo_path_gain, + const std::array& X2, + rtc::ArrayView> R2) { + const size_t num_capture_channels = R2.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] = X2[k] * echo_path_gain; + } + } +} + +// Applies a soft noise gate to the echo generating power. +void ApplyNoiseGate(const EchoCanceller3Config::EchoModel& config, + rtc::ArrayView X2) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (config.noise_gate_power > X2[k]) { + X2[k] = std::max(0.f, X2[k] - config.noise_gate_slope * + (config.noise_gate_power - X2[k])); + } + } +} + +// Estimates the echo generating signal power as gated maximal power over a +// time window. +void EchoGeneratingPower(size_t num_render_channels, + const SpectrumBuffer& spectrum_buffer, + const EchoCanceller3Config::EchoModel& echo_model, + int filter_delay_blocks, + rtc::ArrayView X2) { + int idx_stop; + int idx_start; + GetRenderIndexesToAnalyze(spectrum_buffer, echo_model, filter_delay_blocks, + &idx_start, &idx_stop); + + std::fill(X2.begin(), X2.end(), 0.f); + if (num_render_channels == 1) { + for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) { + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + X2[j] = std::max(X2[j], spectrum_buffer.buffer[k][/*channel=*/0][j]); + } + } + } else { + for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) { + std::array render_power; + render_power.fill(0.f); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const auto& channel_power = spectrum_buffer.buffer[k][ch]; + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + render_power[j] += channel_power[j]; + } + } + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + X2[j] = std::max(X2[j], render_power[j]); + } + } + } +} + +} // namespace + +ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config, + size_t num_render_channels) + : config_(config), + num_render_channels_(num_render_channels), + early_reflections_transparent_mode_gain_(GetTransparentModeGain()), + late_reflections_transparent_mode_gain_(GetTransparentModeGain()), + early_reflections_general_gain_( + GetEarlyReflectionsDefaultModeGain(config_.ep_strength)), + late_reflections_general_gain_( + GetLateReflectionsDefaultModeGain(config_.ep_strength)), + erle_onset_compensation_in_dominant_nearend_( + UseErleOnsetCompensationInDominantNearend(config_.ep_strength)) { + Reset(); +} + +ResidualEchoEstimator::~ResidualEchoEstimator() = default; + +void ResidualEchoEstimator::Estimate( + const AecState& aec_state, + const RenderBuffer& render_buffer, + rtc::ArrayView> S2_linear, + rtc::ArrayView> Y2, + bool dominant_nearend, + rtc::ArrayView> R2, + rtc::ArrayView> R2_unbounded) { + RTC_DCHECK_EQ(R2.size(), Y2.size()); + RTC_DCHECK_EQ(R2.size(), S2_linear.size()); + + const size_t num_capture_channels = R2.size(); + + // Estimate the power of the stationary noise in the render signal. + UpdateRenderNoisePower(render_buffer); + + // Estimate the residual echo power. + if (aec_state.UsableLinearEstimate()) { + // When there is saturated echo, assume the same spectral content as is + // present in the microphone signal. + if (aec_state.SaturatedEcho()) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin()); + std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin()); + } + } else { + const bool onset_compensated = + erle_onset_compensation_in_dominant_nearend_ || !dominant_nearend; + LinearEstimate(S2_linear, aec_state.Erle(onset_compensated), R2); + LinearEstimate(S2_linear, aec_state.ErleUnbounded(), R2_unbounded); + } + + UpdateReverb(ReverbType::kLinear, aec_state, render_buffer, + dominant_nearend); + AddReverb(R2); + AddReverb(R2_unbounded); + } else { + const float echo_path_gain = + GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true); + + // When there is saturated echo, assume the same spectral content as is + // present in the microphone signal. + if (aec_state.SaturatedEcho()) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin()); + std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin()); + } + } else { + // Estimate the echo generating signal power. + std::array X2; + EchoGeneratingPower(num_render_channels_, + render_buffer.GetSpectrumBuffer(), config_.echo_model, + aec_state.MinDirectPathFilterDelay(), X2); + if (!aec_state.UseStationarityProperties()) { + ApplyNoiseGate(config_.echo_model, X2); + } + + // Subtract the stationary noise power to avoid stationary noise causing + // excessive echo suppression. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + X2[k] -= config_.echo_model.stationary_gate_slope * X2_noise_floor_[k]; + X2[k] = std::max(0.f, X2[k]); + } + + NonLinearEstimate(echo_path_gain, X2, R2); + NonLinearEstimate(echo_path_gain, X2, R2_unbounded); + } + + if (config_.echo_model.model_reverb_in_nonlinear_mode && + !aec_state.TransparentModeActive()) { + UpdateReverb(ReverbType::kNonLinear, aec_state, render_buffer, + dominant_nearend); + AddReverb(R2); + AddReverb(R2_unbounded); + } + } + + if (aec_state.UseStationarityProperties()) { + // Scale the echo according to echo audibility. + std::array residual_scaling; + aec_state.GetResidualEchoScaling(residual_scaling); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] *= residual_scaling[k]; + R2_unbounded[ch][k] *= residual_scaling[k]; + } + } + } +} + +void ResidualEchoEstimator::Reset() { + echo_reverb_.Reset(); + X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold); + X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power); +} + +void ResidualEchoEstimator::UpdateRenderNoisePower( + const RenderBuffer& render_buffer) { + std::array render_power_data; + rtc::ArrayView> X2 = + render_buffer.Spectrum(0); + rtc::ArrayView render_power = + X2[/*channel=*/0]; + if (num_render_channels_ > 1) { + render_power_data.fill(0.f); + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + const auto& channel_power = X2[ch]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power_data[k] += channel_power[k]; + } + } + render_power = render_power_data; + } + + // Estimate the stationary noise power in a minimum statistics manner. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + // Decrease rapidly. + if (render_power[k] < X2_noise_floor_[k]) { + X2_noise_floor_[k] = render_power[k]; + X2_noise_floor_counter_[k] = 0; + } else { + // Increase in a delayed, leaky manner. + if (X2_noise_floor_counter_[k] >= + static_cast(config_.echo_model.noise_floor_hold)) { + X2_noise_floor_[k] = std::max(X2_noise_floor_[k] * 1.1f, + config_.echo_model.min_noise_floor_power); + } else { + ++X2_noise_floor_counter_[k]; + } + } + } +} + +// Updates the reverb estimation. +void ResidualEchoEstimator::UpdateReverb(ReverbType reverb_type, + const AecState& aec_state, + const RenderBuffer& render_buffer, + bool dominant_nearend) { + // Choose reverb partition based on what type of echo power model is used. + const size_t first_reverb_partition = + reverb_type == ReverbType::kLinear + ? aec_state.FilterLengthBlocks() + 1 + : aec_state.MinDirectPathFilterDelay() + 1; + + // Compute render power for the reverb. + std::array render_power_data; + rtc::ArrayView> X2 = + render_buffer.Spectrum(first_reverb_partition); + rtc::ArrayView render_power = + X2[/*channel=*/0]; + if (num_render_channels_ > 1) { + render_power_data.fill(0.f); + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + const auto& channel_power = X2[ch]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power_data[k] += channel_power[k]; + } + } + render_power = render_power_data; + } + + // Update the reverb estimate. + float reverb_decay = aec_state.ReverbDecay(/*mild=*/dominant_nearend); + if (reverb_type == ReverbType::kLinear) { + echo_reverb_.UpdateReverb( + render_power, aec_state.GetReverbFrequencyResponse(), reverb_decay); + } else { + const float echo_path_gain = + GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/false); + echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain, + reverb_decay); + } +} +// Adds the estimated power of the reverb to the residual echo power. +void ResidualEchoEstimator::AddReverb( + rtc::ArrayView> R2) const { + const size_t num_capture_channels = R2.size(); + + // Add the reverb power. + rtc::ArrayView reverb_power = + echo_reverb_.reverb(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] += reverb_power[k]; + } + } +} + +// Chooses the echo path gain to use. +float ResidualEchoEstimator::GetEchoPathGain( + const AecState& aec_state, + bool gain_for_early_reflections) const { + float gain_amplitude; + if (aec_state.TransparentModeActive()) { + gain_amplitude = gain_for_early_reflections + ? early_reflections_transparent_mode_gain_ + : late_reflections_transparent_mode_gain_; + } else { + gain_amplitude = gain_for_early_reflections + ? early_reflections_general_gain_ + : late_reflections_general_gain_; + } + return gain_amplitude * gain_amplitude; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.h new file mode 100644 index 0000000000..c468764002 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/reverb_model.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +class ResidualEchoEstimator { + public: + ResidualEchoEstimator(const EchoCanceller3Config& config, + size_t num_render_channels); + ~ResidualEchoEstimator(); + + ResidualEchoEstimator(const ResidualEchoEstimator&) = delete; + ResidualEchoEstimator& operator=(const ResidualEchoEstimator&) = delete; + + void Estimate( + const AecState& aec_state, + const RenderBuffer& render_buffer, + rtc::ArrayView> S2_linear, + rtc::ArrayView> Y2, + bool dominant_nearend, + rtc::ArrayView> R2, + rtc::ArrayView> R2_unbounded); + + private: + enum class ReverbType { kLinear, kNonLinear }; + + // Resets the state. + void Reset(); + + // Updates estimate for the power of the stationary noise component in the + // render signal. + void UpdateRenderNoisePower(const RenderBuffer& render_buffer); + + // Updates the reverb estimation. + void UpdateReverb(ReverbType reverb_type, + const AecState& aec_state, + const RenderBuffer& render_buffer, + bool dominant_nearend); + + // Adds the estimated unmodelled echo power to the residual echo power + // estimate. + void AddReverb( + rtc::ArrayView> R2) const; + + // Gets the echo path gain to apply. + float GetEchoPathGain(const AecState& aec_state, + bool gain_for_early_reflections) const; + + const EchoCanceller3Config config_; + const size_t num_render_channels_; + const float early_reflections_transparent_mode_gain_; + const float late_reflections_transparent_mode_gain_; + const float early_reflections_general_gain_; + const float late_reflections_general_gain_; + const bool erle_onset_compensation_in_dominant_nearend_; + std::array X2_noise_floor_; + std::array X2_noise_floor_counter_; + ReverbModel echo_reverb_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc new file mode 100644 index 0000000000..9a7bf0a89c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/residual_echo_estimator.h" + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +constexpr int kSampleRateHz = 48000; +constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); +constexpr float kEpsilon = 1e-4f; +} // namespace + +class ResidualEchoEstimatorTest { + public: + ResidualEchoEstimatorTest(size_t num_render_channels, + size_t num_capture_channels, + const EchoCanceller3Config& config) + : num_render_channels_(num_render_channels), + num_capture_channels_(num_capture_channels), + config_(config), + estimator_(config_, num_render_channels_), + aec_state_(config_, num_capture_channels_), + render_delay_buffer_(RenderDelayBuffer::Create(config_, + kSampleRateHz, + num_render_channels_)), + E2_refined_(num_capture_channels_), + S2_linear_(num_capture_channels_), + Y2_(num_capture_channels_), + R2_(num_capture_channels_), + R2_unbounded_(num_capture_channels_), + x_(kNumBands, num_render_channels_), + H2_(num_capture_channels_, + std::vector>(10)), + h_(num_capture_channels_, + std::vector( + GetTimeDomainLength(config_.filter.refined.length_blocks), + 0.0f)), + random_generator_(42U), + output_(num_capture_channels_) { + for (auto& H2_ch : H2_) { + for (auto& H2_k : H2_ch) { + H2_k.fill(0.01f); + } + H2_ch[2].fill(10.f); + H2_ch[2][0] = 0.1f; + } + + for (auto& subtractor_output : output_) { + subtractor_output.Reset(); + subtractor_output.s_refined.fill(100.f); + } + y_.fill(0.f); + + constexpr float kLevel = 10.f; + for (auto& E2_refined_ch : E2_refined_) { + E2_refined_ch.fill(kLevel); + } + S2_linear_[0].fill(kLevel); + for (auto& Y2_ch : Y2_) { + Y2_ch.fill(kLevel); + } + } + + void RunOneFrame(bool dominant_nearend) { + RandomizeSampleVector(&random_generator_, + x_.View(/*band=*/0, /*channel=*/0)); + render_delay_buffer_->Insert(x_); + if (first_frame_) { + render_delay_buffer_->Reset(); + first_frame_ = false; + } + render_delay_buffer_->PrepareCaptureProcessing(); + + aec_state_.Update(delay_estimate_, H2_, h_, + *render_delay_buffer_->GetRenderBuffer(), E2_refined_, + Y2_, output_); + + estimator_.Estimate(aec_state_, *render_delay_buffer_->GetRenderBuffer(), + S2_linear_, Y2_, dominant_nearend, R2_, R2_unbounded_); + } + + rtc::ArrayView> R2() const { + return R2_; + } + + private: + const size_t num_render_channels_; + const size_t num_capture_channels_; + const EchoCanceller3Config& config_; + ResidualEchoEstimator estimator_; + AecState aec_state_; + std::unique_ptr render_delay_buffer_; + std::vector> E2_refined_; + std::vector> S2_linear_; + std::vector> Y2_; + std::vector> R2_; + std::vector> R2_unbounded_; + Block x_; + std::vector>> H2_; + std::vector> h_; + Random random_generator_; + std::vector output_; + std::array y_; + absl::optional delay_estimate_; + bool first_frame_ = true; +}; + +class ResidualEchoEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + ResidualEchoEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 4), + ::testing::Values(1, 2, 4))); + +TEST_P(ResidualEchoEstimatorMultiChannel, BasicTest) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + + EchoCanceller3Config config; + ResidualEchoEstimatorTest residual_echo_estimator_test( + num_render_channels, num_capture_channels, config); + for (int k = 0; k < 1993; ++k) { + residual_echo_estimator_test.RunOneFrame(/*dominant_nearend=*/false); + } +} + +TEST(ResidualEchoEstimatorMultiChannel, ReverbTest) { + const size_t num_render_channels = 1; + const size_t num_capture_channels = 1; + const size_t nFrames = 100; + + EchoCanceller3Config reference_config; + reference_config.ep_strength.default_len = 0.95f; + reference_config.ep_strength.nearend_len = 0.95f; + EchoCanceller3Config config_use_nearend_len = reference_config; + config_use_nearend_len.ep_strength.default_len = 0.95f; + config_use_nearend_len.ep_strength.nearend_len = 0.83f; + + ResidualEchoEstimatorTest reference_residual_echo_estimator_test( + num_render_channels, num_capture_channels, reference_config); + ResidualEchoEstimatorTest use_nearend_len_residual_echo_estimator_test( + num_render_channels, num_capture_channels, config_use_nearend_len); + + std::vector acum_energy_reference_R2(num_capture_channels, 0.0f); + std::vector acum_energy_R2(num_capture_channels, 0.0f); + for (size_t frame = 0; frame < nFrames; ++frame) { + bool dominant_nearend = frame <= nFrames / 2 ? false : true; + reference_residual_echo_estimator_test.RunOneFrame(dominant_nearend); + use_nearend_len_residual_echo_estimator_test.RunOneFrame(dominant_nearend); + const auto& reference_R2 = reference_residual_echo_estimator_test.R2(); + const auto& R2 = use_nearend_len_residual_echo_estimator_test.R2(); + ASSERT_EQ(reference_R2.size(), R2.size()); + for (size_t ch = 0; ch < reference_R2.size(); ++ch) { + float energy_reference_R2 = std::accumulate( + reference_R2[ch].cbegin(), reference_R2[ch].cend(), 0.0f); + float energy_R2 = std::accumulate(R2[ch].cbegin(), R2[ch].cend(), 0.0f); + if (dominant_nearend) { + EXPECT_GE(energy_reference_R2, energy_R2); + } else { + EXPECT_NEAR(energy_reference_R2, energy_R2, kEpsilon); + } + acum_energy_reference_R2[ch] += energy_reference_R2; + acum_energy_R2[ch] += energy_R2; + } + if (frame == nFrames / 2 || frame == nFrames - 1) { + for (size_t ch = 0; ch < acum_energy_reference_R2.size(); ch++) { + if (dominant_nearend) { + EXPECT_GT(acum_energy_reference_R2[ch], acum_energy_R2[ch]); + } else { + EXPECT_NEAR(acum_energy_reference_R2[ch], acum_energy_R2[ch], + kEpsilon); + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc new file mode 100644 index 0000000000..2daf376911 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc @@ -0,0 +1,410 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_decay_estimator.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +constexpr int kEarlyReverbMinSizeBlocks = 3; +constexpr int kBlocksPerSection = 6; +// Linear regression approach assumes symmetric index around 0. +constexpr float kEarlyReverbFirstPointAtLinearRegressors = + -0.5f * kBlocksPerSection * kFftLengthBy2 + 0.5f; + +// Averages the values in a block of size kFftLengthBy2; +float BlockAverage(rtc::ArrayView v, size_t block_index) { + constexpr float kOneByFftLengthBy2 = 1.f / kFftLengthBy2; + const int i = block_index * kFftLengthBy2; + RTC_DCHECK_GE(v.size(), i + kFftLengthBy2); + const float sum = + std::accumulate(v.begin() + i, v.begin() + i + kFftLengthBy2, 0.f); + return sum * kOneByFftLengthBy2; +} + +// Analyzes the gain in a block. +void AnalyzeBlockGain(const std::array& h2, + float floor_gain, + float* previous_gain, + bool* block_adapting, + bool* decaying_gain) { + float gain = std::max(BlockAverage(h2, 0), 1e-32f); + *block_adapting = + *previous_gain > 1.1f * gain || *previous_gain < 0.9f * gain; + *decaying_gain = gain > floor_gain; + *previous_gain = gain; +} + +// Arithmetic sum of $2 \sum_{i=0.5}^{(N-1)/2}i^2$ calculated directly. +constexpr float SymmetricArithmetricSum(int N) { + return N * (N * N - 1.0f) * (1.f / 12.f); +} + +// Returns the peak energy of an impulse response. +float BlockEnergyPeak(rtc::ArrayView h, int peak_block) { + RTC_DCHECK_LE((peak_block + 1) * kFftLengthBy2, h.size()); + RTC_DCHECK_GE(peak_block, 0); + float peak_value = + *std::max_element(h.begin() + peak_block * kFftLengthBy2, + h.begin() + (peak_block + 1) * kFftLengthBy2, + [](float a, float b) { return a * a < b * b; }); + return peak_value * peak_value; +} + +// Returns the average energy of an impulse response block. +float BlockEnergyAverage(rtc::ArrayView h, int block_index) { + RTC_DCHECK_LE((block_index + 1) * kFftLengthBy2, h.size()); + RTC_DCHECK_GE(block_index, 0); + constexpr float kOneByFftLengthBy2 = 1.f / kFftLengthBy2; + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + return std::accumulate(h.begin() + block_index * kFftLengthBy2, + h.begin() + (block_index + 1) * kFftLengthBy2, 0.f, + sum_of_squares) * + kOneByFftLengthBy2; +} + +} // namespace + +ReverbDecayEstimator::ReverbDecayEstimator(const EchoCanceller3Config& config) + : filter_length_blocks_(config.filter.refined.length_blocks), + filter_length_coefficients_(GetTimeDomainLength(filter_length_blocks_)), + use_adaptive_echo_decay_(config.ep_strength.default_len < 0.f), + early_reverb_estimator_(config.filter.refined.length_blocks - + kEarlyReverbMinSizeBlocks), + late_reverb_start_(kEarlyReverbMinSizeBlocks), + late_reverb_end_(kEarlyReverbMinSizeBlocks), + previous_gains_(config.filter.refined.length_blocks, 0.f), + decay_(std::fabs(config.ep_strength.default_len)), + mild_decay_(std::fabs(config.ep_strength.nearend_len)) { + RTC_DCHECK_GT(config.filter.refined.length_blocks, + static_cast(kEarlyReverbMinSizeBlocks)); +} + +ReverbDecayEstimator::~ReverbDecayEstimator() = default; + +void ReverbDecayEstimator::Update(rtc::ArrayView filter, + const absl::optional& filter_quality, + int filter_delay_blocks, + bool usable_linear_filter, + bool stationary_signal) { + const int filter_size = static_cast(filter.size()); + + if (stationary_signal) { + return; + } + + bool estimation_feasible = + filter_delay_blocks <= + filter_length_blocks_ - kEarlyReverbMinSizeBlocks - 1; + estimation_feasible = + estimation_feasible && filter_size == filter_length_coefficients_; + estimation_feasible = estimation_feasible && filter_delay_blocks > 0; + estimation_feasible = estimation_feasible && usable_linear_filter; + + if (!estimation_feasible) { + ResetDecayEstimation(); + return; + } + + if (!use_adaptive_echo_decay_) { + return; + } + + const float new_smoothing = filter_quality ? *filter_quality * 0.2f : 0.f; + smoothing_constant_ = std::max(new_smoothing, smoothing_constant_); + if (smoothing_constant_ == 0.f) { + return; + } + + if (block_to_analyze_ < filter_length_blocks_) { + // Analyze the filter and accumulate data for reverb estimation. + AnalyzeFilter(filter); + ++block_to_analyze_; + } else { + // When the filter is fully analyzed, estimate the reverb decay and reset + // the block_to_analyze_ counter. + EstimateDecay(filter, filter_delay_blocks); + } +} + +void ReverbDecayEstimator::ResetDecayEstimation() { + early_reverb_estimator_.Reset(); + late_reverb_decay_estimator_.Reset(0); + block_to_analyze_ = 0; + estimation_region_candidate_size_ = 0; + estimation_region_identified_ = false; + smoothing_constant_ = 0.f; + late_reverb_start_ = 0; + late_reverb_end_ = 0; +} + +void ReverbDecayEstimator::EstimateDecay(rtc::ArrayView filter, + int peak_block) { + auto& h = filter; + RTC_DCHECK_EQ(0, h.size() % kFftLengthBy2); + + // Reset the block analysis counter. + block_to_analyze_ = + std::min(peak_block + kEarlyReverbMinSizeBlocks, filter_length_blocks_); + + // To estimate the reverb decay, the energy of the first filter section must + // be substantially larger than the last. Also, the first filter section + // energy must not deviate too much from the max peak. + const float first_reverb_gain = BlockEnergyAverage(h, block_to_analyze_); + const size_t h_size_blocks = h.size() >> kFftLengthBy2Log2; + tail_gain_ = BlockEnergyAverage(h, h_size_blocks - 1); + float peak_energy = BlockEnergyPeak(h, peak_block); + const bool sufficient_reverb_decay = first_reverb_gain > 4.f * tail_gain_; + const bool valid_filter = + first_reverb_gain > 2.f * tail_gain_ && peak_energy < 100.f; + + // Estimate the size of the regions with early and late reflections. + const int size_early_reverb = early_reverb_estimator_.Estimate(); + const int size_late_reverb = + std::max(estimation_region_candidate_size_ - size_early_reverb, 0); + + // Only update the reverb decay estimate if the size of the identified late + // reverb is sufficiently large. + if (size_late_reverb >= 5) { + if (valid_filter && late_reverb_decay_estimator_.EstimateAvailable()) { + float decay = std::pow( + 2.0f, late_reverb_decay_estimator_.Estimate() * kFftLengthBy2); + constexpr float kMaxDecay = 0.95f; // ~1 sec min RT60. + constexpr float kMinDecay = 0.02f; // ~15 ms max RT60. + decay = std::max(.97f * decay_, decay); + decay = std::min(decay, kMaxDecay); + decay = std::max(decay, kMinDecay); + decay_ += smoothing_constant_ * (decay - decay_); + } + + // Update length of decay. Must have enough data (number of sections) in + // order to estimate decay rate. + late_reverb_decay_estimator_.Reset(size_late_reverb * kFftLengthBy2); + late_reverb_start_ = + peak_block + kEarlyReverbMinSizeBlocks + size_early_reverb; + late_reverb_end_ = + block_to_analyze_ + estimation_region_candidate_size_ - 1; + } else { + late_reverb_decay_estimator_.Reset(0); + late_reverb_start_ = 0; + late_reverb_end_ = 0; + } + + // Reset variables for the identification of the region for reverb decay + // estimation. + estimation_region_identified_ = !(valid_filter && sufficient_reverb_decay); + estimation_region_candidate_size_ = 0; + + // Stop estimation of the decay until another good filter is received. + smoothing_constant_ = 0.f; + + // Reset early reflections detector. + early_reverb_estimator_.Reset(); +} + +void ReverbDecayEstimator::AnalyzeFilter(rtc::ArrayView filter) { + auto h = rtc::ArrayView( + filter.begin() + block_to_analyze_ * kFftLengthBy2, kFftLengthBy2); + + // Compute squared filter coeffiecients for the block to analyze_; + std::array h2; + std::transform(h.begin(), h.end(), h2.begin(), [](float a) { return a * a; }); + + // Map out the region for estimating the reverb decay. + bool adapting; + bool above_noise_floor; + AnalyzeBlockGain(h2, tail_gain_, &previous_gains_[block_to_analyze_], + &adapting, &above_noise_floor); + + // Count consecutive number of "good" filter sections, where "good" means: + // 1) energy is above noise floor. + // 2) energy of current section has not changed too much from last check. + estimation_region_identified_ = + estimation_region_identified_ || adapting || !above_noise_floor; + if (!estimation_region_identified_) { + ++estimation_region_candidate_size_; + } + + // Accumulate data for reverb decay estimation and for the estimation of early + // reflections. + if (block_to_analyze_ <= late_reverb_end_) { + if (block_to_analyze_ >= late_reverb_start_) { + for (float h2_k : h2) { + float h2_log2 = FastApproxLog2f(h2_k + 1e-10); + late_reverb_decay_estimator_.Accumulate(h2_log2); + early_reverb_estimator_.Accumulate(h2_log2, smoothing_constant_); + } + } else { + for (float h2_k : h2) { + float h2_log2 = FastApproxLog2f(h2_k + 1e-10); + early_reverb_estimator_.Accumulate(h2_log2, smoothing_constant_); + } + } + } +} + +void ReverbDecayEstimator::Dump(ApmDataDumper* data_dumper) const { + data_dumper->DumpRaw("aec3_reverb_decay", decay_); + data_dumper->DumpRaw("aec3_reverb_tail_energy", tail_gain_); + data_dumper->DumpRaw("aec3_reverb_alpha", smoothing_constant_); + data_dumper->DumpRaw("aec3_num_reverb_decay_blocks", + late_reverb_end_ - late_reverb_start_); + data_dumper->DumpRaw("aec3_late_reverb_start", late_reverb_start_); + data_dumper->DumpRaw("aec3_late_reverb_end", late_reverb_end_); + early_reverb_estimator_.Dump(data_dumper); +} + +void ReverbDecayEstimator::LateReverbLinearRegressor::Reset( + int num_data_points) { + RTC_DCHECK_LE(0, num_data_points); + RTC_DCHECK_EQ(0, num_data_points % 2); + const int N = num_data_points; + nz_ = 0.f; + // Arithmetic sum of $2 \sum_{i=0.5}^{(N-1)/2}i^2$ calculated directly. + nn_ = SymmetricArithmetricSum(N); + // The linear regression approach assumes symmetric index around 0. + count_ = N > 0 ? -N * 0.5f + 0.5f : 0.f; + N_ = N; + n_ = 0; +} + +void ReverbDecayEstimator::LateReverbLinearRegressor::Accumulate(float z) { + nz_ += count_ * z; + ++count_; + ++n_; +} + +float ReverbDecayEstimator::LateReverbLinearRegressor::Estimate() { + RTC_DCHECK(EstimateAvailable()); + if (nn_ == 0.f) { + RTC_DCHECK_NOTREACHED(); + return 0.f; + } + return nz_ / nn_; +} + +ReverbDecayEstimator::EarlyReverbLengthEstimator::EarlyReverbLengthEstimator( + int max_blocks) + : numerators_smooth_(max_blocks - kBlocksPerSection, 0.f), + numerators_(numerators_smooth_.size(), 0.f), + coefficients_counter_(0) { + RTC_DCHECK_LE(0, max_blocks); +} + +ReverbDecayEstimator::EarlyReverbLengthEstimator:: + ~EarlyReverbLengthEstimator() = default; + +void ReverbDecayEstimator::EarlyReverbLengthEstimator::Reset() { + coefficients_counter_ = 0; + std::fill(numerators_.begin(), numerators_.end(), 0.f); + block_counter_ = 0; +} + +void ReverbDecayEstimator::EarlyReverbLengthEstimator::Accumulate( + float value, + float smoothing) { + // Each section is composed by kBlocksPerSection blocks and each section + // overlaps with the next one in (kBlocksPerSection - 1) blocks. For example, + // the first section covers the blocks [0:5], the second covers the blocks + // [1:6] and so on. As a result, for each value, kBlocksPerSection sections + // need to be updated. + int first_section_index = std::max(block_counter_ - kBlocksPerSection + 1, 0); + int last_section_index = + std::min(block_counter_, static_cast(numerators_.size() - 1)); + float x_value = static_cast(coefficients_counter_) + + kEarlyReverbFirstPointAtLinearRegressors; + const float value_to_inc = kFftLengthBy2 * value; + float value_to_add = + x_value * value + (block_counter_ - last_section_index) * value_to_inc; + for (int section = last_section_index; section >= first_section_index; + --section, value_to_add += value_to_inc) { + numerators_[section] += value_to_add; + } + + // Check if this update was the last coefficient of the current block. In that + // case, check if we are at the end of one of the sections and update the + // numerator of the linear regressor that is computed in such section. + if (++coefficients_counter_ == kFftLengthBy2) { + if (block_counter_ >= (kBlocksPerSection - 1)) { + size_t section = block_counter_ - (kBlocksPerSection - 1); + RTC_DCHECK_GT(numerators_.size(), section); + RTC_DCHECK_GT(numerators_smooth_.size(), section); + numerators_smooth_[section] += + smoothing * (numerators_[section] - numerators_smooth_[section]); + n_sections_ = section + 1; + } + ++block_counter_; + coefficients_counter_ = 0; + } +} + +// Estimates the size in blocks of the early reverb. The estimation is done by +// comparing the tilt that is estimated in each section. As an optimization +// detail and due to the fact that all the linear regressors that are computed +// shared the same denominator, the comparison of the tilts is done by a +// comparison of the numerator of the linear regressors. +int ReverbDecayEstimator::EarlyReverbLengthEstimator::Estimate() { + constexpr float N = kBlocksPerSection * kFftLengthBy2; + constexpr float nn = SymmetricArithmetricSum(N); + // numerator_11 refers to the quantity that the linear regressor needs in the + // numerator for getting a decay equal to 1.1 (which is not a decay). + // log2(1.1) * nn / kFftLengthBy2. + constexpr float numerator_11 = 0.13750352374993502f * nn / kFftLengthBy2; + // log2(0.8) * nn / kFftLengthBy2. + constexpr float numerator_08 = -0.32192809488736229f * nn / kFftLengthBy2; + constexpr int kNumSectionsToAnalyze = 9; + + if (n_sections_ < kNumSectionsToAnalyze) { + return 0; + } + + // Estimation of the blocks that correspond to early reverberations. The + // estimation is done by analyzing the impulse response. The portions of the + // impulse response whose energy is not decreasing over its coefficients are + // considered to be part of the early reverberations. Furthermore, the blocks + // where the energy is decreasing faster than what it does at the end of the + // impulse response are also considered to be part of the early + // reverberations. The estimation is limited to the first + // kNumSectionsToAnalyze sections. + + RTC_DCHECK_LE(n_sections_, numerators_smooth_.size()); + const float min_numerator_tail = + *std::min_element(numerators_smooth_.begin() + kNumSectionsToAnalyze, + numerators_smooth_.begin() + n_sections_); + int early_reverb_size_minus_1 = 0; + for (int k = 0; k < kNumSectionsToAnalyze; ++k) { + if ((numerators_smooth_[k] > numerator_11) || + (numerators_smooth_[k] < numerator_08 && + numerators_smooth_[k] < 0.9f * min_numerator_tail)) { + early_reverb_size_minus_1 = k; + } + } + + return early_reverb_size_minus_1 == 0 ? 0 : early_reverb_size_minus_1 + 1; +} + +void ReverbDecayEstimator::EarlyReverbLengthEstimator::Dump( + ApmDataDumper* data_dumper) const { + data_dumper->DumpRaw("aec3_er_acum_numerator", numerators_smooth_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.h new file mode 100644 index 0000000000..fee54210e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_DECAY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_DECAY_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" // kMaxAdaptiveFilter... + +namespace webrtc { + +class ApmDataDumper; +struct EchoCanceller3Config; + +// Class for estimating the decay of the late reverb. +class ReverbDecayEstimator { + public: + explicit ReverbDecayEstimator(const EchoCanceller3Config& config); + ~ReverbDecayEstimator(); + // Updates the decay estimate. + void Update(rtc::ArrayView filter, + const absl::optional& filter_quality, + int filter_delay_blocks, + bool usable_linear_filter, + bool stationary_signal); + // Returns the decay for the exponential model. The parameter `mild` indicates + // which exponential decay to return, the default one or a milder one. + float Decay(bool mild) const { + if (use_adaptive_echo_decay_) { + return decay_; + } else { + return mild ? mild_decay_ : decay_; + } + } + // Dumps debug data. + void Dump(ApmDataDumper* data_dumper) const; + + private: + void EstimateDecay(rtc::ArrayView filter, int peak_block); + void AnalyzeFilter(rtc::ArrayView filter); + + void ResetDecayEstimation(); + + // Class for estimating the decay of the late reverb from the linear filter. + class LateReverbLinearRegressor { + public: + // Resets the estimator to receive a specified number of data points. + void Reset(int num_data_points); + // Accumulates estimation data. + void Accumulate(float z); + // Estimates the decay. + float Estimate(); + // Returns whether an estimate is available. + bool EstimateAvailable() const { return n_ == N_ && N_ != 0; } + + public: + float nz_ = 0.f; + float nn_ = 0.f; + float count_ = 0.f; + int N_ = 0; + int n_ = 0; + }; + + // Class for identifying the length of the early reverb from the linear + // filter. For identifying the early reverberations, the impulse response is + // divided in sections and the tilt of each section is computed by a linear + // regressor. + class EarlyReverbLengthEstimator { + public: + explicit EarlyReverbLengthEstimator(int max_blocks); + ~EarlyReverbLengthEstimator(); + + // Resets the estimator. + void Reset(); + // Accumulates estimation data. + void Accumulate(float value, float smoothing); + // Estimates the size in blocks of the early reverb. + int Estimate(); + // Dumps debug data. + void Dump(ApmDataDumper* data_dumper) const; + + private: + std::vector numerators_smooth_; + std::vector numerators_; + int coefficients_counter_; + int block_counter_ = 0; + int n_sections_ = 0; + }; + + const int filter_length_blocks_; + const int filter_length_coefficients_; + const bool use_adaptive_echo_decay_; + LateReverbLinearRegressor late_reverb_decay_estimator_; + EarlyReverbLengthEstimator early_reverb_estimator_; + int late_reverb_start_; + int late_reverb_end_; + int block_to_analyze_ = 0; + int estimation_region_candidate_size_ = 0; + bool estimation_region_identified_ = false; + std::vector previous_gains_; + float decay_; + float mild_decay_; + float tail_gain_ = 0.f; + float smoothing_constant_ = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_DECAY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc new file mode 100644 index 0000000000..6e7282a1fc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_frequency_response.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Computes the ratio of the energies between the direct path and the tail. The +// energy is computed in the power spectrum domain discarding the DC +// contributions. +float AverageDecayWithinFilter( + rtc::ArrayView freq_resp_direct_path, + rtc::ArrayView freq_resp_tail) { + // Skipping the DC for the ratio computation + constexpr size_t kSkipBins = 1; + RTC_CHECK_EQ(freq_resp_direct_path.size(), freq_resp_tail.size()); + + float direct_path_energy = + std::accumulate(freq_resp_direct_path.begin() + kSkipBins, + freq_resp_direct_path.end(), 0.f); + + if (direct_path_energy == 0.f) { + return 0.f; + } + + float tail_energy = std::accumulate(freq_resp_tail.begin() + kSkipBins, + freq_resp_tail.end(), 0.f); + return tail_energy / direct_path_energy; +} + +} // namespace + +ReverbFrequencyResponse::ReverbFrequencyResponse( + bool use_conservative_tail_frequency_response) + : use_conservative_tail_frequency_response_( + use_conservative_tail_frequency_response) { + tail_response_.fill(0.0f); +} + +ReverbFrequencyResponse::~ReverbFrequencyResponse() = default; + +void ReverbFrequencyResponse::Update( + const std::vector>& + frequency_response, + int filter_delay_blocks, + const absl::optional& linear_filter_quality, + bool stationary_block) { + if (stationary_block || !linear_filter_quality) { + return; + } + + Update(frequency_response, filter_delay_blocks, *linear_filter_quality); +} + +void ReverbFrequencyResponse::Update( + const std::vector>& + frequency_response, + int filter_delay_blocks, + float linear_filter_quality) { + rtc::ArrayView freq_resp_tail( + frequency_response[frequency_response.size() - 1]); + + rtc::ArrayView freq_resp_direct_path( + frequency_response[filter_delay_blocks]); + + float average_decay = + AverageDecayWithinFilter(freq_resp_direct_path, freq_resp_tail); + + const float smoothing = 0.2f * linear_filter_quality; + average_decay_ += smoothing * (average_decay - average_decay_); + + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + tail_response_[k] = freq_resp_direct_path[k] * average_decay_; + } + + if (use_conservative_tail_frequency_response_) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + tail_response_[k] = std::max(freq_resp_tail[k], tail_response_[k]); + } + } + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + const float avg_neighbour = + 0.5f * (tail_response_[k - 1] + tail_response_[k + 1]); + tail_response_[k] = std::max(tail_response_[k], avg_neighbour); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.h b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.h new file mode 100644 index 0000000000..69b16b54d0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Class for updating the frequency response for the reverb. +class ReverbFrequencyResponse { + public: + explicit ReverbFrequencyResponse( + bool use_conservative_tail_frequency_response); + ~ReverbFrequencyResponse(); + + // Updates the frequency response estimate of the reverb. + void Update(const std::vector>& + frequency_response, + int filter_delay_blocks, + const absl::optional& linear_filter_quality, + bool stationary_block); + + // Returns the estimated frequency response for the reverb. + rtc::ArrayView FrequencyResponse() const { + return tail_response_; + } + + private: + void Update(const std::vector>& + frequency_response, + int filter_delay_blocks, + float linear_filter_quality); + + const bool use_conservative_tail_frequency_response_; + float average_decay_ = 0.f; + std::array tail_response_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc new file mode 100644 index 0000000000..e4f3507d31 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_model.h" + +#include + +#include +#include + +#include "api/array_view.h" + +namespace webrtc { + +ReverbModel::ReverbModel() { + Reset(); +} + +ReverbModel::~ReverbModel() = default; + +void ReverbModel::Reset() { + reverb_.fill(0.); +} + +void ReverbModel::UpdateReverbNoFreqShaping( + rtc::ArrayView power_spectrum, + float power_spectrum_scaling, + float reverb_decay) { + if (reverb_decay > 0) { + // Update the estimate of the reverberant power. + for (size_t k = 0; k < power_spectrum.size(); ++k) { + reverb_[k] = (reverb_[k] + power_spectrum[k] * power_spectrum_scaling) * + reverb_decay; + } + } +} + +void ReverbModel::UpdateReverb( + rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_scaling, + float reverb_decay) { + if (reverb_decay > 0) { + // Update the estimate of the reverberant power. + for (size_t k = 0; k < power_spectrum.size(); ++k) { + reverb_[k] = + (reverb_[k] + power_spectrum[k] * power_spectrum_scaling[k]) * + reverb_decay; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.h b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.h new file mode 100644 index 0000000000..5ba54853da --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// The ReverbModel class describes an exponential reverberant model +// that can be applied over power spectrums. +class ReverbModel { + public: + ReverbModel(); + ~ReverbModel(); + + // Resets the state. + void Reset(); + + // Returns the reverb. + rtc::ArrayView reverb() const { + return reverb_; + } + + // The methods UpdateReverbNoFreqShaping and UpdateReverb update the + // estimate of the reverberation contribution to an input/output power + // spectrum. Before applying the exponential reverberant model, the input + // power spectrum is pre-scaled. Use the method UpdateReverb when a different + // scaling should be applied per frequency and UpdateReverb_no_freq_shape if + // the same scaling should be used for all the frequencies. + void UpdateReverbNoFreqShaping(rtc::ArrayView power_spectrum, + float power_spectrum_scaling, + float reverb_decay); + + // Update the reverb based on new data. + void UpdateReverb(rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_scaling, + float reverb_decay); + + private: + + std::array reverb_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc new file mode 100644 index 0000000000..5cd7a7870d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_model_estimator.h" + +namespace webrtc { + +ReverbModelEstimator::ReverbModelEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels) + : reverb_decay_estimators_(num_capture_channels), + reverb_frequency_responses_( + num_capture_channels, + ReverbFrequencyResponse( + config.ep_strength.use_conservative_tail_frequency_response)) { + for (size_t ch = 0; ch < reverb_decay_estimators_.size(); ++ch) { + reverb_decay_estimators_[ch] = + std::make_unique(config); + } +} + +ReverbModelEstimator::~ReverbModelEstimator() = default; + +void ReverbModelEstimator::Update( + rtc::ArrayView> impulse_responses, + rtc::ArrayView>> + frequency_responses, + rtc::ArrayView> linear_filter_qualities, + rtc::ArrayView filter_delays_blocks, + const std::vector& usable_linear_estimates, + bool stationary_block) { + const size_t num_capture_channels = reverb_decay_estimators_.size(); + RTC_DCHECK_EQ(num_capture_channels, impulse_responses.size()); + RTC_DCHECK_EQ(num_capture_channels, frequency_responses.size()); + RTC_DCHECK_EQ(num_capture_channels, usable_linear_estimates.size()); + + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + // Estimate the frequency response for the reverb. + reverb_frequency_responses_[ch].Update( + frequency_responses[ch], filter_delays_blocks[ch], + linear_filter_qualities[ch], stationary_block); + + // Estimate the reverb decay, + reverb_decay_estimators_[ch]->Update( + impulse_responses[ch], linear_filter_qualities[ch], + filter_delays_blocks[ch], usable_linear_estimates[ch], + stationary_block); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.h new file mode 100644 index 0000000000..63bade977f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_ + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" // kFftLengthBy2Plus1 +#include "modules/audio_processing/aec3/reverb_decay_estimator.h" +#include "modules/audio_processing/aec3/reverb_frequency_response.h" + +namespace webrtc { + +class ApmDataDumper; + +// Class for estimating the model parameters for the reverberant echo. +class ReverbModelEstimator { + public: + ReverbModelEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels); + ~ReverbModelEstimator(); + + // Updates the estimates based on new data. + void Update( + rtc::ArrayView> impulse_responses, + rtc::ArrayView>> + frequency_responses, + rtc::ArrayView> linear_filter_qualities, + rtc::ArrayView filter_delays_blocks, + const std::vector& usable_linear_estimates, + bool stationary_block); + + // Returns the exponential decay of the reverberant echo. The parameter `mild` + // indicates which exponential decay to return, the default one or a milder + // one. + // TODO(peah): Correct to properly support multiple channels. + float ReverbDecay(bool mild) const { + return reverb_decay_estimators_[0]->Decay(mild); + } + + // Return the frequency response of the reverberant echo. + // TODO(peah): Correct to properly support multiple channels. + rtc::ArrayView GetReverbFrequencyResponse() const { + return reverb_frequency_responses_[0].FrequencyResponse(); + } + + // Dumps debug data. + void Dump(ApmDataDumper* data_dumper) const { + reverb_decay_estimators_[0]->Dump(data_dumper); + } + + private: + std::vector> reverb_decay_estimators_; + std::vector reverb_frequency_responses_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator_unittest.cc new file mode 100644 index 0000000000..fb7dcef37f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator_unittest.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_model_estimator.h" + +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +EchoCanceller3Config CreateConfigForTest(float default_decay) { + EchoCanceller3Config cfg; + cfg.ep_strength.default_len = default_decay; + cfg.filter.refined.length_blocks = 40; + return cfg; +} + +constexpr int kFilterDelayBlocks = 2; + +} // namespace + +class ReverbModelEstimatorTest { + public: + ReverbModelEstimatorTest(float default_decay, size_t num_capture_channels) + : aec3_config_(CreateConfigForTest(default_decay)), + estimated_decay_(default_decay), + h_(num_capture_channels, + std::vector( + aec3_config_.filter.refined.length_blocks * kBlockSize, + 0.f)), + H2_(num_capture_channels, + std::vector>( + aec3_config_.filter.refined.length_blocks)), + quality_linear_(num_capture_channels, 1.0f) { + CreateImpulseResponseWithDecay(); + } + void RunEstimator(); + float GetDecay(bool mild) { + return mild ? mild_estimated_decay_ : estimated_decay_; + } + float GetTrueDecay() { return kTruePowerDecay; } + float GetPowerTailDb() { return 10.f * std::log10(estimated_power_tail_); } + float GetTruePowerTailDb() { return 10.f * std::log10(true_power_tail_); } + + private: + void CreateImpulseResponseWithDecay(); + static constexpr bool kStationaryBlock = false; + static constexpr float kTruePowerDecay = 0.5f; + const EchoCanceller3Config aec3_config_; + float estimated_decay_; + float mild_estimated_decay_; + float estimated_power_tail_ = 0.f; + float true_power_tail_ = 0.f; + std::vector> h_; + std::vector>> H2_; + std::vector> quality_linear_; +}; + +void ReverbModelEstimatorTest::CreateImpulseResponseWithDecay() { + const Aec3Fft fft; + for (const auto& h_k : h_) { + RTC_DCHECK_EQ(h_k.size(), + aec3_config_.filter.refined.length_blocks * kBlockSize); + } + for (const auto& H2_k : H2_) { + RTC_DCHECK_EQ(H2_k.size(), aec3_config_.filter.refined.length_blocks); + } + RTC_DCHECK_EQ(kFilterDelayBlocks, 2); + + float decay_sample = std::sqrt(powf(kTruePowerDecay, 1.f / kBlockSize)); + const size_t filter_delay_coefficients = kFilterDelayBlocks * kBlockSize; + for (auto& h_i : h_) { + std::fill(h_i.begin(), h_i.end(), 0.f); + h_i[filter_delay_coefficients] = 1.f; + for (size_t k = filter_delay_coefficients + 1; k < h_i.size(); ++k) { + h_i[k] = h_i[k - 1] * decay_sample; + } + } + + for (size_t ch = 0; ch < H2_.size(); ++ch) { + for (size_t j = 0, k = 0; j < H2_[ch].size(); ++j, k += kBlockSize) { + std::array fft_data; + fft_data.fill(0.f); + std::copy(h_[ch].begin() + k, h_[ch].begin() + k + kBlockSize, + fft_data.begin()); + FftData H_j; + fft.Fft(&fft_data, &H_j); + H_j.Spectrum(Aec3Optimization::kNone, H2_[ch][j]); + } + } + rtc::ArrayView H2_tail(H2_[0][H2_[0].size() - 1]); + true_power_tail_ = std::accumulate(H2_tail.begin(), H2_tail.end(), 0.f); +} +void ReverbModelEstimatorTest::RunEstimator() { + const size_t num_capture_channels = H2_.size(); + constexpr bool kUsableLinearEstimate = true; + ReverbModelEstimator estimator(aec3_config_, num_capture_channels); + std::vector usable_linear_estimates(num_capture_channels, + kUsableLinearEstimate); + std::vector filter_delay_blocks(num_capture_channels, + kFilterDelayBlocks); + for (size_t k = 0; k < 3000; ++k) { + estimator.Update(h_, H2_, quality_linear_, filter_delay_blocks, + usable_linear_estimates, kStationaryBlock); + } + estimated_decay_ = estimator.ReverbDecay(/*mild=*/false); + mild_estimated_decay_ = estimator.ReverbDecay(/*mild=*/true); + auto freq_resp_tail = estimator.GetReverbFrequencyResponse(); + estimated_power_tail_ = + std::accumulate(freq_resp_tail.begin(), freq_resp_tail.end(), 0.f); +} + +TEST(ReverbModelEstimatorTests, NotChangingDecay) { + constexpr float kDefaultDecay = 0.9f; + for (size_t num_capture_channels : {1, 2, 4, 8}) { + ReverbModelEstimatorTest test(kDefaultDecay, num_capture_channels); + test.RunEstimator(); + EXPECT_EQ(test.GetDecay(/*mild=*/false), kDefaultDecay); + EXPECT_EQ(test.GetDecay(/*mild=*/true), + EchoCanceller3Config().ep_strength.nearend_len); + EXPECT_NEAR(test.GetPowerTailDb(), test.GetTruePowerTailDb(), 5.f); + } +} + +TEST(ReverbModelEstimatorTests, ChangingDecay) { + constexpr float kDefaultDecay = -0.9f; + for (size_t num_capture_channels : {1, 2, 4, 8}) { + ReverbModelEstimatorTest test(kDefaultDecay, num_capture_channels); + test.RunEstimator(); + EXPECT_NEAR(test.GetDecay(/*mild=*/false), test.GetTrueDecay(), 0.1f); + EXPECT_NEAR(test.GetDecay(/*mild=*/true), test.GetTrueDecay(), 0.1f); + EXPECT_NEAR(test.GetPowerTailDb(), test.GetTruePowerTailDb(), 5.f); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc new file mode 100644 index 0000000000..a5e77092a6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" + +#include +#include +#include + +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { + +constexpr std::array + kBandBoundaries = {1, 8, 16, 24, 32, 48, kFftLengthBy2Plus1}; + +std::array FormSubbandMap() { + std::array map_band_to_subband; + size_t subband = 1; + for (size_t k = 0; k < map_band_to_subband.size(); ++k) { + RTC_DCHECK_LT(subband, kBandBoundaries.size()); + if (k >= kBandBoundaries[subband]) { + subband++; + RTC_DCHECK_LT(k, kBandBoundaries[subband]); + } + map_band_to_subband[k] = subband - 1; + } + return map_band_to_subband; +} + +// Defines the size in blocks of the sections that are used for dividing the +// linear filter. The sections are split in a non-linear manner so that lower +// sections that typically represent the direct path have a larger resolution +// than the higher sections which typically represent more reverberant acoustic +// paths. +std::vector DefineFilterSectionSizes(size_t delay_headroom_blocks, + size_t num_blocks, + size_t num_sections) { + size_t filter_length_blocks = num_blocks - delay_headroom_blocks; + std::vector section_sizes(num_sections); + size_t remaining_blocks = filter_length_blocks; + size_t remaining_sections = num_sections; + size_t estimator_size = 2; + size_t idx = 0; + while (remaining_sections > 1 && + remaining_blocks > estimator_size * remaining_sections) { + RTC_DCHECK_LT(idx, section_sizes.size()); + section_sizes[idx] = estimator_size; + remaining_blocks -= estimator_size; + remaining_sections--; + estimator_size *= 2; + idx++; + } + + size_t last_groups_size = remaining_blocks / remaining_sections; + for (; idx < num_sections; idx++) { + section_sizes[idx] = last_groups_size; + } + section_sizes[num_sections - 1] += + remaining_blocks - last_groups_size * remaining_sections; + return section_sizes; +} + +// Forms the limits in blocks for each filter section. Those sections +// are used for analyzing the echo estimates and investigating which +// linear filter sections contribute most to the echo estimate energy. +std::vector SetSectionsBoundaries(size_t delay_headroom_blocks, + size_t num_blocks, + size_t num_sections) { + std::vector estimator_boundaries_blocks(num_sections + 1); + if (estimator_boundaries_blocks.size() == 2) { + estimator_boundaries_blocks[0] = 0; + estimator_boundaries_blocks[1] = num_blocks; + return estimator_boundaries_blocks; + } + RTC_DCHECK_GT(estimator_boundaries_blocks.size(), 2); + const std::vector section_sizes = + DefineFilterSectionSizes(delay_headroom_blocks, num_blocks, + estimator_boundaries_blocks.size() - 1); + + size_t idx = 0; + size_t current_size_block = 0; + RTC_DCHECK_EQ(section_sizes.size() + 1, estimator_boundaries_blocks.size()); + estimator_boundaries_blocks[0] = delay_headroom_blocks; + for (size_t k = delay_headroom_blocks; k < num_blocks; ++k) { + current_size_block++; + if (current_size_block >= section_sizes[idx]) { + idx = idx + 1; + if (idx == section_sizes.size()) { + break; + } + estimator_boundaries_blocks[idx] = k + 1; + current_size_block = 0; + } + } + estimator_boundaries_blocks[section_sizes.size()] = num_blocks; + return estimator_boundaries_blocks; +} + +std::array +SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) { + std::array max_erle; + std::fill(max_erle.begin(), max_erle.begin() + limit_subband_l, max_erle_l); + std::fill(max_erle.begin() + limit_subband_l, max_erle.end(), max_erle_h); + return max_erle; +} + +} // namespace + +SignalDependentErleEstimator::SignalDependentErleEstimator( + const EchoCanceller3Config& config, + size_t num_capture_channels) + : min_erle_(config.erle.min), + num_sections_(config.erle.num_sections), + num_blocks_(config.filter.refined.length_blocks), + delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize), + band_to_subband_(FormSubbandMap()), + max_erle_(SetMaxErleSubbands(config.erle.max_l, + config.erle.max_h, + band_to_subband_[kFftLengthBy2 / 2])), + section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_, + num_blocks_, + num_sections_)), + use_onset_detection_(config.erle.onset_detection), + erle_(num_capture_channels), + erle_onset_compensated_(num_capture_channels), + S2_section_accum_( + num_capture_channels, + std::vector>(num_sections_)), + erle_estimators_( + num_capture_channels, + std::vector>(num_sections_)), + erle_ref_(num_capture_channels), + correction_factors_( + num_capture_channels, + std::vector>(num_sections_)), + num_updates_(num_capture_channels), + n_active_sections_(num_capture_channels) { + RTC_DCHECK_LE(num_sections_, num_blocks_); + RTC_DCHECK_GE(num_sections_, 1); + Reset(); +} + +SignalDependentErleEstimator::~SignalDependentErleEstimator() = default; + +void SignalDependentErleEstimator::Reset() { + for (size_t ch = 0; ch < erle_.size(); ++ch) { + erle_[ch].fill(min_erle_); + erle_onset_compensated_[ch].fill(min_erle_); + for (auto& erle_estimator : erle_estimators_[ch]) { + erle_estimator.fill(min_erle_); + } + erle_ref_[ch].fill(min_erle_); + for (auto& factor : correction_factors_[ch]) { + factor.fill(1.0f); + } + num_updates_[ch].fill(0); + n_active_sections_[ch].fill(0); + } +} + +// Updates the Erle estimate by analyzing the current input signals. It takes +// the render buffer and the filter frequency response in order to do an +// estimation of the number of sections of the linear filter that are needed +// for getting the majority of the energy in the echo estimate. Based on that +// number of sections, it updates the erle estimation by introducing a +// correction factor to the erle that is given as an input to this method. +void SignalDependentErleEstimator::Update( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses, + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + rtc::ArrayView> average_erle, + rtc::ArrayView> + average_erle_onset_compensated, + const std::vector& converged_filters) { + RTC_DCHECK_GT(num_sections_, 1); + + // Gets the number of filter sections that are needed for achieving 90 % + // of the power spectrum energy of the echo estimate. + ComputeNumberOfActiveFilterSections(render_buffer, + filter_frequency_responses); + + // Updates the correction factors that is used for correcting the erle and + // adapt it to the particular characteristics of the input signal. + UpdateCorrectionFactors(X2, Y2, E2, converged_filters); + + // Applies the correction factor to the input erle for getting a more refined + // erle estimation for the current input signal. + for (size_t ch = 0; ch < erle_.size(); ++ch) { + for (size_t k = 0; k < kFftLengthBy2; ++k) { + RTC_DCHECK_GT(correction_factors_[ch].size(), n_active_sections_[ch][k]); + float correction_factor = + correction_factors_[ch][n_active_sections_[ch][k]] + [band_to_subband_[k]]; + erle_[ch][k] = rtc::SafeClamp(average_erle[ch][k] * correction_factor, + min_erle_, max_erle_[band_to_subband_[k]]); + if (use_onset_detection_) { + erle_onset_compensated_[ch][k] = rtc::SafeClamp( + average_erle_onset_compensated[ch][k] * correction_factor, + min_erle_, max_erle_[band_to_subband_[k]]); + } + } + } +} + +void SignalDependentErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + for (auto& erle : erle_estimators_[0]) { + data_dumper->DumpRaw("aec3_all_erle", erle); + } + data_dumper->DumpRaw("aec3_ref_erle", erle_ref_[0]); + for (auto& factor : correction_factors_[0]) { + data_dumper->DumpRaw("aec3_erle_correction_factor", factor); + } +} + +// Estimates for each band the smallest number of sections in the filter that +// together constitute 90% of the estimated echo energy. +void SignalDependentErleEstimator::ComputeNumberOfActiveFilterSections( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses) { + RTC_DCHECK_GT(num_sections_, 1); + // Computes an approximation of the power spectrum if the filter would have + // been limited to a certain number of filter sections. + ComputeEchoEstimatePerFilterSection(render_buffer, + filter_frequency_responses); + // For each band, computes the number of filter sections that are needed for + // achieving the 90 % energy in the echo estimate. + ComputeActiveFilterSections(); +} + +void SignalDependentErleEstimator::UpdateCorrectionFactors( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters) { + for (size_t ch = 0; ch < converged_filters.size(); ++ch) { + if (converged_filters[ch]) { + constexpr float kX2BandEnergyThreshold = 44015068.0f; + constexpr float kSmthConstantDecreases = 0.1f; + constexpr float kSmthConstantIncreases = kSmthConstantDecreases / 2.f; + auto subband_powers = [](rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_subbands) { + for (size_t subband = 0; subband < kSubbands; ++subband) { + RTC_DCHECK_LE(kBandBoundaries[subband + 1], power_spectrum.size()); + power_spectrum_subbands[subband] = std::accumulate( + power_spectrum.begin() + kBandBoundaries[subband], + power_spectrum.begin() + kBandBoundaries[subband + 1], 0.f); + } + }; + + std::array X2_subbands, E2_subbands, Y2_subbands; + subband_powers(X2, X2_subbands); + subband_powers(E2[ch], E2_subbands); + subband_powers(Y2[ch], Y2_subbands); + std::array idx_subbands; + for (size_t subband = 0; subband < kSubbands; ++subband) { + // When aggregating the number of active sections in the filter for + // different bands we choose to take the minimum of all of them. As an + // example, if for one of the bands it is the direct path its refined + // contributor to the final echo estimate, we consider the direct path + // is as well the refined contributor for the subband that contains that + // particular band. That aggregate number of sections will be later used + // as the identifier of the erle estimator that needs to be updated. + RTC_DCHECK_LE(kBandBoundaries[subband + 1], + n_active_sections_[ch].size()); + idx_subbands[subband] = *std::min_element( + n_active_sections_[ch].begin() + kBandBoundaries[subband], + n_active_sections_[ch].begin() + kBandBoundaries[subband + 1]); + } + + std::array new_erle; + std::array is_erle_updated; + is_erle_updated.fill(false); + new_erle.fill(0.f); + for (size_t subband = 0; subband < kSubbands; ++subband) { + if (X2_subbands[subband] > kX2BandEnergyThreshold && + E2_subbands[subband] > 0) { + new_erle[subband] = Y2_subbands[subband] / E2_subbands[subband]; + RTC_DCHECK_GT(new_erle[subband], 0); + is_erle_updated[subband] = true; + ++num_updates_[ch][subband]; + } + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + const size_t idx = idx_subbands[subband]; + RTC_DCHECK_LT(idx, erle_estimators_[ch].size()); + float alpha = new_erle[subband] > erle_estimators_[ch][idx][subband] + ? kSmthConstantIncreases + : kSmthConstantDecreases; + alpha = static_cast(is_erle_updated[subband]) * alpha; + erle_estimators_[ch][idx][subband] += + alpha * (new_erle[subband] - erle_estimators_[ch][idx][subband]); + erle_estimators_[ch][idx][subband] = rtc::SafeClamp( + erle_estimators_[ch][idx][subband], min_erle_, max_erle_[subband]); + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + float alpha = new_erle[subband] > erle_ref_[ch][subband] + ? kSmthConstantIncreases + : kSmthConstantDecreases; + alpha = static_cast(is_erle_updated[subband]) * alpha; + erle_ref_[ch][subband] += + alpha * (new_erle[subband] - erle_ref_[ch][subband]); + erle_ref_[ch][subband] = rtc::SafeClamp(erle_ref_[ch][subband], + min_erle_, max_erle_[subband]); + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + constexpr int kNumUpdateThr = 50; + if (is_erle_updated[subband] && + num_updates_[ch][subband] > kNumUpdateThr) { + const size_t idx = idx_subbands[subband]; + RTC_DCHECK_GT(erle_ref_[ch][subband], 0.f); + // Computes the ratio between the erle that is updated using all the + // points and the erle that is updated only on signals that share the + // same number of active filter sections. + float new_correction_factor = + erle_estimators_[ch][idx][subband] / erle_ref_[ch][subband]; + + correction_factors_[ch][idx][subband] += + 0.1f * + (new_correction_factor - correction_factors_[ch][idx][subband]); + } + } + } + } +} + +void SignalDependentErleEstimator::ComputeEchoEstimatePerFilterSection( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses) { + const SpectrumBuffer& spectrum_render_buffer = + render_buffer.GetSpectrumBuffer(); + const size_t num_render_channels = spectrum_render_buffer.buffer[0].size(); + const size_t num_capture_channels = S2_section_accum_.size(); + const float one_by_num_render_channels = 1.f / num_render_channels; + + RTC_DCHECK_EQ(S2_section_accum_.size(), filter_frequency_responses.size()); + + for (size_t capture_ch = 0; capture_ch < num_capture_channels; ++capture_ch) { + RTC_DCHECK_EQ(S2_section_accum_[capture_ch].size() + 1, + section_boundaries_blocks_.size()); + size_t idx_render = render_buffer.Position(); + idx_render = spectrum_render_buffer.OffsetIndex( + idx_render, section_boundaries_blocks_[0]); + + for (size_t section = 0; section < num_sections_; ++section) { + std::array X2_section; + std::array H2_section; + X2_section.fill(0.f); + H2_section.fill(0.f); + const size_t block_limit = + std::min(section_boundaries_blocks_[section + 1], + filter_frequency_responses[capture_ch].size()); + for (size_t block = section_boundaries_blocks_[section]; + block < block_limit; ++block) { + for (size_t render_ch = 0; + render_ch < spectrum_render_buffer.buffer[idx_render].size(); + ++render_ch) { + for (size_t k = 0; k < X2_section.size(); ++k) { + X2_section[k] += + spectrum_render_buffer.buffer[idx_render][render_ch][k] * + one_by_num_render_channels; + } + } + std::transform(H2_section.begin(), H2_section.end(), + filter_frequency_responses[capture_ch][block].begin(), + H2_section.begin(), std::plus()); + idx_render = spectrum_render_buffer.IncIndex(idx_render); + } + + std::transform(X2_section.begin(), X2_section.end(), H2_section.begin(), + S2_section_accum_[capture_ch][section].begin(), + std::multiplies()); + } + + for (size_t section = 1; section < num_sections_; ++section) { + std::transform(S2_section_accum_[capture_ch][section - 1].begin(), + S2_section_accum_[capture_ch][section - 1].end(), + S2_section_accum_[capture_ch][section].begin(), + S2_section_accum_[capture_ch][section].begin(), + std::plus()); + } + } +} + +void SignalDependentErleEstimator::ComputeActiveFilterSections() { + for (size_t ch = 0; ch < n_active_sections_.size(); ++ch) { + std::fill(n_active_sections_[ch].begin(), n_active_sections_[ch].end(), 0); + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + size_t section = num_sections_; + float target = 0.9f * S2_section_accum_[ch][num_sections_ - 1][k]; + while (section > 0 && S2_section_accum_[ch][section - 1][k] >= target) { + n_active_sections_[ch][k] = --section; + } + } + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.h new file mode 100644 index 0000000000..6847c1ab13 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// This class estimates the dependency of the Erle to the input signal. By +// looking at the input signal, an estimation on whether the current echo +// estimate is due to the direct path or to a more reverberant one is performed. +// Once that estimation is done, it is possible to refine the average Erle that +// this class receive as an input. +class SignalDependentErleEstimator { + public: + SignalDependentErleEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels); + + ~SignalDependentErleEstimator(); + + void Reset(); + + // Returns the Erle per frequency subband. + rtc::ArrayView> Erle( + bool onset_compensated) const { + return onset_compensated && use_onset_detection_ ? erle_onset_compensated_ + : erle_; + } + + // Updates the Erle estimate. The Erle that is passed as an input is required + // to be an estimation of the average Erle achieved by the linear filter. + void Update( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_response, + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + rtc::ArrayView> average_erle, + rtc::ArrayView> + average_erle_onset_compensated, + const std::vector& converged_filters); + + void Dump(const std::unique_ptr& data_dumper) const; + + static constexpr size_t kSubbands = 6; + + private: + void ComputeNumberOfActiveFilterSections( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses); + + void UpdateCorrectionFactors( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters); + + void ComputeEchoEstimatePerFilterSection( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses); + + void ComputeActiveFilterSections(); + + const float min_erle_; + const size_t num_sections_; + const size_t num_blocks_; + const size_t delay_headroom_blocks_; + const std::array band_to_subband_; + const std::array max_erle_; + const std::vector section_boundaries_blocks_; + const bool use_onset_detection_; + std::vector> erle_; + std::vector> erle_onset_compensated_; + std::vector>> + S2_section_accum_; + std::vector>> erle_estimators_; + std::vector> erle_ref_; + std::vector>> correction_factors_; + std::vector> num_updates_; + std::vector> n_active_sections_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc new file mode 100644 index 0000000000..67927a6c68 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" + +#include +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +void GetActiveFrame(Block* x) { + const std::array frame = { + 7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85, + 6665.52, 14808.6, 9342.3, 7483.28, 19261.7, 4145.98, 1622.18, 13475.2, + 7166.32, 6856.61, 21937, 7263.14, 9569.07, 14919, 8413.32, 7551.89, + 7848.65, 6011.27, 13080.6, 15865.2, 12656, 17459.6, 4263.93, 4503.03, + 9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6, + 11405, 15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8, + 1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4, + 12416.2, 16434, 2454.69, 9840.8, 6867.23, 1615.75, 6059.9, 8394.19}; + for (int band = 0; band < x->NumBands(); ++band) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + RTC_DCHECK_GE(kBlockSize, frame.size()); + std::copy(frame.begin(), frame.end(), x->begin(band, channel)); + } + } +} + +class TestInputs { + public: + TestInputs(const EchoCanceller3Config& cfg, + size_t num_render_channels, + size_t num_capture_channels); + ~TestInputs(); + const RenderBuffer& GetRenderBuffer() { return *render_buffer_; } + rtc::ArrayView GetX2() { return X2_; } + rtc::ArrayView> GetY2() const { + return Y2_; + } + rtc::ArrayView> GetE2() const { + return E2_; + } + rtc::ArrayView>> + GetH2() const { + return H2_; + } + const std::vector& GetConvergedFilters() const { + return converged_filters_; + } + void Update(); + + private: + void UpdateCurrentPowerSpectra(); + int n_ = 0; + std::unique_ptr render_delay_buffer_; + RenderBuffer* render_buffer_; + std::array X2_; + std::vector> Y2_; + std::vector> E2_; + std::vector>> H2_; + Block x_; + std::vector converged_filters_; +}; + +TestInputs::TestInputs(const EchoCanceller3Config& cfg, + size_t num_render_channels, + size_t num_capture_channels) + : render_delay_buffer_( + RenderDelayBuffer::Create(cfg, 16000, num_render_channels)), + Y2_(num_capture_channels), + E2_(num_capture_channels), + H2_(num_capture_channels, + std::vector>( + cfg.filter.refined.length_blocks)), + x_(1, num_render_channels), + converged_filters_(num_capture_channels, true) { + render_delay_buffer_->AlignFromDelay(4); + render_buffer_ = render_delay_buffer_->GetRenderBuffer(); + for (auto& H2_ch : H2_) { + for (auto& H2_p : H2_ch) { + H2_p.fill(0.f); + } + } + for (auto& H2_p : H2_[0]) { + H2_p.fill(1.f); + } +} + +TestInputs::~TestInputs() = default; + +void TestInputs::Update() { + if (n_ % 2 == 0) { + std::fill(x_.begin(/*band=*/0, /*channel=*/0), + x_.end(/*band=*/0, /*channel=*/0), 0.f); + } else { + GetActiveFrame(&x_); + } + + render_delay_buffer_->Insert(x_); + render_delay_buffer_->PrepareCaptureProcessing(); + UpdateCurrentPowerSpectra(); + ++n_; +} + +void TestInputs::UpdateCurrentPowerSpectra() { + const SpectrumBuffer& spectrum_render_buffer = + render_buffer_->GetSpectrumBuffer(); + size_t idx = render_buffer_->Position(); + size_t prev_idx = spectrum_render_buffer.OffsetIndex(idx, 1); + auto& X2 = spectrum_render_buffer.buffer[idx][/*channel=*/0]; + auto& X2_prev = spectrum_render_buffer.buffer[prev_idx][/*channel=*/0]; + std::copy(X2.begin(), X2.end(), X2_.begin()); + for (size_t ch = 0; ch < Y2_.size(); ++ch) { + RTC_DCHECK_EQ(X2.size(), Y2_[ch].size()); + for (size_t k = 0; k < X2.size(); ++k) { + E2_[ch][k] = 0.01f * X2_prev[k]; + Y2_[ch][k] = X2[k] + E2_[ch][k]; + } + } +} + +} // namespace + +class SignalDependentErleEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + SignalDependentErleEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 4), + ::testing::Values(1, 2, 4))); + +TEST_P(SignalDependentErleEstimatorMultiChannel, SweepSettings) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + EchoCanceller3Config cfg; + size_t max_length_blocks = 50; + for (size_t blocks = 1; blocks < max_length_blocks; blocks = blocks + 10) { + for (size_t delay_headroom = 0; delay_headroom < 5; ++delay_headroom) { + for (size_t num_sections = 2; num_sections < max_length_blocks; + ++num_sections) { + cfg.filter.refined.length_blocks = blocks; + cfg.filter.refined_initial.length_blocks = + std::min(cfg.filter.refined_initial.length_blocks, blocks); + cfg.delay.delay_headroom_samples = delay_headroom * kBlockSize; + cfg.erle.num_sections = num_sections; + if (EchoCanceller3Config::Validate(&cfg)) { + SignalDependentErleEstimator s(cfg, num_capture_channels); + std::vector> average_erle( + num_capture_channels); + for (auto& e : average_erle) { + e.fill(cfg.erle.max_l); + } + TestInputs inputs(cfg, num_render_channels, num_capture_channels); + for (size_t n = 0; n < 10; ++n) { + inputs.Update(); + s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), + inputs.GetY2(), inputs.GetE2(), average_erle, average_erle, + inputs.GetConvergedFilters()); + } + } + } + } + } +} + +TEST_P(SignalDependentErleEstimatorMultiChannel, LongerRun) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + EchoCanceller3Config cfg; + cfg.filter.refined.length_blocks = 2; + cfg.filter.refined_initial.length_blocks = 1; + cfg.delay.delay_headroom_samples = 0; + cfg.delay.hysteresis_limit_blocks = 0; + cfg.erle.num_sections = 2; + EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true); + std::vector> average_erle( + num_capture_channels); + for (auto& e : average_erle) { + e.fill(cfg.erle.max_l); + } + SignalDependentErleEstimator s(cfg, num_capture_channels); + TestInputs inputs(cfg, num_render_channels, num_capture_channels); + for (size_t n = 0; n < 200; ++n) { + inputs.Update(); + s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), + inputs.GetY2(), inputs.GetE2(), average_erle, average_erle, + inputs.GetConvergedFilters()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc new file mode 100644 index 0000000000..fe32ece09c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/spectrum_buffer.h" + +#include + +namespace webrtc { + +SpectrumBuffer::SpectrumBuffer(size_t size, size_t num_channels) + : size(static_cast(size)), + buffer(size, + std::vector>(num_channels)) { + for (auto& channel : buffer) { + for (auto& c : channel) { + std::fill(c.begin(), c.end(), 0.f); + } + } +} + +SpectrumBuffer::~SpectrumBuffer() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.h new file mode 100644 index 0000000000..51e1317f55 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SPECTRUM_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SPECTRUM_BUFFER_H_ + +#include + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Struct for bundling a circular buffer of one dimensional vector objects +// together with the read and write indices. +struct SpectrumBuffer { + SpectrumBuffer(size_t size, size_t num_channels); + ~SpectrumBuffer(); + + int IncIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index < size - 1 ? index + 1 : 0; + } + + int DecIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index > 0 ? index - 1 : size - 1; + } + + int OffsetIndex(int index, int offset) const { + RTC_DCHECK_GE(size, offset); + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + RTC_DCHECK_GE(size + index + offset, 0); + return (size + index + offset) % size; + } + + void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); } + void IncWriteIndex() { write = IncIndex(write); } + void DecWriteIndex() { write = DecIndex(write); } + void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); } + void IncReadIndex() { read = IncIndex(read); } + void DecReadIndex() { read = DecIndex(read); } + + const int size; + std::vector>> buffer; + int write = 0; + int read = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SPECTRUM_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc new file mode 100644 index 0000000000..4d364041b3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/stationarity_estimator.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +namespace { +constexpr float kMinNoisePower = 10.f; +constexpr int kHangoverBlocks = kNumBlocksPerSecond / 20; +constexpr int kNBlocksAverageInitPhase = 20; +constexpr int kNBlocksInitialPhase = kNumBlocksPerSecond * 2.; +} // namespace + +StationarityEstimator::StationarityEstimator() + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)) { + Reset(); +} + +StationarityEstimator::~StationarityEstimator() = default; + +void StationarityEstimator::Reset() { + noise_.Reset(); + hangovers_.fill(0); + stationarity_flags_.fill(false); +} + +// Update just the noise estimator. Usefull until the delay is known +void StationarityEstimator::UpdateNoiseEstimator( + rtc::ArrayView> spectrum) { + noise_.Update(spectrum); + data_dumper_->DumpRaw("aec3_stationarity_noise_spectrum", noise_.Spectrum()); + data_dumper_->DumpRaw("aec3_stationarity_is_block_stationary", + IsBlockStationary()); +} + +void StationarityEstimator::UpdateStationarityFlags( + const SpectrumBuffer& spectrum_buffer, + rtc::ArrayView render_reverb_contribution_spectrum, + int idx_current, + int num_lookahead) { + std::array indexes; + int num_lookahead_bounded = std::min(num_lookahead, kWindowLength - 1); + int idx = idx_current; + + if (num_lookahead_bounded < kWindowLength - 1) { + int num_lookback = (kWindowLength - 1) - num_lookahead_bounded; + idx = spectrum_buffer.OffsetIndex(idx_current, num_lookback); + } + // For estimating the stationarity properties of the current frame, the + // power for each band is accumulated for several consecutive spectra in the + // method EstimateBandStationarity. + // In order to avoid getting the indexes of the spectra for every band with + // its associated overhead, those indexes are stored in an array and then use + // when the estimation is done. + indexes[0] = idx; + for (size_t k = 1; k < indexes.size(); ++k) { + indexes[k] = spectrum_buffer.DecIndex(indexes[k - 1]); + } + RTC_DCHECK_EQ( + spectrum_buffer.DecIndex(indexes[kWindowLength - 1]), + spectrum_buffer.OffsetIndex(idx_current, -(num_lookahead_bounded + 1))); + + for (size_t k = 0; k < stationarity_flags_.size(); ++k) { + stationarity_flags_[k] = EstimateBandStationarity( + spectrum_buffer, render_reverb_contribution_spectrum, indexes, k); + } + UpdateHangover(); + SmoothStationaryPerFreq(); +} + +bool StationarityEstimator::IsBlockStationary() const { + float acum_stationarity = 0.f; + RTC_DCHECK_EQ(stationarity_flags_.size(), kFftLengthBy2Plus1); + for (size_t band = 0; band < stationarity_flags_.size(); ++band) { + bool st = IsBandStationary(band); + acum_stationarity += static_cast(st); + } + return ((acum_stationarity * (1.f / kFftLengthBy2Plus1)) > 0.75f); +} + +bool StationarityEstimator::EstimateBandStationarity( + const SpectrumBuffer& spectrum_buffer, + rtc::ArrayView average_reverb, + const std::array& indexes, + size_t band) const { + constexpr float kThrStationarity = 10.f; + float acum_power = 0.f; + const int num_render_channels = + static_cast(spectrum_buffer.buffer[0].size()); + const float one_by_num_channels = 1.f / num_render_channels; + for (auto idx : indexes) { + for (int ch = 0; ch < num_render_channels; ++ch) { + acum_power += spectrum_buffer.buffer[idx][ch][band] * one_by_num_channels; + } + } + acum_power += average_reverb[band]; + float noise = kWindowLength * GetStationarityPowerBand(band); + RTC_CHECK_LT(0.f, noise); + bool stationary = acum_power < kThrStationarity * noise; + data_dumper_->DumpRaw("aec3_stationarity_long_ratio", acum_power / noise); + return stationary; +} + +bool StationarityEstimator::AreAllBandsStationary() { + for (auto b : stationarity_flags_) { + if (!b) + return false; + } + return true; +} + +void StationarityEstimator::UpdateHangover() { + bool reduce_hangover = AreAllBandsStationary(); + for (size_t k = 0; k < stationarity_flags_.size(); ++k) { + if (!stationarity_flags_[k]) { + hangovers_[k] = kHangoverBlocks; + } else if (reduce_hangover) { + hangovers_[k] = std::max(hangovers_[k] - 1, 0); + } + } +} + +void StationarityEstimator::SmoothStationaryPerFreq() { + std::array all_ahead_stationary_smooth; + for (size_t k = 1; k < kFftLengthBy2Plus1 - 1; ++k) { + all_ahead_stationary_smooth[k] = stationarity_flags_[k - 1] && + stationarity_flags_[k] && + stationarity_flags_[k + 1]; + } + + all_ahead_stationary_smooth[0] = all_ahead_stationary_smooth[1]; + all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 1] = + all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 2]; + + stationarity_flags_ = all_ahead_stationary_smooth; +} + +std::atomic StationarityEstimator::instance_count_(0); + +StationarityEstimator::NoiseSpectrum::NoiseSpectrum() { + Reset(); +} + +StationarityEstimator::NoiseSpectrum::~NoiseSpectrum() = default; + +void StationarityEstimator::NoiseSpectrum::Reset() { + block_counter_ = 0; + noise_spectrum_.fill(kMinNoisePower); +} + +void StationarityEstimator::NoiseSpectrum::Update( + rtc::ArrayView> spectrum) { + RTC_DCHECK_LE(1, spectrum[0].size()); + const int num_render_channels = static_cast(spectrum.size()); + + std::array avg_spectrum_data; + rtc::ArrayView avg_spectrum; + if (num_render_channels == 1) { + avg_spectrum = spectrum[0]; + } else { + // For multiple channels, average the channel spectra before passing to the + // noise spectrum estimator. + avg_spectrum = avg_spectrum_data; + std::copy(spectrum[0].begin(), spectrum[0].end(), + avg_spectrum_data.begin()); + for (int ch = 1; ch < num_render_channels; ++ch) { + for (size_t k = 1; k < kFftLengthBy2Plus1; ++k) { + avg_spectrum_data[k] += spectrum[ch][k]; + } + } + + const float one_by_num_channels = 1.f / num_render_channels; + for (size_t k = 1; k < kFftLengthBy2Plus1; ++k) { + avg_spectrum_data[k] *= one_by_num_channels; + } + } + + ++block_counter_; + float alpha = GetAlpha(); + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (block_counter_ <= kNBlocksAverageInitPhase) { + noise_spectrum_[k] += (1.f / kNBlocksAverageInitPhase) * avg_spectrum[k]; + } else { + noise_spectrum_[k] = + UpdateBandBySmoothing(avg_spectrum[k], noise_spectrum_[k], alpha); + } + } +} + +float StationarityEstimator::NoiseSpectrum::GetAlpha() const { + constexpr float kAlpha = 0.004f; + constexpr float kAlphaInit = 0.04f; + constexpr float kTiltAlpha = (kAlphaInit - kAlpha) / kNBlocksInitialPhase; + + if (block_counter_ > (kNBlocksInitialPhase + kNBlocksAverageInitPhase)) { + return kAlpha; + } else { + return kAlphaInit - + kTiltAlpha * (block_counter_ - kNBlocksAverageInitPhase); + } +} + +float StationarityEstimator::NoiseSpectrum::UpdateBandBySmoothing( + float power_band, + float power_band_noise, + float alpha) const { + float power_band_noise_updated = power_band_noise; + if (power_band_noise < power_band) { + RTC_DCHECK_GT(power_band, 0.f); + float alpha_inc = alpha * (power_band_noise / power_band); + if (block_counter_ > kNBlocksInitialPhase) { + if (10.f * power_band_noise < power_band) { + alpha_inc *= 0.1f; + } + } + power_band_noise_updated += alpha_inc * (power_band - power_band_noise); + } else { + power_band_noise_updated += alpha * (power_band - power_band_noise); + power_band_noise_updated = + std::max(power_band_noise_updated, kMinNoisePower); + } + return power_band_noise_updated; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.h new file mode 100644 index 0000000000..8bcd3b789e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_STATIONARITY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_STATIONARITY_ESTIMATOR_H_ + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" // kFftLengthBy2Plus1... +#include "modules/audio_processing/aec3/reverb_model.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +class ApmDataDumper; +struct SpectrumBuffer; + +class StationarityEstimator { + public: + StationarityEstimator(); + ~StationarityEstimator(); + + // Reset the stationarity estimator. + void Reset(); + + // Update just the noise estimator. Usefull until the delay is known + void UpdateNoiseEstimator( + rtc::ArrayView> spectrum); + + // Update the flag indicating whether this current frame is stationary. For + // getting a more robust estimation, it looks at future and/or past frames. + void UpdateStationarityFlags( + const SpectrumBuffer& spectrum_buffer, + rtc::ArrayView render_reverb_contribution_spectrum, + int idx_current, + int num_lookahead); + + // Returns true if the current band is stationary. + bool IsBandStationary(size_t band) const { + return stationarity_flags_[band] && (hangovers_[band] == 0); + } + + // Returns true if the current block is estimated as stationary. + bool IsBlockStationary() const; + + private: + static constexpr int kWindowLength = 13; + // Returns the power of the stationary noise spectrum at a band. + float GetStationarityPowerBand(size_t k) const { return noise_.Power(k); } + + // Get an estimation of the stationarity for the current band by looking + // at the past/present/future available data. + bool EstimateBandStationarity(const SpectrumBuffer& spectrum_buffer, + rtc::ArrayView average_reverb, + const std::array& indexes, + size_t band) const; + + // True if all bands at the current point are stationary. + bool AreAllBandsStationary(); + + // Update the hangover depending on the stationary status of the current + // frame. + void UpdateHangover(); + + // Smooth the stationarity detection by looking at neighbouring frequency + // bands. + void SmoothStationaryPerFreq(); + + class NoiseSpectrum { + public: + NoiseSpectrum(); + ~NoiseSpectrum(); + + // Reset the noise power spectrum estimate state. + void Reset(); + + // Update the noise power spectrum with a new frame. + void Update( + rtc::ArrayView> spectrum); + + // Get the noise estimation power spectrum. + rtc::ArrayView Spectrum() const { return noise_spectrum_; } + + // Get the noise power spectrum at a certain band. + float Power(size_t band) const { + RTC_DCHECK_LT(band, noise_spectrum_.size()); + return noise_spectrum_[band]; + } + + private: + // Get the update coefficient to be used for the current frame. + float GetAlpha() const; + + // Update the noise power spectrum at a certain band with a new frame. + float UpdateBandBySmoothing(float power_band, + float power_band_noise, + float alpha) const; + std::array noise_spectrum_; + size_t block_counter_; + }; + + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + NoiseSpectrum noise_; + std::array hangovers_; + std::array stationarity_flags_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_STATIONARITY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc new file mode 100644 index 0000000000..dc7f92fd99 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subband_erle_estimator.h" + +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +constexpr float kX2BandEnergyThreshold = 44015068.0f; +constexpr int kBlocksToHoldErle = 100; +constexpr int kBlocksForOnsetDetection = kBlocksToHoldErle + 150; +constexpr int kPointsToAccumulate = 6; + +std::array SetMaxErleBands(float max_erle_l, + float max_erle_h) { + std::array max_erle; + std::fill(max_erle.begin(), max_erle.begin() + kFftLengthBy2 / 2, max_erle_l); + std::fill(max_erle.begin() + kFftLengthBy2 / 2, max_erle.end(), max_erle_h); + return max_erle; +} + +bool EnableMinErleDuringOnsets() { + return !field_trial::IsEnabled("WebRTC-Aec3MinErleDuringOnsetsKillSwitch"); +} + +} // namespace + +SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels) + : use_onset_detection_(config.erle.onset_detection), + min_erle_(config.erle.min), + max_erle_(SetMaxErleBands(config.erle.max_l, config.erle.max_h)), + use_min_erle_during_onsets_(EnableMinErleDuringOnsets()), + accum_spectra_(num_capture_channels), + erle_(num_capture_channels), + erle_onset_compensated_(num_capture_channels), + erle_unbounded_(num_capture_channels), + erle_during_onsets_(num_capture_channels), + coming_onset_(num_capture_channels), + hold_counters_(num_capture_channels) { + Reset(); +} + +SubbandErleEstimator::~SubbandErleEstimator() = default; + +void SubbandErleEstimator::Reset() { + const size_t num_capture_channels = erle_.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + erle_[ch].fill(min_erle_); + erle_onset_compensated_[ch].fill(min_erle_); + erle_unbounded_[ch].fill(min_erle_); + erle_during_onsets_[ch].fill(min_erle_); + coming_onset_[ch].fill(true); + hold_counters_[ch].fill(0); + } + ResetAccumulatedSpectra(); +} + +void SubbandErleEstimator::Update( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters) { + UpdateAccumulatedSpectra(X2, Y2, E2, converged_filters); + UpdateBands(converged_filters); + + if (use_onset_detection_) { + DecreaseErlePerBandForLowRenderSignals(); + } + + const size_t num_capture_channels = erle_.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + auto& erle = erle_[ch]; + erle[0] = erle[1]; + erle[kFftLengthBy2] = erle[kFftLengthBy2 - 1]; + + auto& erle_oc = erle_onset_compensated_[ch]; + erle_oc[0] = erle_oc[1]; + erle_oc[kFftLengthBy2] = erle_oc[kFftLengthBy2 - 1]; + + auto& erle_u = erle_unbounded_[ch]; + erle_u[0] = erle_u[1]; + erle_u[kFftLengthBy2] = erle_u[kFftLengthBy2 - 1]; + } +} + +void SubbandErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + data_dumper->DumpRaw("aec3_erle_onset", ErleDuringOnsets()[0]); +} + +void SubbandErleEstimator::UpdateBands( + const std::vector& converged_filters) { + const int num_capture_channels = static_cast(accum_spectra_.Y2.size()); + for (int ch = 0; ch < num_capture_channels; ++ch) { + // Note that the use of the converged_filter flag already imposed + // a minimum of the erle that can be estimated as that flag would + // be false if the filter is performing poorly. + if (!converged_filters[ch]) { + continue; + } + + if (accum_spectra_.num_points[ch] != kPointsToAccumulate) { + continue; + } + + std::array new_erle; + std::array is_erle_updated; + is_erle_updated.fill(false); + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (accum_spectra_.E2[ch][k] > 0.f) { + new_erle[k] = accum_spectra_.Y2[ch][k] / accum_spectra_.E2[ch][k]; + is_erle_updated[k] = true; + } + } + + if (use_onset_detection_) { + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (is_erle_updated[k] && !accum_spectra_.low_render_energy[ch][k]) { + if (coming_onset_[ch][k]) { + coming_onset_[ch][k] = false; + if (!use_min_erle_during_onsets_) { + float alpha = + new_erle[k] < erle_during_onsets_[ch][k] ? 0.3f : 0.15f; + erle_during_onsets_[ch][k] = rtc::SafeClamp( + erle_during_onsets_[ch][k] + + alpha * (new_erle[k] - erle_during_onsets_[ch][k]), + min_erle_, max_erle_[k]); + } + } + hold_counters_[ch][k] = kBlocksForOnsetDetection; + } + } + } + + auto update_erle_band = [](float& erle, float new_erle, + bool low_render_energy, float min_erle, + float max_erle) { + float alpha = 0.05f; + if (new_erle < erle) { + alpha = low_render_energy ? 0.f : 0.1f; + } + erle = + rtc::SafeClamp(erle + alpha * (new_erle - erle), min_erle, max_erle); + }; + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (is_erle_updated[k]) { + const bool low_render_energy = accum_spectra_.low_render_energy[ch][k]; + update_erle_band(erle_[ch][k], new_erle[k], low_render_energy, + min_erle_, max_erle_[k]); + if (use_onset_detection_) { + update_erle_band(erle_onset_compensated_[ch][k], new_erle[k], + low_render_energy, min_erle_, max_erle_[k]); + } + + // Virtually unbounded ERLE. + constexpr float kUnboundedErleMax = 100000.0f; + update_erle_band(erle_unbounded_[ch][k], new_erle[k], low_render_energy, + min_erle_, kUnboundedErleMax); + } + } + } +} + +void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { + const int num_capture_channels = static_cast(accum_spectra_.Y2.size()); + for (int ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 1; k < kFftLengthBy2; ++k) { + --hold_counters_[ch][k]; + if (hold_counters_[ch][k] <= + (kBlocksForOnsetDetection - kBlocksToHoldErle)) { + if (erle_onset_compensated_[ch][k] > erle_during_onsets_[ch][k]) { + erle_onset_compensated_[ch][k] = + std::max(erle_during_onsets_[ch][k], + 0.97f * erle_onset_compensated_[ch][k]); + RTC_DCHECK_LE(min_erle_, erle_onset_compensated_[ch][k]); + } + if (hold_counters_[ch][k] <= 0) { + coming_onset_[ch][k] = true; + hold_counters_[ch][k] = 0; + } + } + } + } +} + +void SubbandErleEstimator::ResetAccumulatedSpectra() { + for (size_t ch = 0; ch < erle_during_onsets_.size(); ++ch) { + accum_spectra_.Y2[ch].fill(0.f); + accum_spectra_.E2[ch].fill(0.f); + accum_spectra_.num_points[ch] = 0; + accum_spectra_.low_render_energy[ch].fill(false); + } +} + +void SubbandErleEstimator::UpdateAccumulatedSpectra( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters) { + auto& st = accum_spectra_; + RTC_DCHECK_EQ(st.E2.size(), E2.size()); + RTC_DCHECK_EQ(st.E2.size(), E2.size()); + const int num_capture_channels = static_cast(Y2.size()); + for (int ch = 0; ch < num_capture_channels; ++ch) { + // Note that the use of the converged_filter flag already imposed + // a minimum of the erle that can be estimated as that flag would + // be false if the filter is performing poorly. + if (!converged_filters[ch]) { + continue; + } + + if (st.num_points[ch] == kPointsToAccumulate) { + st.num_points[ch] = 0; + st.Y2[ch].fill(0.f); + st.E2[ch].fill(0.f); + st.low_render_energy[ch].fill(false); + } + + std::transform(Y2[ch].begin(), Y2[ch].end(), st.Y2[ch].begin(), + st.Y2[ch].begin(), std::plus()); + std::transform(E2[ch].begin(), E2[ch].end(), st.E2[ch].begin(), + st.E2[ch].begin(), std::plus()); + + for (size_t k = 0; k < X2.size(); ++k) { + st.low_render_energy[ch][k] = + st.low_render_energy[ch][k] || X2[k] < kX2BandEnergyThreshold; + } + + ++st.num_points[ch]; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.h new file mode 100644 index 0000000000..8bf9c4d645 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_ERLE_ESTIMATOR_H_ + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Estimates the echo return loss enhancement for each frequency subband. +class SubbandErleEstimator { + public: + SubbandErleEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels); + ~SubbandErleEstimator(); + + // Resets the ERLE estimator. + void Reset(); + + // Updates the ERLE estimate. + void Update(rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters); + + // Returns the ERLE estimate. + rtc::ArrayView> Erle( + bool onset_compensated) const { + return onset_compensated && use_onset_detection_ ? erle_onset_compensated_ + : erle_; + } + + // Returns the non-capped ERLE estimate. + rtc::ArrayView> ErleUnbounded() + const { + return erle_unbounded_; + } + + // Returns the ERLE estimate at onsets (only used for testing). + rtc::ArrayView> ErleDuringOnsets() + const { + return erle_during_onsets_; + } + + void Dump(const std::unique_ptr& data_dumper) const; + + private: + struct AccumulatedSpectra { + explicit AccumulatedSpectra(size_t num_capture_channels) + : Y2(num_capture_channels), + E2(num_capture_channels), + low_render_energy(num_capture_channels), + num_points(num_capture_channels) {} + std::vector> Y2; + std::vector> E2; + std::vector> low_render_energy; + std::vector num_points; + }; + + void UpdateAccumulatedSpectra( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters); + + void ResetAccumulatedSpectra(); + + void UpdateBands(const std::vector& converged_filters); + void DecreaseErlePerBandForLowRenderSignals(); + + const bool use_onset_detection_; + const float min_erle_; + const std::array max_erle_; + const bool use_min_erle_during_onsets_; + AccumulatedSpectra accum_spectra_; + // ERLE without special handling of render onsets. + std::vector> erle_; + // ERLE lowered during render onsets. + std::vector> erle_onset_compensated_; + std::vector> erle_unbounded_; + // Estimation of ERLE during render onsets. + std::vector> erle_during_onsets_; + std::vector> coming_onset_; + std::vector> hold_counters_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_ERLE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc new file mode 100644 index 0000000000..2aa400c3af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subband_nearend_detector.h" + +#include + +namespace webrtc { +SubbandNearendDetector::SubbandNearendDetector( + const EchoCanceller3Config::Suppressor::SubbandNearendDetection& config, + size_t num_capture_channels) + : config_(config), + num_capture_channels_(num_capture_channels), + nearend_smoothers_(num_capture_channels_, + aec3::MovingAverage(kFftLengthBy2Plus1, + config_.nearend_average_blocks)), + one_over_subband_length1_( + 1.f / (config_.subband1.high - config_.subband1.low + 1)), + one_over_subband_length2_( + 1.f / (config_.subband2.high - config_.subband2.low + 1)) {} + +void SubbandNearendDetector::Update( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) { + nearend_state_ = false; + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + const std::array& noise = + comfort_noise_spectrum[ch]; + std::array nearend; + nearend_smoothers_[ch].Average(nearend_spectrum[ch], nearend); + + // Noise power of the first region. + float noise_power = + std::accumulate(noise.begin() + config_.subband1.low, + noise.begin() + config_.subband1.high + 1, 0.f) * + one_over_subband_length1_; + + // Nearend power of the first region. + float nearend_power_subband1 = + std::accumulate(nearend.begin() + config_.subband1.low, + nearend.begin() + config_.subband1.high + 1, 0.f) * + one_over_subband_length1_; + + // Nearend power of the second region. + float nearend_power_subband2 = + std::accumulate(nearend.begin() + config_.subband2.low, + nearend.begin() + config_.subband2.high + 1, 0.f) * + one_over_subband_length2_; + + // One channel is sufficient to trigger nearend state. + nearend_state_ = + nearend_state_ || + (nearend_power_subband1 < + config_.nearend_threshold * nearend_power_subband2 && + (nearend_power_subband1 > config_.snr_threshold * noise_power)); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.h new file mode 100644 index 0000000000..8357edb65f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_NEAREND_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_NEAREND_DETECTOR_H_ + +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/moving_average.h" +#include "modules/audio_processing/aec3/nearend_detector.h" + +namespace webrtc { +// Class for selecting whether the suppressor is in the nearend or echo state. +class SubbandNearendDetector : public NearendDetector { + public: + SubbandNearendDetector( + const EchoCanceller3Config::Suppressor::SubbandNearendDetection& config, + size_t num_capture_channels); + + // Returns whether the current state is the nearend state. + bool IsNearendState() const override { return nearend_state_; } + + // Updates the state selection based on latest spectral estimates. + void Update(rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) override; + + private: + const EchoCanceller3Config::Suppressor::SubbandNearendDetection config_; + const size_t num_capture_channels_; + std::vector nearend_smoothers_; + const float one_over_subband_length1_; + const float one_over_subband_length2_; + bool nearend_state_ = false; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_NEAREND_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc new file mode 100644 index 0000000000..aa36bb272a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +bool UseCoarseFilterResetHangover() { + return !field_trial::IsEnabled( + "WebRTC-Aec3CoarseFilterResetHangoverKillSwitch"); +} + +void PredictionError(const Aec3Fft& fft, + const FftData& S, + rtc::ArrayView y, + std::array* e, + std::array* s) { + std::array tmp; + fft.Ifft(S, &tmp); + constexpr float kScale = 1.0f / kFftLengthBy2; + std::transform(y.begin(), y.end(), tmp.begin() + kFftLengthBy2, e->begin(), + [&](float a, float b) { return a - b * kScale; }); + + if (s) { + for (size_t k = 0; k < s->size(); ++k) { + (*s)[k] = kScale * tmp[k + kFftLengthBy2]; + } + } +} + +void ScaleFilterOutput(rtc::ArrayView y, + float factor, + rtc::ArrayView e, + rtc::ArrayView s) { + RTC_DCHECK_EQ(y.size(), e.size()); + RTC_DCHECK_EQ(y.size(), s.size()); + for (size_t k = 0; k < y.size(); ++k) { + s[k] *= factor; + e[k] = y[k] - s[k]; + } +} + +} // namespace + +Subtractor::Subtractor(const EchoCanceller3Config& config, + size_t num_render_channels, + size_t num_capture_channels, + ApmDataDumper* data_dumper, + Aec3Optimization optimization) + : fft_(), + data_dumper_(data_dumper), + optimization_(optimization), + config_(config), + num_capture_channels_(num_capture_channels), + use_coarse_filter_reset_hangover_(UseCoarseFilterResetHangover()), + refined_filters_(num_capture_channels_), + coarse_filter_(num_capture_channels_), + refined_gains_(num_capture_channels_), + coarse_gains_(num_capture_channels_), + filter_misadjustment_estimators_(num_capture_channels_), + poor_coarse_filter_counters_(num_capture_channels_, 0), + coarse_filter_reset_hangover_(num_capture_channels_, 0), + refined_frequency_responses_( + num_capture_channels_, + std::vector>( + std::max(config_.filter.refined_initial.length_blocks, + config_.filter.refined.length_blocks), + std::array())), + refined_impulse_responses_( + num_capture_channels_, + std::vector(GetTimeDomainLength(std::max( + config_.filter.refined_initial.length_blocks, + config_.filter.refined.length_blocks)), + 0.f)), + coarse_impulse_responses_(0) { + // Set up the storing of coarse impulse responses if data dumping is + // available. + if (ApmDataDumper::IsAvailable()) { + coarse_impulse_responses_.resize(num_capture_channels_); + const size_t filter_size = GetTimeDomainLength( + std::max(config_.filter.coarse_initial.length_blocks, + config_.filter.coarse.length_blocks)); + for (std::vector& impulse_response : coarse_impulse_responses_) { + impulse_response.resize(filter_size, 0.f); + } + } + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + refined_filters_[ch] = std::make_unique( + config_.filter.refined.length_blocks, + config_.filter.refined_initial.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + optimization, data_dumper_); + + coarse_filter_[ch] = std::make_unique( + config_.filter.coarse.length_blocks, + config_.filter.coarse_initial.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + optimization, data_dumper_); + refined_gains_[ch] = std::make_unique( + config_.filter.refined_initial, + config_.filter.config_change_duration_blocks); + coarse_gains_[ch] = std::make_unique( + config_.filter.coarse_initial, + config.filter.config_change_duration_blocks); + } + + RTC_DCHECK(data_dumper_); + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + for (auto& H2_k : refined_frequency_responses_[ch]) { + H2_k.fill(0.f); + } + } +} + +Subtractor::~Subtractor() = default; + +void Subtractor::HandleEchoPathChange( + const EchoPathVariability& echo_path_variability) { + const auto full_reset = [&]() { + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + refined_filters_[ch]->HandleEchoPathChange(); + coarse_filter_[ch]->HandleEchoPathChange(); + refined_gains_[ch]->HandleEchoPathChange(echo_path_variability); + coarse_gains_[ch]->HandleEchoPathChange(); + refined_gains_[ch]->SetConfig(config_.filter.refined_initial, true); + coarse_gains_[ch]->SetConfig(config_.filter.coarse_initial, true); + refined_filters_[ch]->SetSizePartitions( + config_.filter.refined_initial.length_blocks, true); + coarse_filter_[ch]->SetSizePartitions( + config_.filter.coarse_initial.length_blocks, true); + } + }; + + if (echo_path_variability.delay_change != + EchoPathVariability::DelayAdjustment::kNone) { + full_reset(); + } + + if (echo_path_variability.gain_change) { + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + refined_gains_[ch]->HandleEchoPathChange(echo_path_variability); + } + } +} + +void Subtractor::ExitInitialState() { + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + refined_gains_[ch]->SetConfig(config_.filter.refined, false); + coarse_gains_[ch]->SetConfig(config_.filter.coarse, false); + refined_filters_[ch]->SetSizePartitions( + config_.filter.refined.length_blocks, false); + coarse_filter_[ch]->SetSizePartitions(config_.filter.coarse.length_blocks, + false); + } +} + +void Subtractor::Process(const RenderBuffer& render_buffer, + const Block& capture, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + rtc::ArrayView outputs) { + RTC_DCHECK_EQ(num_capture_channels_, capture.NumChannels()); + + // Compute the render powers. + const bool same_filter_sizes = refined_filters_[0]->SizePartitions() == + coarse_filter_[0]->SizePartitions(); + std::array X2_refined; + std::array X2_coarse_data; + auto& X2_coarse = same_filter_sizes ? X2_refined : X2_coarse_data; + if (same_filter_sizes) { + render_buffer.SpectralSum(refined_filters_[0]->SizePartitions(), + &X2_refined); + } else if (refined_filters_[0]->SizePartitions() > + coarse_filter_[0]->SizePartitions()) { + render_buffer.SpectralSums(coarse_filter_[0]->SizePartitions(), + refined_filters_[0]->SizePartitions(), + &X2_coarse, &X2_refined); + } else { + render_buffer.SpectralSums(refined_filters_[0]->SizePartitions(), + coarse_filter_[0]->SizePartitions(), &X2_refined, + &X2_coarse); + } + + // Process all capture channels + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + SubtractorOutput& output = outputs[ch]; + rtc::ArrayView y = capture.View(/*band=*/0, ch); + FftData& E_refined = output.E_refined; + FftData E_coarse; + std::array& e_refined = output.e_refined; + std::array& e_coarse = output.e_coarse; + + FftData S; + FftData& G = S; + + // Form the outputs of the refined and coarse filters. + refined_filters_[ch]->Filter(render_buffer, &S); + PredictionError(fft_, S, y, &e_refined, &output.s_refined); + + coarse_filter_[ch]->Filter(render_buffer, &S); + PredictionError(fft_, S, y, &e_coarse, &output.s_coarse); + + // Compute the signal powers in the subtractor output. + output.ComputeMetrics(y); + + // Adjust the filter if needed. + bool refined_filters_adjusted = false; + filter_misadjustment_estimators_[ch].Update(output); + if (filter_misadjustment_estimators_[ch].IsAdjustmentNeeded()) { + float scale = filter_misadjustment_estimators_[ch].GetMisadjustment(); + refined_filters_[ch]->ScaleFilter(scale); + for (auto& h_k : refined_impulse_responses_[ch]) { + h_k *= scale; + } + ScaleFilterOutput(y, scale, e_refined, output.s_refined); + filter_misadjustment_estimators_[ch].Reset(); + refined_filters_adjusted = true; + } + + // Compute the FFts of the refined and coarse filter outputs. + fft_.ZeroPaddedFft(e_refined, Aec3Fft::Window::kHanning, &E_refined); + fft_.ZeroPaddedFft(e_coarse, Aec3Fft::Window::kHanning, &E_coarse); + + // Compute spectra for future use. + E_coarse.Spectrum(optimization_, output.E2_coarse); + E_refined.Spectrum(optimization_, output.E2_refined); + + // Update the refined filter. + if (!refined_filters_adjusted) { + // Do not allow the performance of the coarse filter to affect the + // adaptation speed of the refined filter just after the coarse filter has + // been reset. + const bool disallow_leakage_diverged = + coarse_filter_reset_hangover_[ch] > 0 && + use_coarse_filter_reset_hangover_; + + std::array erl; + ComputeErl(optimization_, refined_frequency_responses_[ch], erl); + refined_gains_[ch]->Compute(X2_refined, render_signal_analyzer, output, + erl, refined_filters_[ch]->SizePartitions(), + aec_state.SaturatedCapture(), + disallow_leakage_diverged, &G); + } else { + G.re.fill(0.f); + G.im.fill(0.f); + } + refined_filters_[ch]->Adapt(render_buffer, G, + &refined_impulse_responses_[ch]); + refined_filters_[ch]->ComputeFrequencyResponse( + &refined_frequency_responses_[ch]); + + if (ch == 0) { + data_dumper_->DumpRaw("aec3_subtractor_G_refined", G.re); + data_dumper_->DumpRaw("aec3_subtractor_G_refined", G.im); + } + + // Update the coarse filter. + poor_coarse_filter_counters_[ch] = + output.e2_refined < output.e2_coarse + ? poor_coarse_filter_counters_[ch] + 1 + : 0; + if (poor_coarse_filter_counters_[ch] < 5) { + coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_coarse, + coarse_filter_[ch]->SizePartitions(), + aec_state.SaturatedCapture(), &G); + coarse_filter_reset_hangover_[ch] = + std::max(coarse_filter_reset_hangover_[ch] - 1, 0); + } else { + poor_coarse_filter_counters_[ch] = 0; + coarse_filter_[ch]->SetFilter(refined_filters_[ch]->SizePartitions(), + refined_filters_[ch]->GetFilter()); + coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_refined, + coarse_filter_[ch]->SizePartitions(), + aec_state.SaturatedCapture(), &G); + coarse_filter_reset_hangover_[ch] = + config_.filter.coarse_reset_hangover_blocks; + } + + if (ApmDataDumper::IsAvailable()) { + RTC_DCHECK_LT(ch, coarse_impulse_responses_.size()); + coarse_filter_[ch]->Adapt(render_buffer, G, + &coarse_impulse_responses_[ch]); + } else { + coarse_filter_[ch]->Adapt(render_buffer, G); + } + + if (ch == 0) { + data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.re); + data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.im); + filter_misadjustment_estimators_[ch].Dump(data_dumper_); + DumpFilters(); + } + + std::for_each(e_refined.begin(), e_refined.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + + if (ch == 0) { + data_dumper_->DumpWav("aec3_refined_filters_output", kBlockSize, + &e_refined[0], 16000, 1); + data_dumper_->DumpWav("aec3_coarse_filter_output", kBlockSize, + &e_coarse[0], 16000, 1); + } + } +} + +void Subtractor::FilterMisadjustmentEstimator::Update( + const SubtractorOutput& output) { + e2_acum_ += output.e2_refined; + y2_acum_ += output.y2; + if (++n_blocks_acum_ == n_blocks_) { + if (y2_acum_ > n_blocks_ * 200.f * 200.f * kBlockSize) { + float update = (e2_acum_ / y2_acum_); + if (e2_acum_ > n_blocks_ * 7500.f * 7500.f * kBlockSize) { + // Duration equal to blockSizeMs * n_blocks_ * 4. + overhang_ = 4; + } else { + overhang_ = std::max(overhang_ - 1, 0); + } + + if ((update < inv_misadjustment_) || (overhang_ > 0)) { + inv_misadjustment_ += 0.1f * (update - inv_misadjustment_); + } + } + e2_acum_ = 0.f; + y2_acum_ = 0.f; + n_blocks_acum_ = 0; + } +} + +void Subtractor::FilterMisadjustmentEstimator::Reset() { + e2_acum_ = 0.f; + y2_acum_ = 0.f; + n_blocks_acum_ = 0; + inv_misadjustment_ = 0.f; + overhang_ = 0.f; +} + +void Subtractor::FilterMisadjustmentEstimator::Dump( + ApmDataDumper* data_dumper) const { + data_dumper->DumpRaw("aec3_inv_misadjustment_factor", inv_misadjustment_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.h b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.h new file mode 100644 index 0000000000..86159a3442 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_H_ + +#include +#include + +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/refined_filter_update_gain.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Proves linear echo cancellation functionality +class Subtractor { + public: + Subtractor(const EchoCanceller3Config& config, + size_t num_render_channels, + size_t num_capture_channels, + ApmDataDumper* data_dumper, + Aec3Optimization optimization); + ~Subtractor(); + Subtractor(const Subtractor&) = delete; + Subtractor& operator=(const Subtractor&) = delete; + + // Performs the echo subtraction. + void Process(const RenderBuffer& render_buffer, + const Block& capture, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + rtc::ArrayView outputs); + + void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + + // Exits the initial state. + void ExitInitialState(); + + // Returns the block-wise frequency responses for the refined adaptive + // filters. + const std::vector>>& + FilterFrequencyResponses() const { + return refined_frequency_responses_; + } + + // Returns the estimates of the impulse responses for the refined adaptive + // filters. + const std::vector>& FilterImpulseResponses() const { + return refined_impulse_responses_; + } + + void DumpFilters() { + data_dumper_->DumpRaw( + "aec3_subtractor_h_refined", + rtc::ArrayView( + refined_impulse_responses_[0].data(), + GetTimeDomainLength( + refined_filters_[0]->max_filter_size_partitions()))); + if (ApmDataDumper::IsAvailable()) { + RTC_DCHECK_GT(coarse_impulse_responses_.size(), 0); + data_dumper_->DumpRaw( + "aec3_subtractor_h_coarse", + rtc::ArrayView( + coarse_impulse_responses_[0].data(), + GetTimeDomainLength( + coarse_filter_[0]->max_filter_size_partitions()))); + } + + refined_filters_[0]->DumpFilter("aec3_subtractor_H_refined"); + coarse_filter_[0]->DumpFilter("aec3_subtractor_H_coarse"); + } + + private: + class FilterMisadjustmentEstimator { + public: + FilterMisadjustmentEstimator() = default; + ~FilterMisadjustmentEstimator() = default; + // Update the misadjustment estimator. + void Update(const SubtractorOutput& output); + // GetMisadjustment() Returns a recommended scale for the filter so the + // prediction error energy gets closer to the energy that is seen at the + // microphone input. + float GetMisadjustment() const { + RTC_DCHECK_GT(inv_misadjustment_, 0.0f); + // It is not aiming to adjust all the estimated mismatch. Instead, + // it adjusts half of that estimated mismatch. + return 2.f / sqrtf(inv_misadjustment_); + } + // Returns true if the prediciton error energy is significantly larger + // than the microphone signal energy and, therefore, an adjustment is + // recommended. + bool IsAdjustmentNeeded() const { return inv_misadjustment_ > 10.f; } + void Reset(); + void Dump(ApmDataDumper* data_dumper) const; + + private: + const int n_blocks_ = 4; + int n_blocks_acum_ = 0; + float e2_acum_ = 0.f; + float y2_acum_ = 0.f; + float inv_misadjustment_ = 0.f; + int overhang_ = 0.f; + }; + + const Aec3Fft fft_; + ApmDataDumper* data_dumper_; + const Aec3Optimization optimization_; + const EchoCanceller3Config config_; + const size_t num_capture_channels_; + const bool use_coarse_filter_reset_hangover_; + + std::vector> refined_filters_; + std::vector> coarse_filter_; + std::vector> refined_gains_; + std::vector> coarse_gains_; + std::vector filter_misadjustment_estimators_; + std::vector poor_coarse_filter_counters_; + std::vector coarse_filter_reset_hangover_; + std::vector>> + refined_frequency_responses_; + std::vector> refined_impulse_responses_; + std::vector> coarse_impulse_responses_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc new file mode 100644 index 0000000000..ed80101f06 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor_output.h" + +#include + +namespace webrtc { + +SubtractorOutput::SubtractorOutput() = default; +SubtractorOutput::~SubtractorOutput() = default; + +void SubtractorOutput::Reset() { + s_refined.fill(0.f); + s_coarse.fill(0.f); + e_refined.fill(0.f); + e_coarse.fill(0.f); + E_refined.re.fill(0.f); + E_refined.im.fill(0.f); + E2_refined.fill(0.f); + E2_coarse.fill(0.f); + e2_refined = 0.f; + e2_coarse = 0.f; + s2_refined = 0.f; + s2_coarse = 0.f; + y2 = 0.f; +} + +void SubtractorOutput::ComputeMetrics(rtc::ArrayView y) { + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); + e2_refined = + std::accumulate(e_refined.begin(), e_refined.end(), 0.f, sum_of_squares); + e2_coarse = + std::accumulate(e_coarse.begin(), e_coarse.end(), 0.f, sum_of_squares); + s2_refined = + std::accumulate(s_refined.begin(), s_refined.end(), 0.f, sum_of_squares); + s2_coarse = + std::accumulate(s_coarse.begin(), s_coarse.end(), 0.f, sum_of_squares); + + s_refined_max_abs = *std::max_element(s_refined.begin(), s_refined.end()); + s_refined_max_abs = + std::max(s_refined_max_abs, + -(*std::min_element(s_refined.begin(), s_refined.end()))); + + s_coarse_max_abs = *std::max_element(s_coarse.begin(), s_coarse.end()); + s_coarse_max_abs = std::max( + s_coarse_max_abs, -(*std::min_element(s_coarse.begin(), s_coarse.end()))); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.h b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.h new file mode 100644 index 0000000000..d2d12082c6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fft_data.h" + +namespace webrtc { + +// Stores the values being returned from the echo subtractor for a single +// capture channel. +struct SubtractorOutput { + SubtractorOutput(); + ~SubtractorOutput(); + + std::array s_refined; + std::array s_coarse; + std::array e_refined; + std::array e_coarse; + FftData E_refined; + std::array E2_refined; + std::array E2_coarse; + float s2_refined = 0.f; + float s2_coarse = 0.f; + float e2_refined = 0.f; + float e2_coarse = 0.f; + float y2 = 0.f; + float s_refined_max_abs = 0.f; + float s_coarse_max_abs = 0.f; + + // Reset the struct content. + void Reset(); + + // Updates the powers of the signals. + void ComputeMetrics(rtc::ArrayView y); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc new file mode 100644 index 0000000000..baf0600161 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor_output_analyzer.h" + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +SubtractorOutputAnalyzer::SubtractorOutputAnalyzer(size_t num_capture_channels) + : filters_converged_(num_capture_channels, false) {} + +void SubtractorOutputAnalyzer::Update( + rtc::ArrayView subtractor_output, + bool* any_filter_converged, + bool* any_coarse_filter_converged, + bool* all_filters_diverged) { + RTC_DCHECK(any_filter_converged); + RTC_DCHECK(all_filters_diverged); + RTC_DCHECK_EQ(subtractor_output.size(), filters_converged_.size()); + + *any_filter_converged = false; + *any_coarse_filter_converged = false; + *all_filters_diverged = true; + + for (size_t ch = 0; ch < subtractor_output.size(); ++ch) { + const float y2 = subtractor_output[ch].y2; + const float e2_refined = subtractor_output[ch].e2_refined; + const float e2_coarse = subtractor_output[ch].e2_coarse; + + constexpr float kConvergenceThreshold = 50 * 50 * kBlockSize; + constexpr float kConvergenceThresholdLowLevel = 20 * 20 * kBlockSize; + bool refined_filter_converged = + e2_refined < 0.5f * y2 && y2 > kConvergenceThreshold; + bool coarse_filter_converged_strict = + e2_coarse < 0.05f * y2 && y2 > kConvergenceThreshold; + bool coarse_filter_converged_relaxed = + e2_coarse < 0.2f * y2 && y2 > kConvergenceThresholdLowLevel; + float min_e2 = std::min(e2_refined, e2_coarse); + bool filter_diverged = min_e2 > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize; + filters_converged_[ch] = + refined_filter_converged || coarse_filter_converged_strict; + + *any_filter_converged = *any_filter_converged || filters_converged_[ch]; + *any_coarse_filter_converged = + *any_coarse_filter_converged || coarse_filter_converged_relaxed; + *all_filters_diverged = *all_filters_diverged && filter_diverged; + } +} + +void SubtractorOutputAnalyzer::HandleEchoPathChange() { + std::fill(filters_converged_.begin(), filters_converged_.end(), false); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.h b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.h new file mode 100644 index 0000000000..32707dbb19 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_ANALYZER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_ANALYZER_H_ + +#include + +#include "modules/audio_processing/aec3/subtractor_output.h" + +namespace webrtc { + +// Class for analyzing the properties subtractor output. +class SubtractorOutputAnalyzer { + public: + explicit SubtractorOutputAnalyzer(size_t num_capture_channels); + ~SubtractorOutputAnalyzer() = default; + + // Analyses the subtractor output. + void Update(rtc::ArrayView subtractor_output, + bool* any_filter_converged, + bool* any_coarse_filter_converged, + bool* all_filters_diverged); + + const std::vector& ConvergedFilters() const { + return filters_converged_; + } + + // Handle echo path change. + void HandleEchoPathChange(); + + private: + std::vector filters_converged_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_ANALYZER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_unittest.cc new file mode 100644 index 0000000000..56b9cec9f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_unittest.cc @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::vector RunSubtractorTest( + size_t num_render_channels, + size_t num_capture_channels, + int num_blocks_to_process, + int delay_samples, + int refined_filter_length_blocks, + int coarse_filter_length_blocks, + bool uncorrelated_inputs, + const std::vector& blocks_with_echo_path_changes) { + ApmDataDumper data_dumper(42); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + EchoCanceller3Config config; + config.filter.refined.length_blocks = refined_filter_length_blocks; + config.filter.coarse.length_blocks = coarse_filter_length_blocks; + + Subtractor subtractor(config, num_render_channels, num_capture_channels, + &data_dumper, DetectOptimization()); + absl::optional delay_estimate; + Block x(kNumBands, num_render_channels); + Block y(/*num_bands=*/1, num_capture_channels); + std::array x_old; + std::vector output(num_capture_channels); + config.delay.default_delay = 1; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + RenderSignalAnalyzer render_signal_analyzer(config); + Random random_generator(42U); + Aec3Fft fft; + std::vector> Y2(num_capture_channels); + std::vector> E2_refined( + num_capture_channels); + std::array E2_coarse; + AecState aec_state(config, num_capture_channels); + x_old.fill(0.f); + for (auto& Y2_ch : Y2) { + Y2_ch.fill(0.f); + } + for (auto& E2_refined_ch : E2_refined) { + E2_refined_ch.fill(0.f); + } + E2_coarse.fill(0.f); + + std::vector>>> delay_buffer( + num_capture_channels); + for (size_t capture_ch = 0; capture_ch < num_capture_channels; ++capture_ch) { + delay_buffer[capture_ch].resize(num_render_channels); + for (size_t render_ch = 0; render_ch < num_render_channels; ++render_ch) { + delay_buffer[capture_ch][render_ch] = + std::make_unique>(delay_samples); + } + } + + // [B,A] = butter(2,100/8000,'high') + constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients = {{0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + std::vector> x_hp_filter( + num_render_channels); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x_hp_filter[ch] = + std::make_unique(kHighPassFilterCoefficients, 1); + } + std::vector> y_hp_filter( + num_capture_channels); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + y_hp_filter[ch] = + std::make_unique(kHighPassFilterCoefficients, 1); + } + + for (int k = 0; k < num_blocks_to_process; ++k) { + for (size_t render_ch = 0; render_ch < num_render_channels; ++render_ch) { + RandomizeSampleVector(&random_generator, x.View(/*band=*/0, render_ch)); + } + if (uncorrelated_inputs) { + for (size_t capture_ch = 0; capture_ch < num_capture_channels; + ++capture_ch) { + RandomizeSampleVector(&random_generator, + y.View(/*band=*/0, capture_ch)); + } + } else { + for (size_t capture_ch = 0; capture_ch < num_capture_channels; + ++capture_ch) { + rtc::ArrayView y_view = y.View(/*band=*/0, capture_ch); + for (size_t render_ch = 0; render_ch < num_render_channels; + ++render_ch) { + std::array y_channel; + delay_buffer[capture_ch][render_ch]->Delay( + x.View(/*band=*/0, render_ch), y_channel); + for (size_t k = 0; k < kBlockSize; ++k) { + y_view[k] += y_channel[k] / num_render_channels; + } + } + } + } + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x_hp_filter[ch]->Process(x.View(/*band=*/0, ch)); + } + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + y_hp_filter[ch]->Process(y.View(/*band=*/0, ch)); + } + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + render_signal_analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + aec_state.MinDirectPathFilterDelay()); + + // Handle echo path changes. + if (std::find(blocks_with_echo_path_changes.begin(), + blocks_with_echo_path_changes.end(), + k) != blocks_with_echo_path_changes.end()) { + subtractor.HandleEchoPathChange(EchoPathVariability( + true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, + false)); + } + subtractor.Process(*render_delay_buffer->GetRenderBuffer(), y, + render_signal_analyzer, aec_state, output); + + aec_state.HandleEchoPathChange(EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNone, false)); + aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), + subtractor.FilterImpulseResponses(), + *render_delay_buffer->GetRenderBuffer(), E2_refined, Y2, + output); + } + + std::vector results(num_capture_channels); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + const float output_power = std::inner_product( + output[ch].e_refined.begin(), output[ch].e_refined.end(), + output[ch].e_refined.begin(), 0.f); + const float y_power = + std::inner_product(y.begin(/*band=*/0, ch), y.end(/*band=*/0, ch), + y.begin(/*band=*/0, ch), 0.f); + if (y_power == 0.f) { + ADD_FAILURE(); + results[ch] = -1.f; + } + results[ch] = output_power / y_power; + } + return results; +} + +std::string ProduceDebugText(size_t num_render_channels, + size_t num_capture_channels, + size_t delay, + int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "delay: " << delay << ", "; + ss << "filter_length_blocks:" << filter_length_blocks << ", "; + ss << "num_render_channels:" << num_render_channels << ", "; + ss << "num_capture_channels:" << num_capture_channels; + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non data dumper works. +TEST(SubtractorDeathTest, NullDataDumper) { + EXPECT_DEATH( + Subtractor(EchoCanceller3Config(), 1, 1, nullptr, DetectOptimization()), + ""); +} + +#endif + +// Verifies that the subtractor is able to converge on correlated data. +TEST(Subtractor, Convergence) { + std::vector blocks_with_echo_path_changes; + for (size_t filter_length_blocks : {12, 20, 30}) { + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(1, 1, delay_samples, filter_length_blocks)); + std::vector echo_to_nearend_powers = RunSubtractorTest( + 1, 1, 2500, delay_samples, filter_length_blocks, filter_length_blocks, + false, blocks_with_echo_path_changes); + + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_GT(0.1f, echo_to_nearend_power); + } + } + } +} + +// Verifies that the subtractor is able to handle the case when the refined +// filter is longer than the coarse filter. +TEST(Subtractor, RefinedFilterLongerThanCoarseFilter) { + std::vector blocks_with_echo_path_changes; + std::vector echo_to_nearend_powers = RunSubtractorTest( + 1, 1, 400, 64, 20, 15, false, blocks_with_echo_path_changes); + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_GT(0.5f, echo_to_nearend_power); + } +} + +// Verifies that the subtractor is able to handle the case when the coarse +// filter is longer than the refined filter. +TEST(Subtractor, CoarseFilterLongerThanRefinedFilter) { + std::vector blocks_with_echo_path_changes; + std::vector echo_to_nearend_powers = RunSubtractorTest( + 1, 1, 400, 64, 15, 20, false, blocks_with_echo_path_changes); + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_GT(0.5f, echo_to_nearend_power); + } +} + +// Verifies that the subtractor does not converge on uncorrelated signals. +TEST(Subtractor, NonConvergenceOnUncorrelatedSignals) { + std::vector blocks_with_echo_path_changes; + for (size_t filter_length_blocks : {12, 20, 30}) { + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(1, 1, delay_samples, filter_length_blocks)); + + std::vector echo_to_nearend_powers = RunSubtractorTest( + 1, 1, 3000, delay_samples, filter_length_blocks, filter_length_blocks, + true, blocks_with_echo_path_changes); + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_NEAR(1.f, echo_to_nearend_power, 0.1); + } + } + } +} + +class SubtractorMultiChannelUpToEightRender + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +#if defined(NDEBUG) +INSTANTIATE_TEST_SUITE_P(NonDebugMultiChannel, + SubtractorMultiChannelUpToEightRender, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(1, 2, 4))); +#else +INSTANTIATE_TEST_SUITE_P(DebugMultiChannel, + SubtractorMultiChannelUpToEightRender, + ::testing::Combine(::testing::Values(1, 2), + ::testing::Values(1, 2))); +#endif + +// Verifies that the subtractor is able to converge on correlated data. +TEST_P(SubtractorMultiChannelUpToEightRender, Convergence) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + + std::vector blocks_with_echo_path_changes; + size_t num_blocks_to_process = 2500 * num_render_channels; + std::vector echo_to_nearend_powers = RunSubtractorTest( + num_render_channels, num_capture_channels, num_blocks_to_process, 64, 20, + 20, false, blocks_with_echo_path_changes); + + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_GT(0.1f, echo_to_nearend_power); + } +} + +class SubtractorMultiChannelUpToFourRender + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +#if defined(NDEBUG) +INSTANTIATE_TEST_SUITE_P(NonDebugMultiChannel, + SubtractorMultiChannelUpToFourRender, + ::testing::Combine(::testing::Values(1, 2, 4), + ::testing::Values(1, 2, 4))); +#else +INSTANTIATE_TEST_SUITE_P(DebugMultiChannel, + SubtractorMultiChannelUpToFourRender, + ::testing::Combine(::testing::Values(1, 2), + ::testing::Values(1, 2))); +#endif + +// Verifies that the subtractor does not converge on uncorrelated signals. +TEST_P(SubtractorMultiChannelUpToFourRender, + NonConvergenceOnUncorrelatedSignals) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + + std::vector blocks_with_echo_path_changes; + size_t num_blocks_to_process = 5000 * num_render_channels; + std::vector echo_to_nearend_powers = RunSubtractorTest( + num_render_channels, num_capture_channels, num_blocks_to_process, 64, 20, + 20, true, blocks_with_echo_path_changes); + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_LT(.8f, echo_to_nearend_power); + EXPECT_NEAR(1.f, echo_to_nearend_power, 0.25f); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc new file mode 100644 index 0000000000..83ded425d5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_filter.h" + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/vector_math.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +// Hanning window from Matlab command win = sqrt(hanning(128)). +const float kSqrtHanning[kFftLength] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f, + 0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f, + 0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f, + 0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f, + 0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f, + 0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f, + 0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f, + 0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f, + 0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f, + 0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f, + 0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f, + 0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f, + 0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f, + 0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f, + 0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f, + 0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f}; + +} // namespace + +SuppressionFilter::SuppressionFilter(Aec3Optimization optimization, + int sample_rate_hz, + size_t num_capture_channels) + : optimization_(optimization), + sample_rate_hz_(sample_rate_hz), + num_capture_channels_(num_capture_channels), + fft_(), + e_output_old_(NumBandsForRate(sample_rate_hz_), + std::vector>( + num_capture_channels_)) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); + for (size_t b = 0; b < e_output_old_.size(); ++b) { + for (size_t ch = 0; ch < e_output_old_[b].size(); ++ch) { + e_output_old_[b][ch].fill(0.f); + } + } +} + +SuppressionFilter::~SuppressionFilter() = default; + +void SuppressionFilter::ApplyGain( + rtc::ArrayView comfort_noise, + rtc::ArrayView comfort_noise_high_band, + const std::array& suppression_gain, + float high_bands_gain, + rtc::ArrayView E_lowest_band, + Block* e) { + RTC_DCHECK(e); + RTC_DCHECK_EQ(e->NumBands(), NumBandsForRate(sample_rate_hz_)); + + // Comfort noise gain is sqrt(1-g^2), where g is the suppression gain. + std::array noise_gain; + for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) { + noise_gain[i] = 1.f - suppression_gain[i] * suppression_gain[i]; + } + aec3::VectorMath(optimization_).Sqrt(noise_gain); + + const float high_bands_noise_scaling = + 0.4f * std::sqrt(1.f - high_bands_gain * high_bands_gain); + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + FftData E; + + // Analysis filterbank. + E.Assign(E_lowest_band[ch]); + + for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) { + // Apply suppression gains. + float E_real = E.re[i] * suppression_gain[i]; + float E_imag = E.im[i] * suppression_gain[i]; + + // Scale and add the comfort noise. + E.re[i] = E_real + noise_gain[i] * comfort_noise[ch].re[i]; + E.im[i] = E_imag + noise_gain[i] * comfort_noise[ch].im[i]; + } + + // Synthesis filterbank. + std::array e_extended; + constexpr float kIfftNormalization = 2.f / kFftLength; + fft_.Ifft(E, &e_extended); + + auto e0 = e->View(/*band=*/0, ch); + float* e0_old = e_output_old_[0][ch].data(); + + // Window and add the first half of e_extended with the second half of + // e_extended from the previous block. + for (size_t i = 0; i < kFftLengthBy2; ++i) { + float e0_i = e0_old[i] * kSqrtHanning[kFftLengthBy2 + i]; + e0_i += e_extended[i] * kSqrtHanning[i]; + e0[i] = e0_i * kIfftNormalization; + } + + // The second half of e_extended is stored for the succeeding frame. + std::copy(e_extended.begin() + kFftLengthBy2, + e_extended.begin() + kFftLength, + std::begin(e_output_old_[0][ch])); + + // Apply suppression gain to upper bands. + for (int b = 1; b < e->NumBands(); ++b) { + auto e_band = e->View(b, ch); + for (size_t i = 0; i < kFftLengthBy2; ++i) { + e_band[i] *= high_bands_gain; + } + } + + // Add comfort noise to band 1. + if (e->NumBands() > 1) { + E.Assign(comfort_noise_high_band[ch]); + std::array time_domain_high_band_noise; + fft_.Ifft(E, &time_domain_high_band_noise); + + auto e1 = e->View(/*band=*/1, ch); + const float gain = high_bands_noise_scaling * kIfftNormalization; + for (size_t i = 0; i < kFftLengthBy2; ++i) { + e1[i] += time_domain_high_band_noise[i] * gain; + } + } + + // Delay upper bands to match the delay of the filter bank. + for (int b = 1; b < e->NumBands(); ++b) { + auto e_band = e->View(b, ch); + float* e_band_old = e_output_old_[b][ch].data(); + for (size_t i = 0; i < kFftLengthBy2; ++i) { + std::swap(e_band[i], e_band_old[i]); + } + } + + // Clamp output of all bands. + for (int b = 0; b < e->NumBands(); ++b) { + auto e_band = e->View(b, ch); + for (size_t i = 0; i < kFftLengthBy2; ++i) { + e_band[i] = rtc::SafeClamp(e_band[i], -32768.f, 32767.f); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.h b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.h new file mode 100644 index 0000000000..c18b2334bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_FILTER_H_ + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/fft_data.h" + +namespace webrtc { + +class SuppressionFilter { + public: + SuppressionFilter(Aec3Optimization optimization, + int sample_rate_hz, + size_t num_capture_channels_); + ~SuppressionFilter(); + + SuppressionFilter(const SuppressionFilter&) = delete; + SuppressionFilter& operator=(const SuppressionFilter&) = delete; + + void ApplyGain(rtc::ArrayView comfort_noise, + rtc::ArrayView comfort_noise_high_bands, + const std::array& suppression_gain, + float high_bands_gain, + rtc::ArrayView E_lowest_band, + Block* e); + + private: + const Aec3Optimization optimization_; + const int sample_rate_hz_; + const size_t num_capture_channels_; + const Aec3Fft fft_; + std::vector>> e_output_old_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc new file mode 100644 index 0000000000..464f5cfed2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_filter.h" + +#include + +#include +#include +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr float kPi = 3.141592f; + +void ProduceSinusoid(int sample_rate_hz, + float sinusoidal_frequency_hz, + size_t* sample_counter, + Block* x) { + // Produce a sinusoid of the specified frequency. + for (size_t k = *sample_counter, j = 0; k < (*sample_counter + kBlockSize); + ++k, ++j) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + x->View(/*band=*/0, channel)[j] = + 32767.f * + std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz); + } + } + *sample_counter = *sample_counter + kBlockSize; + + for (int band = 1; band < x->NumBands(); ++band) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + std::fill(x->begin(band, channel), x->end(band, channel), 0.f); + } + } +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for null suppressor output. +TEST(SuppressionFilterDeathTest, NullOutput) { + std::vector cn(1); + std::vector cn_high_bands(1); + std::vector E(1); + std::array gain; + + EXPECT_DEATH(SuppressionFilter(Aec3Optimization::kNone, 16000, 1) + .ApplyGain(cn, cn_high_bands, gain, 1.0f, E, nullptr), + ""); +} + +// Verifies the check for allowed sample rate. +TEST(SuppressionFilterDeathTest, ProperSampleRate) { + EXPECT_DEATH(SuppressionFilter(Aec3Optimization::kNone, 16001, 1), ""); +} + +#endif + +// Verifies that no comfort noise is added when the gain is 1. +TEST(SuppressionFilter, ComfortNoiseInUnityGain) { + SuppressionFilter filter(Aec3Optimization::kNone, 48000, 1); + std::vector cn(1); + std::vector cn_high_bands(1); + std::array gain; + std::array e_old_; + Aec3Fft fft; + + e_old_.fill(0.f); + gain.fill(1.f); + cn[0].re.fill(1.f); + cn[0].im.fill(1.f); + cn_high_bands[0].re.fill(1.f); + cn_high_bands[0].im.fill(1.f); + + Block e(3, kBlockSize); + Block e_ref = e; + + std::vector E(1); + fft.PaddedFft(e.View(/*band=*/0, /*channel=*/0), e_old_, + Aec3Fft::Window::kSqrtHanning, &E[0]); + std::copy(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), e_old_.begin()); + + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, E, &e); + + for (int band = 0; band < e.NumBands(); ++band) { + for (int channel = 0; channel < e.NumChannels(); ++channel) { + const auto e_view = e.View(band, channel); + const auto e_ref_view = e_ref.View(band, channel); + for (size_t sample = 0; sample < e_view.size(); ++sample) { + EXPECT_EQ(e_ref_view[sample], e_view[sample]); + } + } + } +} + +// Verifies that the suppressor is able to suppress a signal. +TEST(SuppressionFilter, SignalSuppression) { + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + constexpr size_t kNumChannels = 1; + + SuppressionFilter filter(Aec3Optimization::kNone, kSampleRateHz, 1); + std::vector cn(1); + std::vector cn_high_bands(1); + std::array e_old_; + Aec3Fft fft; + std::array gain; + Block e(kNumBands, kNumChannels); + e_old_.fill(0.f); + + gain.fill(1.f); + std::for_each(gain.begin() + 10, gain.end(), [](float& a) { a = 0.f; }); + + cn[0].re.fill(0.f); + cn[0].im.fill(0.f); + cn_high_bands[0].re.fill(0.f); + cn_high_bands[0].im.fill(0.f); + + size_t sample_counter = 0; + + float e0_input = 0.f; + float e0_output = 0.f; + for (size_t k = 0; k < 100; ++k) { + ProduceSinusoid(16000, 16000 * 40 / kFftLengthBy2 / 2, &sample_counter, &e); + e0_input = std::inner_product(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), + e.begin(/*band=*/0, /*channel=*/0), e0_input); + + std::vector E(1); + fft.PaddedFft(e.View(/*band=*/0, /*channel=*/0), e_old_, + Aec3Fft::Window::kSqrtHanning, &E[0]); + std::copy(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), e_old_.begin()); + + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, E, &e); + e0_output = std::inner_product( + e.begin(/*band=*/0, /*channel=*/0), e.end(/*band=*/0, /*channel=*/0), + e.begin(/*band=*/0, /*channel=*/0), e0_output); + } + + EXPECT_LT(e0_output, e0_input / 1000.f); +} + +// Verifies that the suppressor is able to pass through a desired signal while +// applying suppressing for some frequencies. +TEST(SuppressionFilter, SignalTransparency) { + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + SuppressionFilter filter(Aec3Optimization::kNone, kSampleRateHz, 1); + std::vector cn(1); + std::array e_old_; + Aec3Fft fft; + std::vector cn_high_bands(1); + std::array gain; + Block e(kNumBands, kNumChannels); + e_old_.fill(0.f); + gain.fill(1.f); + std::for_each(gain.begin() + 30, gain.end(), [](float& a) { a = 0.f; }); + + cn[0].re.fill(0.f); + cn[0].im.fill(0.f); + cn_high_bands[0].re.fill(0.f); + cn_high_bands[0].im.fill(0.f); + + size_t sample_counter = 0; + + float e0_input = 0.f; + float e0_output = 0.f; + for (size_t k = 0; k < 100; ++k) { + ProduceSinusoid(16000, 16000 * 10 / kFftLengthBy2 / 2, &sample_counter, &e); + e0_input = std::inner_product(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), + e.begin(/*band=*/0, /*channel=*/0), e0_input); + + std::vector E(1); + fft.PaddedFft(e.View(/*band=*/0, /*channel=*/0), e_old_, + Aec3Fft::Window::kSqrtHanning, &E[0]); + std::copy(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), e_old_.begin()); + + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, E, &e); + e0_output = std::inner_product( + e.begin(/*band=*/0, /*channel=*/0), e.end(/*band=*/0, /*channel=*/0), + e.begin(/*band=*/0, /*channel=*/0), e0_output); + } + + EXPECT_LT(0.9f * e0_input, e0_output); +} + +// Verifies that the suppressor delay. +TEST(SuppressionFilter, Delay) { + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + SuppressionFilter filter(Aec3Optimization::kNone, kSampleRateHz, 1); + std::vector cn(1); + std::vector cn_high_bands(1); + std::array e_old_; + Aec3Fft fft; + std::array gain; + Block e(kNumBands, kNumChannels); + + gain.fill(1.f); + + cn[0].re.fill(0.f); + cn[0].im.fill(0.f); + cn_high_bands[0].re.fill(0.f); + cn_high_bands[0].im.fill(0.f); + + for (size_t k = 0; k < 100; ++k) { + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t channel = 0; channel < kNumChannels; ++channel) { + auto e_view = e.View(band, channel); + for (size_t sample = 0; sample < kBlockSize; ++sample) { + e_view[sample] = k * kBlockSize + sample + channel; + } + } + } + + std::vector E(1); + fft.PaddedFft(e.View(/*band=*/0, /*channel=*/0), e_old_, + Aec3Fft::Window::kSqrtHanning, &E[0]); + std::copy(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), e_old_.begin()); + + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, E, &e); + if (k > 2) { + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t channel = 0; channel < kNumChannels; ++channel) { + const auto e_view = e.View(band, channel); + for (size_t sample = 0; sample < kBlockSize; ++sample) { + EXPECT_NEAR(k * kBlockSize + sample - kBlockSize + channel, + e_view[sample], 0.01); + } + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc new file mode 100644 index 0000000000..037dabaabe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_gain.h" + +#include +#include + +#include +#include + +#include "modules/audio_processing/aec3/dominant_nearend_detector.h" +#include "modules/audio_processing/aec3/moving_average.h" +#include "modules/audio_processing/aec3/subband_nearend_detector.h" +#include "modules/audio_processing/aec3/vector_math.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +void LimitLowFrequencyGains(std::array* gain) { + // Limit the low frequency gains to avoid the impact of the high-pass filter + // on the lower-frequency gain influencing the overall achieved gain. + (*gain)[0] = (*gain)[1] = std::min((*gain)[1], (*gain)[2]); +} + +void LimitHighFrequencyGains(bool conservative_hf_suppression, + std::array* gain) { + // Limit the high frequency gains to avoid echo leakage due to an imperfect + // filter. + constexpr size_t kFirstBandToLimit = (64 * 2000) / 8000; + const float min_upper_gain = (*gain)[kFirstBandToLimit]; + std::for_each( + gain->begin() + kFirstBandToLimit + 1, gain->end(), + [min_upper_gain](float& a) { a = std::min(a, min_upper_gain); }); + (*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1]; + + if (conservative_hf_suppression) { + // Limits the gain in the frequencies for which the adaptive filter has not + // converged. + // TODO(peah): Make adaptive to take the actual filter error into account. + constexpr size_t kUpperAccurateBandPlus1 = 29; + + constexpr float oneByBandsInSum = + 1 / static_cast(kUpperAccurateBandPlus1 - 20); + const float hf_gain_bound = + std::accumulate(gain->begin() + 20, + gain->begin() + kUpperAccurateBandPlus1, 0.f) * + oneByBandsInSum; + + std::for_each( + gain->begin() + kUpperAccurateBandPlus1, gain->end(), + [hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); }); + } +} + +// Scales the echo according to assessed audibility at the other end. +void WeightEchoForAudibility(const EchoCanceller3Config& config, + rtc::ArrayView echo, + rtc::ArrayView weighted_echo) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, echo.size()); + RTC_DCHECK_EQ(kFftLengthBy2Plus1, weighted_echo.size()); + + auto weigh = [](float threshold, float normalizer, size_t begin, size_t end, + rtc::ArrayView echo, + rtc::ArrayView weighted_echo) { + for (size_t k = begin; k < end; ++k) { + if (echo[k] < threshold) { + float tmp = (threshold - echo[k]) * normalizer; + weighted_echo[k] = echo[k] * std::max(0.f, 1.f - tmp * tmp); + } else { + weighted_echo[k] = echo[k]; + } + } + }; + + float threshold = config.echo_audibility.floor_power * + config.echo_audibility.audibility_threshold_lf; + float normalizer = 1.f / (threshold - config.echo_audibility.floor_power); + weigh(threshold, normalizer, 0, 3, echo, weighted_echo); + + threshold = config.echo_audibility.floor_power * + config.echo_audibility.audibility_threshold_mf; + normalizer = 1.f / (threshold - config.echo_audibility.floor_power); + weigh(threshold, normalizer, 3, 7, echo, weighted_echo); + + threshold = config.echo_audibility.floor_power * + config.echo_audibility.audibility_threshold_hf; + normalizer = 1.f / (threshold - config.echo_audibility.floor_power); + weigh(threshold, normalizer, 7, kFftLengthBy2Plus1, echo, weighted_echo); +} + +} // namespace + +std::atomic SuppressionGain::instance_count_(0); + +float SuppressionGain::UpperBandsGain( + rtc::ArrayView> echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + const absl::optional& narrow_peak_band, + bool saturated_echo, + const Block& render, + const std::array& low_band_gain) const { + RTC_DCHECK_LT(0, render.NumBands()); + if (render.NumBands() == 1) { + return 1.f; + } + const int num_render_channels = render.NumChannels(); + + if (narrow_peak_band && + (*narrow_peak_band > static_cast(kFftLengthBy2Plus1 - 10))) { + return 0.001f; + } + + constexpr size_t kLowBandGainLimit = kFftLengthBy2 / 2; + const float gain_below_8_khz = *std::min_element( + low_band_gain.begin() + kLowBandGainLimit, low_band_gain.end()); + + // Always attenuate the upper bands when there is saturated echo. + if (saturated_echo) { + return std::min(0.001f, gain_below_8_khz); + } + + // Compute the upper and lower band energies. + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + float low_band_energy = 0.f; + for (int ch = 0; ch < num_render_channels; ++ch) { + const float channel_energy = + std::accumulate(render.begin(/*band=*/0, ch), + render.end(/*band=*/0, ch), 0.0f, sum_of_squares); + low_band_energy = std::max(low_band_energy, channel_energy); + } + float high_band_energy = 0.f; + for (int k = 1; k < render.NumBands(); ++k) { + for (int ch = 0; ch < num_render_channels; ++ch) { + const float energy = std::accumulate( + render.begin(k, ch), render.end(k, ch), 0.f, sum_of_squares); + high_band_energy = std::max(high_band_energy, energy); + } + } + + // If there is more power in the lower frequencies than the upper frequencies, + // or if the power in upper frequencies is low, do not bound the gain in the + // upper bands. + float anti_howling_gain; + const float activation_threshold = + kBlockSize * config_.suppressor.high_bands_suppression + .anti_howling_activation_threshold; + if (high_band_energy < std::max(low_band_energy, activation_threshold)) { + anti_howling_gain = 1.f; + } else { + // In all other cases, bound the gain for upper frequencies. + RTC_DCHECK_LE(low_band_energy, high_band_energy); + RTC_DCHECK_NE(0.f, high_band_energy); + anti_howling_gain = + config_.suppressor.high_bands_suppression.anti_howling_gain * + sqrtf(low_band_energy / high_band_energy); + } + + float gain_bound = 1.f; + if (!dominant_nearend_detector_->IsNearendState()) { + // Bound the upper gain during significant echo activity. + const auto& cfg = config_.suppressor.high_bands_suppression; + auto low_frequency_energy = [](rtc::ArrayView spectrum) { + RTC_DCHECK_LE(16, spectrum.size()); + return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f); + }; + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + const float echo_sum = low_frequency_energy(echo_spectrum[ch]); + const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]); + if (echo_sum > cfg.enr_threshold * noise_sum) { + gain_bound = cfg.max_gain_during_echo; + break; + } + } + } + + // Choose the gain as the minimum of the lower and upper gains. + return std::min(std::min(gain_below_8_khz, anti_howling_gain), gain_bound); +} + +// Computes the gain to reduce the echo to a non audible level. +void SuppressionGain::GainToNoAudibleEcho( + const std::array& nearend, + const std::array& echo, + const std::array& masker, + std::array* gain) const { + const auto& p = dominant_nearend_detector_->IsNearendState() ? nearend_params_ + : normal_params_; + for (size_t k = 0; k < gain->size(); ++k) { + float enr = echo[k] / (nearend[k] + 1.f); // Echo-to-nearend ratio. + float emr = echo[k] / (masker[k] + 1.f); // Echo-to-masker (noise) ratio. + float g = 1.0f; + if (enr > p.enr_transparent_[k] && emr > p.emr_transparent_[k]) { + g = (p.enr_suppress_[k] - enr) / + (p.enr_suppress_[k] - p.enr_transparent_[k]); + g = std::max(g, p.emr_transparent_[k] / emr); + } + (*gain)[k] = g; + } +} + +// Compute the minimum gain as the attenuating gain to put the signal just +// above the zero sample values. +void SuppressionGain::GetMinGain( + rtc::ArrayView weighted_residual_echo, + rtc::ArrayView last_nearend, + rtc::ArrayView last_echo, + bool low_noise_render, + bool saturated_echo, + rtc::ArrayView min_gain) const { + if (!saturated_echo) { + const float min_echo_power = + low_noise_render ? config_.echo_audibility.low_render_limit + : config_.echo_audibility.normal_render_limit; + + for (size_t k = 0; k < min_gain.size(); ++k) { + min_gain[k] = weighted_residual_echo[k] > 0.f + ? min_echo_power / weighted_residual_echo[k] + : 1.f; + min_gain[k] = std::min(min_gain[k], 1.f); + } + + if (!initial_state_ || + config_.suppressor.lf_smoothing_during_initial_phase) { + const float& dec = dominant_nearend_detector_->IsNearendState() + ? nearend_params_.max_dec_factor_lf + : normal_params_.max_dec_factor_lf; + + for (int k = 0; k <= config_.suppressor.last_lf_smoothing_band; ++k) { + // Make sure the gains of the low frequencies do not decrease too + // quickly after strong nearend. + if (last_nearend[k] > last_echo[k] || + k <= config_.suppressor.last_permanent_lf_smoothing_band) { + min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec); + min_gain[k] = std::min(min_gain[k], 1.f); + } + } + } + } else { + std::fill(min_gain.begin(), min_gain.end(), 0.f); + } +} + +// Compute the maximum gain by limiting the gain increase from the previous +// gain. +void SuppressionGain::GetMaxGain(rtc::ArrayView max_gain) const { + const auto& inc = dominant_nearend_detector_->IsNearendState() + ? nearend_params_.max_inc_factor + : normal_params_.max_inc_factor; + const auto& floor = config_.suppressor.floor_first_increase; + for (size_t k = 0; k < max_gain.size(); ++k) { + max_gain[k] = std::min(std::max(last_gain_[k] * inc, floor), 1.f); + } +} + +void SuppressionGain::LowerBandGain( + bool low_noise_render, + const AecState& aec_state, + rtc::ArrayView> + suppressor_input, + rtc::ArrayView> residual_echo, + rtc::ArrayView> comfort_noise, + bool clock_drift, + std::array* gain) { + gain->fill(1.f); + const bool saturated_echo = aec_state.SaturatedEcho(); + std::array max_gain; + GetMaxGain(max_gain); + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::array G; + std::array nearend; + nearend_smoothers_[ch].Average(suppressor_input[ch], nearend); + + // Weight echo power in terms of audibility. + std::array weighted_residual_echo; + WeightEchoForAudibility(config_, residual_echo[ch], weighted_residual_echo); + + std::array min_gain; + GetMinGain(weighted_residual_echo, last_nearend_[ch], last_echo_[ch], + low_noise_render, saturated_echo, min_gain); + + GainToNoAudibleEcho(nearend, weighted_residual_echo, comfort_noise[0], &G); + + // Clamp gains. + for (size_t k = 0; k < gain->size(); ++k) { + G[k] = std::max(std::min(G[k], max_gain[k]), min_gain[k]); + (*gain)[k] = std::min((*gain)[k], G[k]); + } + + // Store data required for the gain computation of the next block. + std::copy(nearend.begin(), nearend.end(), last_nearend_[ch].begin()); + std::copy(weighted_residual_echo.begin(), weighted_residual_echo.end(), + last_echo_[ch].begin()); + } + + LimitLowFrequencyGains(gain); + // Use conservative high-frequency gains during clock-drift or when not in + // dominant nearend. + if (!dominant_nearend_detector_->IsNearendState() || clock_drift || + config_.suppressor.conservative_hf_suppression) { + LimitHighFrequencyGains(config_.suppressor.conservative_hf_suppression, + gain); + } + + // Store computed gains. + std::copy(gain->begin(), gain->end(), last_gain_.begin()); + + // Transform gains to amplitude domain. + aec3::VectorMath(optimization_).Sqrt(*gain); +} + +SuppressionGain::SuppressionGain(const EchoCanceller3Config& config, + Aec3Optimization optimization, + int sample_rate_hz, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + optimization_(optimization), + config_(config), + num_capture_channels_(num_capture_channels), + state_change_duration_blocks_( + static_cast(config_.filter.config_change_duration_blocks)), + last_nearend_(num_capture_channels_, {0}), + last_echo_(num_capture_channels_, {0}), + nearend_smoothers_( + num_capture_channels_, + aec3::MovingAverage(kFftLengthBy2Plus1, + config.suppressor.nearend_average_blocks)), + nearend_params_(config_.suppressor.last_lf_band, + config_.suppressor.first_hf_band, + config_.suppressor.nearend_tuning), + normal_params_(config_.suppressor.last_lf_band, + config_.suppressor.first_hf_band, + config_.suppressor.normal_tuning), + use_unbounded_echo_spectrum_(config.suppressor.dominant_nearend_detection + .use_unbounded_echo_spectrum) { + RTC_DCHECK_LT(0, state_change_duration_blocks_); + last_gain_.fill(1.f); + if (config_.suppressor.use_subband_nearend_detection) { + dominant_nearend_detector_ = std::make_unique( + config_.suppressor.subband_nearend_detection, num_capture_channels_); + } else { + dominant_nearend_detector_ = std::make_unique( + config_.suppressor.dominant_nearend_detection, num_capture_channels_); + } + RTC_DCHECK(dominant_nearend_detector_); +} + +SuppressionGain::~SuppressionGain() = default; + +void SuppressionGain::GetGain( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> echo_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + residual_echo_spectrum_unbounded, + rtc::ArrayView> + comfort_noise_spectrum, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + const Block& render, + bool clock_drift, + float* high_bands_gain, + std::array* low_band_gain) { + RTC_DCHECK(high_bands_gain); + RTC_DCHECK(low_band_gain); + + // Choose residual echo spectrum for dominant nearend detection. + const auto echo = use_unbounded_echo_spectrum_ + ? residual_echo_spectrum_unbounded + : residual_echo_spectrum; + + // Update the nearend state selection. + dominant_nearend_detector_->Update(nearend_spectrum, echo, + comfort_noise_spectrum, initial_state_); + + // Compute gain for the lower band. + bool low_noise_render = low_render_detector_.Detect(render); + LowerBandGain(low_noise_render, aec_state, nearend_spectrum, + residual_echo_spectrum, comfort_noise_spectrum, clock_drift, + low_band_gain); + + // Compute the gain for the upper bands. + const absl::optional narrow_peak_band = + render_signal_analyzer.NarrowPeakBand(); + + *high_bands_gain = + UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band, + aec_state.SaturatedEcho(), render, *low_band_gain); + + data_dumper_->DumpRaw("aec3_dominant_nearend", + dominant_nearend_detector_->IsNearendState()); +} + +void SuppressionGain::SetInitialState(bool state) { + initial_state_ = state; + if (state) { + initial_state_change_counter_ = state_change_duration_blocks_; + } else { + initial_state_change_counter_ = 0; + } +} + +// Detects when the render signal can be considered to have low power and +// consist of stationary noise. +bool SuppressionGain::LowNoiseRenderDetector::Detect(const Block& render) { + float x2_sum = 0.f; + float x2_max = 0.f; + for (int ch = 0; ch < render.NumChannels(); ++ch) { + for (float x_k : render.View(/*band=*/0, ch)) { + const float x2 = x_k * x_k; + x2_sum += x2; + x2_max = std::max(x2_max, x2); + } + } + x2_sum = x2_sum / render.NumChannels(); + + constexpr float kThreshold = 50.f * 50.f * 64.f; + const bool low_noise_render = + average_power_ < kThreshold && x2_max < 3 * average_power_; + average_power_ = average_power_ * 0.9f + x2_sum * 0.1f; + return low_noise_render; +} + +SuppressionGain::GainParameters::GainParameters( + int last_lf_band, + int first_hf_band, + const EchoCanceller3Config::Suppressor::Tuning& tuning) + : max_inc_factor(tuning.max_inc_factor), + max_dec_factor_lf(tuning.max_dec_factor_lf) { + // Compute per-band masking thresholds. + RTC_DCHECK_LT(last_lf_band, first_hf_band); + auto& lf = tuning.mask_lf; + auto& hf = tuning.mask_hf; + RTC_DCHECK_LT(lf.enr_transparent, lf.enr_suppress); + RTC_DCHECK_LT(hf.enr_transparent, hf.enr_suppress); + for (int k = 0; k < static_cast(kFftLengthBy2Plus1); k++) { + float a; + if (k <= last_lf_band) { + a = 0.f; + } else if (k < first_hf_band) { + a = (k - last_lf_band) / static_cast(first_hf_band - last_lf_band); + } else { + a = 1.f; + } + enr_transparent_[k] = (1 - a) * lf.enr_transparent + a * hf.enr_transparent; + enr_suppress_[k] = (1 - a) * lf.enr_suppress + a * hf.enr_suppress; + emr_transparent_[k] = (1 - a) * lf.emr_transparent + a * hf.emr_transparent; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.h b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.h new file mode 100644 index 0000000000..c19ddd7e30 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/moving_average.h" +#include "modules/audio_processing/aec3/nearend_detector.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +class SuppressionGain { + public: + SuppressionGain(const EchoCanceller3Config& config, + Aec3Optimization optimization, + int sample_rate_hz, + size_t num_capture_channels); + ~SuppressionGain(); + + SuppressionGain(const SuppressionGain&) = delete; + SuppressionGain& operator=(const SuppressionGain&) = delete; + + void GetGain( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> echo_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + residual_echo_spectrum_unbounded, + rtc::ArrayView> + comfort_noise_spectrum, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + const Block& render, + bool clock_drift, + float* high_bands_gain, + std::array* low_band_gain); + + bool IsDominantNearend() { + return dominant_nearend_detector_->IsNearendState(); + } + + // Toggles the usage of the initial state. + void SetInitialState(bool state); + + private: + // Computes the gain to apply for the bands beyond the first band. + float UpperBandsGain( + rtc::ArrayView> echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + const absl::optional& narrow_peak_band, + bool saturated_echo, + const Block& render, + const std::array& low_band_gain) const; + + void GainToNoAudibleEcho(const std::array& nearend, + const std::array& echo, + const std::array& masker, + std::array* gain) const; + + void LowerBandGain( + bool stationary_with_low_power, + const AecState& aec_state, + rtc::ArrayView> + suppressor_input, + rtc::ArrayView> residual_echo, + rtc::ArrayView> comfort_noise, + bool clock_drift, + std::array* gain); + + void GetMinGain(rtc::ArrayView weighted_residual_echo, + rtc::ArrayView last_nearend, + rtc::ArrayView last_echo, + bool low_noise_render, + bool saturated_echo, + rtc::ArrayView min_gain) const; + + void GetMaxGain(rtc::ArrayView max_gain) const; + + class LowNoiseRenderDetector { + public: + bool Detect(const Block& render); + + private: + float average_power_ = 32768.f * 32768.f; + }; + + struct GainParameters { + explicit GainParameters( + int last_lf_band, + int first_hf_band, + const EchoCanceller3Config::Suppressor::Tuning& tuning); + const float max_inc_factor; + const float max_dec_factor_lf; + std::array enr_transparent_; + std::array enr_suppress_; + std::array emr_transparent_; + }; + + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const Aec3Optimization optimization_; + const EchoCanceller3Config config_; + const size_t num_capture_channels_; + const int state_change_duration_blocks_; + std::array last_gain_; + std::vector> last_nearend_; + std::vector> last_echo_; + LowNoiseRenderDetector low_render_detector_; + bool initial_state_ = true; + int initial_state_change_counter_ = 0; + std::vector nearend_smoothers_; + const GainParameters nearend_params_; + const GainParameters normal_params_; + // Determines if the dominant nearend detector uses the unbounded residual + // echo spectrum. + const bool use_unbounded_echo_spectrum_; + std::unique_ptr dominant_nearend_detector_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc new file mode 100644 index 0000000000..02de706c77 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_gain.h" + +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/subtractor.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output gains works. +TEST(SuppressionGainDeathTest, NullOutputGains) { + std::vector> E2(1, {0.0f}); + std::vector> R2(1, {0.0f}); + std::vector> R2_unbounded(1, {0.0f}); + std::vector> S2(1); + std::vector> N2(1, {0.0f}); + for (auto& S2_k : S2) { + S2_k.fill(0.1f); + } + FftData E; + FftData Y; + E.re.fill(0.0f); + E.im.fill(0.0f); + Y.re.fill(0.0f); + Y.im.fill(0.0f); + + float high_bands_gain; + AecState aec_state(EchoCanceller3Config{}, 1); + EXPECT_DEATH( + SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000, 1) + .GetGain(E2, S2, R2, R2_unbounded, N2, + RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state, + Block(3, 1), false, &high_bands_gain, nullptr), + ""); +} + +#endif + +// Does a sanity check that the gains are correctly computed. +TEST(SuppressionGain, BasicGainComputation) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 2; + constexpr int kSampleRateHz = 16000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + SuppressionGain suppression_gain(EchoCanceller3Config(), DetectOptimization(), + kSampleRateHz, kNumCaptureChannels); + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + float high_bands_gain; + std::vector> E2(kNumCaptureChannels); + std::vector> S2(kNumCaptureChannels, + {0.0f}); + std::vector> Y2(kNumCaptureChannels); + std::vector> R2(kNumCaptureChannels); + std::vector> R2_unbounded( + kNumCaptureChannels); + std::vector> N2(kNumCaptureChannels); + std::array g; + std::vector output(kNumCaptureChannels); + Block x(kNumBands, kNumRenderChannels); + EchoCanceller3Config config; + AecState aec_state(config, kNumCaptureChannels); + ApmDataDumper data_dumper(42); + Subtractor subtractor(config, kNumRenderChannels, kNumCaptureChannels, + &data_dumper, DetectOptimization()); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); + absl::optional delay_estimate; + + // Ensure that a strong noise is detected to mask any echoes. + for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) { + E2[ch].fill(10.f); + Y2[ch].fill(10.f); + R2[ch].fill(0.1f); + R2_unbounded[ch].fill(0.1f); + N2[ch].fill(100.0f); + } + for (auto& subtractor_output : output) { + subtractor_output.Reset(); + } + + // Ensure that the gain is no longer forced to zero. + for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) { + aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), + subtractor.FilterImpulseResponses(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, output); + } + + for (int k = 0; k < 100; ++k) { + aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), + subtractor.FilterImpulseResponses(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, output); + suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state, + x, false, &high_bands_gain, &g); + } + std::for_each(g.begin(), g.end(), + [](float a) { EXPECT_NEAR(1.0f, a, 0.001f); }); + + // Ensure that a strong nearend is detected to mask any echoes. + for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) { + E2[ch].fill(100.f); + Y2[ch].fill(100.f); + R2[ch].fill(0.1f); + R2_unbounded[ch].fill(0.1f); + S2[ch].fill(0.1f); + N2[ch].fill(0.f); + } + + for (int k = 0; k < 100; ++k) { + aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), + subtractor.FilterImpulseResponses(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, output); + suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state, + x, false, &high_bands_gain, &g); + } + std::for_each(g.begin(), g.end(), + [](float a) { EXPECT_NEAR(1.0f, a, 0.001f); }); + + // Add a strong echo to one of the channels and ensure that it is suppressed. + E2[1].fill(1000000000.0f); + R2[1].fill(10000000000000.0f); + R2_unbounded[1].fill(10000000000000.0f); + + for (int k = 0; k < 10; ++k) { + suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state, + x, false, &high_bands_gain, &g); + } + std::for_each(g.begin(), g.end(), + [](float a) { EXPECT_NEAR(0.0f, a, 0.001f); }); +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc b/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc new file mode 100644 index 0000000000..489f53f4f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/transparent_mode.h" + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +constexpr size_t kBlocksSinceConvergencedFilterInit = 10000; +constexpr size_t kBlocksSinceConsistentEstimateInit = 10000; + +bool DeactivateTransparentMode() { + return field_trial::IsEnabled("WebRTC-Aec3TransparentModeKillSwitch"); +} + +bool ActivateTransparentModeHmm() { + return field_trial::IsEnabled("WebRTC-Aec3TransparentModeHmm"); +} + +} // namespace + +// Classifier that toggles transparent mode which reduces echo suppression when +// headsets are used. +class TransparentModeImpl : public TransparentMode { + public: + bool Active() const override { return transparency_activated_; } + + void Reset() override { + // Determines if transparent mode is used. + transparency_activated_ = false; + + // The estimated probability of being transparent mode. + prob_transparent_state_ = 0.f; + } + + void Update(int filter_delay_blocks, + bool any_filter_consistent, + bool any_filter_converged, + bool any_coarse_filter_converged, + bool all_filters_diverged, + bool active_render, + bool saturated_capture) override { + // The classifier is implemented as a Hidden Markov Model (HMM) with two + // hidden states: "normal" and "transparent". The estimated probabilities of + // the two states are updated by observing filter convergence during active + // render. The filters are less likely to be reported as converged when + // there is no echo present in the microphone signal. + + // The constants have been obtained by observing active_render and + // any_coarse_filter_converged under varying call scenarios. They + // have further been hand tuned to prefer normal state during uncertain + // regions (to avoid echo leaks). + + // The model is only updated during active render. + if (!active_render) + return; + + // Probability of switching from one state to the other. + constexpr float kSwitch = 0.000001f; + + // Probability of observing converged filters in states "normal" and + // "transparent" during active render. + constexpr float kConvergedNormal = 0.01f; + constexpr float kConvergedTransparent = 0.001f; + + // Probability of transitioning to transparent state from normal state and + // transparent state respectively. + constexpr float kA[2] = {kSwitch, 1.f - kSwitch}; + + // Probability of the two observations (converged filter or not converged + // filter) in normal state and transparent state respectively. + constexpr float kB[2][2] = { + {1.f - kConvergedNormal, kConvergedNormal}, + {1.f - kConvergedTransparent, kConvergedTransparent}}; + + // Probability of the two states before the update. + const float prob_transparent = prob_transparent_state_; + const float prob_normal = 1.f - prob_transparent; + + // Probability of transitioning to transparent state. + const float prob_transition_transparent = + prob_normal * kA[0] + prob_transparent * kA[1]; + const float prob_transition_normal = 1.f - prob_transition_transparent; + + // Observed output. + const int out = static_cast(any_coarse_filter_converged); + + // Joint probabilites of the observed output and respective states. + const float prob_joint_normal = prob_transition_normal * kB[0][out]; + const float prob_joint_transparent = + prob_transition_transparent * kB[1][out]; + + // Conditional probability of transparent state and the observed output. + RTC_DCHECK_GT(prob_joint_normal + prob_joint_transparent, 0.f); + prob_transparent_state_ = + prob_joint_transparent / (prob_joint_normal + prob_joint_transparent); + + // Transparent mode is only activated when its state probability is high. + // Dead zone between activation/deactivation thresholds to avoid switching + // back and forth. + if (prob_transparent_state_ > 0.95f) { + transparency_activated_ = true; + } else if (prob_transparent_state_ < 0.5f) { + transparency_activated_ = false; + } + } + + private: + bool transparency_activated_ = false; + float prob_transparent_state_ = 0.f; +}; + +// Legacy classifier for toggling transparent mode. +class LegacyTransparentModeImpl : public TransparentMode { + public: + explicit LegacyTransparentModeImpl(const EchoCanceller3Config& config) + : linear_and_stable_echo_path_( + config.echo_removal_control.linear_and_stable_echo_path), + active_blocks_since_sane_filter_(kBlocksSinceConsistentEstimateInit), + non_converged_sequence_size_(kBlocksSinceConvergencedFilterInit) {} + + bool Active() const override { return transparency_activated_; } + + void Reset() override { + non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; + diverged_sequence_size_ = 0; + strong_not_saturated_render_blocks_ = 0; + if (linear_and_stable_echo_path_) { + recent_convergence_during_activity_ = false; + } + } + + void Update(int filter_delay_blocks, + bool any_filter_consistent, + bool any_filter_converged, + bool any_coarse_filter_converged, + bool all_filters_diverged, + bool active_render, + bool saturated_capture) override { + ++capture_block_counter_; + strong_not_saturated_render_blocks_ += + active_render && !saturated_capture ? 1 : 0; + + if (any_filter_consistent && filter_delay_blocks < 5) { + sane_filter_observed_ = true; + active_blocks_since_sane_filter_ = 0; + } else if (active_render) { + ++active_blocks_since_sane_filter_; + } + + bool sane_filter_recently_seen; + if (!sane_filter_observed_) { + sane_filter_recently_seen = + capture_block_counter_ <= 5 * kNumBlocksPerSecond; + } else { + sane_filter_recently_seen = + active_blocks_since_sane_filter_ <= 30 * kNumBlocksPerSecond; + } + + if (any_filter_converged) { + recent_convergence_during_activity_ = true; + active_non_converged_sequence_size_ = 0; + non_converged_sequence_size_ = 0; + ++num_converged_blocks_; + } else { + if (++non_converged_sequence_size_ > 20 * kNumBlocksPerSecond) { + num_converged_blocks_ = 0; + } + + if (active_render && + ++active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { + recent_convergence_during_activity_ = false; + } + } + + if (!all_filters_diverged) { + diverged_sequence_size_ = 0; + } else if (++diverged_sequence_size_ >= 60) { + // TODO(peah): Change these lines to ensure proper triggering of usable + // filter. + non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; + } + + if (active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { + finite_erl_recently_detected_ = false; + } + if (num_converged_blocks_ > 50) { + finite_erl_recently_detected_ = true; + } + + if (finite_erl_recently_detected_) { + transparency_activated_ = false; + } else if (sane_filter_recently_seen && + recent_convergence_during_activity_) { + transparency_activated_ = false; + } else { + const bool filter_should_have_converged = + strong_not_saturated_render_blocks_ > 6 * kNumBlocksPerSecond; + transparency_activated_ = filter_should_have_converged; + } + } + + private: + const bool linear_and_stable_echo_path_; + size_t capture_block_counter_ = 0; + bool transparency_activated_ = false; + size_t active_blocks_since_sane_filter_; + bool sane_filter_observed_ = false; + bool finite_erl_recently_detected_ = false; + size_t non_converged_sequence_size_; + size_t diverged_sequence_size_ = 0; + size_t active_non_converged_sequence_size_ = 0; + size_t num_converged_blocks_ = 0; + bool recent_convergence_during_activity_ = false; + size_t strong_not_saturated_render_blocks_ = 0; +}; + +std::unique_ptr TransparentMode::Create( + const EchoCanceller3Config& config) { + if (config.ep_strength.bounded_erl || DeactivateTransparentMode()) { + RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Disabled"; + return nullptr; + } + if (ActivateTransparentModeHmm()) { + RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: HMM"; + return std::make_unique(); + } + RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Legacy"; + return std::make_unique(config); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.h b/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.h new file mode 100644 index 0000000000..bc5dd0391b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_TRANSPARENT_MODE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_TRANSPARENT_MODE_H_ + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Class for detecting and toggling the transparent mode which causes the +// suppressor to apply less suppression. +class TransparentMode { + public: + static std::unique_ptr Create( + const EchoCanceller3Config& config); + + virtual ~TransparentMode() {} + + // Returns whether the transparent mode should be active. + virtual bool Active() const = 0; + + // Resets the state of the detector. + virtual void Reset() = 0; + + // Updates the detection decision based on new data. + virtual void Update(int filter_delay_blocks, + bool any_filter_consistent, + bool any_filter_converged, + bool any_coarse_filter_converged, + bool all_filters_diverged, + bool active_render, + bool saturated_capture) = 0; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_AEC3_TRANSPARENT_MODE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/vector_math.h b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math.h new file mode 100644 index 0000000000..e4d1381ae1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math.h @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace aec3 { + +// Provides optimizations for mathematical operations based on vectors. +class VectorMath { + public: + explicit VectorMath(Aec3Optimization optimization) + : optimization_(optimization) {} + + // Elementwise square root. + void SqrtAVX2(rtc::ArrayView x); + void Sqrt(rtc::ArrayView x) { + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + __m128 g = _mm_loadu_ps(&x[j]); + g = _mm_sqrt_ps(g); + _mm_storeu_ps(&x[j], g); + } + + for (; j < x_size; ++j) { + x[j] = sqrtf(x[j]); + } + } break; + case Aec3Optimization::kAvx2: + SqrtAVX2(x); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + float32x4_t g = vld1q_f32(&x[j]); +#if !defined(WEBRTC_ARCH_ARM64) + float32x4_t y = vrsqrteq_f32(g); + + // Code to handle sqrt(0). + // If the input to sqrtf() is zero, a zero will be returned. + // If the input to vrsqrteq_f32() is zero, positive infinity is + // returned. + const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000); + // check for divide by zero + const uint32x4_t div_by_zero = + vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(y)); + // zero out the positive infinity results + y = vreinterpretq_f32_u32( + vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(y))); + // from arm documentation + // The Newton-Raphson iteration: + // y[n+1] = y[n] * (3 - d * (y[n] * y[n])) / 2) + // converges to (1/√d) if y0 is the result of VRSQRTE applied to d. + // + // Note: The precision did not improve after 2 iterations. + for (int i = 0; i < 2; i++) { + y = vmulq_f32(vrsqrtsq_f32(vmulq_f32(y, y), g), y); + } + // sqrt(g) = g * 1/sqrt(g) + g = vmulq_f32(g, y); +#else + g = vsqrtq_f32(g); +#endif + vst1q_f32(&x[j], g); + } + + for (; j < x_size; ++j) { + x[j] = sqrtf(x[j]); + } + } +#endif + break; + default: + std::for_each(x.begin(), x.end(), [](float& a) { a = sqrtf(a); }); + } + } + + // Elementwise vector multiplication z = x * y. + void MultiplyAVX2(rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView z); + void Multiply(rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView z) { + RTC_DCHECK_EQ(z.size(), x.size()); + RTC_DCHECK_EQ(z.size(), y.size()); + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const __m128 x_j = _mm_loadu_ps(&x[j]); + const __m128 y_j = _mm_loadu_ps(&y[j]); + const __m128 z_j = _mm_mul_ps(x_j, y_j); + _mm_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] = x[j] * y[j]; + } + } break; + case Aec3Optimization::kAvx2: + MultiplyAVX2(x, y, z); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const float32x4_t x_j = vld1q_f32(&x[j]); + const float32x4_t y_j = vld1q_f32(&y[j]); + const float32x4_t z_j = vmulq_f32(x_j, y_j); + vst1q_f32(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] = x[j] * y[j]; + } + } break; +#endif + default: + std::transform(x.begin(), x.end(), y.begin(), z.begin(), + std::multiplies()); + } + } + + // Elementwise vector accumulation z += x. + void AccumulateAVX2(rtc::ArrayView x, rtc::ArrayView z); + void Accumulate(rtc::ArrayView x, rtc::ArrayView z) { + RTC_DCHECK_EQ(z.size(), x.size()); + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const __m128 x_j = _mm_loadu_ps(&x[j]); + __m128 z_j = _mm_loadu_ps(&z[j]); + z_j = _mm_add_ps(x_j, z_j); + _mm_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] += x[j]; + } + } break; + case Aec3Optimization::kAvx2: + AccumulateAVX2(x, z); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const float32x4_t x_j = vld1q_f32(&x[j]); + float32x4_t z_j = vld1q_f32(&z[j]); + z_j = vaddq_f32(z_j, x_j); + vst1q_f32(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] += x[j]; + } + } break; +#endif + default: + std::transform(x.begin(), x.end(), z.begin(), z.begin(), + std::plus()); + } + } + + private: + Aec3Optimization optimization_; +}; + +} // namespace aec3 + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc new file mode 100644 index 0000000000..0b5f3c142e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/vector_math.h" + +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace aec3 { + +// Elementwise square root. +void VectorMath::SqrtAVX2(rtc::ArrayView x) { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 3; + + int j = 0; + for (; j < vector_limit * 8; j += 8) { + __m256 g = _mm256_loadu_ps(&x[j]); + g = _mm256_sqrt_ps(g); + _mm256_storeu_ps(&x[j], g); + } + + for (; j < x_size; ++j) { + x[j] = sqrtf(x[j]); + } +} + +// Elementwise vector multiplication z = x * y. +void VectorMath::MultiplyAVX2(rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView z) { + RTC_DCHECK_EQ(z.size(), x.size()); + RTC_DCHECK_EQ(z.size(), y.size()); + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 3; + + int j = 0; + for (; j < vector_limit * 8; j += 8) { + const __m256 x_j = _mm256_loadu_ps(&x[j]); + const __m256 y_j = _mm256_loadu_ps(&y[j]); + const __m256 z_j = _mm256_mul_ps(x_j, y_j); + _mm256_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] = x[j] * y[j]; + } +} + +// Elementwise vector accumulation z += x. +void VectorMath::AccumulateAVX2(rtc::ArrayView x, + rtc::ArrayView z) { + RTC_DCHECK_EQ(z.size(), x.size()); + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 3; + + int j = 0; + for (; j < vector_limit * 8; j += 8) { + const __m256 x_j = _mm256_loadu_ps(&x[j]); + __m256 z_j = _mm256_loadu_ps(&z[j]); + z_j = _mm256_add_ps(x_j, z_j); + _mm256_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] += x[j]; + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_gn/moz.build new file mode 100644 index 0000000000..89ee0b6a81 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("vector_math_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_unittest.cc new file mode 100644 index 0000000000..a9c37e33cf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_unittest.cc @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/vector_math.h" + +#include + +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { + +#if defined(WEBRTC_HAS_NEON) + +TEST(VectorMath, Sqrt) { + std::array x; + std::array z; + std::array z_neon; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = (2.f / 3.f) * k; + } + + std::copy(x.begin(), x.end(), z.begin()); + aec3::VectorMath(Aec3Optimization::kNone).Sqrt(z); + std::copy(x.begin(), x.end(), z_neon.begin()); + aec3::VectorMath(Aec3Optimization::kNeon).Sqrt(z_neon); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_NEAR(z[k], z_neon[k], 0.0001f); + EXPECT_NEAR(sqrtf(x[k]), z_neon[k], 0.0001f); + } +} + +TEST(VectorMath, Multiply) { + std::array x; + std::array y; + std::array z; + std::array z_neon; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + y[k] = (2.f / 3.f) * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Multiply(x, y, z); + aec3::VectorMath(Aec3Optimization::kNeon).Multiply(x, y, z_neon); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_neon[k]); + EXPECT_FLOAT_EQ(x[k] * y[k], z_neon[k]); + } +} + +TEST(VectorMath, Accumulate) { + std::array x; + std::array z; + std::array z_neon; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + z[k] = z_neon[k] = 2.f * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Accumulate(x, z); + aec3::VectorMath(Aec3Optimization::kNeon).Accumulate(x, z_neon); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_neon[k]); + EXPECT_FLOAT_EQ(x[k] + 2.f * x[k], z_neon[k]); + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +TEST(VectorMath, Sse2Sqrt) { + if (GetCPUInfo(kSSE2) != 0) { + std::array x; + std::array z; + std::array z_sse2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = (2.f / 3.f) * k; + } + + std::copy(x.begin(), x.end(), z.begin()); + aec3::VectorMath(Aec3Optimization::kNone).Sqrt(z); + std::copy(x.begin(), x.end(), z_sse2.begin()); + aec3::VectorMath(Aec3Optimization::kSse2).Sqrt(z_sse2); + EXPECT_EQ(z, z_sse2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_sse2[k]); + EXPECT_FLOAT_EQ(sqrtf(x[k]), z_sse2[k]); + } + } +} + +TEST(VectorMath, Avx2Sqrt) { + if (GetCPUInfo(kAVX2) != 0) { + std::array x; + std::array z; + std::array z_avx2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = (2.f / 3.f) * k; + } + + std::copy(x.begin(), x.end(), z.begin()); + aec3::VectorMath(Aec3Optimization::kNone).Sqrt(z); + std::copy(x.begin(), x.end(), z_avx2.begin()); + aec3::VectorMath(Aec3Optimization::kAvx2).Sqrt(z_avx2); + EXPECT_EQ(z, z_avx2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_avx2[k]); + EXPECT_FLOAT_EQ(sqrtf(x[k]), z_avx2[k]); + } + } +} + +TEST(VectorMath, Sse2Multiply) { + if (GetCPUInfo(kSSE2) != 0) { + std::array x; + std::array y; + std::array z; + std::array z_sse2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + y[k] = (2.f / 3.f) * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Multiply(x, y, z); + aec3::VectorMath(Aec3Optimization::kSse2).Multiply(x, y, z_sse2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_sse2[k]); + EXPECT_FLOAT_EQ(x[k] * y[k], z_sse2[k]); + } + } +} + +TEST(VectorMath, Avx2Multiply) { + if (GetCPUInfo(kAVX2) != 0) { + std::array x; + std::array y; + std::array z; + std::array z_avx2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + y[k] = (2.f / 3.f) * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Multiply(x, y, z); + aec3::VectorMath(Aec3Optimization::kAvx2).Multiply(x, y, z_avx2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_avx2[k]); + EXPECT_FLOAT_EQ(x[k] * y[k], z_avx2[k]); + } + } +} + +TEST(VectorMath, Sse2Accumulate) { + if (GetCPUInfo(kSSE2) != 0) { + std::array x; + std::array z; + std::array z_sse2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + z[k] = z_sse2[k] = 2.f * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Accumulate(x, z); + aec3::VectorMath(Aec3Optimization::kSse2).Accumulate(x, z_sse2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_sse2[k]); + EXPECT_FLOAT_EQ(x[k] + 2.f * x[k], z_sse2[k]); + } + } +} + +TEST(VectorMath, Avx2Accumulate) { + if (GetCPUInfo(kAVX2) != 0) { + std::array x; + std::array z; + std::array z_avx2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + z[k] = z_avx2[k] = 2.f * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Accumulate(x, z); + aec3::VectorMath(Aec3Optimization::kAvx2).Accumulate(x, z_avx2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_avx2[k]); + EXPECT_FLOAT_EQ(x[k] + 2.f * x[k], z_avx2[k]); + } + } +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/aec_dump/BUILD.gn new file mode 100644 index 0000000000..38d8776258 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/BUILD.gn @@ -0,0 +1,112 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") # This contains def of 'rtc_enable_protobuf' + +rtc_source_set("aec_dump") { + visibility = [ "*" ] + sources = [ "aec_dump_factory.h" ] + + deps = [ + "..:aec_dump_interface", + "../../../rtc_base/system:file_wrapper", + "../../../rtc_base/system:rtc_export", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +if (rtc_include_tests) { + rtc_library("mock_aec_dump") { + testonly = true + sources = [ + "mock_aec_dump.cc", + "mock_aec_dump.h", + ] + + deps = [ + "..:aec_dump_interface", + "..:audioproc_test_utils", + "../", + "../../../test:test_support", + ] + } + + rtc_library("mock_aec_dump_unittests") { + testonly = true + configs += [ "..:apm_debug_dump" ] + sources = [ "aec_dump_integration_test.cc" ] + + deps = [ + ":mock_aec_dump", + "..:api", + "..:audioproc_test_utils", + "../", + "//testing/gtest", + ] + } +} + +if (rtc_enable_protobuf) { + rtc_library("aec_dump_impl") { + sources = [ + "aec_dump_impl.cc", + "aec_dump_impl.h", + "capture_stream_info.cc", + "capture_stream_info.h", + ] + + deps = [ + ":aec_dump", + "..:aec_dump_interface", + "../../../api/audio:audio_frame_api", + "../../../api/task_queue", + "../../../rtc_base:checks", + "../../../rtc_base:ignore_wundef", + "../../../rtc_base:logging", + "../../../rtc_base:macromagic", + "../../../rtc_base:protobuf_utils", + "../../../rtc_base:race_checker", + "../../../rtc_base:rtc_event", + "../../../rtc_base:rtc_task_queue", + "../../../rtc_base/system:file_wrapper", + "../../../system_wrappers", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + + deps += [ "../:audioproc_debug_proto" ] + } + + if (rtc_include_tests) { + rtc_library("aec_dump_unittests") { + testonly = true + defines = [] + deps = [ + ":aec_dump", + ":aec_dump_impl", + "..:audioproc_debug_proto", + "../", + "../../../rtc_base:task_queue_for_test", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gtest", + ] + sources = [ "aec_dump_unittest.cc" ] + } + } +} + +rtc_library("null_aec_dump_factory") { + assert_no_deps = [ ":aec_dump_impl" ] + sources = [ "null_aec_dump_factory.cc" ] + + deps = [ + ":aec_dump", + "..:aec_dump_interface", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_factory.h b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_factory.h new file mode 100644 index 0000000000..20718c3d7f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_factory.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "rtc_base/system/file_wrapper.h" +#include "rtc_base/system/rtc_export.h" + +namespace rtc { +class TaskQueue; +} // namespace rtc + +namespace webrtc { + +class RTC_EXPORT AecDumpFactory { + public: + // The `worker_queue` may not be null and must outlive the created + // AecDump instance. `max_log_size_bytes == -1` means the log size + // will be unlimited. `handle` may not be null. The AecDump takes + // responsibility for `handle` and closes it in the destructor. A + // non-null return value indicates that the file has been + // sucessfully opened. + static std::unique_ptr Create(webrtc::FileWrapper file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); + static std::unique_ptr Create(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); + static std::unique_ptr Create(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build new file mode 100644 index 0000000000..201cd58360 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build @@ -0,0 +1,209 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec_dump_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc new file mode 100644 index 0000000000..94c24048e0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec_dump/aec_dump_impl.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "rtc_base/checks.h" +#include "rtc_base/event.h" +#include "rtc_base/task_queue.h" + +namespace webrtc { + +namespace { +void CopyFromConfigToEvent(const webrtc::InternalAPMConfig& config, + webrtc::audioproc::Config* pb_cfg) { + pb_cfg->set_aec_enabled(config.aec_enabled); + pb_cfg->set_aec_delay_agnostic_enabled(config.aec_delay_agnostic_enabled); + pb_cfg->set_aec_drift_compensation_enabled( + config.aec_drift_compensation_enabled); + pb_cfg->set_aec_extended_filter_enabled(config.aec_extended_filter_enabled); + pb_cfg->set_aec_suppression_level(config.aec_suppression_level); + + pb_cfg->set_aecm_enabled(config.aecm_enabled); + pb_cfg->set_aecm_comfort_noise_enabled(config.aecm_comfort_noise_enabled); + pb_cfg->set_aecm_routing_mode(config.aecm_routing_mode); + + pb_cfg->set_agc_enabled(config.agc_enabled); + pb_cfg->set_agc_mode(config.agc_mode); + pb_cfg->set_agc_limiter_enabled(config.agc_limiter_enabled); + pb_cfg->set_noise_robust_agc_enabled(config.noise_robust_agc_enabled); + + pb_cfg->set_hpf_enabled(config.hpf_enabled); + + pb_cfg->set_ns_enabled(config.ns_enabled); + pb_cfg->set_ns_level(config.ns_level); + + pb_cfg->set_transient_suppression_enabled( + config.transient_suppression_enabled); + + pb_cfg->set_pre_amplifier_enabled(config.pre_amplifier_enabled); + pb_cfg->set_pre_amplifier_fixed_gain_factor( + config.pre_amplifier_fixed_gain_factor); + + pb_cfg->set_experiments_description(config.experiments_description); +} + +} // namespace + +AecDumpImpl::AecDumpImpl(FileWrapper debug_file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) + : debug_file_(std::move(debug_file)), + num_bytes_left_for_log_(max_log_size_bytes), + worker_queue_(worker_queue) {} + +AecDumpImpl::~AecDumpImpl() { + // Block until all tasks have finished running. + rtc::Event thread_sync_event; + worker_queue_->PostTask([&thread_sync_event] { thread_sync_event.Set(); }); + // Wait until the event has been signaled with .Set(). By then all + // pending tasks will have finished. + thread_sync_event.Wait(rtc::Event::kForever); +} + +void AecDumpImpl::WriteInitMessage(const ProcessingConfig& api_format, + int64_t time_now_ms) { + auto event = std::make_unique(); + event->set_type(audioproc::Event::INIT); + audioproc::Init* msg = event->mutable_init(); + + msg->set_sample_rate(api_format.input_stream().sample_rate_hz()); + msg->set_output_sample_rate(api_format.output_stream().sample_rate_hz()); + msg->set_reverse_sample_rate( + api_format.reverse_input_stream().sample_rate_hz()); + msg->set_reverse_output_sample_rate( + api_format.reverse_output_stream().sample_rate_hz()); + + msg->set_num_input_channels( + static_cast(api_format.input_stream().num_channels())); + msg->set_num_output_channels( + static_cast(api_format.output_stream().num_channels())); + msg->set_num_reverse_channels( + static_cast(api_format.reverse_input_stream().num_channels())); + msg->set_num_reverse_output_channels( + api_format.reverse_output_stream().num_channels()); + msg->set_timestamp_ms(time_now_ms); + + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::AddCaptureStreamInput( + const AudioFrameView& src) { + capture_stream_info_.AddInput(src); +} + +void AecDumpImpl::AddCaptureStreamOutput( + const AudioFrameView& src) { + capture_stream_info_.AddOutput(src); +} + +void AecDumpImpl::AddCaptureStreamInput(const int16_t* const data, + int num_channels, + int samples_per_channel) { + capture_stream_info_.AddInput(data, num_channels, samples_per_channel); +} + +void AecDumpImpl::AddCaptureStreamOutput(const int16_t* const data, + int num_channels, + int samples_per_channel) { + capture_stream_info_.AddOutput(data, num_channels, samples_per_channel); +} + +void AecDumpImpl::AddAudioProcessingState(const AudioProcessingState& state) { + capture_stream_info_.AddAudioProcessingState(state); +} + +void AecDumpImpl::WriteCaptureStreamMessage() { + PostWriteToFileTask(capture_stream_info_.FetchEvent()); +} + +void AecDumpImpl::WriteRenderStreamMessage(const int16_t* const data, + int num_channels, + int samples_per_channel) { + auto event = std::make_unique(); + event->set_type(audioproc::Event::REVERSE_STREAM); + audioproc::ReverseStream* msg = event->mutable_reverse_stream(); + const size_t data_size = sizeof(int16_t) * samples_per_channel * num_channels; + msg->set_data(data, data_size); + + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::WriteRenderStreamMessage( + const AudioFrameView& src) { + auto event = std::make_unique(); + event->set_type(audioproc::Event::REVERSE_STREAM); + + audioproc::ReverseStream* msg = event->mutable_reverse_stream(); + + for (int i = 0; i < src.num_channels(); ++i) { + const auto& channel_view = src.channel(i); + msg->add_channel(channel_view.begin(), sizeof(float) * channel_view.size()); + } + + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::WriteConfig(const InternalAPMConfig& config) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + auto event = std::make_unique(); + event->set_type(audioproc::Event::CONFIG); + CopyFromConfigToEvent(config, event->mutable_config()); + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::WriteRuntimeSetting( + const AudioProcessing::RuntimeSetting& runtime_setting) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + auto event = std::make_unique(); + event->set_type(audioproc::Event::RUNTIME_SETTING); + audioproc::RuntimeSetting* setting = event->mutable_runtime_setting(); + switch (runtime_setting.type()) { + case AudioProcessing::RuntimeSetting::Type::kCapturePreGain: { + float x; + runtime_setting.GetFloat(&x); + setting->set_capture_pre_gain(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kCapturePostGain: { + float x; + runtime_setting.GetFloat(&x); + setting->set_capture_post_gain(x); + break; + } + case AudioProcessing::RuntimeSetting::Type:: + kCustomRenderProcessingRuntimeSetting: { + float x; + runtime_setting.GetFloat(&x); + setting->set_custom_render_processing_setting(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kCaptureCompressionGain: + // Runtime AGC1 compression gain is ignored. + // TODO(http://bugs.webrtc.org/10432): Store compression gain in aecdumps. + break; + case AudioProcessing::RuntimeSetting::Type::kCaptureFixedPostGain: { + float x; + runtime_setting.GetFloat(&x); + setting->set_capture_fixed_post_gain(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kCaptureOutputUsed: { + bool x; + runtime_setting.GetBool(&x); + setting->set_capture_output_used(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kPlayoutVolumeChange: { + int x; + runtime_setting.GetInt(&x); + setting->set_playout_volume_change(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kPlayoutAudioDeviceChange: { + AudioProcessing::RuntimeSetting::PlayoutAudioDeviceInfo src; + runtime_setting.GetPlayoutAudioDeviceInfo(&src); + auto* dst = setting->mutable_playout_audio_device_change(); + dst->set_id(src.id); + dst->set_max_volume(src.max_volume); + break; + } + case AudioProcessing::RuntimeSetting::Type::kNotSpecified: + RTC_DCHECK_NOTREACHED(); + break; + } + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::PostWriteToFileTask(std::unique_ptr event) { + RTC_DCHECK(event); + worker_queue_->PostTask([event = std::move(event), this] { + std::string event_string = event->SerializeAsString(); + const size_t event_byte_size = event_string.size(); + + if (num_bytes_left_for_log_ >= 0) { + const int64_t next_message_size = sizeof(int32_t) + event_byte_size; + if (num_bytes_left_for_log_ < next_message_size) { + // Ensure that no further events are written, even if they're smaller + // than the current event. + num_bytes_left_for_log_ = 0; + return; + } + num_bytes_left_for_log_ -= next_message_size; + } + + // Write message preceded by its size. + if (!debug_file_.Write(&event_byte_size, sizeof(int32_t))) { + RTC_DCHECK_NOTREACHED(); + } + if (!debug_file_.Write(event_string.data(), event_string.size())) { + RTC_DCHECK_NOTREACHED(); + } + }); +} + +std::unique_ptr AecDumpFactory::Create(webrtc::FileWrapper file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + RTC_DCHECK(worker_queue); + if (!file.is_open()) + return nullptr; + + return std::make_unique(std::move(file), max_log_size_bytes, + worker_queue); +} + +std::unique_ptr AecDumpFactory::Create(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return Create(FileWrapper::OpenWriteOnly(file_name), max_log_size_bytes, + worker_queue); +} + +std::unique_ptr AecDumpFactory::Create(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return Create(FileWrapper(handle), max_log_size_bytes, worker_queue); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.h b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.h new file mode 100644 index 0000000000..fac3712b7a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_IMPL_H_ + +#include +#include +#include + +#include "modules/audio_processing/aec_dump/capture_stream_info.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/system/file_wrapper.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/thread_annotations.h" + +// Files generated at build-time by the protobuf compiler. +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { + +// Task-queue based implementation of AecDump. It is thread safe by +// relying on locks in TaskQueue. +class AecDumpImpl : public AecDump { + public: + // `max_log_size_bytes` - maximum number of bytes to write to the debug file, + // `max_log_size_bytes == -1` means the log size will be unlimited. + AecDumpImpl(FileWrapper debug_file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); + AecDumpImpl(const AecDumpImpl&) = delete; + AecDumpImpl& operator=(const AecDumpImpl&) = delete; + ~AecDumpImpl() override; + + void WriteInitMessage(const ProcessingConfig& api_format, + int64_t time_now_ms) override; + void AddCaptureStreamInput(const AudioFrameView& src) override; + void AddCaptureStreamOutput(const AudioFrameView& src) override; + void AddCaptureStreamInput(const int16_t* const data, + int num_channels, + int samples_per_channel) override; + void AddCaptureStreamOutput(const int16_t* const data, + int num_channels, + int samples_per_channel) override; + void AddAudioProcessingState(const AudioProcessingState& state) override; + void WriteCaptureStreamMessage() override; + + void WriteRenderStreamMessage(const int16_t* const data, + int num_channels, + int samples_per_channel) override; + void WriteRenderStreamMessage( + const AudioFrameView& src) override; + + void WriteConfig(const InternalAPMConfig& config) override; + + void WriteRuntimeSetting( + const AudioProcessing::RuntimeSetting& runtime_setting) override; + + private: + void PostWriteToFileTask(std::unique_ptr event); + + FileWrapper debug_file_; + int64_t num_bytes_left_for_log_ = 0; + rtc::RaceChecker race_checker_; + rtc::TaskQueue* worker_queue_; + CaptureStreamInfo capture_stream_info_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc new file mode 100644 index 0000000000..503135d87f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "modules/audio_processing/aec_dump/mock_aec_dump.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::Exactly; +using ::testing::StrictMock; + +namespace { +rtc::scoped_refptr CreateAudioProcessing() { + rtc::scoped_refptr apm( + webrtc::AudioProcessingBuilderForTesting().Create()); + RTC_DCHECK(apm); + return apm; +} + +std::unique_ptr CreateMockAecDump() { + auto mock_aec_dump = + std::make_unique>(); + EXPECT_CALL(*mock_aec_dump.get(), WriteConfig(_)).Times(AtLeast(1)); + EXPECT_CALL(*mock_aec_dump.get(), WriteInitMessage(_, _)).Times(AtLeast(1)); + return std::unique_ptr(std::move(mock_aec_dump)); +} + +} // namespace + +TEST(AecDumpIntegration, ConfigurationAndInitShouldBeLogged) { + auto apm = CreateAudioProcessing(); + + apm->AttachAecDump(CreateMockAecDump()); +} + +TEST(AecDumpIntegration, + RenderStreamShouldBeLoggedOnceEveryProcessReverseStream) { + auto apm = CreateAudioProcessing(); + auto mock_aec_dump = CreateMockAecDump(); + constexpr int kNumChannels = 1; + constexpr int kNumSampleRateHz = 16000; + constexpr int kNumSamplesPerChannel = kNumSampleRateHz / 100; + std::array frame; + frame.fill(0.f); + webrtc::StreamConfig stream_config(kNumSampleRateHz, kNumChannels); + + EXPECT_CALL(*mock_aec_dump.get(), WriteRenderStreamMessage(_, _, _)) + .Times(Exactly(1)); + + apm->AttachAecDump(std::move(mock_aec_dump)); + apm->ProcessReverseStream(frame.data(), stream_config, stream_config, + frame.data()); +} + +TEST(AecDumpIntegration, CaptureStreamShouldBeLoggedOnceEveryProcessStream) { + auto apm = CreateAudioProcessing(); + auto mock_aec_dump = CreateMockAecDump(); + constexpr int kNumChannels = 1; + constexpr int kNumSampleRateHz = 16000; + constexpr int kNumSamplesPerChannel = kNumSampleRateHz / 100; + std::array frame; + frame.fill(0.f); + + webrtc::StreamConfig stream_config(kNumSampleRateHz, kNumChannels); + + EXPECT_CALL(*mock_aec_dump.get(), AddCaptureStreamInput(_, _, _)) + .Times(AtLeast(1)); + + EXPECT_CALL(*mock_aec_dump.get(), AddCaptureStreamOutput(_, _, _)) + .Times(Exactly(1)); + + EXPECT_CALL(*mock_aec_dump.get(), AddAudioProcessingState(_)) + .Times(Exactly(1)); + + EXPECT_CALL(*mock_aec_dump.get(), WriteCaptureStreamMessage()) + .Times(Exactly(1)); + + apm->AttachAecDump(std::move(mock_aec_dump)); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); +} diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc new file mode 100644 index 0000000000..62f896fe14 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "rtc_base/task_queue_for_test.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(AecDumper, APICallsDoNotCrash) { + // Note order of initialization: Task queue has to be initialized + // before AecDump. + webrtc::TaskQueueForTest file_writer_queue("file_writer_queue"); + + const std::string filename = + webrtc::test::TempFilename(webrtc::test::OutputPath(), "aec_dump"); + + { + std::unique_ptr aec_dump = + webrtc::AecDumpFactory::Create(filename, -1, &file_writer_queue); + + constexpr int kNumChannels = 1; + constexpr int kNumSamplesPerChannel = 160; + std::array frame; + frame.fill(0.f); + aec_dump->WriteRenderStreamMessage(frame.data(), kNumChannels, + kNumSamplesPerChannel); + + aec_dump->AddCaptureStreamInput(frame.data(), kNumChannels, + kNumSamplesPerChannel); + aec_dump->AddCaptureStreamOutput(frame.data(), kNumChannels, + kNumSamplesPerChannel); + + aec_dump->WriteCaptureStreamMessage(); + + webrtc::InternalAPMConfig apm_config; + aec_dump->WriteConfig(apm_config); + + webrtc::ProcessingConfig api_format; + constexpr int64_t kTimeNowMs = 123456789ll; + aec_dump->WriteInitMessage(api_format, kTimeNowMs); + } + // Remove file after the AecDump d-tor has finished. + ASSERT_EQ(0, remove(filename.c_str())); +} + +TEST(AecDumper, WriteToFile) { + webrtc::TaskQueueForTest file_writer_queue("file_writer_queue"); + + const std::string filename = + webrtc::test::TempFilename(webrtc::test::OutputPath(), "aec_dump"); + + { + std::unique_ptr aec_dump = + webrtc::AecDumpFactory::Create(filename, -1, &file_writer_queue); + + constexpr int kNumChannels = 1; + constexpr int kNumSamplesPerChannel = 160; + std::array frame; + frame.fill(0.f); + + aec_dump->WriteRenderStreamMessage(frame.data(), kNumChannels, + kNumSamplesPerChannel); + } + + // Verify the file has been written after the AecDump d-tor has + // finished. + FILE* fid = fopen(filename.c_str(), "r"); + ASSERT_TRUE(fid != NULL); + + // Clean it up. + ASSERT_EQ(0, fclose(fid)); + ASSERT_EQ(0, remove(filename.c_str())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.cc new file mode 100644 index 0000000000..7d82a39729 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec_dump/capture_stream_info.h" + +namespace webrtc { + +void CaptureStreamInfo::AddInput(const AudioFrameView& src) { + auto* stream = event_->mutable_stream(); + + for (int i = 0; i < src.num_channels(); ++i) { + const auto& channel_view = src.channel(i); + stream->add_input_channel(channel_view.begin(), + sizeof(float) * channel_view.size()); + } +} + +void CaptureStreamInfo::AddOutput(const AudioFrameView& src) { + auto* stream = event_->mutable_stream(); + + for (int i = 0; i < src.num_channels(); ++i) { + const auto& channel_view = src.channel(i); + stream->add_output_channel(channel_view.begin(), + sizeof(float) * channel_view.size()); + } +} + +void CaptureStreamInfo::AddInput(const int16_t* const data, + int num_channels, + int samples_per_channel) { + auto* stream = event_->mutable_stream(); + const size_t data_size = sizeof(int16_t) * samples_per_channel * num_channels; + stream->set_input_data(data, data_size); +} + +void CaptureStreamInfo::AddOutput(const int16_t* const data, + int num_channels, + int samples_per_channel) { + auto* stream = event_->mutable_stream(); + const size_t data_size = sizeof(int16_t) * samples_per_channel * num_channels; + stream->set_output_data(data, data_size); +} + +void CaptureStreamInfo::AddAudioProcessingState( + const AecDump::AudioProcessingState& state) { + auto* stream = event_->mutable_stream(); + stream->set_delay(state.delay); + stream->set_drift(state.drift); + if (state.applied_input_volume.has_value()) { + stream->set_applied_input_volume(*state.applied_input_volume); + } + stream->set_keypress(state.keypress); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.h b/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.h new file mode 100644 index 0000000000..0819bbcb23 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_CAPTURE_STREAM_INFO_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_CAPTURE_STREAM_INFO_H_ + +#include +#include + +#include "modules/audio_processing/include/aec_dump.h" +#include "rtc_base/ignore_wundef.h" + +// Files generated at build-time by the protobuf compiler. +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { + +class CaptureStreamInfo { + public: + CaptureStreamInfo() { CreateNewEvent(); } + CaptureStreamInfo(const CaptureStreamInfo&) = delete; + CaptureStreamInfo& operator=(const CaptureStreamInfo&) = delete; + ~CaptureStreamInfo() = default; + + void AddInput(const AudioFrameView& src); + void AddOutput(const AudioFrameView& src); + + void AddInput(const int16_t* const data, + int num_channels, + int samples_per_channel); + void AddOutput(const int16_t* const data, + int num_channels, + int samples_per_channel); + + void AddAudioProcessingState(const AecDump::AudioProcessingState& state); + + std::unique_ptr FetchEvent() { + std::unique_ptr result = std::move(event_); + CreateNewEvent(); + return result; + } + + private: + void CreateNewEvent() { + event_ = std::make_unique(); + event_->set_type(audioproc::Event::STREAM); + } + std::unique_ptr event_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_CAPTURE_STREAM_INFO_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc new file mode 100644 index 0000000000..fe35d81db9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec_dump/mock_aec_dump.h" + +namespace webrtc { + +namespace test { + +MockAecDump::MockAecDump() = default; +MockAecDump::~MockAecDump() = default; +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.h b/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.h new file mode 100644 index 0000000000..b396739de4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_MOCK_AEC_DUMP_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_MOCK_AEC_DUMP_H_ + +#include + +#include "modules/audio_processing/include/aec_dump.h" +#include "test/gmock.h" + +namespace webrtc { + +namespace test { + +class MockAecDump : public AecDump { + public: + MockAecDump(); + virtual ~MockAecDump(); + + MOCK_METHOD(void, + WriteInitMessage, + (const ProcessingConfig& api_format, int64_t time_now_ms), + (override)); + + MOCK_METHOD(void, + AddCaptureStreamInput, + (const AudioFrameView& src), + (override)); + MOCK_METHOD(void, + AddCaptureStreamOutput, + (const AudioFrameView& src), + (override)); + MOCK_METHOD(void, + AddCaptureStreamInput, + (const int16_t* const data, + int num_channels, + int samples_per_channel), + (override)); + MOCK_METHOD(void, + AddCaptureStreamOutput, + (const int16_t* const data, + int num_channels, + int samples_per_channel), + (override)); + MOCK_METHOD(void, + AddAudioProcessingState, + (const AudioProcessingState& state), + (override)); + MOCK_METHOD(void, WriteCaptureStreamMessage, (), (override)); + + MOCK_METHOD(void, + WriteRenderStreamMessage, + (const int16_t* const data, + int num_channels, + int samples_per_channel), + (override)); + MOCK_METHOD(void, + WriteRenderStreamMessage, + (const AudioFrameView& src), + (override)); + + MOCK_METHOD(void, WriteConfig, (const InternalAPMConfig& config), (override)); + + MOCK_METHOD(void, + WriteRuntimeSetting, + (const AudioProcessing::RuntimeSetting& config), + (override)); +}; + +} // namespace test + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_MOCK_AEC_DUMP_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc new file mode 100644 index 0000000000..9bd9745069 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/include/aec_dump.h" + +namespace webrtc { + +std::unique_ptr AecDumpFactory::Create(webrtc::FileWrapper file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return nullptr; +} + +std::unique_ptr AecDumpFactory::Create(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return nullptr; +} + +std::unique_ptr AecDumpFactory::Create(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return nullptr; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build new file mode 100644 index 0000000000..974b70b087 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("null_aec_dump_factory_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump_interface_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec_dump_interface_gn/moz.build new file mode 100644 index 0000000000..1c47bbd5cc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump_interface_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec_dump_interface_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/aecm/BUILD.gn new file mode 100644 index 0000000000..a77f04aba5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/BUILD.gn @@ -0,0 +1,44 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("aecm_core") { + sources = [ + "aecm_core.cc", + "aecm_core.h", + "aecm_defines.h", + "echo_control_mobile.cc", + "echo_control_mobile.h", + ] + deps = [ + "../../../common_audio:common_audio_c", + "../../../rtc_base:checks", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:sanitizer", + "../../../system_wrappers", + "../utility:legacy_delay_estimator", + ] + cflags = [] + + if (rtc_build_with_neon) { + sources += [ "aecm_core_neon.cc" ] + + if (target_cpu != "arm64") { + # Enable compilation for the NEON instruction set. + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags += [ "-mfpu=neon" ] + } + } + + if (target_cpu == "mipsel") { + sources += [ "aecm_core_mips.cc" ] + } else { + sources += [ "aecm_core_c.cc" ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc new file mode 100644 index 0000000000..fbc3239732 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc @@ -0,0 +1,1125 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/aecm_core.h" + +#include +#include +#include + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/real_fft.h" +} +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +#ifdef AEC_DEBUG +FILE* dfile; +FILE* testfile; +#endif + +// Initialization table for echo channel in 8 kHz +static const int16_t kChannelStored8kHz[PART_LEN1] = { + 2040, 1815, 1590, 1498, 1405, 1395, 1385, 1418, 1451, 1506, 1562, + 1644, 1726, 1804, 1882, 1918, 1953, 1982, 2010, 2025, 2040, 2034, + 2027, 2021, 2014, 1997, 1980, 1925, 1869, 1800, 1732, 1683, 1635, + 1604, 1572, 1545, 1517, 1481, 1444, 1405, 1367, 1331, 1294, 1270, + 1245, 1239, 1233, 1247, 1260, 1282, 1303, 1338, 1373, 1407, 1441, + 1470, 1499, 1524, 1549, 1565, 1582, 1601, 1621, 1649, 1676}; + +// Initialization table for echo channel in 16 kHz +static const int16_t kChannelStored16kHz[PART_LEN1] = { + 2040, 1590, 1405, 1385, 1451, 1562, 1726, 1882, 1953, 2010, 2040, + 2027, 2014, 1980, 1869, 1732, 1635, 1572, 1517, 1444, 1367, 1294, + 1245, 1233, 1260, 1303, 1373, 1441, 1499, 1549, 1582, 1621, 1676, + 1741, 1802, 1861, 1921, 1983, 2040, 2102, 2170, 2265, 2375, 2515, + 2651, 2781, 2922, 3075, 3253, 3471, 3738, 3976, 4151, 4258, 4308, + 4288, 4270, 4253, 4237, 4179, 4086, 3947, 3757, 3484, 3153}; + +} // namespace + +const int16_t WebRtcAecm_kCosTable[] = { + 8192, 8190, 8187, 8180, 8172, 8160, 8147, 8130, 8112, 8091, 8067, + 8041, 8012, 7982, 7948, 7912, 7874, 7834, 7791, 7745, 7697, 7647, + 7595, 7540, 7483, 7424, 7362, 7299, 7233, 7164, 7094, 7021, 6947, + 6870, 6791, 6710, 6627, 6542, 6455, 6366, 6275, 6182, 6087, 5991, + 5892, 5792, 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930, 4815, + 4698, 4580, 4461, 4341, 4219, 4096, 3971, 3845, 3719, 3591, 3462, + 3331, 3200, 3068, 2935, 2801, 2667, 2531, 2395, 2258, 2120, 1981, + 1842, 1703, 1563, 1422, 1281, 1140, 998, 856, 713, 571, 428, + 285, 142, 0, -142, -285, -428, -571, -713, -856, -998, -1140, + -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395, -2531, -2667, + -2801, -2935, -3068, -3200, -3331, -3462, -3591, -3719, -3845, -3971, -4095, + -4219, -4341, -4461, -4580, -4698, -4815, -4930, -5043, -5155, -5265, -5374, + -5481, -5586, -5690, -5792, -5892, -5991, -6087, -6182, -6275, -6366, -6455, + -6542, -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, -7233, -7299, + -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745, -7791, -7834, -7874, + -7912, -7948, -7982, -8012, -8041, -8067, -8091, -8112, -8130, -8147, -8160, + -8172, -8180, -8187, -8190, -8191, -8190, -8187, -8180, -8172, -8160, -8147, + -8130, -8112, -8091, -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834, + -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362, -7299, -7233, + -7164, -7094, -7021, -6947, -6870, -6791, -6710, -6627, -6542, -6455, -6366, + -6275, -6182, -6087, -5991, -5892, -5792, -5690, -5586, -5481, -5374, -5265, + -5155, -5043, -4930, -4815, -4698, -4580, -4461, -4341, -4219, -4096, -3971, + -3845, -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, -2667, -2531, + -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422, -1281, -1140, -998, + -856, -713, -571, -428, -285, -142, 0, 142, 285, 428, 571, + 713, 856, 998, 1140, 1281, 1422, 1563, 1703, 1842, 1981, 2120, + 2258, 2395, 2531, 2667, 2801, 2935, 3068, 3200, 3331, 3462, 3591, + 3719, 3845, 3971, 4095, 4219, 4341, 4461, 4580, 4698, 4815, 4930, + 5043, 5155, 5265, 5374, 5481, 5586, 5690, 5792, 5892, 5991, 6087, + 6182, 6275, 6366, 6455, 6542, 6627, 6710, 6791, 6870, 6947, 7021, + 7094, 7164, 7233, 7299, 7362, 7424, 7483, 7540, 7595, 7647, 7697, + 7745, 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041, 8067, 8091, + 8112, 8130, 8147, 8160, 8172, 8180, 8187, 8190}; + +const int16_t WebRtcAecm_kSinTable[] = { + 0, 142, 285, 428, 571, 713, 856, 998, 1140, 1281, 1422, + 1563, 1703, 1842, 1981, 2120, 2258, 2395, 2531, 2667, 2801, 2935, + 3068, 3200, 3331, 3462, 3591, 3719, 3845, 3971, 4095, 4219, 4341, + 4461, 4580, 4698, 4815, 4930, 5043, 5155, 5265, 5374, 5481, 5586, + 5690, 5792, 5892, 5991, 6087, 6182, 6275, 6366, 6455, 6542, 6627, + 6710, 6791, 6870, 6947, 7021, 7094, 7164, 7233, 7299, 7362, 7424, + 7483, 7540, 7595, 7647, 7697, 7745, 7791, 7834, 7874, 7912, 7948, + 7982, 8012, 8041, 8067, 8091, 8112, 8130, 8147, 8160, 8172, 8180, + 8187, 8190, 8191, 8190, 8187, 8180, 8172, 8160, 8147, 8130, 8112, + 8091, 8067, 8041, 8012, 7982, 7948, 7912, 7874, 7834, 7791, 7745, + 7697, 7647, 7595, 7540, 7483, 7424, 7362, 7299, 7233, 7164, 7094, + 7021, 6947, 6870, 6791, 6710, 6627, 6542, 6455, 6366, 6275, 6182, + 6087, 5991, 5892, 5792, 5690, 5586, 5481, 5374, 5265, 5155, 5043, + 4930, 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971, 3845, 3719, + 3591, 3462, 3331, 3200, 3068, 2935, 2801, 2667, 2531, 2395, 2258, + 2120, 1981, 1842, 1703, 1563, 1422, 1281, 1140, 998, 856, 713, + 571, 428, 285, 142, 0, -142, -285, -428, -571, -713, -856, + -998, -1140, -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395, + -2531, -2667, -2801, -2935, -3068, -3200, -3331, -3462, -3591, -3719, -3845, + -3971, -4095, -4219, -4341, -4461, -4580, -4698, -4815, -4930, -5043, -5155, + -5265, -5374, -5481, -5586, -5690, -5792, -5892, -5991, -6087, -6182, -6275, + -6366, -6455, -6542, -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, + -7233, -7299, -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745, -7791, + -7834, -7874, -7912, -7948, -7982, -8012, -8041, -8067, -8091, -8112, -8130, + -8147, -8160, -8172, -8180, -8187, -8190, -8191, -8190, -8187, -8180, -8172, + -8160, -8147, -8130, -8112, -8091, -8067, -8041, -8012, -7982, -7948, -7912, + -7874, -7834, -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362, + -7299, -7233, -7164, -7094, -7021, -6947, -6870, -6791, -6710, -6627, -6542, + -6455, -6366, -6275, -6182, -6087, -5991, -5892, -5792, -5690, -5586, -5481, + -5374, -5265, -5155, -5043, -4930, -4815, -4698, -4580, -4461, -4341, -4219, + -4096, -3971, -3845, -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, + -2667, -2531, -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422, -1281, + -1140, -998, -856, -713, -571, -428, -285, -142}; + + +// Moves the pointer to the next entry and inserts `far_spectrum` and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q) { + // Get new buffer position + self->far_history_pos++; + if (self->far_history_pos >= MAX_DELAY) { + self->far_history_pos = 0; + } + // Update Q-domain buffer + self->far_q_domains[self->far_history_pos] = far_q; + // Update far end spectrum buffer + memcpy(&(self->far_history[self->far_history_pos * PART_LEN1]), far_spectrum, + sizeof(uint16_t) * PART_LEN1); +} + +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, + int* far_q, + int delay) { + int buffer_position = 0; + RTC_DCHECK(self); + buffer_position = self->far_history_pos - delay; + + // Check buffer position + if (buffer_position < 0) { + buffer_position += MAX_DELAY; + } + // Get Q-domain + *far_q = self->far_q_domains[buffer_position]; + // Return far end spectrum + return &(self->far_history[buffer_position * PART_LEN1]); +} + +// Declare function pointers. +CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; +StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; +ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; + +AecmCore* WebRtcAecm_CreateCore() { + // Allocate zero-filled memory. + AecmCore* aecm = static_cast(calloc(1, sizeof(AecmCore))); + + aecm->farFrameBuf = + WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aecm->farFrameBuf) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->nearNoisyFrameBuf = + WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aecm->nearNoisyFrameBuf) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->nearCleanFrameBuf = + WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aecm->nearCleanFrameBuf) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->outFrameBuf = + WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aecm->outFrameBuf) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->delay_estimator_farend = + WebRtc_CreateDelayEstimatorFarend(PART_LEN1, MAX_DELAY); + if (aecm->delay_estimator_farend == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + aecm->delay_estimator = + WebRtc_CreateDelayEstimator(aecm->delay_estimator_farend, 0); + if (aecm->delay_estimator == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + // TODO(bjornv): Explicitly disable robust delay validation until no + // performance regression has been established. Then remove the line. + WebRtc_enable_robust_validation(aecm->delay_estimator, 0); + + aecm->real_fft = WebRtcSpl_CreateRealFFT(PART_LEN_SHIFT); + if (aecm->real_fft == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + // Init some aecm pointers. 16 and 32 byte alignment is only necessary + // for Neon code currently. + aecm->xBuf = (int16_t*)(((uintptr_t)aecm->xBuf_buf + 31) & ~31); + aecm->dBufClean = (int16_t*)(((uintptr_t)aecm->dBufClean_buf + 31) & ~31); + aecm->dBufNoisy = (int16_t*)(((uintptr_t)aecm->dBufNoisy_buf + 31) & ~31); + aecm->outBuf = (int16_t*)(((uintptr_t)aecm->outBuf_buf + 15) & ~15); + aecm->channelStored = + (int16_t*)(((uintptr_t)aecm->channelStored_buf + 15) & ~15); + aecm->channelAdapt16 = + (int16_t*)(((uintptr_t)aecm->channelAdapt16_buf + 15) & ~15); + aecm->channelAdapt32 = + (int32_t*)(((uintptr_t)aecm->channelAdapt32_buf + 31) & ~31); + + return aecm; +} + +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path) { + int i = 0; + + // Reset the stored channel + memcpy(aecm->channelStored, echo_path, sizeof(int16_t) * PART_LEN1); + // Reset the adapted channels + memcpy(aecm->channelAdapt16, echo_path, sizeof(int16_t) * PART_LEN1); + for (i = 0; i < PART_LEN1; i++) { + aecm->channelAdapt32[i] = (int32_t)aecm->channelAdapt16[i] << 16; + } + + // Reset channel storing variables + aecm->mseAdaptOld = 1000; + aecm->mseStoredOld = 1000; + aecm->mseThreshold = WEBRTC_SPL_WORD32_MAX; + aecm->mseChannelCount = 0; +} + +static void CalcLinearEnergiesC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN1; i++) { + echo_est[i] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); + (*far_energy) += (uint32_t)(far_spectrum[i]); + *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + (*echo_energy_stored) += (uint32_t)echo_est[i]; + } +} + +static void StoreAdaptiveChannelC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, + sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) { + echo_est[i] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); + echo_est[i + 1] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], far_spectrum[i + 1]); + echo_est[i + 2] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], far_spectrum[i + 2]); + echo_est[i + 3] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], far_spectrum[i + 3]); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); +} + +static void ResetAdaptiveChannelC(AecmCore* aecm) { + int i; + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) { + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; + aecm->channelAdapt32[i + 1] = (int32_t)aecm->channelStored[i + 1] << 16; + aecm->channelAdapt32[i + 2] = (int32_t)aecm->channelStored[i + 2] << 16; + aecm->channelAdapt32[i + 3] = (int32_t)aecm->channelStored[i + 3] << 16; + } + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} + +// Initialize function pointers for ARM Neon platform. +#if defined(WEBRTC_HAS_NEON) +static void WebRtcAecm_InitNeon(void) { + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon; + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon; +} +#endif + +// Initialize function pointers for MIPS platform. +#if defined(MIPS32_LE) +static void WebRtcAecm_InitMips(void) { +#if defined(MIPS_DSP_R1_LE) + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannel_mips; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannel_mips; +#endif + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergies_mips; +} +#endif + +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with +// WebRtcAecm_CreateCore(...) Input: +// - aecm : Pointer to the Echo Suppression instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq) { + int i = 0; + int32_t tmp32 = PART_LEN1 * PART_LEN1; + int16_t tmp16 = PART_LEN1; + + if (samplingFreq != 8000 && samplingFreq != 16000) { + samplingFreq = 8000; + return -1; + } + // sanity check of sampling frequency + aecm->mult = (int16_t)samplingFreq / 8000; + + aecm->farBufWritePos = 0; + aecm->farBufReadPos = 0; + aecm->knownDelay = 0; + aecm->lastKnownDelay = 0; + + WebRtc_InitBuffer(aecm->farFrameBuf); + WebRtc_InitBuffer(aecm->nearNoisyFrameBuf); + WebRtc_InitBuffer(aecm->nearCleanFrameBuf); + WebRtc_InitBuffer(aecm->outFrameBuf); + + memset(aecm->xBuf_buf, 0, sizeof(aecm->xBuf_buf)); + memset(aecm->dBufClean_buf, 0, sizeof(aecm->dBufClean_buf)); + memset(aecm->dBufNoisy_buf, 0, sizeof(aecm->dBufNoisy_buf)); + memset(aecm->outBuf_buf, 0, sizeof(aecm->outBuf_buf)); + + aecm->seed = 666; + aecm->totCount = 0; + + if (WebRtc_InitDelayEstimatorFarend(aecm->delay_estimator_farend) != 0) { + return -1; + } + if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) { + return -1; + } + // Set far end histories to zero + memset(aecm->far_history, 0, sizeof(uint16_t) * PART_LEN1 * MAX_DELAY); + memset(aecm->far_q_domains, 0, sizeof(int) * MAX_DELAY); + aecm->far_history_pos = MAX_DELAY; + + aecm->nlpFlag = 1; + aecm->fixedDelay = -1; + + aecm->dfaCleanQDomain = 0; + aecm->dfaCleanQDomainOld = 0; + aecm->dfaNoisyQDomain = 0; + aecm->dfaNoisyQDomainOld = 0; + + memset(aecm->nearLogEnergy, 0, sizeof(aecm->nearLogEnergy)); + aecm->farLogEnergy = 0; + memset(aecm->echoAdaptLogEnergy, 0, sizeof(aecm->echoAdaptLogEnergy)); + memset(aecm->echoStoredLogEnergy, 0, sizeof(aecm->echoStoredLogEnergy)); + + // Initialize the echo channels with a stored shape. + if (samplingFreq == 8000) { + WebRtcAecm_InitEchoPathCore(aecm, kChannelStored8kHz); + } else { + WebRtcAecm_InitEchoPathCore(aecm, kChannelStored16kHz); + } + + memset(aecm->echoFilt, 0, sizeof(aecm->echoFilt)); + memset(aecm->nearFilt, 0, sizeof(aecm->nearFilt)); + aecm->noiseEstCtr = 0; + + aecm->cngMode = AecmTrue; + + memset(aecm->noiseEstTooLowCtr, 0, sizeof(aecm->noiseEstTooLowCtr)); + memset(aecm->noiseEstTooHighCtr, 0, sizeof(aecm->noiseEstTooHighCtr)); + // Shape the initial noise level to an approximate pink noise. + for (i = 0; i < (PART_LEN1 >> 1) - 1; i++) { + aecm->noiseEst[i] = (tmp32 << 8); + tmp16--; + tmp32 -= (int32_t)((tmp16 << 1) + 1); + } + for (; i < PART_LEN1; i++) { + aecm->noiseEst[i] = (tmp32 << 8); + } + + aecm->farEnergyMin = WEBRTC_SPL_WORD16_MAX; + aecm->farEnergyMax = WEBRTC_SPL_WORD16_MIN; + aecm->farEnergyMaxMin = 0; + aecm->farEnergyVAD = FAR_ENERGY_MIN; // This prevents false speech detection + // at the beginning. + aecm->farEnergyMSE = 0; + aecm->currentVADValue = 0; + aecm->vadUpdateCount = 0; + aecm->firstVAD = 1; + + aecm->startupState = 0; + aecm->supGain = SUPGAIN_DEFAULT; + aecm->supGainOld = SUPGAIN_DEFAULT; + + aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + + // Assert a preprocessor definition at compile-time. It's an assumption + // used in assembly code, so check the assembly files before any change. + static_assert(PART_LEN % 16 == 0, "PART_LEN is not a multiple of 16"); + + // Initialize function pointers. + WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC; + WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC; + WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC; + +#if defined(WEBRTC_HAS_NEON) + WebRtcAecm_InitNeon(); +#endif + +#if defined(MIPS32_LE) + WebRtcAecm_InitMips(); +#endif + return 0; +} + +// TODO(bjornv): This function is currently not used. Add support for these +// parameters from a higher level +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag) { + aecm->nlpFlag = nlpFlag; + aecm->fixedDelay = delay; + + return 0; +} + +void WebRtcAecm_FreeCore(AecmCore* aecm) { + if (aecm == NULL) { + return; + } + + WebRtc_FreeBuffer(aecm->farFrameBuf); + WebRtc_FreeBuffer(aecm->nearNoisyFrameBuf); + WebRtc_FreeBuffer(aecm->nearCleanFrameBuf); + WebRtc_FreeBuffer(aecm->outFrameBuf); + + WebRtc_FreeDelayEstimator(aecm->delay_estimator); + WebRtc_FreeDelayEstimatorFarend(aecm->delay_estimator_farend); + WebRtcSpl_FreeRealFFT(aecm->real_fft); + + free(aecm); +} + +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out) { + int16_t outBlock_buf[PART_LEN + 8]; // Align buffer to 8-byte boundary. + int16_t* outBlock = (int16_t*)(((uintptr_t)outBlock_buf + 15) & ~15); + + int16_t farFrame[FRAME_LEN]; + const int16_t* out_ptr = NULL; + int size = 0; + + // Buffer the current frame. + // Fetch an older one corresponding to the delay. + WebRtcAecm_BufferFarFrame(aecm, farend, FRAME_LEN); + WebRtcAecm_FetchFarFrame(aecm, farFrame, FRAME_LEN, aecm->knownDelay); + + // Buffer the synchronized far and near frames, + // to pass the smaller blocks individually. + WebRtc_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN); + WebRtc_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN); + if (nearendClean != NULL) { + WebRtc_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN); + } + + // Process as many blocks as possible. + while (WebRtc_available_read(aecm->farFrameBuf) >= PART_LEN) { + int16_t far_block[PART_LEN]; + const int16_t* far_block_ptr = NULL; + int16_t near_noisy_block[PART_LEN]; + const int16_t* near_noisy_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->farFrameBuf, (void**)&far_block_ptr, far_block, + PART_LEN); + WebRtc_ReadBuffer(aecm->nearNoisyFrameBuf, (void**)&near_noisy_block_ptr, + near_noisy_block, PART_LEN); + if (nearendClean != NULL) { + int16_t near_clean_block[PART_LEN]; + const int16_t* near_clean_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->nearCleanFrameBuf, (void**)&near_clean_block_ptr, + near_clean_block, PART_LEN); + if (WebRtcAecm_ProcessBlock(aecm, far_block_ptr, near_noisy_block_ptr, + near_clean_block_ptr, outBlock) == -1) { + return -1; + } + } else { + if (WebRtcAecm_ProcessBlock(aecm, far_block_ptr, near_noisy_block_ptr, + NULL, outBlock) == -1) { + return -1; + } + } + + WebRtc_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN); + } + + // Stuff the out buffer if we have less than a frame to output. + // This should only happen for the first frame. + size = (int)WebRtc_available_read(aecm->outFrameBuf); + if (size < FRAME_LEN) { + WebRtc_MoveReadPtr(aecm->outFrameBuf, size - FRAME_LEN); + } + + // Obtain an output frame. + WebRtc_ReadBuffer(aecm->outFrameBuf, (void**)&out_ptr, out, FRAME_LEN); + if (out_ptr != out) { + // ReadBuffer() hasn't copied to `out` in this case. + memcpy(out, out_ptr, FRAME_LEN * sizeof(int16_t)); + } + + return 0; +} + +// WebRtcAecm_AsymFilt(...) +// +// Performs asymmetric filtering. +// +// Inputs: +// - filtOld : Previous filtered value. +// - inVal : New input value. +// - stepSizePos : Step size when we have a positive contribution. +// - stepSizeNeg : Step size when we have a negative contribution. +// +// Output: +// +// Return: - Filtered value. +// +int16_t WebRtcAecm_AsymFilt(const int16_t filtOld, + const int16_t inVal, + const int16_t stepSizePos, + const int16_t stepSizeNeg) { + int16_t retVal; + + if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN)) { + return inVal; + } + retVal = filtOld; + if (filtOld > inVal) { + retVal -= (filtOld - inVal) >> stepSizeNeg; + } else { + retVal += (inVal - filtOld) >> stepSizePos; + } + + return retVal; +} + +// ExtractFractionPart(a, zeros) +// +// returns the fraction part of `a`, with `zeros` number of leading zeros, as an +// int16_t scaled to Q8. There is no sanity check of `a` in the sense that the +// number of zeros match. +static int16_t ExtractFractionPart(uint32_t a, int zeros) { + return (int16_t)(((a << zeros) & 0x7FFFFFFF) >> 23); +} + +// Calculates and returns the log of `energy` in Q8. The input `energy` is +// supposed to be in Q(`q_domain`). +static int16_t LogOfEnergyInQ8(uint32_t energy, int q_domain) { + static const int16_t kLogLowValue = PART_LEN_SHIFT << 7; + int16_t log_energy_q8 = kLogLowValue; + if (energy > 0) { + int zeros = WebRtcSpl_NormU32(energy); + int16_t frac = ExtractFractionPart(energy, zeros); + // log2 of `energy` in Q8. + log_energy_q8 += ((31 - zeros) << 8) + frac - (q_domain << 8); + } + return log_energy_q8; +} + +// WebRtcAecm_CalcEnergies(...) +// +// This function calculates the log of energies for nearend, farend and +// estimated echoes. There is also an update of energy decision levels, i.e. +// internal VAD. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param far_spectrum [in] Pointer to farend spectrum. +// @param far_q [in] Q-domain of farend spectrum. +// @param nearEner [in] Near end energy for current block in +// Q(aecm->dfaQDomain). +// @param echoEst [out] Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint32_t nearEner, + int32_t* echoEst) { + // Local variables + uint32_t tmpAdapt = 0; + uint32_t tmpStored = 0; + uint32_t tmpFar = 0; + + int i; + + int16_t tmp16; + int16_t increase_max_shifts = 4; + int16_t decrease_max_shifts = 11; + int16_t increase_min_shifts = 11; + int16_t decrease_min_shifts = 3; + + // Get log of near end energy and store in buffer + + // Shift buffer + memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + + // Logarithm of integrated magnitude spectrum (nearEner) + aecm->nearLogEnergy[0] = LogOfEnergyInQ8(nearEner, aecm->dfaNoisyQDomain); + + WebRtcAecm_CalcLinearEnergies(aecm, far_spectrum, echoEst, &tmpFar, &tmpAdapt, + &tmpStored); + + // Shift buffers + memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + + // Logarithm of delayed far end energy + aecm->farLogEnergy = LogOfEnergyInQ8(tmpFar, far_q); + + // Logarithm of estimated echo energy through adapted channel + aecm->echoAdaptLogEnergy[0] = + LogOfEnergyInQ8(tmpAdapt, RESOLUTION_CHANNEL16 + far_q); + + // Logarithm of estimated echo energy through stored channel + aecm->echoStoredLogEnergy[0] = + LogOfEnergyInQ8(tmpStored, RESOLUTION_CHANNEL16 + far_q); + + // Update farend energy levels (min, max, vad, mse) + if (aecm->farLogEnergy > FAR_ENERGY_MIN) { + if (aecm->startupState == 0) { + increase_max_shifts = 2; + decrease_min_shifts = 2; + increase_min_shifts = 8; + } + + aecm->farEnergyMin = + WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy, + increase_min_shifts, decrease_min_shifts); + aecm->farEnergyMax = + WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy, + increase_max_shifts, decrease_max_shifts); + aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin); + + // Dynamic VAD region size + tmp16 = 2560 - aecm->farEnergyMin; + if (tmp16 > 0) { + tmp16 = (int16_t)((tmp16 * FAR_ENERGY_VAD_REGION) >> 9); + } else { + tmp16 = 0; + } + tmp16 += FAR_ENERGY_VAD_REGION; + + if ((aecm->startupState == 0) | (aecm->vadUpdateCount > 1024)) { + // In startup phase or VAD update halted + aecm->farEnergyVAD = aecm->farEnergyMin + tmp16; + } else { + if (aecm->farEnergyVAD > aecm->farLogEnergy) { + aecm->farEnergyVAD += + (aecm->farLogEnergy + tmp16 - aecm->farEnergyVAD) >> 6; + aecm->vadUpdateCount = 0; + } else { + aecm->vadUpdateCount++; + } + } + // Put MSE threshold higher than VAD + aecm->farEnergyMSE = aecm->farEnergyVAD + (1 << 8); + } + + // Update VAD variables + if (aecm->farLogEnergy > aecm->farEnergyVAD) { + if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF)) { + // We are in startup or have significant dynamics in input speech level + aecm->currentVADValue = 1; + } + } else { + aecm->currentVADValue = 0; + } + if ((aecm->currentVADValue) && (aecm->firstVAD)) { + aecm->firstVAD = 0; + if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0]) { + // The estimated echo has higher energy than the near end signal. + // This means that the initialization was too aggressive. Scale + // down by a factor 8 + for (i = 0; i < PART_LEN1; i++) { + aecm->channelAdapt16[i] >>= 3; + } + // Compensate the adapted echo energy level accordingly. + aecm->echoAdaptLogEnergy[0] -= (3 << 8); + aecm->firstVAD = 1; + } + } +} + +// WebRtcAecm_CalcStepSize(...) +// +// This function calculates the step size used in channel estimation +// +// +// @param aecm [in] Handle of the AECM instance. +// @param mu [out] (Return value) Stepsize in log2(), i.e. number of +// shifts. +// +// +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm) { + int32_t tmp32; + int16_t tmp16; + int16_t mu = MU_MAX; + + // Here we calculate the step size mu used in the + // following NLMS based Channel estimation algorithm + if (!aecm->currentVADValue) { + // Far end energy level too low, no channel update + mu = 0; + } else if (aecm->startupState > 0) { + if (aecm->farEnergyMin >= aecm->farEnergyMax) { + mu = MU_MIN; + } else { + tmp16 = (aecm->farLogEnergy - aecm->farEnergyMin); + tmp32 = tmp16 * MU_DIFF; + tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin); + mu = MU_MIN - 1 - (int16_t)(tmp32); + // The -1 is an alternative to rounding. This way we get a larger + // stepsize, so we in some sense compensate for truncation in NLMS + } + if (mu < MU_MAX) { + mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX + } + } + + return mu; +} + +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. NLMS and decision on channel +// storage. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param far_spectrum [in] Absolute value of the farend signal in Q(far_q) +// @param far_q [in] Q-domain of the farend signal +// @param dfa [in] Absolute value of the nearend signal +// (Q[aecm->dfaQDomain]) +// @param mu [in] NLMS step size. +// @param echoEst [i/o] Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint16_t* const dfa, + const int16_t mu, + int32_t* echoEst) { + uint32_t tmpU32no1, tmpU32no2; + int32_t tmp32no1, tmp32no2; + int32_t mseStored; + int32_t mseAdapt; + + int i; + + int16_t zerosFar, zerosNum, zerosCh, zerosDfa; + int16_t shiftChFar, shiftNum, shift2ResChan; + int16_t tmp16no1; + int16_t xfaQ, dfaQ; + + // This is the channel estimation algorithm. It is base on NLMS but has a + // variable step length, which was calculated above. + if (mu) { + for (i = 0; i < PART_LEN1; i++) { + // Determine norm of channel and farend to make sure we don't get overflow + // in multiplication + zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]); + zerosFar = WebRtcSpl_NormU32((uint32_t)far_spectrum[i]); + if (zerosCh + zerosFar > 31) { + // Multiplication is safe + tmpU32no1 = + WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i], far_spectrum[i]); + shiftChFar = 0; + } else { + // We need to shift down before multiplication + shiftChFar = 32 - zerosCh - zerosFar; + // If zerosCh == zerosFar == 0, shiftChFar is 32. A + // right shift of 32 is undefined. To avoid that, we + // do this check. + tmpU32no1 = + rtc::dchecked_cast( + shiftChFar >= 32 ? 0 : aecm->channelAdapt32[i] >> shiftChFar) * + far_spectrum[i]; + } + // Determine Q-domain of numerator + zerosNum = WebRtcSpl_NormU32(tmpU32no1); + if (dfa[i]) { + zerosDfa = WebRtcSpl_NormU32((uint32_t)dfa[i]); + } else { + zerosDfa = 32; + } + tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain - RESOLUTION_CHANNEL32 - + far_q + shiftChFar; + if (zerosNum > tmp16no1 + 1) { + xfaQ = tmp16no1; + dfaQ = zerosDfa - 2; + } else { + xfaQ = zerosNum - 2; + dfaQ = RESOLUTION_CHANNEL32 + far_q - aecm->dfaNoisyQDomain - + shiftChFar + xfaQ; + } + // Add in the same Q-domain + tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ); + tmpU32no2 = WEBRTC_SPL_SHIFT_W32((uint32_t)dfa[i], dfaQ); + tmp32no1 = (int32_t)tmpU32no2 - (int32_t)tmpU32no1; + zerosNum = WebRtcSpl_NormW32(tmp32no1); + if ((tmp32no1) && (far_spectrum[i] > (CHANNEL_VAD << far_q))) { + // + // Update is needed + // + // This is what we would like to compute + // + // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * far_spectrum[i]) + // tmp32norm = (i + 1) + // aecm->channelAdapt[i] += (2^mu) * tmp32no1 + // / (tmp32norm * far_spectrum[i]) + // + + // Make sure we don't get overflow in multiplication. + if (zerosNum + zerosFar > 31) { + if (tmp32no1 > 0) { + tmp32no2 = + (int32_t)WEBRTC_SPL_UMUL_32_16(tmp32no1, far_spectrum[i]); + } else { + tmp32no2 = + -(int32_t)WEBRTC_SPL_UMUL_32_16(-tmp32no1, far_spectrum[i]); + } + shiftNum = 0; + } else { + shiftNum = 32 - (zerosNum + zerosFar); + if (tmp32no1 > 0) { + tmp32no2 = (tmp32no1 >> shiftNum) * far_spectrum[i]; + } else { + tmp32no2 = -((-tmp32no1 >> shiftNum) * far_spectrum[i]); + } + } + // Normalize with respect to frequency bin + tmp32no2 = WebRtcSpl_DivW32W16(tmp32no2, i + 1); + // Make sure we are in the right Q-domain + shift2ResChan = + shiftNum + shiftChFar - xfaQ - mu - ((30 - zerosFar) << 1); + if (WebRtcSpl_NormW32(tmp32no2) < shift2ResChan) { + tmp32no2 = WEBRTC_SPL_WORD32_MAX; + } else { + tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan); + } + aecm->channelAdapt32[i] = + WebRtcSpl_AddSatW32(aecm->channelAdapt32[i], tmp32no2); + if (aecm->channelAdapt32[i] < 0) { + // We can never have negative channel gain + aecm->channelAdapt32[i] = 0; + } + aecm->channelAdapt16[i] = (int16_t)(aecm->channelAdapt32[i] >> 16); + } + } + } + // END: Adaptive channel update + + // Determine if we should store or restore the channel + if ((aecm->startupState == 0) & (aecm->currentVADValue)) { + // During startup we store the channel every block, + // and we recalculate echo estimate + WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst); + } else { + if (aecm->farLogEnergy < aecm->farEnergyMSE) { + aecm->mseChannelCount = 0; + } else { + aecm->mseChannelCount++; + } + // Enough data for validation. Store channel if we can. + if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10)) { + // We have enough data. + // Calculate MSE of "Adapt" and "Stored" versions. + // It is actually not MSE, but average absolute error. + mseStored = 0; + mseAdapt = 0; + for (i = 0; i < MIN_MSE_COUNT; i++) { + tmp32no1 = ((int32_t)aecm->echoStoredLogEnergy[i] - + (int32_t)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseStored += tmp32no2; + + tmp32no1 = ((int32_t)aecm->echoAdaptLogEnergy[i] - + (int32_t)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseAdapt += tmp32no2; + } + if (((mseStored << MSE_RESOLUTION) < (MIN_MSE_DIFF * mseAdapt)) & + ((aecm->mseStoredOld << MSE_RESOLUTION) < + (MIN_MSE_DIFF * aecm->mseAdaptOld))) { + // The stored channel has a significantly lower MSE than the adaptive + // one for two consecutive calculations. Reset the adaptive channel. + WebRtcAecm_ResetAdaptiveChannel(aecm); + } else if (((MIN_MSE_DIFF * mseStored) > (mseAdapt << MSE_RESOLUTION)) & + (mseAdapt < aecm->mseThreshold) & + (aecm->mseAdaptOld < aecm->mseThreshold)) { + // The adaptive channel has a significantly lower MSE than the stored + // one. The MSE for the adaptive channel has also been low for two + // consecutive calculations. Store the adaptive channel. + WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst); + + // Update threshold + if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX) { + aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld); + } else { + int scaled_threshold = aecm->mseThreshold * 5 / 8; + aecm->mseThreshold += ((mseAdapt - scaled_threshold) * 205) >> 8; + } + } + + // Reset counter + aecm->mseChannelCount = 0; + + // Store the MSE values. + aecm->mseStoredOld = mseStored; + aecm->mseAdaptOld = mseAdapt; + } + } + // END: Determine if we should store or reset channel estimate. +} + +// CalcSuppressionGain(...) +// +// This function calculates the suppression gain that is used in the Wiener +// filter. +// +// +// @param aecm [i/n] Handle of the AECM instance. +// @param supGain [out] (Return value) Suppression gain with which to scale +// the noise +// level (Q14). +// +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm) { + int32_t tmp32no1; + + int16_t supGain = SUPGAIN_DEFAULT; + int16_t tmp16no1; + int16_t dE = 0; + + // Determine suppression gain used in the Wiener filter. The gain is based on + // a mix of far end energy and echo estimation error. Adjust for the far end + // signal level. A low signal level indicates no far end signal, hence we set + // the suppression gain to 0 + if (!aecm->currentVADValue) { + supGain = 0; + } else { + // Adjust for possible double talk. If we have large variations in + // estimation error we likely have double talk (or poor channel). + tmp16no1 = (aecm->nearLogEnergy[0] - aecm->echoStoredLogEnergy[0] - + ENERGY_DEV_OFFSET); + dE = WEBRTC_SPL_ABS_W16(tmp16no1); + + if (dE < ENERGY_DEV_TOL) { + // Likely no double talk. The better estimation, the more we can suppress + // signal. Update counters + if (dE < SUPGAIN_EPC_DT) { + tmp32no1 = aecm->supGainErrParamDiffAB * dE; + tmp32no1 += (SUPGAIN_EPC_DT >> 1); + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT); + supGain = aecm->supGainErrParamA - tmp16no1; + } else { + tmp32no1 = aecm->supGainErrParamDiffBD * (ENERGY_DEV_TOL - dE); + tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1); + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16( + tmp32no1, (ENERGY_DEV_TOL - SUPGAIN_EPC_DT)); + supGain = aecm->supGainErrParamD + tmp16no1; + } + } else { + // Likely in double talk. Use default value + supGain = aecm->supGainErrParamD; + } + } + + if (supGain > aecm->supGainOld) { + tmp16no1 = supGain; + } else { + tmp16no1 = aecm->supGainOld; + } + aecm->supGainOld = supGain; + if (tmp16no1 < aecm->supGain) { + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); + } else { + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); + } + + // END: Update suppression gain + + return aecm->supGain; +} + +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, + const int farLen) { + int writeLen = farLen, writePos = 0; + + // Check if the write position must be wrapped + while (aecm->farBufWritePos + writeLen > FAR_BUF_LEN) { + // Write to remaining buffer space before wrapping + writeLen = FAR_BUF_LEN - aecm->farBufWritePos; + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(int16_t) * writeLen); + aecm->farBufWritePos = 0; + writePos = writeLen; + writeLen = farLen - writeLen; + } + + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(int16_t) * writeLen); + aecm->farBufWritePos += writeLen; +} + +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + const int farLen, + const int knownDelay) { + int readLen = farLen; + int readPos = 0; + int delayChange = knownDelay - aecm->lastKnownDelay; + + aecm->farBufReadPos -= delayChange; + + // Check if delay forces a read position wrap + while (aecm->farBufReadPos < 0) { + aecm->farBufReadPos += FAR_BUF_LEN; + } + while (aecm->farBufReadPos > FAR_BUF_LEN - 1) { + aecm->farBufReadPos -= FAR_BUF_LEN; + } + + aecm->lastKnownDelay = knownDelay; + + // Check if read position must be wrapped + while (aecm->farBufReadPos + readLen > FAR_BUF_LEN) { + // Read from remaining buffer space before wrapping + readLen = FAR_BUF_LEN - aecm->farBufReadPos; + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(int16_t) * readLen); + aecm->farBufReadPos = 0; + readPos = readLen; + readLen = farLen - readLen; + } + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(int16_t) * readLen); + aecm->farBufReadPos += readLen; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.h b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.h new file mode 100644 index 0000000000..3de49315c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.h @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs echo control (suppression) with fft routines in fixed-point. + +#ifndef MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ +#define MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +} +#include "modules/audio_processing/aecm/aecm_defines.h" + +struct RealFFT; + +namespace webrtc { + +#ifdef _MSC_VER // visual c++ +#define ALIGN8_BEG __declspec(align(8)) +#define ALIGN8_END +#else // gcc or icc +#define ALIGN8_BEG +#define ALIGN8_END __attribute__((aligned(8))) +#endif + +typedef struct { + int16_t real; + int16_t imag; +} ComplexInt16; + +typedef struct { + int farBufWritePos; + int farBufReadPos; + int knownDelay; + int lastKnownDelay; + int firstVAD; // Parameter to control poorly initialized channels + + RingBuffer* farFrameBuf; + RingBuffer* nearNoisyFrameBuf; + RingBuffer* nearCleanFrameBuf; + RingBuffer* outFrameBuf; + + int16_t farBuf[FAR_BUF_LEN]; + + int16_t mult; + uint32_t seed; + + // Delay estimation variables + void* delay_estimator_farend; + void* delay_estimator; + uint16_t currentDelay; + // Far end history variables + // TODO(bjornv): Replace `far_history` with ring_buffer. + uint16_t far_history[PART_LEN1 * MAX_DELAY]; + int far_history_pos; + int far_q_domains[MAX_DELAY]; + + int16_t nlpFlag; + int16_t fixedDelay; + + uint32_t totCount; + + int16_t dfaCleanQDomain; + int16_t dfaCleanQDomainOld; + int16_t dfaNoisyQDomain; + int16_t dfaNoisyQDomainOld; + + int16_t nearLogEnergy[MAX_BUF_LEN]; + int16_t farLogEnergy; + int16_t echoAdaptLogEnergy[MAX_BUF_LEN]; + int16_t echoStoredLogEnergy[MAX_BUF_LEN]; + + // The extra 16 or 32 bytes in the following buffers are for alignment based + // Neon code. + // It's designed this way since the current GCC compiler can't align a + // buffer in 16 or 32 byte boundaries properly. + int16_t channelStored_buf[PART_LEN1 + 8]; + int16_t channelAdapt16_buf[PART_LEN1 + 8]; + int32_t channelAdapt32_buf[PART_LEN1 + 8]; + int16_t xBuf_buf[PART_LEN2 + 16]; // farend + int16_t dBufClean_buf[PART_LEN2 + 16]; // nearend + int16_t dBufNoisy_buf[PART_LEN2 + 16]; // nearend + int16_t outBuf_buf[PART_LEN + 8]; + + // Pointers to the above buffers + int16_t* channelStored; + int16_t* channelAdapt16; + int32_t* channelAdapt32; + int16_t* xBuf; + int16_t* dBufClean; + int16_t* dBufNoisy; + int16_t* outBuf; + + int32_t echoFilt[PART_LEN1]; + int16_t nearFilt[PART_LEN1]; + int32_t noiseEst[PART_LEN1]; + int noiseEstTooLowCtr[PART_LEN1]; + int noiseEstTooHighCtr[PART_LEN1]; + int16_t noiseEstCtr; + int16_t cngMode; + + int32_t mseAdaptOld; + int32_t mseStoredOld; + int32_t mseThreshold; + + int16_t farEnergyMin; + int16_t farEnergyMax; + int16_t farEnergyMaxMin; + int16_t farEnergyVAD; + int16_t farEnergyMSE; + int currentVADValue; + int16_t vadUpdateCount; + + int16_t startupState; + int16_t mseChannelCount; + int16_t supGain; + int16_t supGainOld; + + int16_t supGainErrParamA; + int16_t supGainErrParamD; + int16_t supGainErrParamDiffAB; + int16_t supGainErrParamDiffBD; + + struct RealFFT* real_fft; + +#ifdef AEC_DEBUG + FILE* farFile; + FILE* nearFile; + FILE* outFile; +#endif +} AecmCore; + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CreateCore() +// +// Allocates the memory needed by the AECM. The memory needs to be +// initialized separately using the WebRtcAecm_InitCore() function. +// Returns a pointer to the instance and a nullptr at failure. +AecmCore* WebRtcAecm_CreateCore(); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with +// WebRtcAecm_CreateCore() +// Input: +// - aecm : Pointer to the AECM instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FreeCore(...) +// +// This function releases the memory allocated by WebRtcAecm_CreateCore() +// Input: +// - aecm : Pointer to the AECM instance +// +void WebRtcAecm_FreeCore(AecmCore* aecm); + +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_InitEchoPathCore(...) +// +// This function resets the echo channel adaptation with the specified channel. +// Input: +// - aecm : Pointer to the AECM instance +// - echo_path : Pointer to the data that should initialize the echo +// path +// +// Output: +// - aecm : Initialized instance +// +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessFrame(...) +// +// This function processes frames and sends blocks to +// WebRtcAecm_ProcessBlock(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS +// +// Output: +// - out : Out buffer, one frame of nearend signal : +// +// +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessBlock(...) +// +// This function is called for every block within one frame +// This function is called by WebRtcAecm_ProcessFrame(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one block of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS +// +// Output: +// - out : Out buffer, one block of nearend signal : +// +// +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* noisyClean, + int16_t* out); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_BufferFarFrame() +// +// Inserts a frame of data into farend buffer. +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, + int farLen); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FetchFarFrame() +// +// Read the farend buffer to account for known delay +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// - knownDelay : known delay +// +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + int farLen, + int knownDelay); + +// All the functions below are intended to be private + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateFarHistory() +// +// Moves the pointer to the next entry and inserts `far_spectrum` and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_AlignedFarend() +// +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcSuppressionGain() +// +// This function calculates the suppression gain that is used in the +// Wiener filter. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - supGain : Suppression gain with which to scale the noise +// level (Q14). +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcEnergies() +// +// This function calculates the log of energies for nearend, farend and +// estimated echoes. There is also an update of energy decision levels, +// i.e. internal VAD. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Pointer to farend spectrum. +// - far_q : Q-domain of farend spectrum. +// - nearEner : Near end energy for current block in +// Q(aecm->dfaQDomain). +// +// Output: +// - echoEst : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + int16_t far_q, + uint32_t nearEner, + int32_t* echoEst); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcStepSize() +// +// This function calculates the step size used in channel estimation +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - mu : Stepsize in log2(), i.e. number of shifts. +// +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. +// NLMS and decision on channel storage. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Absolute value of the farend signal in Q(far_q) +// - far_q : Q-domain of the farend signal +// - dfa : Absolute value of the nearend signal +// (Q[aecm->dfaQDomain]) +// - mu : NLMS step size. +// Input/Output: +// - echoEst : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + int16_t far_q, + const uint16_t* const dfa, + int16_t mu, + int32_t* echoEst); + +extern const int16_t WebRtcAecm_kCosTable[]; +extern const int16_t WebRtcAecm_kSinTable[]; + +/////////////////////////////////////////////////////////////////////////////// +// Some function pointers, for internal functions shared by ARM NEON and +// generic C code. +// +typedef void (*CalcLinearEnergies)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echoEst, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; + +typedef void (*StoreAdaptiveChannel)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); +extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; + +typedef void (*ResetAdaptiveChannel)(AecmCore* aecm); +extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; + +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file aecm_core.c, while those for ARM Neon platforms +// are declared below and defined in file aecm_core_neon.c. +#if defined(WEBRTC_HAS_NEON) +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); + +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm); +#endif + +#if defined(MIPS32_LE) +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm); +#endif +#endif + +} // namespace webrtc + +#endif diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc new file mode 100644 index 0000000000..d363dd2cfd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc @@ -0,0 +1,671 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_processing/aecm/aecm_core.h" + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/real_fft.h" +} +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +extern "C" { +#include "system_wrappers/include/cpu_features_wrapper.h" +} + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/sanitizer.h" + +namespace webrtc { + +namespace { + +// Square root of Hanning window in Q14. +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, 3562, 3951, + 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, + 8364, 8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, + 14384, 14571, 14749, 14918, 15079, 15231, 15373, 15506, 15631, 15746, 15851, + 15947, 16034, 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384}; + +#ifdef AECM_WITH_ABS_APPROX +// Q15 alpha = 0.99439986968132 const Factor for magnitude approximation +static const uint16_t kAlpha1 = 32584; +// Q15 beta = 0.12967166976970 const Factor for magnitude approximation +static const uint16_t kBeta1 = 4249; +// Q15 alpha = 0.94234827210087 const Factor for magnitude approximation +static const uint16_t kAlpha2 = 30879; +// Q15 beta = 0.33787806009150 const Factor for magnitude approximation +static const uint16_t kBeta2 = 11072; +// Q15 alpha = 0.82247698684306 const Factor for magnitude approximation +static const uint16_t kAlpha3 = 26951; +// Q15 beta = 0.57762063060713 const Factor for magnitude approximation +static const uint16_t kBeta3 = 18927; +#endif + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16; + int32_t tmp32; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + int16_t noiseRShift16[PART_LEN1]; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift; + + RTC_DCHECK_GE(shiftFromNearToNoise, 0); + RTC_DCHECK_LT(shiftFromNearToNoise, 16); + + if (aecm->noiseEstCtr < 100) { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } else { + minTrackShift = 9; + } + + // Estimate noise power. + for (i = 0; i < PART_LEN1; i++) { + // Shift to the noise domain. + tmp32 = (int32_t)dfa[i]; + outLShift32 = tmp32 << shiftFromNearToNoise; + + if (outLShift32 < aecm->noiseEst[i]) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (aecm->noiseEst[i] < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // `kNoiseEstIncCount` block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { + aecm->noiseEst[i]--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } else { + aecm->noiseEst[i] -= + ((aecm->noiseEst[i] - outLShift32) >> minTrackShift); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((aecm->noiseEst[i] >> 19) > 0) { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + aecm->noiseEst[i] >>= 11; + aecm->noiseEst[i] *= 2049; + } else if ((aecm->noiseEst[i] >> 11) > 0) { + // Large enough for relative increase + aecm->noiseEst[i] *= 2049; + aecm->noiseEst[i] >>= 11; + } else { + // Make incremental increases based on size every + // `kNoiseEstIncCount` block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { + aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1; + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } + } + + for (i = 0; i < PART_LEN1; i++) { + tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise; + if (tmp32 > 32767) { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + noiseRShift16[i] = (int16_t)tmp32; + + tmp16 = ONE_Q14 - lambda[i]; + noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14); + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + + // Generate noise according to estimated energy. + uReal[0] = 0; // Reject LF noise. + uImag[0] = 0; + for (i = 1; i < PART_LEN1; i++) { + // Get a random index for the cos and sin tables over [0 359]. + tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15); + + // Tables are in Q13. + uReal[i] = + (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >> 13); + uImag[i] = + (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >> 13); + } + uImag[PART_LEN] = 0; + + for (i = 0; i < PART_LEN1; i++) { + out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]); + out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]); + } +} + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i = 0; + + // FFT of signal + for (i = 0; i < PART_LEN; i++) { + // Window time domain signal and insert into real part of + // transformation array `fft` + int16_t scaled_time_signal = time_signal[i] * (1 << time_signal_scaling); + fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14); + scaled_time_signal = time_signal[i + PART_LEN] * (1 << time_signal_scaling); + fft[PART_LEN + i] = (int16_t)( + (scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14); + } + + // Do forward FFT, then take only the first PART_LEN complex samples, + // and change signs of the imaginary parts. + WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal); + for (i = 0; i < PART_LEN; i++) { + freq_signal[i].imag = -freq_signal[i].imag; + } +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, j, outCFFT; + int32_t tmp32no1; + // Reuse `efw` for the inverse FFT output after transferring + // the contents to `fft`. + int16_t* ifft_out = (int16_t*)efw; + + // Synthesis + for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) { + fft[j] = efw[i].real; + fft[j + 1] = -efw[i].imag; + } + fft[0] = efw[0].real; + fft[1] = -efw[0].imag; + + fft[PART_LEN2] = efw[PART_LEN].real; + fft[PART_LEN2 + 1] = -efw[PART_LEN].imag; + + // Inverse FFT. Keep outCFFT to scale the samples in the next block. + outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out); + for (i = 0; i < PART_LEN; i++) { + ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14); + tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i], + outCFFT - aecm->dfaCleanQDomain); + output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1 + aecm->outBuf[i], + WEBRTC_SPL_WORD16_MIN); + + tmp32no1 = + (ifft_out[PART_LEN + i] * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14; + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, outCFFT - aecm->dfaCleanQDomain); + aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1, + WEBRTC_SPL_WORD16_MIN); + } + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31); + + int16_t tmp16no1; +#ifndef WEBRTC_ARCH_ARM_V7 + int16_t tmp16no2; +#endif +#ifdef AECM_WITH_ABS_APPROX + int16_t max_value = 0; + int16_t min_value = 0; + uint16_t alpha = 0; + uint16_t beta = 0; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, calculate the magnitude for + // all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = + (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = + (uint32_t)(freq_signal_abs[0]) + (uint32_t)(freq_signal_abs[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) { + if (freq_signal[i].real == 0) { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + } else if (freq_signal[i].imag == 0) { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real); + } else { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(`imag`,`real`) + beta * min(`imag`,`real`) + // + // The parameters alpha and beta are stored in Q15 + +#ifdef AECM_WITH_ABS_APPROX + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + + if (tmp16no1 > tmp16no2) { + max_value = tmp16no1; + min_value = tmp16no2; + } else { + max_value = tmp16no2; + min_value = tmp16no1; + } + + // Magnitude in Q(-6) + if ((max_value >> 2) > min_value) { + alpha = kAlpha1; + beta = kBeta1; + } else if ((max_value >> 1) > min_value) { + alpha = kAlpha2; + beta = kBeta2; + } else { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (int16_t)((max_value * alpha) >> 15); + tmp16no2 = (int16_t)((min_value * beta) >> 15); + freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2; +#else +#ifdef WEBRTC_ARCH_ARM_V7 + __asm __volatile( + "smulbb %[tmp32no1], %[real], %[real]\n\t" + "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t" + : [tmp32no1] "+&r"(tmp32no1), [tmp32no2] "=r"(tmp32no2) + : [real] "r"(freq_signal[i].real), [imag] "r"(freq_signal[i].imag)); +#else + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); +#endif // WEBRTC_ARCH_ARM_V7 + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; +#endif // AECM_WITH_ABS_APPROX + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } + + return time_signal_scaling; +} + +} // namespace + +int RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/8200 + WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + // TODO(kma): define fft with ComplexInt16. + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31); + int32_t* echoEst32 = (int32_t*)(((uintptr_t)echoEst32_buf + 31) & ~31); + ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int16_t nlpGain = ONE_Q14; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) { + aecm->startupState = + (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean + PART_LEN, nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, aecm->xBuf, dfw, xfa, &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = + TimeToFrequencyDomain(aecm, aecm->dBufNoisy, dfw, dfaNoisy, &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + if (nearendClean == NULL) { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, aecm->dBufClean, dfw, dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, dfaNoisy, + PART_LEN1, zerosDBufNoisy); + if (delay == -1) { + return -1; + } else if (delay == -2) { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t)far_q; + if (far_spectrum_ptr == NULL) { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum, + echoEst32); + + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu, + echoEst32); + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += + rtc::dchecked_cast((int64_t{tmp32no1} * 50) >> 8); + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+ + // aecm->xfaQDomainBuf[diff]) + echoEst32Gained = + WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = + 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + RTC_DCHECK_GE(zeros16, 0); // `zeros16` is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] * (1 << zeros16); + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] * (1 << dfa_clean_q_domain_diff); + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 * (1 << -qDomainDiff) + : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) { + hnl[i] = ONE_Q14; + } else if (aecm->nearFilt[i] == 0) { + hnl[i] = 0; + } else { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = + WebRtcSpl_DivU32U16(echoEst32Gained, (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) { + hnl[i] = 0; + } else if (tmp32no1 < 0) { + hnl[i] = ONE_Q14; + } else { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] < 0) { + hnl[i] = 0; + } + } + } + if (hnl[i]) { + numPosCoef++; + } + } + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < PART_LEN1; i++) { + hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { + avgHnl32 += (int32_t)hnl[i]; + } + RTC_DCHECK_GT(kMaxPrefBand - kMinPrefBand + 1, 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) { + if (hnl[i] > (int16_t)avgHnl32) { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) { + for (i = 0; i < PART_LEN1; i++) { + // Truncate values close to zero and one. + if (hnl[i] > NLP_COMP_HIGH) { + hnl[i] = ONE_Q14; + } else if (hnl[i] < NLP_COMP_LOW) { + hnl[i] = 0; + } + + // Remove outliers + if (numPosCoef < 3) { + nlpGain = 0; + } else { + nlpGain = ONE_Q14; + } + + // NLP + if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) { + hnl[i] = ONE_Q14; + } else { + hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14); + } + + // multiply with Wiener coefficients + efw[i].real = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, hnl[i], 14)); + efw[i].imag = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, hnl[i], 14)); + } + } else { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, hnl[i], 14)); + efw[i].imag = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, hnl[i], 14)); + } + } + + if (aecm->cngMode == AecmTrue) { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build new file mode 100644 index 0000000000..f0e41cd6bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build @@ -0,0 +1,293 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc", + "/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc" + ] + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc" + ] + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc" + ] + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "ppc64": + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "riscv64": + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +Library("aecm_core_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc new file mode 100644 index 0000000000..828aa6d2fb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc @@ -0,0 +1,1656 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/aecm_core.h" +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, 3562, 3951, + 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, + 8364, 8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, + 14384, 14571, 14749, 14918, 15079, 15231, 15373, 15506, 15631, 15746, 15851, + 15947, 16034, 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384}; + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static int16_t coefTable[] = { + 0, 4, 256, 260, 128, 132, 384, 388, 64, 68, 320, 324, 192, 196, 448, + 452, 32, 36, 288, 292, 160, 164, 416, 420, 96, 100, 352, 356, 224, 228, + 480, 484, 16, 20, 272, 276, 144, 148, 400, 404, 80, 84, 336, 340, 208, + 212, 464, 468, 48, 52, 304, 308, 176, 180, 432, 436, 112, 116, 368, 372, + 240, 244, 496, 500, 8, 12, 264, 268, 136, 140, 392, 396, 72, 76, 328, + 332, 200, 204, 456, 460, 40, 44, 296, 300, 168, 172, 424, 428, 104, 108, + 360, 364, 232, 236, 488, 492, 24, 28, 280, 284, 152, 156, 408, 412, 88, + 92, 344, 348, 216, 220, 472, 476, 56, 60, 312, 316, 184, 188, 440, 444, + 120, 124, 376, 380, 248, 252, 504, 508}; + +static int16_t coefTable_ifft[] = { + 0, 512, 256, 508, 128, 252, 384, 380, 64, 124, 320, 444, 192, 188, 448, + 316, 32, 60, 288, 476, 160, 220, 416, 348, 96, 92, 352, 412, 224, 156, + 480, 284, 16, 28, 272, 492, 144, 236, 400, 364, 80, 108, 336, 428, 208, + 172, 464, 300, 48, 44, 304, 460, 176, 204, 432, 332, 112, 76, 368, 396, + 240, 140, 496, 268, 8, 12, 264, 500, 136, 244, 392, 372, 72, 116, 328, + 436, 200, 180, 456, 308, 40, 52, 296, 468, 168, 212, 424, 340, 104, 84, + 360, 404, 232, 148, 488, 276, 24, 20, 280, 484, 152, 228, 408, 356, 88, + 100, 344, 420, 216, 164, 472, 292, 56, 36, 312, 452, 184, 196, 440, 324, + 120, 68, 376, 388, 248, 132, 504, 260}; + +} // namespace + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i, j; + int32_t tmp1, tmp2, tmp3, tmp4; + int16_t* pfrfi; + ComplexInt16* pfreq_signal; + int16_t f_coef, s_coef; + int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1; + int32_t hann, hann1, coefs; + + memset(fft, 0, sizeof(int16_t) * PART_LEN4); + + // FFT of signal + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[shift], %[time_signal_scaling], -14 \n\t" + "addiu %[i], $zero, 64 \n\t" + "addiu %[load_ptr], %[time_signal], 0 \n\t" + "addiu %[hann], %[hanning], 0 \n\t" + "addiu %[hann1], %[hanning], 128 \n\t" + "addiu %[coefs], %[coefTable], 0 \n\t" + "bltz %[shift], 2f \n\t" + " negu %[shift1], %[shift] \n\t" + "1: " + "\n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "sllv %[tmp1], %[tmp1], %[shift] \n\t" + "sllv %[tmp3], %[tmp3], %[shift] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "b 3f \n\t" + " nop \n\t" + "2: " + "\n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "srav %[tmp1], %[tmp1], %[shift1] \n\t" + "srav %[tmp3], %[tmp3], %[shift1] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 2b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "3: " + "\n\t" + ".set pop \n\t" + : [load_ptr] "=&r"(load_ptr), [shift] "=&r"(shift), [hann] "=&r"(hann), + [hann1] "=&r"(hann1), [shift1] "=&r"(shift1), [coefs] "=&r"(coefs), + [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), + [tmp4] "=&r"(tmp4), [i] "=&r"(i), [f_coef] "=&r"(f_coef), + [s_coef] "=&r"(s_coef), [store_ptr1] "=&r"(store_ptr1), + [store_ptr2] "=&r"(store_ptr2) + : [time_signal] "r"(time_signal), [coefTable] "r"(coefTable), + [time_signal_scaling] "r"(time_signal_scaling), + [hanning] "r"(WebRtcAecm_kSqrtHanning), [fft] "r"(fft) + : "memory", "hi", "lo"); + + WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + pfrfi = fft; + pfreq_signal = freq_signal; + + __asm __volatile( + ".set push " + "\n\t" + ".set noreorder " + "\n\t" + "addiu %[j], $zero, 128 " + "\n\t" + "1: " + "\n\t" + "lh %[tmp1], 0(%[pfrfi]) " + "\n\t" + "lh %[tmp2], 2(%[pfrfi]) " + "\n\t" + "lh %[tmp3], 4(%[pfrfi]) " + "\n\t" + "lh %[tmp4], 6(%[pfrfi]) " + "\n\t" + "subu %[tmp2], $zero, %[tmp2] " + "\n\t" + "sh %[tmp1], 0(%[pfreq_signal]) " + "\n\t" + "sh %[tmp2], 2(%[pfreq_signal]) " + "\n\t" + "subu %[tmp4], $zero, %[tmp4] " + "\n\t" + "sh %[tmp3], 4(%[pfreq_signal]) " + "\n\t" + "sh %[tmp4], 6(%[pfreq_signal]) " + "\n\t" + "lh %[tmp1], 8(%[pfrfi]) " + "\n\t" + "lh %[tmp2], 10(%[pfrfi]) " + "\n\t" + "lh %[tmp3], 12(%[pfrfi]) " + "\n\t" + "lh %[tmp4], 14(%[pfrfi]) " + "\n\t" + "addiu %[j], %[j], -8 " + "\n\t" + "subu %[tmp2], $zero, %[tmp2] " + "\n\t" + "sh %[tmp1], 8(%[pfreq_signal]) " + "\n\t" + "sh %[tmp2], 10(%[pfreq_signal]) " + "\n\t" + "subu %[tmp4], $zero, %[tmp4] " + "\n\t" + "sh %[tmp3], 12(%[pfreq_signal]) " + "\n\t" + "sh %[tmp4], 14(%[pfreq_signal]) " + "\n\t" + "addiu %[pfreq_signal], %[pfreq_signal], 16 " + "\n\t" + "bgtz %[j], 1b " + "\n\t" + " addiu %[pfrfi], %[pfrfi], 16 " + "\n\t" + ".set pop " + "\n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), + [j] "=&r"(j), [pfrfi] "+r"(pfrfi), [pfreq_signal] "+r"(pfreq_signal), + [tmp4] "=&r"(tmp4) + : + : "memory"); +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, outCFFT; + int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im; + int16_t* pcoefTable_ifft = coefTable_ifft; + int16_t* pfft = fft; + int16_t* ppfft = fft; + ComplexInt16* pefw = efw; + int32_t out_aecm; + int16_t* paecm_buf = aecm->outBuf; + const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning; + const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN]; + int16_t* output1 = output; + + __asm __volatile( + ".set push " + "\n\t" + ".set noreorder " + "\n\t" + "addiu %[i], $zero, 64 " + "\n\t" + "1: " + "\n\t" + "lh %[tmp1], 0(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp2], 2(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp_re], 0(%[pefw]) " + "\n\t" + "lh %[tmp_im], 2(%[pefw]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp2] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp1] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "subu %[tmp_im], $zero, %[tmp_im] " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "lh %[tmp1], 4(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp2], 6(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp_re], 4(%[pefw]) " + "\n\t" + "lh %[tmp_im], 6(%[pefw]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp2] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp1] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "subu %[tmp_im], $zero, %[tmp_im] " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "lh %[tmp1], 8(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp2], 10(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp_re], 8(%[pefw]) " + "\n\t" + "lh %[tmp_im], 10(%[pefw]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp2] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp1] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "subu %[tmp_im], $zero, %[tmp_im] " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "lh %[tmp1], 12(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp2], 14(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp_re], 12(%[pefw]) " + "\n\t" + "lh %[tmp_im], 14(%[pefw]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp2] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp1] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "subu %[tmp_im], $zero, %[tmp_im] " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 " + "\n\t" + "addiu %[i], %[i], -4 " + "\n\t" + "bgtz %[i], 1b " + "\n\t" + " addiu %[pefw], %[pefw], 16 " + "\n\t" + ".set pop " + "\n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [pfft] "+r"(pfft), [i] "=&r"(i), + [tmp_re] "=&r"(tmp_re), [tmp_im] "=&r"(tmp_im), [pefw] "+r"(pefw), + [pcoefTable_ifft] "+r"(pcoefTable_ifft), [fft] "+r"(fft) + : + : "memory"); + + fft[2] = efw[PART_LEN].real; + fft[3] = -efw[PART_LEN].imag; + + outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); + pfft = fft; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[ppfft]) \n\t" + "lh %[tmp2], 4(%[ppfft]) \n\t" + "lh %[tmp3], 8(%[ppfft]) \n\t" + "lh %[tmp4], 12(%[ppfft]) \n\t" + "addiu %[i], %[i], -4 \n\t" + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp2], 2(%[pfft]) \n\t" + "sh %[tmp3], 4(%[pfft]) \n\t" + "sh %[tmp4], 6(%[pfft]) \n\t" + "addiu %[ppfft], %[ppfft], 16 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pfft], %[pfft], 8 \n\t" + ".set pop \n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [pfft] "+r"(pfft), [i] "=&r"(i), + [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4), [ppfft] "+r"(ppfft) + : + : "memory"); + + pfft = fft; + out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain); + + __asm __volatile( + ".set push " + "\n\t" + ".set noreorder " + "\n\t" + "addiu %[i], $zero, 64 " + "\n\t" + "11: " + "\n\t" + "lh %[tmp1], 0(%[pfft]) " + "\n\t" + "lh %[tmp2], 0(%[p_kSqrtHanning]) " + "\n\t" + "addiu %[i], %[i], -2 " + "\n\t" + "mul %[tmp1], %[tmp1], %[tmp2] " + "\n\t" + "lh %[tmp3], 2(%[pfft]) " + "\n\t" + "lh %[tmp4], 2(%[p_kSqrtHanning]) " + "\n\t" + "mul %[tmp3], %[tmp3], %[tmp4] " + "\n\t" + "addiu %[tmp1], %[tmp1], 8192 " + "\n\t" + "sra %[tmp1], %[tmp1], 14 " + "\n\t" + "addiu %[tmp3], %[tmp3], 8192 " + "\n\t" + "sra %[tmp3], %[tmp3], 14 " + "\n\t" + "bgez %[out_aecm], 1f " + "\n\t" + " negu %[tmp2], %[out_aecm] " + "\n\t" + "srav %[tmp1], %[tmp1], %[tmp2] " + "\n\t" + "b 2f " + "\n\t" + " srav %[tmp3], %[tmp3], %[tmp2] " + "\n\t" + "1: " + "\n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] " + "\n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] " + "\n\t" + "2: " + "\n\t" + "lh %[tmp4], 0(%[paecm_buf]) " + "\n\t" + "lh %[tmp2], 2(%[paecm_buf]) " + "\n\t" + "addu %[tmp3], %[tmp3], %[tmp2] " + "\n\t" + "addu %[tmp1], %[tmp1], %[tmp4] " + "\n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 " + "\n\t" + "sra %[tmp1], %[tmp1], 16 " + "\n\t" + "shll_s.w %[tmp3], %[tmp3], 16 " + "\n\t" + "sra %[tmp3], %[tmp3], 16 " + "\n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 " + "\n\t" + "sra %[tmp2], %[tmp1], 15 " + "\n\t" + "beq %[tmp4], %[tmp2], 3f " + "\n\t" + " ori %[tmp2], $zero, 0x7fff " + "\n\t" + "xor %[tmp1], %[tmp2], %[tmp4] " + "\n\t" + "3: " + "\n\t" + "sra %[tmp2], %[tmp3], 31 " + "\n\t" + "sra %[tmp4], %[tmp3], 15 " + "\n\t" + "beq %[tmp2], %[tmp4], 4f " + "\n\t" + " ori %[tmp4], $zero, 0x7fff " + "\n\t" + "xor %[tmp3], %[tmp4], %[tmp2] " + "\n\t" + "4: " + "\n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[pfft]) " + "\n\t" + "sh %[tmp1], 0(%[output1]) " + "\n\t" + "sh %[tmp3], 2(%[pfft]) " + "\n\t" + "sh %[tmp3], 2(%[output1]) " + "\n\t" + "lh %[tmp1], 128(%[pfft]) " + "\n\t" + "lh %[tmp2], 0(%[pp_kSqrtHanning]) " + "\n\t" + "mul %[tmp1], %[tmp1], %[tmp2] " + "\n\t" + "lh %[tmp3], 130(%[pfft]) " + "\n\t" + "lh %[tmp4], -2(%[pp_kSqrtHanning]) " + "\n\t" + "mul %[tmp3], %[tmp3], %[tmp4] " + "\n\t" + "sra %[tmp1], %[tmp1], 14 " + "\n\t" + "sra %[tmp3], %[tmp3], 14 " + "\n\t" + "bgez %[out_aecm], 5f " + "\n\t" + " negu %[tmp2], %[out_aecm] " + "\n\t" + "srav %[tmp3], %[tmp3], %[tmp2] " + "\n\t" + "b 6f " + "\n\t" + " srav %[tmp1], %[tmp1], %[tmp2] " + "\n\t" + "5: " + "\n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] " + "\n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] " + "\n\t" + "6: " + "\n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 " + "\n\t" + "sra %[tmp1], %[tmp1], 16 " + "\n\t" + "shll_s.w %[tmp3], %[tmp3], 16 " + "\n\t" + "sra %[tmp3], %[tmp3], 16 " + "\n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 " + "\n\t" + "sra %[tmp2], %[tmp1], 15 " + "\n\t" + "beq %[tmp4], %[tmp2], 7f " + "\n\t" + " ori %[tmp2], $zero, 0x7fff " + "\n\t" + "xor %[tmp1], %[tmp2], %[tmp4] " + "\n\t" + "7: " + "\n\t" + "sra %[tmp2], %[tmp3], 31 " + "\n\t" + "sra %[tmp4], %[tmp3], 15 " + "\n\t" + "beq %[tmp2], %[tmp4], 8f " + "\n\t" + " ori %[tmp4], $zero, 0x7fff " + "\n\t" + "xor %[tmp3], %[tmp4], %[tmp2] " + "\n\t" + "8: " + "\n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[paecm_buf]) " + "\n\t" + "sh %[tmp3], 2(%[paecm_buf]) " + "\n\t" + "addiu %[output1], %[output1], 4 " + "\n\t" + "addiu %[paecm_buf], %[paecm_buf], 4 " + "\n\t" + "addiu %[pfft], %[pfft], 4 " + "\n\t" + "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 " + "\n\t" + "bgtz %[i], 11b " + "\n\t" + " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 " + "\n\t" + ".set pop " + "\n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [pfft] "+r"(pfft), + [output1] "+r"(output1), [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4), + [paecm_buf] "+r"(paecm_buf), [i] "=&r"(i), + [pp_kSqrtHanning] "+r"(pp_kSqrtHanning), + [p_kSqrtHanning] "+r"(p_kSqrtHanning) + : [out_aecm] "r"(out_aecm), + [WebRtcAecm_kSqrtHanning] "r"(WebRtcAecm_kSqrtHanning) + : "hi", "lo", "memory"); + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + uint32_t par1 = (*far_energy); + uint32_t par2 = (*echo_energy_adapt); + uint32_t par3 = (*echo_energy_stored); + int16_t* ch_stored_p = &(aecm->channelStored[0]); + int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]); + uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0])); + int32_t* echo_p = &(echo_est[0]); + int32_t temp0, stored0, echo0, adept0, spectrum0; + int32_t stored1, adept1, spectrum1, echo1, temp1; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[stored0], 0(%[ch_stored_p]) \n\t" + "lhu %[adept0], 0(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 0(%[spectrum_p]) \n\t" + "lh %[stored1], 2(%[ch_stored_p]) \n\t" + "lhu %[adept1], 2(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 2(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[echo_p], %[echo_p], 16 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -16(%[echo_p]) \n\t" + "usw %[echo1], -12(%[echo_p]) \n\t" + "lh %[stored0], 4(%[ch_stored_p]) \n\t" + "lhu %[adept0], 4(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 4(%[spectrum_p]) \n\t" + "lh %[stored1], 6(%[ch_stored_p]) \n\t" + "lhu %[adept1], 6(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 6(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t" + "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t" + "addiu %[spectrum_p], %[spectrum_p], 8 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -8(%[echo_p]) \n\t" + "usw %[echo1], -4(%[echo_p]) \n\t" + ".set pop \n\t" + : [temp0] "=&r"(temp0), [stored0] "=&r"(stored0), + [adept0] "=&r"(adept0), [spectrum0] "=&r"(spectrum0), + [echo0] "=&r"(echo0), [echo_p] "+r"(echo_p), [par3] "+r"(par3), + [par1] "+r"(par1), [par2] "+r"(par2), [stored1] "=&r"(stored1), + [adept1] "=&r"(adept1), [echo1] "=&r"(echo1), + [spectrum1] "=&r"(spectrum1), [temp1] "=&r"(temp1), + [ch_stored_p] "+r"(ch_stored_p), [ch_adapt_p] "+r"(ch_adapt_p), + [spectrum_p] "+r"(spectrum_p) + : + : "hi", "lo", "memory"); + } + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + par1 += (uint32_t)(far_spectrum[PART_LEN]); + par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; + par3 += (uint32_t)echo_est[PART_LEN]; + + (*far_energy) = par1; + (*echo_energy_adapt) = par2; + (*echo_energy_stored) = par3; +} + +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + int16_t* temp1; + uint16_t* temp8; + int32_t temp0, temp2, temp3, temp4, temp5, temp6; + int32_t* temp7 = &(echo_est[0]); + temp1 = &(aecm->channelStored[0]); + temp8 = (uint16_t*)(&far_spectrum[0]); + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, + sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile( + "ulw %[temp0], 0(%[temp8]) \n\t" + "ulw %[temp2], 0(%[temp1]) \n\t" + "ulw %[temp4], 4(%[temp8]) \n\t" + "ulw %[temp5], 4(%[temp1]) \n\t" + "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t" + "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t" + "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t" + "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t" + "addiu %[temp7], %[temp7], 16 \n\t" + "addiu %[temp1], %[temp1], 8 \n\t" + "addiu %[temp8], %[temp8], 8 \n\t" + "sra %[temp3], %[temp3], 1 \n\t" + "sra %[temp0], %[temp0], 1 \n\t" + "sra %[temp6], %[temp6], 1 \n\t" + "sra %[temp4], %[temp4], 1 \n\t" + "usw %[temp3], -12(%[temp7]) \n\t" + "usw %[temp0], -16(%[temp7]) \n\t" + "usw %[temp6], -4(%[temp7]) \n\t" + "usw %[temp4], -8(%[temp7]) \n\t" + : [temp0] "=&r"(temp0), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), + [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), + [temp1] "+r"(temp1), [temp8] "+r"(temp8), [temp7] "+r"(temp7) + : + : "hi", "lo", "memory"); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); +} + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) { + int i; + int32_t* temp3; + int16_t* temp0; + int32_t temp1, temp2, temp4, temp5; + + temp0 = &(aecm->channelStored[0]); + temp3 = &(aecm->channelAdapt32[0]); + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile( + "ulw %[temp1], 0(%[temp0]) \n\t" + "ulw %[temp4], 4(%[temp0]) \n\t" + "preceq.w.phl %[temp2], %[temp1] \n\t" + "preceq.w.phr %[temp1], %[temp1] \n\t" + "preceq.w.phl %[temp5], %[temp4] \n\t" + "preceq.w.phr %[temp4], %[temp4] \n\t" + "addiu %[temp0], %[temp0], 8 \n\t" + "usw %[temp2], 4(%[temp3]) \n\t" + "usw %[temp1], 0(%[temp3]) \n\t" + "usw %[temp5], 12(%[temp3]) \n\t" + "usw %[temp4], 8(%[temp3]) \n\t" + "addiu %[temp3], %[temp3], 16 \n\t" + : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp4] "=&r"(temp4), + [temp5] "=&r"(temp5), [temp3] "+r"(temp3), [temp0] "+r"(temp0) + : + : "memory"); + } + + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} +#endif // #if defined(MIPS_DSP_R1_LE) + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31); + + int16_t tmp16no1; +#if !defined(MIPS_DSP_R2_LE) + int32_t tmp32no1; + int32_t tmp32no2; + int16_t tmp16no2; +#else + int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13; + int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23; + int16_t* freqp; + uint16_t* freqabsp; + uint32_t freqt0, freqt1, freqt2, freqt3; + uint32_t freqs; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, + // calculate the magnitude for all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal[PART_LEN].real = fft[PART_LEN2]; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = + (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = + (uint32_t)(freq_signal_abs[0]) + (uint32_t)(freq_signal_abs[PART_LEN]); + +#if !defined(MIPS_DSP_R2_LE) + for (i = 1; i < PART_LEN; i++) { + if (freq_signal[i].real == 0) { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + } else if (freq_signal[i].imag == 0) { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real); + } else { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(`imag`,`real`) + beta * min(`imag`,`real`) + // + // The parameters alpha and beta are stored in Q15 + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } +#else // #if !defined(MIPS_DSP_R2_LE) + freqs = + (uint32_t)(freq_signal_abs[0]) + (uint32_t)(freq_signal_abs[PART_LEN]); + freqp = &(freq_signal[1].real); + + __asm __volatile( + "lw %[freqt0], 0(%[freqp]) \n\t" + "lw %[freqt1], 4(%[freqp]) \n\t" + "lw %[freqt2], 8(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "addiu %[freqp], %[freqp], 12 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + : [freqt0] "=&r"(freqt0), [freqt1] "=&r"(freqt1), [freqt2] "=&r"(freqt2), + [freqp] "+r"(freqp), [tmp32no20] "=r"(tmp32no20), + [tmp32no21] "=r"(tmp32no21), [tmp32no22] "=r"(tmp32no22) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo"); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + freq_signal_abs[1] = (uint16_t)tmp32no10; + freq_signal_abs[2] = (uint16_t)tmp32no11; + freq_signal_abs[3] = (uint16_t)tmp32no12; + freqs += (uint32_t)tmp32no10; + freqs += (uint32_t)tmp32no11; + freqs += (uint32_t)tmp32no12; + freqabsp = &(freq_signal_abs[4]); + for (i = 4; i < PART_LEN; i += 4) { + __asm __volatile( + "ulw %[freqt0], 0(%[freqp]) \n\t" + "ulw %[freqt1], 4(%[freqp]) \n\t" + "ulw %[freqt2], 8(%[freqp]) \n\t" + "ulw %[freqt3], 12(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "mult $ac3, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t" + "addiu %[freqp], %[freqp], 16 \n\t" + "addiu %[freqabsp], %[freqabsp], 8 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + "extr.w %[tmp32no23], $ac3, 1 \n\t" + : [freqt0] "=&r"(freqt0), [freqt1] "=&r"(freqt1), + [freqt2] "=&r"(freqt2), [freqt3] "=&r"(freqt3), + [tmp32no20] "=r"(tmp32no20), [tmp32no21] "=r"(tmp32no21), + [tmp32no22] "=r"(tmp32no22), [tmp32no23] "=r"(tmp32no23), + [freqabsp] "+r"(freqabsp), [freqp] "+r"(freqp) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo", + "$ac3hi", "$ac3lo"); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23); + + __asm __volatile( + "sh %[tmp32no10], -8(%[freqabsp]) \n\t" + "sh %[tmp32no11], -6(%[freqabsp]) \n\t" + "sh %[tmp32no12], -4(%[freqabsp]) \n\t" + "sh %[tmp32no13], -2(%[freqabsp]) \n\t" + "addu %[freqs], %[freqs], %[tmp32no10] \n\t" + "addu %[freqs], %[freqs], %[tmp32no11] \n\t" + "addu %[freqs], %[freqs], %[tmp32no12] \n\t" + "addu %[freqs], %[freqs], %[tmp32no13] \n\t" + : [freqs] "+r"(freqs) + : [tmp32no10] "r"(tmp32no10), [tmp32no11] "r"(tmp32no11), + [tmp32no12] "r"(tmp32no12), [tmp32no13] "r"(tmp32no13), + [freqabsp] "r"(freqabsp) + : "memory"); + } + + (*freq_signal_sum_abs) = freqs; +#endif + + return time_signal_scaling; +} + +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~31); + int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~31); + ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; + int16_t* ptr; + int16_t* ptr1; + int16_t* er_ptr; + int16_t* dr_ptr; + + ptr = &hnl[0]; + ptr1 = &hnl[0]; + er_ptr = &efw[0].real; + dr_ptr = &dfw[0].real; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) { + aecm->startupState = + (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean + PART_LEN, nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, aecm->xBuf, dfw, xfa, &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = + TimeToFrequencyDomain(aecm, aecm->dBufNoisy, dfw, dfaNoisy, &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + if (nearendClean == NULL) { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, aecm->dBufClean, dfw, dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, dfaNoisy, + PART_LEN1, zerosDBufNoisy); + if (delay == -1) { + return -1; + } else if (delay == -2) { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t)far_q; + + if (far_spectrum_ptr == NULL) { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum, + echoEst32); + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu, + echoEst32); + + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += + rtc::dchecked_cast((int64_t{tmp32no1} * 50) >> 8); + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) + echoEst32Gained = + WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = + 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + RTC_DCHECK_GE(zeros16, 0); // `zeros16` is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] << zeros16; + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] << dfa_clean_q_domain_diff; + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = + qDomainDiff < 0 ? tmp16no2 << -qDomainDiff : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else if (aecm->nearFilt[i] == 0) { + hnl[i] = 0; + } else { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = + WebRtcSpl_DivU32U16(echoEst32Gained, (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN + // - max(0, 17 - zeros16 - zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) { + hnl[i] = 0; + } else if (tmp32no1 < 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] <= 0) { + hnl[i] = 0; + } else { + numPosCoef++; + } + } + } + } + + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < (PART_LEN1 >> 3); i++) { + __asm __volatile( + "lh %[temp1], 0(%[ptr1]) \n\t" + "lh %[temp2], 2(%[ptr1]) \n\t" + "lh %[temp3], 4(%[ptr1]) \n\t" + "lh %[temp4], 6(%[ptr1]) \n\t" + "lh %[temp5], 8(%[ptr1]) \n\t" + "lh %[temp6], 10(%[ptr1]) \n\t" + "lh %[temp7], 12(%[ptr1]) \n\t" + "lh %[temp8], 14(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "mul %[temp2], %[temp2], %[temp2] \n\t" + "mul %[temp3], %[temp3], %[temp3] \n\t" + "mul %[temp4], %[temp4], %[temp4] \n\t" + "mul %[temp5], %[temp5], %[temp5] \n\t" + "mul %[temp6], %[temp6], %[temp6] \n\t" + "mul %[temp7], %[temp7], %[temp7] \n\t" + "mul %[temp8], %[temp8], %[temp8] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "sra %[temp4], %[temp4], 14 \n\t" + "sra %[temp5], %[temp5], 14 \n\t" + "sra %[temp6], %[temp6], 14 \n\t" + "sra %[temp7], %[temp7], 14 \n\t" + "sra %[temp8], %[temp8], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "sh %[temp2], 2(%[ptr1]) \n\t" + "sh %[temp3], 4(%[ptr1]) \n\t" + "sh %[temp4], 6(%[ptr1]) \n\t" + "sh %[temp5], 8(%[ptr1]) \n\t" + "sh %[temp6], 10(%[ptr1]) \n\t" + "sh %[temp7], 12(%[ptr1]) \n\t" + "sh %[temp8], 14(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 16 \n\t" + : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), + [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), + [temp7] "=&r"(temp7), [temp8] "=&r"(temp8), [ptr1] "+r"(ptr1) + : + : "memory", "hi", "lo"); + } + for (i = 0; i < (PART_LEN1 & 7); i++) { + __asm __volatile( + "lh %[temp1], 0(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 2 \n\t" + : [temp1] "=&r"(temp1), [ptr1] "+r"(ptr1) + : + : "memory", "hi", "lo"); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { + avgHnl32 += (int32_t)hnl[i]; + } + + RTC_DCHECK_GT(kMaxPrefBand - kMinPrefBand + 1, 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) { + if (hnl[i] > (int16_t)avgHnl32) { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) { + if (numPosCoef < 3) { + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = 0; + efw[i].imag = 0; + hnl[i] = 0; + } + } else { + for (i = 0; i < PART_LEN1; i++) { +#if defined(MIPS_DSP_R1_LE) + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "shra_r.w %[temp2], %[temp2], 14 \n\t" + "shra_r.w %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), + [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [ptr] "+r"(ptr), + [er_ptr] "+r"(er_ptr), [dr_ptr] "+r"(dr_ptr) + : + : "memory", "hi", "lo"); +#else + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "addiu %[temp2], %[temp2], 0x2000 \n\t" + "addiu %[temp3], %[temp3], 0x2000 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), + [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [ptr] "+r"(ptr), + [er_ptr] "+r"(er_ptr), [dr_ptr] "+r"(dr_ptr) + : + : "memory", "hi", "lo"); +#endif + } + } + } else { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, hnl[i], 14)); + efw[i].imag = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, hnl[i], 14)); + } + } + + if (aecm->cngMode == AecmTrue) { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +// Generate comfort noise and add to output signal. +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2; + int32_t tmp32, tmp321, tnoise, tnoise1; + int32_t tmp322, tmp323, *tmp1; + int16_t* dfap; + int16_t* lambdap; + const int32_t c2049 = 2049; + const int32_t c359 = 359; + const int32_t c114 = ONE_Q14; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift = 9; + + RTC_DCHECK_GE(shiftFromNearToNoise, 0); + RTC_DCHECK_LT(shiftFromNearToNoise, 16); + + if (aecm->noiseEstCtr < 100) { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + int16_t* randW16p = (int16_t*)randW16; +#if defined(MIPS_DSP_R1_LE) + int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable; + int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable; +#endif // #if defined(MIPS_DSP_R1_LE) + tmp1 = (int32_t*)aecm->noiseEst + 1; + dfap = (int16_t*)dfa + 1; + lambdap = (int16_t*)lambda + 1; + // Estimate noise power. + for (i = 1; i < PART_LEN1; i += 2) { + // Shift to the noise domain. + __asm __volatile( + "lh %[tmp32], 0(%[dfap]) \n\t" + "lw %[tnoise], 0(%[tmp1]) \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r"(tmp32), [outLShift32] "=r"(outLShift32), + [tnoise] "=&r"(tnoise) + : [tmp1] "r"(tmp1), [dfap] "r"(dfap), + [shiftFromNearToNoise] "r"(shiftFromNearToNoise) + : "memory"); + + if (outLShift32 < tnoise) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (tnoise < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // `kNoiseEstIncCount` block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { + tnoise--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } else { + __asm __volatile( + "subu %[tmp32], %[tnoise], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tmp32] "=&r"(tmp32), [tnoise] "+r"(tnoise) + : + [outLShift32] "r"(outLShift32), [minTrackShift] "r"(minTrackShift)); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise >> 19) <= 0) { + if ((tnoise >> 11) > 0) { + // Large enough for relative increase + __asm __volatile( + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + "sra %[tnoise], %[tnoise], 11 \n\t" + : [tnoise] "+r"(tnoise) + : [c2049] "r"(c2049) + : "hi", "lo"); + } else { + // Make incremental increases based on size every + // `kNoiseEstIncCount` block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { + __asm __volatile( + "sra %[tmp32], %[tnoise], 9 \n\t" + "addi %[tnoise], %[tnoise], 1 \n\t" + "addu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tnoise] "+r"(tnoise), [tmp32] "=&r"(tmp32) + :); + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile( + "sra %[tnoise], %[tnoise], 11 \n\t" + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + : [tnoise] "+r"(tnoise) + : [c2049] "r"(c2049) + : "hi", "lo"); + } + } + + // Shift to the noise domain. + __asm __volatile( + "lh %[tmp32], 2(%[dfap]) \n\t" + "lw %[tnoise1], 4(%[tmp1]) \n\t" + "addiu %[dfap], %[dfap], 4 \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r"(tmp32), [dfap] "+r"(dfap), + [outLShift32] "=r"(outLShift32), [tnoise1] "=&r"(tnoise1) + : [tmp1] "r"(tmp1), [shiftFromNearToNoise] "r"(shiftFromNearToNoise) + : "memory"); + + if (outLShift32 < tnoise1) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i + 1] = 0; + // Track the minimum. + if (tnoise1 < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // `kNoiseEstIncCount` block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i + 1]++; + if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) { + tnoise1--; + aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter + } + } else { + __asm __volatile( + "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tmp32] "=&r"(tmp32), [tnoise1] "+r"(tnoise1) + : + [outLShift32] "r"(outLShift32), [minTrackShift] "r"(minTrackShift)); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i + 1] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise1 >> 19) <= 0) { + if ((tnoise1 >> 11) > 0) { + // Large enough for relative increase + __asm __volatile( + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + "sra %[tnoise1], %[tnoise1], 11 \n\t" + : [tnoise1] "+r"(tnoise1) + : [c2049] "r"(c2049) + : "hi", "lo"); + } else { + // Make incremental increases based on size every + // `kNoiseEstIncCount` block + aecm->noiseEstTooLowCtr[i + 1]++; + if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) { + __asm __volatile( + "sra %[tmp32], %[tnoise1], 9 \n\t" + "addi %[tnoise1], %[tnoise1], 1 \n\t" + "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tnoise1] "+r"(tnoise1), [tmp32] "=&r"(tmp32) + :); + aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile( + "sra %[tnoise1], %[tnoise1], 11 \n\t" + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + : [tnoise1] "+r"(tnoise1) + : [c2049] "r"(c2049) + : "hi", "lo"); + } + } + + __asm __volatile( + "lh %[tmp16], 0(%[lambdap]) \n\t" + "lh %[tmp161], 2(%[lambdap]) \n\t" + "sw %[tnoise], 0(%[tmp1]) \n\t" + "sw %[tnoise1], 4(%[tmp1]) \n\t" + "subu %[tmp16], %[c114], %[tmp16] \n\t" + "subu %[tmp161], %[c114], %[tmp161] \n\t" + "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t" + "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t" + "addiu %[lambdap], %[lambdap], 4 \n\t" + "addiu %[tmp1], %[tmp1], 8 \n\t" + : [tmp16] "=&r"(tmp16), [tmp161] "=&r"(tmp161), [tmp1] "+r"(tmp1), + [tmp32] "=&r"(tmp32), [tmp321] "=&r"(tmp321), [lambdap] "+r"(lambdap) + : [tnoise] "r"(tnoise), [tnoise1] "r"(tnoise1), [c114] "r"(c114), + [shiftFromNearToNoise] "r"(shiftFromNearToNoise) + : "memory"); + + if (tmp32 > 32767) { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + if (tmp321 > 32767) { + tmp321 = 32767; + aecm->noiseEst[i + 1] = tmp321 << shiftFromNearToNoise; + } + + __asm __volatile( + "mul %[tmp32], %[tmp32], %[tmp16] \n\t" + "mul %[tmp321], %[tmp321], %[tmp161] \n\t" + "sra %[nrsh1], %[tmp32], 14 \n\t" + "sra %[nrsh2], %[tmp321], 14 \n\t" + : [nrsh1] "=&r"(nrsh1), [nrsh2] "=r"(nrsh2) + : [tmp16] "r"(tmp16), [tmp161] "r"(tmp161), [tmp32] "r"(tmp32), + [tmp321] "r"(tmp321) + : "memory", "hi", "lo"); + + __asm __volatile( + "lh %[tmp32], 0(%[randW16p]) \n\t" + "lh %[tmp321], 2(%[randW16p]) \n\t" + "addiu %[randW16p], %[randW16p], 4 \n\t" + "mul %[tmp32], %[tmp32], %[c359] \n\t" + "mul %[tmp321], %[tmp321], %[c359] \n\t" + "sra %[tmp16], %[tmp32], 15 \n\t" + "sra %[tmp161], %[tmp321], 15 \n\t" + : [randW16p] "+r"(randW16p), [tmp32] "=&r"(tmp32), [tmp16] "=r"(tmp16), + [tmp161] "=r"(tmp161), [tmp321] "=&r"(tmp321) + : [c359] "r"(c359) + : "memory", "hi", "lo"); + +#if !defined(MIPS_DSP_R1_LE) + tmp32 = WebRtcAecm_kCosTable[tmp16]; + tmp321 = WebRtcAecm_kSinTable[tmp16]; + tmp322 = WebRtcAecm_kCosTable[tmp161]; + tmp323 = WebRtcAecm_kSinTable[tmp161]; +#else + __asm __volatile( + "sll %[tmp16], %[tmp16], 1 \n\t" + "sll %[tmp161], %[tmp161], 1 \n\t" + "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t" + "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t" + "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t" + "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t" + : [tmp32] "=&r"(tmp32), [tmp321] "=&r"(tmp321), [tmp322] "=&r"(tmp322), + [tmp323] "=&r"(tmp323) + : [kCosTablep] "r"(kCosTablep), [tmp16] "r"(tmp16), + [tmp161] "r"(tmp161), [kSinTablep] "r"(kSinTablep) + : "memory"); +#endif + __asm __volatile( + "mul %[tmp32], %[tmp32], %[nrsh1] \n\t" + "negu %[tmp162], %[nrsh1] \n\t" + "mul %[tmp322], %[tmp322], %[nrsh2] \n\t" + "negu %[tmp163], %[nrsh2] \n\t" + "sra %[tmp32], %[tmp32], 13 \n\t" + "mul %[tmp321], %[tmp321], %[tmp162] \n\t" + "sra %[tmp322], %[tmp322], 13 \n\t" + "mul %[tmp323], %[tmp323], %[tmp163] \n\t" + "sra %[tmp321], %[tmp321], 13 \n\t" + "sra %[tmp323], %[tmp323], 13 \n\t" + : [tmp32] "+r"(tmp32), [tmp321] "+r"(tmp321), [tmp162] "=&r"(tmp162), + [tmp322] "+r"(tmp322), [tmp323] "+r"(tmp323), [tmp163] "=&r"(tmp163) + : [nrsh1] "r"(nrsh1), [nrsh2] "r"(nrsh2) + : "hi", "lo"); + // Tables are in Q13. + uReal[i] = (int16_t)tmp32; + uImag[i] = (int16_t)tmp321; + uReal[i + 1] = (int16_t)tmp322; + uImag[i + 1] = (int16_t)tmp323; + } + + int32_t tt, sgn; + tt = out[0].real; + sgn = ((int)tt) >> 31; + out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[0].imag; + sgn = ((int)tt) >> 31; + out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + for (i = 1; i < PART_LEN; i++) { + tt = out[i].real + uReal[i]; + sgn = ((int)tt) >> 31; + out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[i].imag + uImag[i]; + sgn = ((int)tt) >> 31; + out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + } + tt = out[PART_LEN].real + uReal[PART_LEN]; + sgn = ((int)tt) >> 31; + out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[PART_LEN].imag; + sgn = ((int)tt) >> 31; + out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc new file mode 100644 index 0000000000..584110d3af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "common_audio/signal_processing/include/real_fft.h" +#include "modules/audio_processing/aecm/aecm_core.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// TODO(kma): Re-write the corresponding assembly file, the offset +// generating script and makefile, to replace these C functions. + +static inline void AddLanes(uint32_t* ptr, uint32x4_t v) { +#if defined(WEBRTC_ARCH_ARM64) + *(ptr) = vaddvq_u32(v); +#else + uint32x2_t tmp_v; + tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v)); + tmp_v = vpadd_u32(tmp_v, tmp_v); + *(ptr) = vget_lane_u32(tmp_v, 0); +#endif +} + +} // namespace + +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt_p = aecm->channelAdapt16; + int32_t* echo_est_p = echo_est; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + const uint16_t* far_spectrum_p = far_spectrum; + int16x8_t store_v, adapt_v; + uint16x8_t spectrum_v; + uint32x4_t echo_est_v_low, echo_est_v_high; + uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v; + + far_energy_v = vdupq_n_u32(0); + echo_adapt_v = vdupq_n_u32(0); + echo_stored_v = vdupq_n_u32(0); + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + // The C code: + // for (i = 0; i < PART_LEN1; i++) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // (*far_energy) += (uint32_t)(far_spectrum[i]); + // *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + // (*echo_energy_stored) += (uint32_t)echo_est[i]; + // } + while (start_stored_p < end_stored_p) { + spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); + store_v = vld1q_s16(start_stored_p); + + far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v)); + far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v)); + + echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)), + vget_low_u16(spectrum_v)); + echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)), + vget_high_u16(spectrum_v)); + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); + + echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v); + echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v); + + echo_adapt_v = + vmlal_u16(echo_adapt_v, vreinterpret_u16_s16(vget_low_s16(adapt_v)), + vget_low_u16(spectrum_v)); + echo_adapt_v = + vmlal_u16(echo_adapt_v, vreinterpret_u16_s16(vget_high_s16(adapt_v)), + vget_high_u16(spectrum_v)); + + start_stored_p += 8; + start_adapt_p += 8; + far_spectrum_p += 8; + echo_est_p += 8; + } + + AddLanes(far_energy, far_energy_v); + AddLanes(echo_energy_stored, echo_stored_v); + AddLanes(echo_energy_adapt, echo_adapt_v); + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + *echo_energy_stored += (uint32_t)echo_est[PART_LEN]; + *far_energy += (uint32_t)far_spectrum[PART_LEN]; + *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; +} + +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + RTC_DCHECK_EQ(0, (uintptr_t)echo_est % 32); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelStored % 16); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelAdapt16 % 16); + + // This is C code of following optimized code. + // During startup we store the channel every block. + // memcpy(aecm->channelStored, + // aecm->channelAdapt16, + // sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + // for (i = 0; i < PART_LEN; i += 4) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], + // far_spectrum[i + 1]); + // echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], + // far_spectrum[i + 2]); + // echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], + // far_spectrum[i + 3]); + // } + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + const uint16_t* far_spectrum_p = far_spectrum; + int16_t* start_adapt_p = aecm->channelAdapt16; + int16_t* start_stored_p = aecm->channelStored; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + int32_t* echo_est_p = echo_est; + + uint16x8_t far_spectrum_v; + int16x8_t adapt_v; + uint32x4_t echo_est_v_low, echo_est_v_high; + + while (start_stored_p < end_stored_p) { + far_spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); + + vst1q_s16(start_stored_p, adapt_v); + + echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v), + vget_low_u16(vreinterpretq_u16_s16(adapt_v))); + echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v), + vget_high_u16(vreinterpretq_u16_s16(adapt_v))); + + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); + + far_spectrum_p += 8; + start_adapt_p += 8; + start_stored_p += 8; + echo_est_p += 8; + } + aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN]; + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); +} + +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) { + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelStored % 16); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelAdapt16 % 16); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelAdapt32 % 32); + + // The C code of following optimized code. + // for (i = 0; i < PART_LEN1; i++) { + // aecm->channelAdapt16[i] = aecm->channelStored[i]; + // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( + // (int32_t)aecm->channelStored[i], 16); + // } + + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt16_p = aecm->channelAdapt16; + int32_t* start_adapt32_p = aecm->channelAdapt32; + const int16_t* end_stored_p = start_stored_p + PART_LEN; + + int16x8_t stored_v; + int32x4_t adapt32_v_low, adapt32_v_high; + + while (start_stored_p < end_stored_p) { + stored_v = vld1q_s16(start_stored_p); + vst1q_s16(start_adapt16_p, stored_v); + + adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16); + adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16); + + vst1q_s32(start_adapt32_p, adapt32_v_low); + vst1q_s32(start_adapt32_p + 4, adapt32_v_high); + + start_stored_p += 8; + start_adapt16_p += 8; + start_adapt32_p += 8; + } + aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN]; + aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_defines.h b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_defines.h new file mode 100644 index 0000000000..5805549e2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_defines.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ +#define MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ + +#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */ + +/* Algorithm parameters */ +#define FRAME_LEN 80 /* Total frame length, 10 ms. */ + +#define PART_LEN 64 /* Length of partition. */ +#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */ + +#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */ +#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */ +#define PART_LEN4 (PART_LEN << 2) /* Length of partition * 4. */ +#define FAR_BUF_LEN PART_LEN4 /* Length of buffers. */ +#define MAX_DELAY 100 + +/* Counter parameters */ +#define CONV_LEN 512 /* Convergence length used at startup. */ +#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */ + +/* Energy parameters */ +#define MAX_BUF_LEN 64 /* History length of energy signals. */ +#define FAR_ENERGY_MIN 1025 /* Lowest Far energy level: At least 2 */ + /* in energy. */ +#define FAR_ENERGY_DIFF 929 /* Allowed difference between max */ + /* and min. */ +#define ENERGY_DEV_OFFSET 0 /* The energy error offset in Q8. */ +#define ENERGY_DEV_TOL 400 /* The energy estimation tolerance (Q8). */ +#define FAR_ENERGY_VAD_REGION 230 /* Far VAD tolerance region. */ + +/* Stepsize parameters */ +#define MU_MIN 10 /* Min stepsize 2^-MU_MIN (far end energy */ + /* dependent). */ +#define MU_MAX 1 /* Max stepsize 2^-MU_MAX (far end energy */ + /* dependent). */ +#define MU_DIFF 9 /* MU_MIN - MU_MAX */ + +/* Channel parameters */ +#define MIN_MSE_COUNT 20 /* Min number of consecutive blocks with enough */ + /* far end energy to compare channel estimates. */ +#define MIN_MSE_DIFF 29 /* The ratio between adapted and stored channel to */ + /* accept a new storage (0.8 in Q-MSE_RESOLUTION). */ +#define MSE_RESOLUTION 5 /* MSE parameter resolution. */ +#define RESOLUTION_CHANNEL16 12 /* W16 Channel in Q-RESOLUTION_CHANNEL16. */ +#define RESOLUTION_CHANNEL32 28 /* W32 Channel in Q-RESOLUTION_CHANNEL. */ +#define CHANNEL_VAD 16 /* Minimum energy in frequency band */ + /* to update channel. */ + +/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */ +#define RESOLUTION_SUPGAIN 8 /* Channel in Q-(RESOLUTION_SUPGAIN). */ +#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) /* Default. */ +#define SUPGAIN_ERROR_PARAM_A 3072 /* Estimation error parameter */ + /* (Maximum gain) (8 in Q8). */ +#define SUPGAIN_ERROR_PARAM_B 1536 /* Estimation error parameter */ + /* (Gain before going down). */ +#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT /* Estimation error parameter */ +/* (Should be the same as Default) (1 in Q8). */ +#define SUPGAIN_EPC_DT 200 /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */ + +/* Defines for "check delay estimation" */ +#define CORR_WIDTH 31 /* Number of samples to correlate over. */ +#define CORR_MAX 16 /* Maximum correlation offset. */ +#define CORR_MAX_BUF 63 +#define CORR_DEV 4 +#define CORR_MAX_LEVEL 20 +#define CORR_MAX_LOW 4 +#define CORR_BUF_LEN (CORR_MAX << 1) + 1 +/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */ + +#define ONE_Q14 (1 << 14) + +/* NLP defines */ +#define NLP_COMP_LOW 3277 /* 0.2 in Q14 */ +#define NLP_COMP_HIGH ONE_Q14 /* 1 in Q14 */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc b/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc new file mode 100644 index 0000000000..14522c0f1d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc @@ -0,0 +1,599 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/echo_control_mobile.h" + +#ifdef AEC_DEBUG +#include +#endif +#include +#include + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aecm/aecm_defines.h" +} +#include "modules/audio_processing/aecm/aecm_core.h" + +namespace webrtc { + +namespace { + +#define BUF_SIZE_FRAMES 50 // buffer size (frames) +// Maximum length of resampled signal. Must be an integer multiple of frames +// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN +// The factor of 2 handles wb, and the + 1 is as a safety margin +#define MAX_RESAMP_LEN (5 * FRAME_LEN) + +static const size_t kBufSizeSamp = + BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples) +static const int kSampMsNb = 8; // samples per ms in nb +// Target suppression levels for nlp modes +// log{0.001, 0.00001, 0.00000001} +static const int kInitCheck = 42; + +typedef struct { + int sampFreq; + int scSampFreq; + short bufSizeStart; + int knownDelay; + + // Stores the last frame added to the farend buffer + short farendOld[2][FRAME_LEN]; + short initFlag; // indicates if AEC has been initialized + + // Variables used for averaging far end buffer size + short counter; + short sum; + short firstVal; + short checkBufSizeCtr; + + // Variables used for delay shifts + short msInSndCardBuf; + short filtDelay; + int timeForDelayChange; + int ECstartup; + int checkBuffSize; + int delayChange; + short lastDelayDiff; + + int16_t echoMode; + +#ifdef AEC_DEBUG + FILE* bufFile; + FILE* delayFile; + FILE* preCompFile; + FILE* postCompFile; +#endif // AEC_DEBUG + // Structures + RingBuffer* farendBuf; + + AecmCore* aecmCore; +} AecMobile; + +} // namespace + +// Estimates delay to set the position of the farend buffer read pointer +// (controlled by knownDelay) +static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf); + +// Stuffs the farend buffer if the estimated delay is too large +static int WebRtcAecm_DelayComp(AecMobile* aecm); + +void* WebRtcAecm_Create() { + // Allocate zero-filled memory. + AecMobile* aecm = static_cast(calloc(1, sizeof(AecMobile))); + + aecm->aecmCore = WebRtcAecm_CreateCore(); + if (!aecm->aecmCore) { + WebRtcAecm_Free(aecm); + return NULL; + } + + aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp, sizeof(int16_t)); + if (!aecm->farendBuf) { + WebRtcAecm_Free(aecm); + return NULL; + } + +#ifdef AEC_DEBUG + aecm->aecmCore->farFile = fopen("aecFar.pcm", "wb"); + aecm->aecmCore->nearFile = fopen("aecNear.pcm", "wb"); + aecm->aecmCore->outFile = fopen("aecOut.pcm", "wb"); + // aecm->aecmCore->outLpFile = fopen("aecOutLp.pcm","wb"); + + aecm->bufFile = fopen("aecBuf.dat", "wb"); + aecm->delayFile = fopen("aecDelay.dat", "wb"); + aecm->preCompFile = fopen("preComp.pcm", "wb"); + aecm->postCompFile = fopen("postComp.pcm", "wb"); +#endif // AEC_DEBUG + return aecm; +} + +void WebRtcAecm_Free(void* aecmInst) { + AecMobile* aecm = static_cast(aecmInst); + + if (aecm == NULL) { + return; + } + +#ifdef AEC_DEBUG + fclose(aecm->aecmCore->farFile); + fclose(aecm->aecmCore->nearFile); + fclose(aecm->aecmCore->outFile); + // fclose(aecm->aecmCore->outLpFile); + + fclose(aecm->bufFile); + fclose(aecm->delayFile); + fclose(aecm->preCompFile); + fclose(aecm->postCompFile); +#endif // AEC_DEBUG + WebRtcAecm_FreeCore(aecm->aecmCore); + WebRtc_FreeBuffer(aecm->farendBuf); + free(aecm); +} + +int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq) { + AecMobile* aecm = static_cast(aecmInst); + AecmConfig aecConfig; + + if (aecm == NULL) { + return -1; + } + + if (sampFreq != 8000 && sampFreq != 16000) { + return AECM_BAD_PARAMETER_ERROR; + } + aecm->sampFreq = sampFreq; + + // Initialize AECM core + if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1) { + return AECM_UNSPECIFIED_ERROR; + } + + // Initialize farend buffer + WebRtc_InitBuffer(aecm->farendBuf); + + aecm->initFlag = kInitCheck; // indicates that initialization has been done + + aecm->delayChange = 1; + + aecm->sum = 0; + aecm->counter = 0; + aecm->checkBuffSize = 1; + aecm->firstVal = 0; + + aecm->ECstartup = 1; + aecm->bufSizeStart = 0; + aecm->checkBufSizeCtr = 0; + aecm->filtDelay = 0; + aecm->timeForDelayChange = 0; + aecm->knownDelay = 0; + aecm->lastDelayDiff = 0; + + memset(&aecm->farendOld, 0, sizeof(aecm->farendOld)); + + // Default settings. + aecConfig.cngMode = AecmTrue; + aecConfig.echoMode = 3; + + if (WebRtcAecm_set_config(aecm, aecConfig) == -1) { + return AECM_UNSPECIFIED_ERROR; + } + + return 0; +} + +// Returns any error that is caused when buffering the +// farend signal. +int32_t WebRtcAecm_GetBufferFarendError(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples) { + AecMobile* aecm = static_cast(aecmInst); + + if (aecm == NULL) + return -1; + + if (farend == NULL) + return AECM_NULL_POINTER_ERROR; + + if (aecm->initFlag != kInitCheck) + return AECM_UNINITIALIZED_ERROR; + + if (nrOfSamples != 80 && nrOfSamples != 160) + return AECM_BAD_PARAMETER_ERROR; + + return 0; +} + +int32_t WebRtcAecm_BufferFarend(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples) { + AecMobile* aecm = static_cast(aecmInst); + + const int32_t err = + WebRtcAecm_GetBufferFarendError(aecmInst, farend, nrOfSamples); + + if (err != 0) + return err; + + // TODO(unknown): Is this really a good idea? + if (!aecm->ECstartup) { + WebRtcAecm_DelayComp(aecm); + } + + WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples); + + return 0; +} + +int32_t WebRtcAecm_Process(void* aecmInst, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out, + size_t nrOfSamples, + int16_t msInSndCardBuf) { + AecMobile* aecm = static_cast(aecmInst); + int32_t retVal = 0; + size_t i; + short nmbrOfFilledBuffers; + size_t nBlocks10ms; + size_t nFrames; +#ifdef AEC_DEBUG + short msInAECBuf; +#endif + + if (aecm == NULL) { + return -1; + } + + if (nearendNoisy == NULL) { + return AECM_NULL_POINTER_ERROR; + } + + if (out == NULL) { + return AECM_NULL_POINTER_ERROR; + } + + if (aecm->initFlag != kInitCheck) { + return AECM_UNINITIALIZED_ERROR; + } + + if (nrOfSamples != 80 && nrOfSamples != 160) { + return AECM_BAD_PARAMETER_ERROR; + } + + if (msInSndCardBuf < 0) { + msInSndCardBuf = 0; + retVal = AECM_BAD_PARAMETER_WARNING; + } else if (msInSndCardBuf > 500) { + msInSndCardBuf = 500; + retVal = AECM_BAD_PARAMETER_WARNING; + } + msInSndCardBuf += 10; + aecm->msInSndCardBuf = msInSndCardBuf; + + nFrames = nrOfSamples / FRAME_LEN; + nBlocks10ms = nFrames / aecm->aecmCore->mult; + + if (aecm->ECstartup) { + if (nearendClean == NULL) { + if (out != nearendNoisy) { + memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples); + } + } else if (out != nearendClean) { + memcpy(out, nearendClean, sizeof(short) * nrOfSamples); + } + + nmbrOfFilledBuffers = + (short)WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; + // The AECM is in the start up mode + // AECM is disabled until the soundcard buffer and farend buffers are OK + + // Mechanism to ensure that the soundcard buffer is reasonably stable. + if (aecm->checkBuffSize) { + aecm->checkBufSizeCtr++; + // Before we fill up the far end buffer we require the amount of data on + // the sound card to be stable (+/-8 ms) compared to the first value. This + // comparison is made during the following 4 consecutive frames. If it + // seems to be stable then we start to fill up the far end buffer. + + if (aecm->counter == 0) { + aecm->firstVal = aecm->msInSndCardBuf; + aecm->sum = 0; + } + + if (abs(aecm->firstVal - aecm->msInSndCardBuf) < + WEBRTC_SPL_MAX(0.2 * aecm->msInSndCardBuf, kSampMsNb)) { + aecm->sum += aecm->msInSndCardBuf; + aecm->counter++; + } else { + aecm->counter = 0; + } + + if (aecm->counter * nBlocks10ms >= 6) { + // The farend buffer size is determined in blocks of 80 samples + // Use 75% of the average value of the soundcard buffer + aecm->bufSizeStart = WEBRTC_SPL_MIN( + (3 * aecm->sum * aecm->aecmCore->mult) / (aecm->counter * 40), + BUF_SIZE_FRAMES); + // buffersize has now been determined + aecm->checkBuffSize = 0; + } + + if (aecm->checkBufSizeCtr * nBlocks10ms > 50) { + // for really bad sound cards, don't disable echocanceller for more than + // 0.5 sec + aecm->bufSizeStart = WEBRTC_SPL_MIN( + (3 * aecm->msInSndCardBuf * aecm->aecmCore->mult) / 40, + BUF_SIZE_FRAMES); + aecm->checkBuffSize = 0; + } + } + + // if checkBuffSize changed in the if-statement above + if (!aecm->checkBuffSize) { + // soundcard buffer is now reasonably stable + // When the far end buffer is filled with approximately the same amount of + // data as the amount on the sound card we end the start up phase and + // start to cancel echoes. + + if (nmbrOfFilledBuffers == aecm->bufSizeStart) { + aecm->ECstartup = 0; // Enable the AECM + } else if (nmbrOfFilledBuffers > aecm->bufSizeStart) { + WebRtc_MoveReadPtr(aecm->farendBuf, + (int)WebRtc_available_read(aecm->farendBuf) - + (int)aecm->bufSizeStart * FRAME_LEN); + aecm->ECstartup = 0; + } + } + + } else { + // AECM is enabled + + // Note only 1 block supported for nb and 2 blocks for wb + for (i = 0; i < nFrames; i++) { + int16_t farend[FRAME_LEN]; + const int16_t* farend_ptr = NULL; + + nmbrOfFilledBuffers = + (short)WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; + + // Check that there is data in the far end buffer + if (nmbrOfFilledBuffers > 0) { + // Get the next 80 samples from the farend buffer + WebRtc_ReadBuffer(aecm->farendBuf, (void**)&farend_ptr, farend, + FRAME_LEN); + + // Always store the last frame for use when we run out of data + memcpy(&(aecm->farendOld[i][0]), farend_ptr, FRAME_LEN * sizeof(short)); + } else { + // We have no data so we use the last played frame + memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short)); + farend_ptr = farend; + } + + // Call buffer delay estimator when all data is extracted, + // i,e. i = 0 for NB and i = 1 for WB + if ((i == 0 && aecm->sampFreq == 8000) || + (i == 1 && aecm->sampFreq == 16000)) { + WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf); + } + + // Call the AECM + /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i], + &out[FRAME_LEN * i], aecm->knownDelay);*/ + if (WebRtcAecm_ProcessFrame( + aecm->aecmCore, farend_ptr, &nearendNoisy[FRAME_LEN * i], + (nearendClean ? &nearendClean[FRAME_LEN * i] : NULL), + &out[FRAME_LEN * i]) == -1) + return -1; + } + } + +#ifdef AEC_DEBUG + msInAECBuf = (short)WebRtc_available_read(aecm->farendBuf) / + (kSampMsNb * aecm->aecmCore->mult); + fwrite(&msInAECBuf, 2, 1, aecm->bufFile); + fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile); +#endif + + return retVal; +} + +int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config) { + AecMobile* aecm = static_cast(aecmInst); + + if (aecm == NULL) { + return -1; + } + + if (aecm->initFlag != kInitCheck) { + return AECM_UNINITIALIZED_ERROR; + } + + if (config.cngMode != AecmFalse && config.cngMode != AecmTrue) { + return AECM_BAD_PARAMETER_ERROR; + } + aecm->aecmCore->cngMode = config.cngMode; + + if (config.echoMode < 0 || config.echoMode > 4) { + return AECM_BAD_PARAMETER_ERROR; + } + aecm->echoMode = config.echoMode; + + if (aecm->echoMode == 0) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 3; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 3; + aecm->aecmCore->supGainErrParamDiffAB = + (SUPGAIN_ERROR_PARAM_A >> 3) - (SUPGAIN_ERROR_PARAM_B >> 3); + aecm->aecmCore->supGainErrParamDiffBD = + (SUPGAIN_ERROR_PARAM_B >> 3) - (SUPGAIN_ERROR_PARAM_D >> 3); + } else if (aecm->echoMode == 1) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 2; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 2; + aecm->aecmCore->supGainErrParamDiffAB = + (SUPGAIN_ERROR_PARAM_A >> 2) - (SUPGAIN_ERROR_PARAM_B >> 2); + aecm->aecmCore->supGainErrParamDiffBD = + (SUPGAIN_ERROR_PARAM_B >> 2) - (SUPGAIN_ERROR_PARAM_D >> 2); + } else if (aecm->echoMode == 2) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 1; + aecm->aecmCore->supGainErrParamDiffAB = + (SUPGAIN_ERROR_PARAM_A >> 1) - (SUPGAIN_ERROR_PARAM_B >> 1); + aecm->aecmCore->supGainErrParamDiffBD = + (SUPGAIN_ERROR_PARAM_B >> 1) - (SUPGAIN_ERROR_PARAM_D >> 1); + } else if (aecm->echoMode == 3) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->aecmCore->supGainErrParamDiffAB = + SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->aecmCore->supGainErrParamDiffBD = + SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + } else if (aecm->echoMode == 4) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A << 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D << 1; + aecm->aecmCore->supGainErrParamDiffAB = + (SUPGAIN_ERROR_PARAM_A << 1) - (SUPGAIN_ERROR_PARAM_B << 1); + aecm->aecmCore->supGainErrParamDiffBD = + (SUPGAIN_ERROR_PARAM_B << 1) - (SUPGAIN_ERROR_PARAM_D << 1); + } + + return 0; +} + +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes) { + AecMobile* aecm = static_cast(aecmInst); + const int16_t* echo_path_ptr = static_cast(echo_path); + + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + return AECM_NULL_POINTER_ERROR; + } + if (size_bytes != WebRtcAecm_echo_path_size_bytes()) { + // Input channel size does not match the size of AECM + return AECM_BAD_PARAMETER_ERROR; + } + if (aecm->initFlag != kInitCheck) { + return AECM_UNINITIALIZED_ERROR; + } + + WebRtcAecm_InitEchoPathCore(aecm->aecmCore, echo_path_ptr); + + return 0; +} + +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes) { + AecMobile* aecm = static_cast(aecmInst); + int16_t* echo_path_ptr = static_cast(echo_path); + + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + return AECM_NULL_POINTER_ERROR; + } + if (size_bytes != WebRtcAecm_echo_path_size_bytes()) { + // Input channel size does not match the size of AECM + return AECM_BAD_PARAMETER_ERROR; + } + if (aecm->initFlag != kInitCheck) { + return AECM_UNINITIALIZED_ERROR; + } + + memcpy(echo_path_ptr, aecm->aecmCore->channelStored, size_bytes); + return 0; +} + +size_t WebRtcAecm_echo_path_size_bytes() { + return (PART_LEN1 * sizeof(int16_t)); +} + +static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) { + short delayNew, nSampSndCard; + short nSampFar = (short)WebRtc_available_read(aecm->farendBuf); + short diff; + + nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + + delayNew = nSampSndCard - nSampFar; + + if (delayNew < FRAME_LEN) { + WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN); + delayNew += FRAME_LEN; + } + + aecm->filtDelay = + WEBRTC_SPL_MAX(0, (8 * aecm->filtDelay + 2 * delayNew) / 10); + + diff = aecm->filtDelay - aecm->knownDelay; + if (diff > 224) { + if (aecm->lastDelayDiff < 96) { + aecm->timeForDelayChange = 0; + } else { + aecm->timeForDelayChange++; + } + } else if (diff < 96 && aecm->knownDelay > 0) { + if (aecm->lastDelayDiff > 224) { + aecm->timeForDelayChange = 0; + } else { + aecm->timeForDelayChange++; + } + } else { + aecm->timeForDelayChange = 0; + } + aecm->lastDelayDiff = diff; + + if (aecm->timeForDelayChange > 25) { + aecm->knownDelay = WEBRTC_SPL_MAX((int)aecm->filtDelay - 160, 0); + } + return 0; +} + +static int WebRtcAecm_DelayComp(AecMobile* aecm) { + int nSampFar = (int)WebRtc_available_read(aecm->farendBuf); + int nSampSndCard, delayNew, nSampAdd; + const int maxStuffSamp = 10 * FRAME_LEN; + + nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + delayNew = nSampSndCard - nSampFar; + + if (delayNew > FAR_BUF_LEN - FRAME_LEN * aecm->aecmCore->mult) { + // The difference of the buffer sizes is larger than the maximum + // allowed known delay. Compensate by stuffing the buffer. + nSampAdd = + (int)(WEBRTC_SPL_MAX(((nSampSndCard >> 1) - nSampFar), FRAME_LEN)); + nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp); + + WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd); + aecm->delayChange = 1; // the delay needs to be updated + } + + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.h b/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.h new file mode 100644 index 0000000000..ee780524de --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.h @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ +#define MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ + +#include +#include + +namespace webrtc { + +enum { AecmFalse = 0, AecmTrue }; + +// Errors +#define AECM_UNSPECIFIED_ERROR 12000 +#define AECM_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AECM_UNINITIALIZED_ERROR 12002 +#define AECM_NULL_POINTER_ERROR 12003 +#define AECM_BAD_PARAMETER_ERROR 12004 + +// Warnings +#define AECM_BAD_PARAMETER_WARNING 12100 + +typedef struct { + int16_t cngMode; // AECM_FALSE, AECM_TRUE (default) + int16_t echoMode; // 0, 1, 2, 3 (default), 4 +} AecmConfig; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocates the memory needed by the AECM. The memory needs to be + * initialized separately using the WebRtcAecm_Init() function. + * Returns a pointer to the instance and a nullptr at failure. + */ +void* WebRtcAecm_Create(); + +/* + * This function releases the memory allocated by WebRtcAecm_Create() + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + */ +void WebRtcAecm_Free(void* aecmInst); + +/* + * Initializes an AECM instance. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int32_t sampFreq Sampling frequency of data + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq); + +/* + * Inserts an 80 or 160 sample block of data into the farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* farend In buffer containing one frame of + * farend signal + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_BufferFarend(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples); + +/* + * Reports any errors that would arise when buffering a farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* farend In buffer containing one frame of + * farend signal + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_GetBufferFarendError(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples); + +/* + * Runs the AECM on an 80 or 160 sample blocks of data. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* nearendNoisy In buffer containing one frame of + * reference nearend+echo signal. If + * noise reduction is active, provide + * the noisy signal here. + * int16_t* nearendClean In buffer containing one frame of + * nearend+echo signal. If noise + * reduction is active, provide the + * clean signal here. Otherwise pass a + * NULL pointer. + * int16_t nrOfSamples Number of samples in nearend buffer + * int16_t msInSndCardBuf Delay estimate for sound card and + * system buffers + * + * Outputs Description + * ------------------------------------------------------------------- + * int16_t* out Out buffer, one frame of processed nearend + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_Process(void* aecmInst, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out, + size_t nrOfSamples, + int16_t msInSndCardBuf); + +/* + * This function enables the user to set certain parameters on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * AecmConfig config Config instance that contains all + * properties to be set + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config); + +/* + * This function enables the user to set the echo path on-the-fly. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * void* echo_path Pointer to the echo path to be set + * size_t size_bytes Size in bytes of the echo path + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes); + +/* + * This function enables the user to get the currently used echo path + * on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * void* echo_path Pointer to echo path + * size_t size_bytes Size in bytes of the echo path + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes); + +/* + * This function enables the user to get the echo path size in bytes + * + * Outputs Description + * ------------------------------------------------------------------- + * size_t return Size in bytes + */ +size_t WebRtcAecm_echo_path_size_bytes(); + +#ifdef __cplusplus +} +#endif + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn new file mode 100644 index 0000000000..75bef1450f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn @@ -0,0 +1,126 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_source_set("gain_control_interface") { + sources = [ "gain_control.h" ] +} + +rtc_library("agc") { + sources = [ + "agc_manager_direct.cc", + "agc_manager_direct.h", + ] + configs += [ "..:apm_debug_dump" ] + deps = [ + ":gain_control_interface", + ":level_estimation", + "..:api", + "..:apm_logging", + "..:audio_buffer", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../common_audio:common_audio_c", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + "../agc2:clipping_predictor", + "../agc2:gain_map", + "../agc2:input_volume_stats_reporter", + "../vad", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("level_estimation") { + sources = [ + "agc.cc", + "agc.h", + "loudness_histogram.cc", + "loudness_histogram.h", + "utility.cc", + "utility.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:checks", + "../vad", + ] +} + +rtc_library("legacy_agc") { + visibility = [ + ":*", + "..:*", + ] # Only targets in this file and in + # audio_processing can depend on + # this. + + sources = [ + "legacy/analog_agc.cc", + "legacy/analog_agc.h", + "legacy/digital_agc.cc", + "legacy/digital_agc.h", + "legacy/gain_control.h", + ] + + deps = [ + "../../../common_audio", + "../../../common_audio:common_audio_c", + "../../../common_audio/third_party/ooura:fft_size_256", + "../../../rtc_base:checks", + "../../../system_wrappers", + ] + + if (rtc_build_with_neon) { + if (target_cpu != "arm64") { + # Enable compilation for the NEON instruction set. + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + } +} + +if (rtc_include_tests) { + rtc_library("agc_unittests") { + testonly = true + sources = [ + "agc_manager_direct_unittest.cc", + "loudness_histogram_unittest.cc", + "mock_agc.h", + ] + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":agc", + ":gain_control_interface", + ":level_estimation", + "..:mocks", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:random", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + "../../../test:field_trial", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc.cc b/third_party/libwebrtc/modules/audio_processing/agc/agc.cc new file mode 100644 index 0000000000..a018ff9f93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc.h" + +#include +#include +#include + +#include "modules/audio_processing/agc/loudness_histogram.h" +#include "modules/audio_processing/agc/utility.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr int kDefaultLevelDbfs = -18; +constexpr int kNumAnalysisFrames = 100; +constexpr double kActivityThreshold = 0.3; +constexpr int kNum10msFramesInOneSecond = 100; +constexpr int kMaxSampleRateHz = 384000; + +} // namespace + +Agc::Agc() + : target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)), + target_level_dbfs_(kDefaultLevelDbfs), + histogram_(LoudnessHistogram::Create(kNumAnalysisFrames)), + inactive_histogram_(LoudnessHistogram::Create()) {} + +Agc::~Agc() = default; + +void Agc::Process(rtc::ArrayView audio) { + const int sample_rate_hz = audio.size() * kNum10msFramesInOneSecond; + RTC_DCHECK_LE(sample_rate_hz, kMaxSampleRateHz); + vad_.ProcessChunk(audio.data(), audio.size(), sample_rate_hz); + const std::vector& rms = vad_.chunkwise_rms(); + const std::vector& probabilities = + vad_.chunkwise_voice_probabilities(); + RTC_DCHECK_EQ(rms.size(), probabilities.size()); + for (size_t i = 0; i < rms.size(); ++i) { + histogram_->Update(rms[i], probabilities[i]); + } +} + +bool Agc::GetRmsErrorDb(int* error) { + if (!error) { + RTC_DCHECK_NOTREACHED(); + return false; + } + + if (histogram_->num_updates() < kNumAnalysisFrames) { + // We haven't yet received enough frames. + return false; + } + + if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) { + // We are likely in an inactive segment. + return false; + } + + double loudness = Linear2Loudness(histogram_->CurrentRms()); + *error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5); + histogram_->Reset(); + return true; +} + +void Agc::Reset() { + histogram_->Reset(); +} + +int Agc::set_target_level_dbfs(int level) { + // TODO(turajs): just some arbitrary sanity check. We can come up with better + // limits. The upper limit should be chosen such that the risk of clipping is + // low. The lower limit should not result in a too quiet signal. + if (level >= 0 || level <= -100) + return -1; + target_level_dbfs_ = level; + target_level_loudness_ = Dbfs2Loudness(level); + return 0; +} + +int Agc::target_level_dbfs() const { + return target_level_dbfs_; +} + +float Agc::voice_probability() const { + return vad_.last_voice_probability(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc.h b/third_party/libwebrtc/modules/audio_processing/agc/agc.h new file mode 100644 index 0000000000..da42808225 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_AGC_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/vad/voice_activity_detector.h" + +namespace webrtc { + +class LoudnessHistogram; + +class Agc { + public: + Agc(); + virtual ~Agc(); + + // `audio` must be mono; in a multi-channel stream, provide the first (usually + // left) channel. + virtual void Process(rtc::ArrayView audio); + + // Retrieves the difference between the target RMS level and the current + // signal RMS level in dB. Returns true if an update is available and false + // otherwise, in which case `error` should be ignored and no action taken. + virtual bool GetRmsErrorDb(int* error); + virtual void Reset(); + + virtual int set_target_level_dbfs(int level); + virtual int target_level_dbfs() const; + virtual float voice_probability() const; + + private: + double target_level_loudness_; + int target_level_dbfs_; + std::unique_ptr histogram_; + std::unique_ptr inactive_histogram_; + VoiceActivityDetector vad_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build new file mode 100644 index 0000000000..45e6cad306 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("agc_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc new file mode 100644 index 0000000000..b8ad4a8bb9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc_manager_direct.h" + +#include +#include + +#include "api/array_view.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc/gain_control.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// Amount of error we tolerate in the microphone level (presumably due to OS +// quantization) before we assume the user has manually adjusted the microphone. +constexpr int kLevelQuantizationSlack = 25; + +constexpr int kDefaultCompressionGain = 7; +constexpr int kMaxCompressionGain = 12; +constexpr int kMinCompressionGain = 2; +// Controls the rate of compression changes towards the target. +constexpr float kCompressionGainStep = 0.05f; + +constexpr int kMaxMicLevel = 255; +static_assert(kGainMapSize > kMaxMicLevel, "gain map too small"); +constexpr int kMinMicLevel = 12; + +// Prevent very large microphone level changes. +constexpr int kMaxResidualGainChange = 15; + +// Maximum additional gain allowed to compensate for microphone level +// restrictions from clipping events. +constexpr int kSurplusCompressionGain = 6; + +// Target speech level (dBFs) and speech probability threshold used to compute +// the RMS error override in `GetSpeechLevelErrorDb()`. These are only used for +// computing the error override and they are not passed to `agc_`. +// TODO(webrtc:7494): Move these to a config and pass in the ctor. +constexpr float kOverrideTargetSpeechLevelDbfs = -18.0f; +constexpr float kOverrideSpeechProbabilitySilenceThreshold = 0.5f; +// The minimum number of frames between `UpdateGain()` calls. +// TODO(webrtc:7494): Move this to a config and pass in the ctor with +// kOverrideWaitFrames = 100. Default value zero needed for the unit tests. +constexpr int kOverrideWaitFrames = 0; + +using AnalogAgcConfig = + AudioProcessing::Config::GainController1::AnalogGainController; + +// If the "WebRTC-Audio-2ndAgcMinMicLevelExperiment" field trial is specified, +// parses it and returns a value between 0 and 255 depending on the field-trial +// string. Returns an unspecified value if the field trial is not specified, if +// disabled or if it cannot be parsed. Example: +// 'WebRTC-Audio-2ndAgcMinMicLevelExperiment/Enabled-80' => returns 80. +absl::optional GetMinMicLevelOverride() { + constexpr char kMinMicLevelFieldTrial[] = + "WebRTC-Audio-2ndAgcMinMicLevelExperiment"; + if (!webrtc::field_trial::IsEnabled(kMinMicLevelFieldTrial)) { + return absl::nullopt; + } + const auto field_trial_string = + webrtc::field_trial::FindFullName(kMinMicLevelFieldTrial); + int min_mic_level = -1; + sscanf(field_trial_string.c_str(), "Enabled-%d", &min_mic_level); + if (min_mic_level >= 0 && min_mic_level <= 255) { + return min_mic_level; + } else { + RTC_LOG(LS_WARNING) << "[agc] Invalid parameter for " + << kMinMicLevelFieldTrial << ", ignored."; + return absl::nullopt; + } +} + +int LevelFromGainError(int gain_error, int level, int min_mic_level) { + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, kMaxMicLevel); + if (gain_error == 0) { + return level; + } + + int new_level = level; + if (gain_error > 0) { + while (kGainMap[new_level] - kGainMap[level] < gain_error && + new_level < kMaxMicLevel) { + ++new_level; + } + } else { + while (kGainMap[new_level] - kGainMap[level] > gain_error && + new_level > min_mic_level) { + --new_level; + } + } + return new_level; +} + +// Returns the proportion of samples in the buffer which are at full-scale +// (and presumably clipped). +float ComputeClippedRatio(const float* const* audio, + size_t num_channels, + size_t samples_per_channel) { + RTC_DCHECK_GT(samples_per_channel, 0); + int num_clipped = 0; + for (size_t ch = 0; ch < num_channels; ++ch) { + int num_clipped_in_ch = 0; + for (size_t i = 0; i < samples_per_channel; ++i) { + RTC_DCHECK(audio[ch]); + if (audio[ch][i] >= 32767.0f || audio[ch][i] <= -32768.0f) { + ++num_clipped_in_ch; + } + } + num_clipped = std::max(num_clipped, num_clipped_in_ch); + } + return static_cast(num_clipped) / (samples_per_channel); +} + +void LogClippingMetrics(int clipping_rate) { + RTC_LOG(LS_INFO) << "Input clipping rate: " << clipping_rate << "%"; + RTC_HISTOGRAM_COUNTS_LINEAR(/*name=*/"WebRTC.Audio.Agc.InputClippingRate", + /*sample=*/clipping_rate, /*min=*/0, /*max=*/100, + /*bucket_count=*/50); +} + +// Computes the speech level error in dB. `speech_level_dbfs` is required to be +// in the range [-90.0f, 30.0f] and `speech_probability` in the range +// [0.0f, 1.0f]. +int GetSpeechLevelErrorDb(float speech_level_dbfs, float speech_probability) { + constexpr float kMinSpeechLevelDbfs = -90.0f; + constexpr float kMaxSpeechLevelDbfs = 30.0f; + RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs); + RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs); + RTC_DCHECK_GE(speech_probability, 0.0f); + RTC_DCHECK_LE(speech_probability, 1.0f); + + if (speech_probability < kOverrideSpeechProbabilitySilenceThreshold) { + return 0; + } + + const float speech_level = rtc::SafeClamp( + speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs); + + return std::round(kOverrideTargetSpeechLevelDbfs - speech_level); +} + +} // namespace + +MonoAgc::MonoAgc(ApmDataDumper* data_dumper, + int clipped_level_min, + bool disable_digital_adaptive, + int min_mic_level) + : min_mic_level_(min_mic_level), + disable_digital_adaptive_(disable_digital_adaptive), + agc_(std::make_unique()), + max_level_(kMaxMicLevel), + max_compression_gain_(kMaxCompressionGain), + target_compression_(kDefaultCompressionGain), + compression_(target_compression_), + compression_accumulator_(compression_), + clipped_level_min_(clipped_level_min) {} + +MonoAgc::~MonoAgc() = default; + +void MonoAgc::Initialize() { + max_level_ = kMaxMicLevel; + max_compression_gain_ = kMaxCompressionGain; + target_compression_ = disable_digital_adaptive_ ? 0 : kDefaultCompressionGain; + compression_ = disable_digital_adaptive_ ? 0 : target_compression_; + compression_accumulator_ = compression_; + capture_output_used_ = true; + check_volume_on_next_process_ = true; + frames_since_update_gain_ = 0; + is_first_frame_ = true; +} + +void MonoAgc::Process(rtc::ArrayView audio, + absl::optional rms_error_override) { + new_compression_to_set_ = absl::nullopt; + + if (check_volume_on_next_process_) { + check_volume_on_next_process_ = false; + // We have to wait until the first process call to check the volume, + // because Chromium doesn't guarantee it to be valid any earlier. + CheckVolumeAndReset(); + } + + agc_->Process(audio); + + // Always check if `agc_` has a new error available. If yes, `agc_` gets + // reset. + // TODO(webrtc:7494) Replace the `agc_` call `GetRmsErrorDb()` with `Reset()` + // if an error override is used. + int rms_error = 0; + bool update_gain = agc_->GetRmsErrorDb(&rms_error); + if (rms_error_override.has_value()) { + if (is_first_frame_ || frames_since_update_gain_ < kOverrideWaitFrames) { + update_gain = false; + } else { + rms_error = *rms_error_override; + update_gain = true; + } + } + + if (update_gain) { + UpdateGain(rms_error); + } + + if (!disable_digital_adaptive_) { + UpdateCompressor(); + } + + is_first_frame_ = false; + if (frames_since_update_gain_ < kOverrideWaitFrames) { + ++frames_since_update_gain_; + } +} + +void MonoAgc::HandleClipping(int clipped_level_step) { + RTC_DCHECK_GT(clipped_level_step, 0); + // Always decrease the maximum level, even if the current level is below + // threshold. + SetMaxLevel(std::max(clipped_level_min_, max_level_ - clipped_level_step)); + if (log_to_histograms_) { + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed", + level_ - clipped_level_step >= clipped_level_min_); + } + if (level_ > clipped_level_min_) { + // Don't try to adjust the level if we're already below the limit. As + // a consequence, if the user has brought the level above the limit, we + // will still not react until the postproc updates the level. + SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step)); + // Reset the AGCs for all channels since the level has changed. + agc_->Reset(); + frames_since_update_gain_ = 0; + is_first_frame_ = false; + } +} + +void MonoAgc::SetLevel(int new_level) { + int voe_level = recommended_input_volume_; + if (voe_level == 0) { + RTC_DLOG(LS_INFO) + << "[agc] VolumeCallbacks returned level=0, taking no action."; + return; + } + if (voe_level < 0 || voe_level > kMaxMicLevel) { + RTC_LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" + << voe_level; + return; + } + + // Detect manual input volume adjustments by checking if the current level + // `voe_level` is outside of the `[level_ - kLevelQuantizationSlack, level_ + + // kLevelQuantizationSlack]` range where `level_` is the last input volume + // known by this gain controller. + if (voe_level > level_ + kLevelQuantizationSlack || + voe_level < level_ - kLevelQuantizationSlack) { + RTC_DLOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating " + "stored level from " + << level_ << " to " << voe_level; + level_ = voe_level; + // Always allow the user to increase the volume. + if (level_ > max_level_) { + SetMaxLevel(level_); + } + // Take no action in this case, since we can't be sure when the volume + // was manually adjusted. The compressor will still provide some of the + // desired gain change. + agc_->Reset(); + frames_since_update_gain_ = 0; + is_first_frame_ = false; + return; + } + + new_level = std::min(new_level, max_level_); + if (new_level == level_) { + return; + } + + recommended_input_volume_ = new_level; + RTC_DLOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", level_=" << level_ + << ", new_level=" << new_level; + level_ = new_level; +} + +void MonoAgc::SetMaxLevel(int level) { + RTC_DCHECK_GE(level, clipped_level_min_); + max_level_ = level; + // Scale the `kSurplusCompressionGain` linearly across the restricted + // level range. + max_compression_gain_ = + kMaxCompressionGain + std::floor((1.f * kMaxMicLevel - max_level_) / + (kMaxMicLevel - clipped_level_min_) * + kSurplusCompressionGain + + 0.5f); + RTC_DLOG(LS_INFO) << "[agc] max_level_=" << max_level_ + << ", max_compression_gain_=" << max_compression_gain_; +} + +void MonoAgc::HandleCaptureOutputUsedChange(bool capture_output_used) { + if (capture_output_used_ == capture_output_used) { + return; + } + capture_output_used_ = capture_output_used; + + if (capture_output_used) { + // When we start using the output, we should reset things to be safe. + check_volume_on_next_process_ = true; + } +} + +int MonoAgc::CheckVolumeAndReset() { + int level = recommended_input_volume_; + // Reasons for taking action at startup: + // 1) A person starting a call is expected to be heard. + // 2) Independent of interpretation of `level` == 0 we should raise it so the + // AGC can do its job properly. + if (level == 0 && !startup_) { + RTC_DLOG(LS_INFO) + << "[agc] VolumeCallbacks returned level=0, taking no action."; + return 0; + } + if (level < 0 || level > kMaxMicLevel) { + RTC_LOG(LS_ERROR) << "[agc] VolumeCallbacks returned an invalid level=" + << level; + return -1; + } + RTC_DLOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level; + + if (level < min_mic_level_) { + level = min_mic_level_; + RTC_DLOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level; + recommended_input_volume_ = level; + } + agc_->Reset(); + level_ = level; + startup_ = false; + frames_since_update_gain_ = 0; + is_first_frame_ = true; + return 0; +} + +// Distributes the required gain change between the digital compression stage +// and volume slider. We use the compressor first, providing a slack region +// around the current slider position to reduce movement. +// +// If the slider needs to be moved, we check first if the user has adjusted +// it, in which case we take no action and cache the updated level. +void MonoAgc::UpdateGain(int rms_error_db) { + int rms_error = rms_error_db; + + // Always reset the counter regardless of whether the gain is changed + // or not. This matches with the bahvior of `agc_` where the histogram is + // reset every time an RMS error is successfully read. + frames_since_update_gain_ = 0; + + // The compressor will always add at least kMinCompressionGain. In effect, + // this adjusts our target gain upward by the same amount and rms_error + // needs to reflect that. + rms_error += kMinCompressionGain; + + // Handle as much error as possible with the compressor first. + int raw_compression = + rtc::SafeClamp(rms_error, kMinCompressionGain, max_compression_gain_); + + // Deemphasize the compression gain error. Move halfway between the current + // target and the newly received target. This serves to soften perceptible + // intra-talkspurt adjustments, at the cost of some adaptation speed. + if ((raw_compression == max_compression_gain_ && + target_compression_ == max_compression_gain_ - 1) || + (raw_compression == kMinCompressionGain && + target_compression_ == kMinCompressionGain + 1)) { + // Special case to allow the target to reach the endpoints of the + // compression range. The deemphasis would otherwise halt it at 1 dB shy. + target_compression_ = raw_compression; + } else { + target_compression_ = + (raw_compression - target_compression_) / 2 + target_compression_; + } + + // Residual error will be handled by adjusting the volume slider. Use the + // raw rather than deemphasized compression here as we would otherwise + // shrink the amount of slack the compressor provides. + const int residual_gain = + rtc::SafeClamp(rms_error - raw_compression, -kMaxResidualGainChange, + kMaxResidualGainChange); + RTC_DLOG(LS_INFO) << "[agc] rms_error=" << rms_error + << ", target_compression=" << target_compression_ + << ", residual_gain=" << residual_gain; + if (residual_gain == 0) + return; + + int old_level = level_; + SetLevel(LevelFromGainError(residual_gain, level_, min_mic_level_)); + if (old_level != level_) { + // Reset the AGC since the level has changed. + agc_->Reset(); + } +} + +void MonoAgc::UpdateCompressor() { + if (compression_ == target_compression_) { + return; + } + + // Adapt the compression gain slowly towards the target, in order to avoid + // highly perceptible changes. + if (target_compression_ > compression_) { + compression_accumulator_ += kCompressionGainStep; + } else { + compression_accumulator_ -= kCompressionGainStep; + } + + // The compressor accepts integer gains in dB. Adjust the gain when + // we've come within half a stepsize of the nearest integer. (We don't + // check for equality due to potential floating point imprecision). + int new_compression = compression_; + int nearest_neighbor = std::floor(compression_accumulator_ + 0.5); + if (std::fabs(compression_accumulator_ - nearest_neighbor) < + kCompressionGainStep / 2) { + new_compression = nearest_neighbor; + } + + // Set the new compression gain. + if (new_compression != compression_) { + compression_ = new_compression; + compression_accumulator_ = new_compression; + new_compression_to_set_ = compression_; + } +} + +std::atomic AgcManagerDirect::instance_counter_(0); + +AgcManagerDirect::AgcManagerDirect( + const AudioProcessing::Config::GainController1::AnalogGainController& + analog_config, + Agc* agc) + : AgcManagerDirect(/*num_capture_channels=*/1, analog_config) { + RTC_DCHECK(channel_agcs_[0]); + RTC_DCHECK(agc); + channel_agcs_[0]->set_agc(agc); +} + +AgcManagerDirect::AgcManagerDirect(int num_capture_channels, + const AnalogAgcConfig& analog_config) + : analog_controller_enabled_(analog_config.enabled), + min_mic_level_override_(GetMinMicLevelOverride()), + data_dumper_(new ApmDataDumper(instance_counter_.fetch_add(1) + 1)), + num_capture_channels_(num_capture_channels), + disable_digital_adaptive_(!analog_config.enable_digital_adaptive), + frames_since_clipped_(analog_config.clipped_wait_frames), + capture_output_used_(true), + clipped_level_step_(analog_config.clipped_level_step), + clipped_ratio_threshold_(analog_config.clipped_ratio_threshold), + clipped_wait_frames_(analog_config.clipped_wait_frames), + channel_agcs_(num_capture_channels), + new_compressions_to_set_(num_capture_channels), + clipping_predictor_( + CreateClippingPredictor(num_capture_channels, + analog_config.clipping_predictor)), + use_clipping_predictor_step_( + !!clipping_predictor_ && + analog_config.clipping_predictor.use_predicted_step), + clipping_rate_log_(0.0f), + clipping_rate_log_counter_(0) { + RTC_LOG(LS_INFO) << "[agc] analog controller enabled: " + << (analog_controller_enabled_ ? "yes" : "no"); + const int min_mic_level = min_mic_level_override_.value_or(kMinMicLevel); + RTC_LOG(LS_INFO) << "[agc] Min mic level: " << min_mic_level + << " (overridden: " + << (min_mic_level_override_.has_value() ? "yes" : "no") + << ")"; + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + ApmDataDumper* data_dumper_ch = ch == 0 ? data_dumper_.get() : nullptr; + + channel_agcs_[ch] = std::make_unique( + data_dumper_ch, analog_config.clipped_level_min, + disable_digital_adaptive_, min_mic_level); + } + RTC_DCHECK(!channel_agcs_.empty()); + RTC_DCHECK_GT(clipped_level_step_, 0); + RTC_DCHECK_LE(clipped_level_step_, 255); + RTC_DCHECK_GT(clipped_ratio_threshold_, 0.0f); + RTC_DCHECK_LT(clipped_ratio_threshold_, 1.0f); + RTC_DCHECK_GT(clipped_wait_frames_, 0); + channel_agcs_[0]->ActivateLogging(); +} + +AgcManagerDirect::~AgcManagerDirect() {} + +void AgcManagerDirect::Initialize() { + RTC_DLOG(LS_INFO) << "AgcManagerDirect::Initialize"; + data_dumper_->InitiateNewSetOfRecordings(); + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + channel_agcs_[ch]->Initialize(); + } + capture_output_used_ = true; + + AggregateChannelLevels(); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; +} + +void AgcManagerDirect::SetupDigitalGainControl( + GainControl& gain_control) const { + if (gain_control.set_mode(GainControl::kFixedDigital) != 0) { + RTC_LOG(LS_ERROR) << "set_mode(GainControl::kFixedDigital) failed."; + } + const int target_level_dbfs = disable_digital_adaptive_ ? 0 : 2; + if (gain_control.set_target_level_dbfs(target_level_dbfs) != 0) { + RTC_LOG(LS_ERROR) << "set_target_level_dbfs() failed."; + } + const int compression_gain_db = + disable_digital_adaptive_ ? 0 : kDefaultCompressionGain; + if (gain_control.set_compression_gain_db(compression_gain_db) != 0) { + RTC_LOG(LS_ERROR) << "set_compression_gain_db() failed."; + } + const bool enable_limiter = !disable_digital_adaptive_; + if (gain_control.enable_limiter(enable_limiter) != 0) { + RTC_LOG(LS_ERROR) << "enable_limiter() failed."; + } +} + +void AgcManagerDirect::AnalyzePreProcess(const AudioBuffer& audio_buffer) { + const float* const* audio = audio_buffer.channels_const(); + size_t samples_per_channel = audio_buffer.num_frames(); + RTC_DCHECK(audio); + + AggregateChannelLevels(); + if (!capture_output_used_) { + return; + } + + if (!!clipping_predictor_) { + AudioFrameView frame = AudioFrameView( + audio, num_capture_channels_, static_cast(samples_per_channel)); + clipping_predictor_->Analyze(frame); + } + + // Check for clipped samples, as the AGC has difficulty detecting pitch + // under clipping distortion. We do this in the preprocessing phase in order + // to catch clipped echo as well. + // + // If we find a sufficiently clipped frame, drop the current microphone level + // and enforce a new maximum level, dropped the same amount from the current + // maximum. This harsh treatment is an effort to avoid repeated clipped echo + // events. As compensation for this restriction, the maximum compression + // gain is increased, through SetMaxLevel(). + float clipped_ratio = + ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel); + clipping_rate_log_ = std::max(clipped_ratio, clipping_rate_log_); + clipping_rate_log_counter_++; + constexpr int kNumFramesIn30Seconds = 3000; + if (clipping_rate_log_counter_ == kNumFramesIn30Seconds) { + LogClippingMetrics(std::round(100.0f * clipping_rate_log_)); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; + } + + if (frames_since_clipped_ < clipped_wait_frames_) { + ++frames_since_clipped_; + return; + } + + const bool clipping_detected = clipped_ratio > clipped_ratio_threshold_; + bool clipping_predicted = false; + int predicted_step = 0; + if (!!clipping_predictor_) { + for (int channel = 0; channel < num_capture_channels_; ++channel) { + const auto step = clipping_predictor_->EstimateClippedLevelStep( + channel, recommended_input_volume_, clipped_level_step_, + channel_agcs_[channel]->min_mic_level(), kMaxMicLevel); + if (step.has_value()) { + predicted_step = std::max(predicted_step, step.value()); + clipping_predicted = true; + } + } + } + if (clipping_detected) { + RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" + << clipped_ratio; + } + int step = clipped_level_step_; + if (clipping_predicted) { + predicted_step = std::max(predicted_step, clipped_level_step_); + RTC_DLOG(LS_INFO) << "[agc] Clipping predicted. step=" << predicted_step; + if (use_clipping_predictor_step_) { + step = predicted_step; + } + } + if (clipping_detected || + (clipping_predicted && use_clipping_predictor_step_)) { + for (auto& state_ch : channel_agcs_) { + state_ch->HandleClipping(step); + } + frames_since_clipped_ = 0; + if (!!clipping_predictor_) { + clipping_predictor_->Reset(); + } + } + AggregateChannelLevels(); +} + +void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) { + Process(audio_buffer, /*speech_probability=*/absl::nullopt, + /*speech_level_dbfs=*/absl::nullopt); +} + +void AgcManagerDirect::Process(const AudioBuffer& audio_buffer, + absl::optional speech_probability, + absl::optional speech_level_dbfs) { + AggregateChannelLevels(); + const int volume_after_clipping_handling = recommended_input_volume_; + + if (!capture_output_used_) { + return; + } + + const size_t num_frames_per_band = audio_buffer.num_frames_per_band(); + absl::optional rms_error_override = absl::nullopt; + if (speech_probability.has_value() && speech_level_dbfs.has_value()) { + rms_error_override = + GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability); + } + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + std::array audio_data; + int16_t* audio_use = audio_data.data(); + FloatS16ToS16(audio_buffer.split_bands_const_f(ch)[0], num_frames_per_band, + audio_use); + channel_agcs_[ch]->Process({audio_use, num_frames_per_band}, + rms_error_override); + new_compressions_to_set_[ch] = channel_agcs_[ch]->new_compression(); + } + + AggregateChannelLevels(); + if (volume_after_clipping_handling != recommended_input_volume_) { + // The recommended input volume was adjusted in order to match the target + // level. + UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget( + recommended_input_volume_); + } +} + +absl::optional AgcManagerDirect::GetDigitalComressionGain() { + return new_compressions_to_set_[channel_controlling_gain_]; +} + +void AgcManagerDirect::HandleCaptureOutputUsedChange(bool capture_output_used) { + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + channel_agcs_[ch]->HandleCaptureOutputUsedChange(capture_output_used); + } + capture_output_used_ = capture_output_used; +} + +float AgcManagerDirect::voice_probability() const { + float max_prob = 0.f; + for (const auto& state_ch : channel_agcs_) { + max_prob = std::max(max_prob, state_ch->voice_probability()); + } + + return max_prob; +} + +void AgcManagerDirect::set_stream_analog_level(int level) { + if (!analog_controller_enabled_) { + recommended_input_volume_ = level; + } + + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + channel_agcs_[ch]->set_stream_analog_level(level); + } + + AggregateChannelLevels(); +} + +void AgcManagerDirect::AggregateChannelLevels() { + int new_recommended_input_volume = + channel_agcs_[0]->recommended_analog_level(); + channel_controlling_gain_ = 0; + for (size_t ch = 1; ch < channel_agcs_.size(); ++ch) { + int level = channel_agcs_[ch]->recommended_analog_level(); + if (level < new_recommended_input_volume) { + new_recommended_input_volume = level; + channel_controlling_gain_ = static_cast(ch); + } + } + + if (min_mic_level_override_.has_value() && new_recommended_input_volume > 0) { + new_recommended_input_volume = + std::max(new_recommended_input_volume, *min_mic_level_override_); + } + + if (analog_controller_enabled_) { + recommended_input_volume_ = new_recommended_input_volume; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h new file mode 100644 index 0000000000..adb2f5a63f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ +#define MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc/agc.h" +#include "modules/audio_processing/agc2/clipping_predictor.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { + +class MonoAgc; +class GainControl; + +// Adaptive Gain Controller (AGC) that controls the input volume and a digital +// gain. The input volume controller recommends what volume to use, handles +// volume changes and clipping. In particular, it handles changes triggered by +// the user (e.g., volume set to zero by a HW mute button). The digital +// controller chooses and applies the digital compression gain. +// This class is not thread-safe. +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class AgcManagerDirect final { + public: + // Ctor. `num_capture_channels` specifies the number of channels for the audio + // passed to `AnalyzePreProcess()` and `Process()`. Clamps + // `analog_config.startup_min_level` in the [12, 255] range. + AgcManagerDirect( + int num_capture_channels, + const AudioProcessing::Config::GainController1::AnalogGainController& + analog_config); + + ~AgcManagerDirect(); + AgcManagerDirect(const AgcManagerDirect&) = delete; + AgcManagerDirect& operator=(const AgcManagerDirect&) = delete; + + void Initialize(); + + // Configures `gain_control` to work as a fixed digital controller so that the + // adaptive part is only handled by this gain controller. Must be called if + // `gain_control` is also used to avoid the side-effects of running two AGCs. + void SetupDigitalGainControl(GainControl& gain_control) const; + + // Sets the applied input volume. + void set_stream_analog_level(int level); + + // TODO(bugs.webrtc.org/7494): Add argument for the applied input volume and + // remove `set_stream_analog_level()`. + // Analyzes `audio` before `Process()` is called so that the analysis can be + // performed before external digital processing operations take place (e.g., + // echo cancellation). The analysis consists of input clipping detection and + // prediction (if enabled). Must be called after `set_stream_analog_level()`. + void AnalyzePreProcess(const AudioBuffer& audio_buffer); + + // Processes `audio_buffer`. Chooses a digital compression gain and the new + // input volume to recommend. Must be called after `AnalyzePreProcess()`. If + // `speech_probability` (range [0.0f, 1.0f]) and `speech_level_dbfs` (range + // [-90.f, 30.0f]) are given, uses them to override the estimated RMS error. + // TODO(webrtc:7494): This signature is needed for testing purposes, unify + // the signatures when the clean-up is done. + void Process(const AudioBuffer& audio_buffer, + absl::optional speech_probability, + absl::optional speech_level_dbfs); + + // Processes `audio_buffer`. Chooses a digital compression gain and the new + // input volume to recommend. Must be called after `AnalyzePreProcess()`. + void Process(const AudioBuffer& audio_buffer); + + // TODO(bugs.webrtc.org/7494): Return recommended input volume and remove + // `recommended_analog_level()`. + // Returns the recommended input volume. If the input volume contoller is + // disabled, returns the input volume set via the latest + // `set_stream_analog_level()` call. Must be called after + // `AnalyzePreProcess()` and `Process()`. + int recommended_analog_level() const { return recommended_input_volume_; } + + // Call when the capture stream output has been flagged to be used/not-used. + // If unused, the manager disregards all incoming audio. + void HandleCaptureOutputUsedChange(bool capture_output_used); + + float voice_probability() const; + + int num_channels() const { return num_capture_channels_; } + + // If available, returns the latest digital compression gain that has been + // chosen. + absl::optional GetDigitalComressionGain(); + + // Returns true if clipping prediction is enabled. + bool clipping_predictor_enabled() const { return !!clipping_predictor_; } + + // Returns true if clipping prediction is used to adjust the input volume. + bool use_clipping_predictor_step() const { + return use_clipping_predictor_step_; + } + + private: + friend class AgcManagerDirectTestHelper; + + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, DisableDigitalDisablesDigital); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentDefault); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentDisabled); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentOutOfRangeAbove); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentOutOfRangeBelow); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentEnabled50); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentEnabledAboveStartupLevel); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + ClippingParametersVerified); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + DisableClippingPredictorDoesNotLowerVolume); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + UsedClippingPredictionsProduceLowerAnalogLevels); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + UnusedClippingPredictionsProduceEqualAnalogLevels); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + EmptyRmsErrorOverrideHasNoEffect); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + NonEmptyRmsErrorOverrideHasEffect); + + // Ctor that creates a single channel AGC and by injecting `agc`. + // `agc` will be owned by this class; hence, do not delete it. + AgcManagerDirect( + const AudioProcessing::Config::GainController1::AnalogGainController& + analog_config, + Agc* agc); + + void AggregateChannelLevels(); + + const bool analog_controller_enabled_; + + const absl::optional min_mic_level_override_; + std::unique_ptr data_dumper_; + static std::atomic instance_counter_; + const int num_capture_channels_; + const bool disable_digital_adaptive_; + + int frames_since_clipped_; + + // TODO(bugs.webrtc.org/7494): Create a separate member for the applied input + // volume. + // TODO(bugs.webrtc.org/7494): Once + // `AudioProcessingImpl::recommended_stream_analog_level()` becomes a trivial + // getter, leave uninitialized. + // Recommended input volume. After `set_stream_analog_level()` is called it + // holds the observed input volume. Possibly updated by `AnalyzePreProcess()` + // and `Process()`; after these calls, holds the recommended input volume. + int recommended_input_volume_ = 0; + + bool capture_output_used_; + int channel_controlling_gain_ = 0; + + const int clipped_level_step_; + const float clipped_ratio_threshold_; + const int clipped_wait_frames_; + + std::vector> channel_agcs_; + std::vector> new_compressions_to_set_; + + const std::unique_ptr clipping_predictor_; + const bool use_clipping_predictor_step_; + float clipping_rate_log_; + int clipping_rate_log_counter_; +}; + +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class MonoAgc { + public: + MonoAgc(ApmDataDumper* data_dumper, + int clipped_level_min, + bool disable_digital_adaptive, + int min_mic_level); + ~MonoAgc(); + MonoAgc(const MonoAgc&) = delete; + MonoAgc& operator=(const MonoAgc&) = delete; + + void Initialize(); + void HandleCaptureOutputUsedChange(bool capture_output_used); + + // Sets the current input volume. + void set_stream_analog_level(int level) { recommended_input_volume_ = level; } + + // Lowers the recommended input volume in response to clipping based on the + // suggested reduction `clipped_level_step`. Must be called after + // `set_stream_analog_level()`. + void HandleClipping(int clipped_level_step); + + // Analyzes `audio`, requests the RMS error from AGC, updates the recommended + // input volume based on the estimated speech level and, if enabled, updates + // the (digital) compression gain to be applied by `agc_`. Must be called + // after `HandleClipping()`. If `rms_error_override` has a value, RMS error + // from AGC is overridden by it. + void Process(rtc::ArrayView audio, + absl::optional rms_error_override); + + // Returns the recommended input volume. Must be called after `Process()`. + int recommended_analog_level() const { return recommended_input_volume_; } + + float voice_probability() const { return agc_->voice_probability(); } + void ActivateLogging() { log_to_histograms_ = true; } + absl::optional new_compression() const { + return new_compression_to_set_; + } + + // Only used for testing. + void set_agc(Agc* agc) { agc_.reset(agc); } + int min_mic_level() const { return min_mic_level_; } + + private: + // Sets a new input volume, after first checking that it hasn't been updated + // by the user, in which case no action is taken. + void SetLevel(int new_level); + + // Set the maximum input volume the AGC is allowed to apply. Also updates the + // maximum compression gain to compensate. The volume must be at least + // `kClippedLevelMin`. + void SetMaxLevel(int level); + + int CheckVolumeAndReset(); + void UpdateGain(int rms_error_db); + void UpdateCompressor(); + + const int min_mic_level_; + const bool disable_digital_adaptive_; + std::unique_ptr agc_; + int level_ = 0; + int max_level_; + int max_compression_gain_; + int target_compression_; + int compression_; + float compression_accumulator_; + bool capture_output_used_ = true; + bool check_volume_on_next_process_ = true; + bool startup_ = true; + + // TODO(bugs.webrtc.org/7494): Create a separate member for the applied + // input volume. + // Recommended input volume. After `set_stream_analog_level()` is + // called, it holds the observed applied input volume. Possibly updated by + // `HandleClipping()` and `Process()`; after these calls, holds the + // recommended input volume. + int recommended_input_volume_ = 0; + + absl::optional new_compression_to_set_; + bool log_to_histograms_ = false; + const int clipped_level_min_; + + // Frames since the last `UpdateGain()` call. + int frames_since_update_gain_ = 0; + // Set to true for the first frame after startup and reset, otherwise false. + bool is_first_frame_ = true; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc new file mode 100644 index 0000000000..70ac0b5b34 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc @@ -0,0 +1,2184 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc_manager_direct.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/agc/gain_control.h" +#include "modules/audio_processing/agc/mock_agc.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/strings/string_builder.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::DoAll; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { +namespace { + +constexpr int kSampleRateHz = 32000; +constexpr int kNumChannels = 1; +constexpr int kInitialInputVolume = 128; +constexpr int kClippedMin = 165; // Arbitrary, but different from the default. +constexpr float kAboveClippedThreshold = 0.2f; +constexpr int kMinMicLevel = 12; +constexpr int kClippedLevelStep = 15; +constexpr float kClippedRatioThreshold = 0.1f; +constexpr int kClippedWaitFrames = 300; +constexpr float kLowSpeechProbability = 0.1f; +constexpr float kHighSpeechProbability = 0.7f; +constexpr float kSpeechLevelDbfs = -25.0f; + +constexpr float kMinSample = std::numeric_limits::min(); +constexpr float kMaxSample = std::numeric_limits::max(); + +using AnalogAgcConfig = + AudioProcessing::Config::GainController1::AnalogGainController; +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; +constexpr AnalogAgcConfig kDefaultAnalogConfig{}; + +class MockGainControl : public GainControl { + public: + virtual ~MockGainControl() {} + MOCK_METHOD(int, set_stream_analog_level, (int level), (override)); + MOCK_METHOD(int, stream_analog_level, (), (const, override)); + MOCK_METHOD(int, set_mode, (Mode mode), (override)); + MOCK_METHOD(Mode, mode, (), (const, override)); + MOCK_METHOD(int, set_target_level_dbfs, (int level), (override)); + MOCK_METHOD(int, target_level_dbfs, (), (const, override)); + MOCK_METHOD(int, set_compression_gain_db, (int gain), (override)); + MOCK_METHOD(int, compression_gain_db, (), (const, override)); + MOCK_METHOD(int, enable_limiter, (bool enable), (override)); + MOCK_METHOD(bool, is_limiter_enabled, (), (const, override)); + MOCK_METHOD(int, + set_analog_level_limits, + (int minimum, int maximum), + (override)); + MOCK_METHOD(int, analog_level_minimum, (), (const, override)); + MOCK_METHOD(int, analog_level_maximum, (), (const, override)); + MOCK_METHOD(bool, stream_is_saturated, (), (const, override)); +}; + +// TODO(bugs.webrtc.org/12874): Remove and use designated initializers once +// fixed. +std::unique_ptr CreateAgcManagerDirect( + int startup_min_volume, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const ClippingPredictorConfig& clipping_predictor_config = + kDefaultAnalogConfig.clipping_predictor) { + AnalogAgcConfig config; + config.startup_min_volume = startup_min_volume; + config.clipped_level_min = kClippedMin; + config.enable_digital_adaptive = false; + config.clipped_level_step = clipped_level_step; + config.clipped_ratio_threshold = clipped_ratio_threshold; + config.clipped_wait_frames = clipped_wait_frames; + config.clipping_predictor = clipping_predictor_config; + return std::make_unique(/*num_capture_channels=*/1, config); +} + +// Deprecated. +// TODO(bugs.webrtc.org/7494): Delete this helper, use +// `AgcManagerDirectTestHelper::CallAgcSequence()` instead. +// Calls `AnalyzePreProcess()` on `manager` `num_calls` times. `peak_ratio` is a +// value in [0, 1] which determines the amplitude of the samples (1 maps to full +// scale). The first half of the calls is made on frames which are half filled +// with zeros in order to simulate a signal with different crest factors. +void CallPreProcessAudioBuffer(int num_calls, + float peak_ratio, + AgcManagerDirect& manager) { + RTC_DCHECK_LE(peak_ratio, 1.0f); + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + const int num_channels = audio_buffer.num_channels(); + const int num_frames = audio_buffer.num_frames(); + + // Make half of the calls with half zeroed frames. + for (int ch = 0; ch < num_channels; ++ch) { + // 50% of the samples in one frame are zero. + for (int i = 0; i < num_frames; i += 2) { + audio_buffer.channels()[ch][i] = peak_ratio * 32767.0f; + audio_buffer.channels()[ch][i + 1] = 0.0f; + } + } + for (int n = 0; n < num_calls / 2; ++n) { + manager.AnalyzePreProcess(audio_buffer); + } + + // Make the remaining half of the calls with frames whose samples are all set. + for (int ch = 0; ch < num_channels; ++ch) { + for (int i = 0; i < num_frames; ++i) { + audio_buffer.channels()[ch][i] = peak_ratio * 32767.0f; + } + } + for (int n = 0; n < num_calls - num_calls / 2; ++n) { + manager.AnalyzePreProcess(audio_buffer); + } +} + +constexpr char kMinMicLevelFieldTrial[] = + "WebRTC-Audio-2ndAgcMinMicLevelExperiment"; + +std::string GetAgcMinMicLevelExperimentFieldTrial(const std::string& value) { + char field_trial_buffer[64]; + rtc::SimpleStringBuilder builder(field_trial_buffer); + builder << kMinMicLevelFieldTrial << "/" << value << "/"; + return builder.str(); +} + +std::string GetAgcMinMicLevelExperimentFieldTrialEnabled( + int enabled_value, + const std::string& suffix = "") { + RTC_DCHECK_GE(enabled_value, 0); + RTC_DCHECK_LE(enabled_value, 255); + char field_trial_buffer[64]; + rtc::SimpleStringBuilder builder(field_trial_buffer); + builder << kMinMicLevelFieldTrial << "/Enabled-" << enabled_value << suffix + << "/"; + return builder.str(); +} + +std::string GetAgcMinMicLevelExperimentFieldTrial( + absl::optional min_mic_level) { + if (min_mic_level.has_value()) { + return GetAgcMinMicLevelExperimentFieldTrialEnabled(*min_mic_level); + } + return GetAgcMinMicLevelExperimentFieldTrial("Disabled"); +} + +// (Over)writes `samples_value` for the samples in `audio_buffer`. +// When `clipped_ratio`, a value in [0, 1], is greater than 0, the corresponding +// fraction of the frame is set to a full scale value to simulate clipping. +void WriteAudioBufferSamples(float samples_value, + float clipped_ratio, + AudioBuffer& audio_buffer) { + RTC_DCHECK_GE(samples_value, kMinSample); + RTC_DCHECK_LE(samples_value, kMaxSample); + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + int num_channels = audio_buffer.num_channels(); + int num_samples = audio_buffer.num_frames(); + int num_clipping_samples = clipped_ratio * num_samples; + for (int ch = 0; ch < num_channels; ++ch) { + int i = 0; + for (; i < num_clipping_samples; ++i) { + audio_buffer.channels()[ch][i] = 32767.0f; + } + for (; i < num_samples; ++i) { + audio_buffer.channels()[ch][i] = samples_value; + } + } +} + +// Deprecated. +// TODO(bugs.webrtc.org/7494): Delete this helper, use +// `AgcManagerDirectTestHelper::CallAgcSequence()` instead. +void CallPreProcessAndProcess(int num_calls, + const AudioBuffer& audio_buffer, + absl::optional speech_probability_override, + absl::optional speech_level_override, + AgcManagerDirect& manager) { + for (int n = 0; n < num_calls; ++n) { + manager.AnalyzePreProcess(audio_buffer); + manager.Process(audio_buffer, speech_probability_override, + speech_level_override); + } +} + +// Reads a given number of 10 ms chunks from a PCM file and feeds them to +// `AgcManagerDirect`. +class SpeechSamplesReader { + private: + // Recording properties. + static constexpr int kPcmSampleRateHz = 16000; + static constexpr int kPcmNumChannels = 1; + static constexpr int kPcmBytesPerSamples = sizeof(int16_t); + + public: + SpeechSamplesReader() + : is_(test::ResourcePath("audio_processing/agc/agc_audio", "pcm"), + std::ios::binary | std::ios::ate), + audio_buffer_(kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels), + buffer_(audio_buffer_.num_frames()), + buffer_num_bytes_(buffer_.size() * kPcmBytesPerSamples) { + RTC_CHECK(is_); + } + + // Reads `num_frames` 10 ms frames from the beginning of the PCM file, applies + // `gain_db` and feeds the frames into `agc` by calling `AnalyzePreProcess()` + // and `Process()` for each frame. Reads the number of 10 ms frames available + // in the PCM file if `num_frames` is too large - i.e., does not loop. + void Feed(int num_frames, int gain_db, AgcManagerDirect& agc) { + float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. + is_.seekg(0, is_.beg); // Start from the beginning of the PCM file. + + // Read and feed frames. + for (int i = 0; i < num_frames; ++i) { + is_.read(reinterpret_cast(buffer_.data()), buffer_num_bytes_); + if (is_.gcount() < buffer_num_bytes_) { + // EOF reached. Stop. + break; + } + // Apply gain and copy samples into `audio_buffer_`. + std::transform(buffer_.begin(), buffer_.end(), + audio_buffer_.channels()[0], [gain](int16_t v) -> float { + return rtc::SafeClamp(static_cast(v) * gain, + kMinSample, kMaxSample); + }); + + agc.AnalyzePreProcess(audio_buffer_); + agc.Process(audio_buffer_); + } + } + + // Reads `num_frames` 10 ms frames from the beginning of the PCM file, applies + // `gain_db` and feeds the frames into `agc` by calling `AnalyzePreProcess()` + // and `Process()` for each frame. Reads the number of 10 ms frames available + // in the PCM file if `num_frames` is too large - i.e., does not loop. + // `speech_probability_override` and `speech_level_override` are passed to + // `Process()` where they are used to override the `agc` RMS error if they + // have a value. + void Feed(int num_frames, + int gain_db, + absl::optional speech_probability_override, + absl::optional speech_level_override, + AgcManagerDirect& agc) { + float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. + is_.seekg(0, is_.beg); // Start from the beginning of the PCM file. + + // Read and feed frames. + for (int i = 0; i < num_frames; ++i) { + is_.read(reinterpret_cast(buffer_.data()), buffer_num_bytes_); + if (is_.gcount() < buffer_num_bytes_) { + // EOF reached. Stop. + break; + } + // Apply gain and copy samples into `audio_buffer_`. + std::transform(buffer_.begin(), buffer_.end(), + audio_buffer_.channels()[0], [gain](int16_t v) -> float { + return rtc::SafeClamp(static_cast(v) * gain, + kMinSample, kMaxSample); + }); + + agc.AnalyzePreProcess(audio_buffer_); + agc.Process(audio_buffer_, speech_probability_override, + speech_level_override); + } + } + + private: + std::ifstream is_; + AudioBuffer audio_buffer_; + std::vector buffer_; + const std::streamsize buffer_num_bytes_; +}; + +} // namespace + +// TODO(bugs.webrtc.org/12874): Use constexpr struct with designated +// initializers once fixed. +constexpr AnalogAgcConfig GetAnalogAgcTestConfig() { + AnalogAgcConfig config; + config.enabled = true; + config.startup_min_volume = kInitialInputVolume; + config.clipped_level_min = kClippedMin; + config.enable_digital_adaptive = true; + config.clipped_level_step = kClippedLevelStep; + config.clipped_ratio_threshold = kClippedRatioThreshold; + config.clipped_wait_frames = kClippedWaitFrames; + config.clipping_predictor = kDefaultAnalogConfig.clipping_predictor; + return config; +} + +constexpr AnalogAgcConfig GetDisabledAnalogAgcConfig() { + AnalogAgcConfig config = GetAnalogAgcTestConfig(); + config.enabled = false; + return config; +} + +// Helper class that provides an `AgcManagerDirect` instance with an injected +// `Agc` mock, an `AudioBuffer` instance and `CallAgcSequence()`, a helper +// method that runs the `AgcManagerDirect` instance on the `AudioBuffer` one by +// sticking to the API contract. +class AgcManagerDirectTestHelper { + public: + // Ctor. Initializes `audio_buffer` with zeros. + AgcManagerDirectTestHelper() + : audio_buffer(kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels), + mock_agc(new ::testing::NiceMock()), + manager(GetAnalogAgcTestConfig(), mock_agc) { + manager.Initialize(); + manager.SetupDigitalGainControl(mock_gain_control); + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, + audio_buffer); + } + + // Calls the sequence of `AgcManagerDirect` methods according to the API + // contract, namely: + // - Sets the applied input volume; + // - Uses `audio_buffer` to call `AnalyzePreProcess()` and `Process()`; + // - Sets the digital compression gain, if specified, on the injected + // `mock_agc`. Returns the recommended input volume. The RMS error from + // AGC is replaced by an override value if `speech_probability_override` + // and `speech_level_override` have a value. + int CallAgcSequence(int applied_input_volume, + absl::optional speech_probability_override, + absl::optional speech_level_override) { + manager.set_stream_analog_level(applied_input_volume); + manager.AnalyzePreProcess(audio_buffer); + manager.Process(audio_buffer, speech_probability_override, + speech_level_override); + absl::optional digital_gain = manager.GetDigitalComressionGain(); + if (digital_gain) { + mock_gain_control.set_compression_gain_db(*digital_gain); + } + return manager.recommended_analog_level(); + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. The RMS error from AGC is replaced by an override + // value if `speech_probability_override` and `speech_level_override` have + // a value. + void CallProcess(int num_calls, + absl::optional speech_probability_override, + absl::optional speech_level_override) { + for (int i = 0; i < num_calls; ++i) { + EXPECT_CALL(*mock_agc, Process(_)).WillOnce(Return()); + manager.Process(audio_buffer, speech_probability_override, + speech_level_override); + absl::optional new_digital_gain = manager.GetDigitalComressionGain(); + if (new_digital_gain) { + mock_gain_control.set_compression_gain_db(*new_digital_gain); + } + } + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + void CallPreProc(int num_calls, float clipped_ratio) { + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + WriteAudioBufferSamples(/*samples_value=*/0.0f, clipped_ratio, + audio_buffer); + for (int i = 0; i < num_calls; ++i) { + manager.AnalyzePreProcess(audio_buffer); + } + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + void CallPreProcForChangingAudio(int num_calls, float peak_ratio) { + RTC_DCHECK_GE(peak_ratio, 0.0f); + RTC_DCHECK_LE(peak_ratio, 1.0f); + const float samples_value = peak_ratio * 32767.0f; + + // Make half of the calls on a frame where the samples alternate + // `sample_values` and zeros. + WriteAudioBufferSamples(samples_value, /*clipped_ratio=*/0.0f, + audio_buffer); + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + for (size_t k = 1; k < audio_buffer.num_frames(); k += 2) { + audio_buffer.channels()[ch][k] = 0.0f; + } + } + for (int i = 0; i < num_calls / 2; ++i) { + manager.AnalyzePreProcess(audio_buffer); + } + + // Make half of thecalls on a frame where all the samples equal + // `sample_values`. + WriteAudioBufferSamples(samples_value, /*clipped_ratio=*/0.0f, + audio_buffer); + for (int i = 0; i < num_calls - num_calls / 2; ++i) { + manager.AnalyzePreProcess(audio_buffer); + } + } + + AudioBuffer audio_buffer; + MockAgc* mock_agc; + AgcManagerDirect manager; + MockGainControl mock_gain_control; +}; + +class AgcManagerDirectParametrizedTest + : public ::testing::TestWithParam, bool>> { + protected: + AgcManagerDirectParametrizedTest() + : field_trials_( + GetAgcMinMicLevelExperimentFieldTrial(std::get<0>(GetParam()))) {} + + bool IsMinMicLevelOverridden() const { + return std::get<0>(GetParam()).has_value(); + } + int GetMinMicLevel() const { + return std::get<0>(GetParam()).value_or(kMinMicLevel); + } + + bool IsRmsErrorOverridden() const { return std::get<1>(GetParam()); } + absl::optional GetOverrideOrEmpty(float value) const { + return IsRmsErrorOverridden() ? absl::optional(value) + : absl::nullopt; + } + + private: + test::ScopedFieldTrials field_trials_; +}; + +INSTANTIATE_TEST_SUITE_P( + , + AgcManagerDirectParametrizedTest, + ::testing::Combine(testing::Values(absl::nullopt, 12, 20), + testing::Bool())); + +// Checks that when the analog controller is disabled, no downward adaptation +// takes place. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to AMP config. The test passes but internally the gain update timing +// differs. +TEST_P(AgcManagerDirectParametrizedTest, + DisabledAnalogAgcDoesNotAdaptDownwards) { + AgcManagerDirect manager_no_analog_agc(kNumChannels, + GetDisabledAnalogAgcConfig()); + manager_no_analog_agc.Initialize(); + AgcManagerDirect manager_with_analog_agc(kNumChannels, + GetAnalogAgcTestConfig()); + manager_with_analog_agc.Initialize(); + + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + constexpr int kAnalogLevel = 250; + static_assert(kAnalogLevel > kInitialInputVolume, "Increase `kAnalogLevel`."); + manager_no_analog_agc.set_stream_analog_level(kAnalogLevel); + manager_with_analog_agc.set_stream_analog_level(kAnalogLevel); + + // Make a first call with input that doesn't clip in order to let the + // controller read the input volume. That is needed because clipping input + // causes the controller to stay in idle state for + // `AnalogAgcConfig::clipped_wait_frames` frames. + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipping_ratio=*/0.0f, + audio_buffer); + manager_no_analog_agc.AnalyzePreProcess(audio_buffer); + manager_with_analog_agc.AnalyzePreProcess(audio_buffer); + manager_no_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-18.0f)); + manager_with_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-18.0f)); + + // Feed clipping input to trigger a downward adapation of the analog level. + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipping_ratio=*/0.2f, + audio_buffer); + manager_no_analog_agc.AnalyzePreProcess(audio_buffer); + manager_with_analog_agc.AnalyzePreProcess(audio_buffer); + manager_no_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-10.0f)); + manager_with_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-10.0f)); + + // Check that no adaptation occurs when the analog controller is disabled + // and make sure that the test triggers a downward adaptation otherwise. + EXPECT_EQ(manager_no_analog_agc.recommended_analog_level(), kAnalogLevel); + ASSERT_LT(manager_with_analog_agc.recommended_analog_level(), kAnalogLevel); +} + +// Checks that when the analog controller is disabled, no upward adaptation +// takes place. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to APM config. The test passes but internally the gain update timing +// differs. +TEST_P(AgcManagerDirectParametrizedTest, DisabledAnalogAgcDoesNotAdaptUpwards) { + AgcManagerDirect manager_no_analog_agc(kNumChannels, + GetDisabledAnalogAgcConfig()); + manager_no_analog_agc.Initialize(); + AgcManagerDirect manager_with_analog_agc(kNumChannels, + GetAnalogAgcTestConfig()); + manager_with_analog_agc.Initialize(); + + constexpr int kAnalogLevel = kInitialInputVolume; + manager_no_analog_agc.set_stream_analog_level(kAnalogLevel); + manager_with_analog_agc.set_stream_analog_level(kAnalogLevel); + + // Feed speech with low energy to trigger an upward adapation of the analog + // level. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + reader.Feed(kNumFrames, kGainDb, GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-42.0f), manager_no_analog_agc); + reader.Feed(kNumFrames, kGainDb, GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-42.0f), manager_with_analog_agc); + + // Check that no adaptation occurs when the analog controller is disabled + // and make sure that the test triggers an upward adaptation otherwise. + EXPECT_EQ(manager_no_analog_agc.recommended_analog_level(), kAnalogLevel); + ASSERT_GT(manager_with_analog_agc.recommended_analog_level(), kAnalogLevel); +} + +TEST_P(AgcManagerDirectParametrizedTest, + StartupMinVolumeConfigurationIsRespected) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_EQ(kInitialInputVolume, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, MicVolumeResponseToRmsError) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Compressor default; no residual error. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + + // Inside the compressor's window; no change of volume. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-28.0f)); + + // Above the compressor's window; volume should be increased. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + EXPECT_EQ(130, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-38.0f)); + EXPECT_EQ(168, helper.manager.recommended_analog_level()); + + // Inside the compressor's window; no change of volume. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-18.0f)); + + // Below the compressor's window; volume should be decreased. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(167, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(163, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-9), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-9.0f)); + EXPECT_EQ(129, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, MicVolumeIsLimited) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Maximum upwards change is limited. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(183, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(243, helper.manager.recommended_analog_level()); + + // Won't go higher than the maximum. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(254, helper.manager.recommended_analog_level()); + + // Maximum downwards change is limited. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(194, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(137, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(88, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(54, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(33, helper.manager.recommended_analog_level()); + + // Won't go lower than the minimum. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(std::max(18, GetMinMicLevel()), + helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(std::max(12, GetMinMicLevel()), + helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorStepsTowardsTarget) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Compressor default; no call to set_compression_gain_db. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/19, kNoOverride, kNoOverride); + + // Moves slowly upwards. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(9), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-27.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/19, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + + // Moves slowly downward, then reverses before reaching the original target. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(9), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-27.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorErrorIsDeemphasized) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-28.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-18.0f)); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(7)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(6)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorReachesMaximum) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/4, speech_probability_override, + GetOverrideOrEmpty(-28.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/15, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(10)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(11)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(12)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorReachesMinimum) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/4, speech_probability_override, + GetOverrideOrEmpty(-18.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/15, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(6)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(5)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(4)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(3)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(2)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, NoActionWhileMuted) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.manager.HandleCaptureOutputUsedChange(false); + helper.manager.Process(helper.audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + absl::optional new_digital_gain = + helper.manager.GetDigitalComressionGain(); + if (new_digital_gain) { + helper.mock_gain_control.set_compression_gain_db(*new_digital_gain); + } +} + +TEST_P(AgcManagerDirectParametrizedTest, UnmutingChecksVolumeWithoutRaising) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.manager.HandleCaptureOutputUsedChange(false); + helper.manager.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 127; + helper.manager.set_stream_analog_level(kInputVolume); + EXPECT_CALL(*helper.mock_agc, Reset()); + + // SetMicVolume should not be called. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)).WillOnce(Return(false)); + helper.CallProcess(/*num_calls=*/1, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_EQ(127, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, UnmutingRaisesTooLowVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.manager.HandleCaptureOutputUsedChange(false); + helper.manager.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 11; + helper.manager.set_stream_analog_level(kInputVolume); + EXPECT_CALL(*helper.mock_agc, Reset()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)).WillOnce(Return(false)); + helper.CallProcess(/*num_calls=*/1, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_EQ(GetMinMicLevel(), helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + ManualLevelChangeResultsInNoSetMicCall) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Change outside of compressor's range, which would normally trigger a call + // to `SetMicVolume()`. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + + // When the analog volume changes, the gain controller is reset. + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + + // GetMicVolume returns a value outside of the quantization slack, indicating + // a manual volume change. + ASSERT_NE(helper.manager.recommended_analog_level(), 154); + helper.manager.set_stream_analog_level(154); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + EXPECT_EQ(154, helper.manager.recommended_analog_level()); + + // Do the same thing, except downwards now. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(100); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(100, helper.manager.recommended_analog_level()); + + // And finally verify the AGC continues working without a manual change. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(99, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + RecoveryAfterManualLevelChangeFromMax) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Force the mic up to max volume. Takes a few steps due to the residual + // gain limitation. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(183, helper.manager.recommended_analog_level()); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(243, helper.manager.recommended_analog_level()); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + + // Manual change does not result in SetMicVolume call. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(50); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(50, helper.manager.recommended_analog_level()); + + // Continues working as usual afterwards. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-38.0f)); + + EXPECT_EQ(69, helper.manager.recommended_analog_level()); +} + +// Checks that, when the min mic level override is not specified, AGC ramps up +// towards the minimum mic level after the mic level is manually set below the +// minimum gain to enforce. +TEST_P(AgcManagerDirectParametrizedTest, + RecoveryAfterManualLevelChangeBelowMinWithoutMiMicLevelnOverride) { + if (IsMinMicLevelOverridden()) { + GTEST_SKIP() << "Skipped. Min mic level overridden."; + } + + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Manual change below min, but strictly positive, otherwise AGC won't take + // any action. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(1); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(1, helper.manager.recommended_analog_level()); + + // Continues working as usual afterwards. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + EXPECT_EQ(2, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(11, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-38.0f)); + EXPECT_EQ(18, helper.manager.recommended_analog_level()); +} + +// Checks that, when the min mic level override is specified, AGC immediately +// applies the minimum mic level after the mic level is manually set below the +// minimum gain to enforce. +TEST_P(AgcManagerDirectParametrizedTest, + RecoveryAfterManualLevelChangeBelowMin) { + if (!IsMinMicLevelOverridden()) { + GTEST_SKIP() << "Skipped. Min mic level not overridden."; + } + + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Manual change below min, but strictly positive, otherwise + // AGC won't take any action. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(1); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(GetMinMicLevel(), helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, NoClippingHasNoImpact) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.CallPreProc(/*num_calls=*/100, /*clipped_ratio=*/0); + EXPECT_EQ(128, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingUnderThresholdHasNoImpact) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/0.099); + EXPECT_EQ(128, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingLowersVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/0.2); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, WaitingPeriodBetweenClippingChecks) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(0); + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(225, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingLoweringIsLimited) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/180, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(kClippedMin, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(0); + helper.CallPreProc(/*num_calls=*/1000, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(kClippedMin, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + ClippingMaxIsRespectedWhenEqualToLevel) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/10, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + ClippingMaxIsRespectedWhenHigherThanLevel) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/200, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(185, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-58.0f)); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + helper.CallProcess(/*num_calls=*/10, speech_probability_override, + GetOverrideOrEmpty(-58.0f)); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + MaxCompressionIsIncreasedAfterClipping) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/210, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, kAboveClippedThreshold); + EXPECT_EQ(195, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/5, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/14, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(10)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(11)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(12)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(13)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + // Continue clipping until we hit the maximum surplus compression. + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(180, helper.manager.recommended_analog_level()); + + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(1, kAboveClippedThreshold); + EXPECT_EQ(kClippedMin, helper.manager.recommended_analog_level()); + + // Current level is now at the minimum, but the maximum allowed level still + // has more to decrease. + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/4, speech_probability_override, + GetOverrideOrEmpty(-34.0f)); + helper.CallProcess(/*num_calls=*/15, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(14)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(15)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(16)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(17)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(18)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, UserCanRaiseVolumeAfterClipping) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/225, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(210, helper.manager.recommended_analog_level()); + + // High enough error to trigger a volume check. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(14), Return(true))); + // User changed the volume. + helper.manager.set_stream_analog_level(250); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-32.0f)); + EXPECT_EQ(250, helper.manager.recommended_analog_level()); + + // Move down... + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-10), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-8.0f)); + EXPECT_EQ(210, helper.manager.recommended_analog_level()); + // And back up to the new max established by the user. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-58.0f)); + EXPECT_EQ(250, helper.manager.recommended_analog_level()); + // Will not move above new maximum. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(250, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingDoesNotPullLowVolumeBackUp) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/80, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(0); + int initial_volume = helper.manager.recommended_analog_level(); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(initial_volume, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, TakesNoActionOnZeroMicVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + helper.manager.set_stream_analog_level(0); + helper.CallProcess(/*num_calls=*/10, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(0, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingDetectionLowersVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/1.0f); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + DisabledClippingPredictorDoesNotLowerVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_FALSE(helper.manager.clipping_predictor_enabled()); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, DisableDigitalDisablesDigital) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + auto agc = std::unique_ptr(new ::testing::NiceMock()); + MockGainControl mock_gain_control; + EXPECT_CALL(mock_gain_control, set_mode(GainControl::kFixedDigital)); + EXPECT_CALL(mock_gain_control, set_target_level_dbfs(0)); + EXPECT_CALL(mock_gain_control, set_compression_gain_db(0)); + EXPECT_CALL(mock_gain_control, enable_limiter(false)); + + AnalogAgcConfig config; + config.enable_digital_adaptive = false; + auto manager = std::make_unique(kNumChannels, config); + manager->Initialize(); + manager->SetupDigitalGainControl(mock_gain_control); +} + +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentDefault) { + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); +} + +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentDisabled) { + for (const std::string& field_trial_suffix : {"", "_20220210"}) { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrial("Disabled" + field_trial_suffix)); + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); + } +} + +// Checks that a field-trial parameter outside of the valid range [0,255] is +// ignored. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentOutOfRangeAbove) { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrial("Enabled-256")); + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); +} + +// Checks that a field-trial parameter outside of the valid range [0,255] is +// ignored. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentOutOfRangeBelow) { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrial("Enabled--1")); + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); +} + +// Verifies that a valid experiment changes the minimum microphone level. The +// start volume is larger than the min level and should therefore not be +// changed. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentEnabled50) { + constexpr int kMinMicLevelOverride = 50; + for (const std::string& field_trial_suffix : {"", "_20220210"}) { + SCOPED_TRACE(field_trial_suffix); + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride, + field_trial_suffix)); + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevelOverride); + } +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a valid value, the mic level never gets lowered beyond the +// override value in the presence of clipping. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentCheckMinLevelWithClipping) { + constexpr int kMinMicLevelOverride = 250; + + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + manager->set_stream_analog_level(kInitialInputVolume); + return manager; + }; + std::unique_ptr manager = factory(); + std::unique_ptr manager_with_override; + { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + // Simulate 4 seconds of clipping; it is expected to trigger a downward + // adjustment of the analog gain. + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, *manager); + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the test signal triggers a larger downward adaptation for + // `manager`, which is allowed to reach a lower gain. + EXPECT_GT(manager_with_override->recommended_analog_level(), + manager->recommended_analog_level()); + // Check that the gain selected by `manager_with_override` equals the minimum + // value overridden via field trial. + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kMinMicLevelOverride); +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a valid value, the mic level never gets lowered beyond the +// override value in the presence of clipping when RMS error override is used. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to APM config. The test passes but internally the gain update timing +// differs. +TEST(AgcManagerDirectTest, + AgcMinMicLevelExperimentCheckMinLevelWithClippingWithRmsErrorOverride) { + constexpr int kMinMicLevelOverride = 250; + + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + manager->set_stream_analog_level(kInitialInputVolume); + return manager; + }; + std::unique_ptr manager = factory(); + std::unique_ptr manager_with_override; + { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + // Simulate 4 seconds of clipping; it is expected to trigger a downward + // adjustment of the analog gain. + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/0.7f, + /*speech_probability_level=*/-18.0f, *manager); + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::optional(0.7f), + /*speech_probability_level=*/absl::optional(-18.0f), + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the test signal triggers a larger downward adaptation for + // `manager`, which is allowed to reach a lower gain. + EXPECT_GT(manager_with_override->recommended_analog_level(), + manager->recommended_analog_level()); + // Check that the gain selected by `manager_with_override` equals the minimum + // value overridden via field trial. + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kMinMicLevelOverride); +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a value lower than the `clipped_level_min`, the behavior of +// the analog gain controller is the same as that obtained when the field trial +// is not specified. +TEST(AgcManagerDirectTest, + AgcMinMicLevelExperimentCompareMicLevelWithClipping) { + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + // Use a large clipped level step to more quickly decrease the analog gain + // with clipping. + AnalogAgcConfig config = kDefaultAnalogConfig; + config.startup_min_volume = kInitialInputVolume; + config.enable_digital_adaptive = false; + config.clipped_level_step = 64; + config.clipped_ratio_threshold = kClippedRatioThreshold; + config.clipped_wait_frames = kClippedWaitFrames; + auto controller = + std::make_unique(/*num_capture_channels=*/1, config); + controller->Initialize(); + controller->set_stream_analog_level(kInitialInputVolume); + return controller; + }; + std::unique_ptr manager = factory(); + std::unique_ptr manager_with_override; + { + constexpr int kMinMicLevelOverride = 20; + static_assert( + kDefaultAnalogConfig.clipped_level_min >= kMinMicLevelOverride, + "Use a lower override value."); + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + // Simulate 4 seconds of clipping; it is expected to trigger a downward + // adjustment of the analog gain. + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, *manager); + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the selected analog gain is the same for both controllers and + // that it equals the minimum level reached when clipping is handled. That is + // expected because the minimum microphone level override is less than the + // minimum level used when clipping is detected. + EXPECT_EQ(manager->recommended_analog_level(), + manager_with_override->recommended_analog_level()); + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kDefaultAnalogConfig.clipped_level_min); +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a value lower than the `clipped_level_min`, the behavior of +// the analog gain controller is the same as that obtained when the field trial +// is not specified. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to APM config. The test passes but internally the gain update timing +// differs. +TEST(AgcManagerDirectTest, + AgcMinMicLevelExperimentCompareMicLevelWithClippingWithRmsErrorOverride) { + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + // Use a large clipped level step to more quickly decrease the analog gain + // with clipping. + AnalogAgcConfig config = kDefaultAnalogConfig; + config.startup_min_volume = kInitialInputVolume; + config.enable_digital_adaptive = false; + config.clipped_level_step = 64; + config.clipped_ratio_threshold = kClippedRatioThreshold; + config.clipped_wait_frames = kClippedWaitFrames; + auto controller = + std::make_unique(/*num_capture_channels=*/1, config); + controller->Initialize(); + controller->set_stream_analog_level(kInitialInputVolume); + return controller; + }; + std::unique_ptr manager = factory(); + std::unique_ptr manager_with_override; + { + constexpr int kMinMicLevelOverride = 20; + static_assert( + kDefaultAnalogConfig.clipped_level_min >= kMinMicLevelOverride, + "Use a lower override value."); + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::optional(0.7f), + /*speech_level_override=*/absl::optional(-18.0f), *manager); + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::optional(0.7f), + /*speech_level_override=*/absl::optional(-18.0f), + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the selected analog gain is the same for both controllers and + // that it equals the minimum level reached when clipping is handled. That is + // expected because the minimum microphone level override is less than the + // minimum level used when clipping is detected. + EXPECT_EQ(manager->recommended_analog_level(), + manager_with_override->recommended_analog_level()); + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kDefaultAnalogConfig.clipped_level_min); +} + +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_level_step`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_ratio_threshold`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_wait_frames`. +// Verifies that configurable clipping parameters are initialized as intended. +TEST_P(AgcManagerDirectParametrizedTest, ClippingParametersVerified) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + EXPECT_EQ(manager->clipped_level_step_, kClippedLevelStep); + EXPECT_EQ(manager->clipped_ratio_threshold_, kClippedRatioThreshold); + EXPECT_EQ(manager->clipped_wait_frames_, kClippedWaitFrames); + std::unique_ptr manager_custom = + CreateAgcManagerDirect(kInitialInputVolume, + /*clipped_level_step=*/10, + /*clipped_ratio_threshold=*/0.2f, + /*clipped_wait_frames=*/50); + manager_custom->Initialize(); + EXPECT_EQ(manager_custom->clipped_level_step_, 10); + EXPECT_EQ(manager_custom->clipped_ratio_threshold_, 0.2f); + EXPECT_EQ(manager_custom->clipped_wait_frames_, 50); +} + +TEST_P(AgcManagerDirectParametrizedTest, + DisableClippingPredictorDisablesClippingPredictor) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + // TODO(bugs.webrtc.org/12874): Use designated initializers once fixed. + ClippingPredictorConfig config; + config.enabled = false; + + std::unique_ptr manager = CreateAgcManagerDirect( + kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, config); + manager->Initialize(); + EXPECT_FALSE(manager->clipping_predictor_enabled()); + EXPECT_FALSE(manager->use_clipping_predictor_step()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingPredictorDisabledByDefault) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + constexpr ClippingPredictorConfig kDefaultConfig; + EXPECT_FALSE(kDefaultConfig.enabled); +} + +TEST_P(AgcManagerDirectParametrizedTest, + EnableClippingPredictorEnablesClippingPredictor) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + // TODO(bugs.webrtc.org/12874): Use designated initializers once fixed. + ClippingPredictorConfig config; + config.enabled = true; + config.use_predicted_step = true; + + std::unique_ptr manager = CreateAgcManagerDirect( + kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, config); + manager->Initialize(); + EXPECT_TRUE(manager->clipping_predictor_enabled()); + EXPECT_TRUE(manager->use_clipping_predictor_step()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + DisableClippingPredictorDoesNotLowerVolume) { + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + AnalogAgcConfig config = GetAnalogAgcTestConfig(); + config.clipping_predictor.enabled = false; + AgcManagerDirect manager(config, new ::testing::NiceMock()); + manager.Initialize(); + manager.set_stream_analog_level(/*level=*/255); + EXPECT_FALSE(manager.clipping_predictor_enabled()); + EXPECT_FALSE(manager.use_clipping_predictor_step()); + EXPECT_EQ(manager.recommended_analog_level(), 255); + manager.Process(audio_buffer, GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.recommended_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.recommended_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.recommended_analog_level(), 255); +} + +TEST_P(AgcManagerDirectParametrizedTest, + UsedClippingPredictionsProduceLowerAnalogLevels) { + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + AnalogAgcConfig config_with_prediction = GetAnalogAgcTestConfig(); + config_with_prediction.clipping_predictor.enabled = true; + config_with_prediction.clipping_predictor.use_predicted_step = true; + AnalogAgcConfig config_without_prediction = GetAnalogAgcTestConfig(); + config_without_prediction.clipping_predictor.enabled = false; + AgcManagerDirect manager_with_prediction(config_with_prediction, + new ::testing::NiceMock()); + AgcManagerDirect manager_without_prediction( + config_without_prediction, new ::testing::NiceMock()); + + manager_with_prediction.Initialize(); + manager_without_prediction.Initialize(); + + constexpr int kInitialLevel = 255; + constexpr float kClippingPeakRatio = 1.0f; + constexpr float kCloseToClippingPeakRatio = 0.99f; + constexpr float kZeroPeakRatio = 0.0f; + manager_with_prediction.set_stream_analog_level(kInitialLevel); + manager_without_prediction.set_stream_analog_level(kInitialLevel); + manager_with_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + manager_without_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_TRUE(manager_with_prediction.clipping_predictor_enabled()); + EXPECT_FALSE(manager_without_prediction.clipping_predictor_enabled()); + EXPECT_TRUE(manager_with_prediction.use_clipping_predictor_step()); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), kInitialLevel); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect a change in the analog level when the prediction step is used. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect no change during waiting. + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect a change when the prediction step is used. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect no change when clipping is not detected or predicted. + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + + // Expect no change during waiting. + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 4 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel - 2 * kClippedLevelStep); +} + +TEST_P(AgcManagerDirectParametrizedTest, + UnusedClippingPredictionsProduceEqualAnalogLevels) { + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + AnalogAgcConfig config_with_prediction = GetAnalogAgcTestConfig(); + config_with_prediction.clipping_predictor.enabled = true; + config_with_prediction.clipping_predictor.use_predicted_step = false; + AnalogAgcConfig config_without_prediction = GetAnalogAgcTestConfig(); + config_without_prediction.clipping_predictor.enabled = false; + AgcManagerDirect manager_with_prediction(config_with_prediction, + new ::testing::NiceMock()); + AgcManagerDirect manager_without_prediction( + config_without_prediction, new ::testing::NiceMock()); + + constexpr int kInitialLevel = 255; + constexpr float kClippingPeakRatio = 1.0f; + constexpr float kCloseToClippingPeakRatio = 0.99f; + constexpr float kZeroPeakRatio = 0.0f; + manager_with_prediction.Initialize(); + manager_without_prediction.Initialize(); + manager_with_prediction.set_stream_analog_level(kInitialLevel); + manager_without_prediction.set_stream_analog_level(kInitialLevel); + manager_with_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + manager_without_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_TRUE(manager_with_prediction.clipping_predictor_enabled()); + EXPECT_FALSE(manager_without_prediction.clipping_predictor_enabled()); + EXPECT_FALSE(manager_with_prediction.use_clipping_predictor_step()); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), kInitialLevel); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect no change in the analog level for non-clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change for non-clipping frames. + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change for non-clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change when clipping is not detected or predicted. + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change during waiting. + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); +} + +// Checks that passing an empty speech level and probability overrides to +// `Process()` has the same effect as passing no overrides. +TEST_P(AgcManagerDirectParametrizedTest, EmptyRmsErrorOverrideHasNoEffect) { + AgcManagerDirect manager_1(kNumChannels, GetAnalogAgcTestConfig()); + AgcManagerDirect manager_2(kNumChannels, GetAnalogAgcTestConfig()); + manager_1.Initialize(); + manager_2.Initialize(); + + constexpr int kAnalogLevel = 50; + manager_1.set_stream_analog_level(kAnalogLevel); + manager_2.set_stream_analog_level(kAnalogLevel); + + // Feed speech with low energy to trigger an upward adapation of the analog + // level. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + + // Check the initial input volume. + ASSERT_EQ(manager_1.recommended_analog_level(), kAnalogLevel); + ASSERT_EQ(manager_2.recommended_analog_level(), kAnalogLevel); + + reader.Feed(kNumFrames, kGainDb, absl::nullopt, absl::nullopt, manager_1); + reader.Feed(kNumFrames, kGainDb, manager_2); + + // Check that the states are the same and adaptation occurs. + EXPECT_EQ(manager_1.recommended_analog_level(), + manager_2.recommended_analog_level()); + ASSERT_GT(manager_1.recommended_analog_level(), kAnalogLevel); + EXPECT_EQ(manager_1.voice_probability(), manager_2.voice_probability()); + EXPECT_EQ(manager_1.frames_since_clipped_, manager_2.frames_since_clipped_); + + // Check that the states of the channel AGCs are the same. + EXPECT_EQ(manager_1.num_channels(), manager_2.num_channels()); + for (int i = 0; i < manager_1.num_channels(); ++i) { + EXPECT_EQ(manager_1.channel_agcs_[i]->recommended_analog_level(), + manager_2.channel_agcs_[i]->recommended_analog_level()); + EXPECT_EQ(manager_1.channel_agcs_[i]->voice_probability(), + manager_2.channel_agcs_[i]->voice_probability()); + } +} + +// Checks that passing a non-empty speech level and probability overrides to +// `Process()` has an effect. +TEST_P(AgcManagerDirectParametrizedTest, NonEmptyRmsErrorOverrideHasEffect) { + AgcManagerDirect manager_1(kNumChannels, GetAnalogAgcTestConfig()); + AgcManagerDirect manager_2(kNumChannels, GetAnalogAgcTestConfig()); + manager_1.Initialize(); + manager_2.Initialize(); + + constexpr int kInputVolume = 128; + manager_1.set_stream_analog_level(kInputVolume); + manager_2.set_stream_analog_level(kInputVolume); + + // Feed speech with low energy to trigger an upward adapation of the input + // volume. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + + // Make sure that the feeding samples triggers an adaptation when no override + // is specified. + reader.Feed(kNumFrames, kGainDb, manager_1); + ASSERT_GT(manager_1.recommended_analog_level(), kInputVolume); + + // Expect that feeding samples triggers an adaptation when the speech + // probability and speech level overrides are specified. + reader.Feed(kNumFrames, kGainDb, + /*speech_probability_override=*/kHighSpeechProbability, + /*speech_level_override=*/-45.0f, manager_2); + EXPECT_GT(manager_2.recommended_analog_level(), kInputVolume); + + // The voice probability override does not affect the `voice_probability()` + // getter. + EXPECT_EQ(manager_1.voice_probability(), manager_2.voice_probability()); +} + +class AgcManagerDirectChannelSampleRateTest + : public ::testing::TestWithParam> { + protected: + int GetNumChannels() const { return std::get<0>(GetParam()); } + int GetSampleRateHz() const { return std::get<1>(GetParam()); } +}; + +TEST_P(AgcManagerDirectChannelSampleRateTest, CheckIsAlive) { + const int num_channels = GetNumChannels(); + const int sample_rate_hz = GetSampleRateHz(); + + constexpr AnalogAgcConfig kConfig{.enabled = true, + .clipping_predictor{.enabled = true}}; + AgcManagerDirect manager(num_channels, kConfig); + manager.Initialize(); + AudioBuffer buffer(sample_rate_hz, num_channels, sample_rate_hz, num_channels, + sample_rate_hz, num_channels); + + constexpr int kStartupVolume = 100; + int applied_initial_volume = kStartupVolume; + + // Trigger a downward adaptation with clipping. + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.5f, + buffer); + const int initial_volume1 = applied_initial_volume; + for (int i = 0; i < 400; ++i) { + manager.set_stream_analog_level(applied_initial_volume); + manager.AnalyzePreProcess(buffer); + manager.Process(buffer, kLowSpeechProbability, + /*speech_level_dbfs=*/-20.0f); + applied_initial_volume = manager.recommended_analog_level(); + } + ASSERT_LT(manager.recommended_analog_level(), initial_volume1); + + // Fill in audio that does not clip. + WriteAudioBufferSamples(/*samples_value=*/1234.5f, /*clipped_ratio=*/0.0f, + buffer); + + // Trigger an upward adaptation. + const int initial_volume2 = manager.recommended_analog_level(); + for (int i = 0; i < kConfig.clipped_wait_frames; ++i) { + manager.set_stream_analog_level(applied_initial_volume); + manager.AnalyzePreProcess(buffer); + manager.Process(buffer, kHighSpeechProbability, + /*speech_level_dbfs=*/-65.0f); + applied_initial_volume = manager.recommended_analog_level(); + } + EXPECT_GT(manager.recommended_analog_level(), initial_volume2); + + // Trigger a downward adaptation. + const int initial_volume = manager.recommended_analog_level(); + for (int i = 0; i < 100; ++i) { + manager.set_stream_analog_level(applied_initial_volume); + manager.AnalyzePreProcess(buffer); + manager.Process(buffer, kHighSpeechProbability, + /*speech_level_dbfs=*/-5.0f); + applied_initial_volume = manager.recommended_analog_level(); + } + EXPECT_LT(manager.recommended_analog_level(), initial_volume); +} + +INSTANTIATE_TEST_SUITE_P( + , + AgcManagerDirectChannelSampleRateTest, + ::testing::Combine(::testing::Values(1, 2, 3, 6), + ::testing::Values(8000, 16000, 32000, 48000))); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/gain_control.h b/third_party/libwebrtc/modules/audio_processing/agc/gain_control.h new file mode 100644 index 0000000000..389b2114af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/gain_control.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_ +#define MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_ + +namespace webrtc { + +// The automatic gain control (AGC) component brings the signal to an +// appropriate range. This is done by applying a digital gain directly and, in +// the analog mode, prescribing an analog gain to be applied at the audio HAL. +// +// Recommended to be enabled on the client-side. +class GainControl { + public: + // When an analog mode is set, this must be called prior to `ProcessStream()` + // to pass the current analog level from the audio HAL. Must be within the + // range provided to `set_analog_level_limits()`. + virtual int set_stream_analog_level(int level) = 0; + + // When an analog mode is set, this should be called after `ProcessStream()` + // to obtain the recommended new analog level for the audio HAL. It is the + // users responsibility to apply this level. + virtual int stream_analog_level() const = 0; + + enum Mode { + // Adaptive mode intended for use if an analog volume control is available + // on the capture device. It will require the user to provide coupling + // between the OS mixer controls and AGC through the `stream_analog_level()` + // functions. + // + // It consists of an analog gain prescription for the audio device and a + // digital compression stage. + kAdaptiveAnalog, + + // Adaptive mode intended for situations in which an analog volume control + // is unavailable. It operates in a similar fashion to the adaptive analog + // mode, but with scaling instead applied in the digital domain. As with + // the analog mode, it additionally uses a digital compression stage. + kAdaptiveDigital, + + // Fixed mode which enables only the digital compression stage also used by + // the two adaptive modes. + // + // It is distinguished from the adaptive modes by considering only a + // short time-window of the input signal. It applies a fixed gain through + // most of the input level range, and compresses (gradually reduces gain + // with increasing level) the input signal at higher levels. This mode is + // preferred on embedded devices where the capture signal level is + // predictable, so that a known gain can be applied. + kFixedDigital + }; + + virtual int set_mode(Mode mode) = 0; + virtual Mode mode() const = 0; + + // Sets the target peak `level` (or envelope) of the AGC in dBFs (decibels + // from digital full-scale). The convention is to use positive values. For + // instance, passing in a value of 3 corresponds to -3 dBFs, or a target + // level 3 dB below full-scale. Limited to [0, 31]. + // + // TODO(ajm): use a negative value here instead, if/when VoE will similarly + // update its interface. + virtual int set_target_level_dbfs(int level) = 0; + virtual int target_level_dbfs() const = 0; + + // Sets the maximum `gain` the digital compression stage may apply, in dB. A + // higher number corresponds to greater compression, while a value of 0 will + // leave the signal uncompressed. Limited to [0, 90]. + virtual int set_compression_gain_db(int gain) = 0; + virtual int compression_gain_db() const = 0; + + // When enabled, the compression stage will hard limit the signal to the + // target level. Otherwise, the signal will be compressed but not limited + // above the target level. + virtual int enable_limiter(bool enable) = 0; + virtual bool is_limiter_enabled() const = 0; + + // Sets the `minimum` and `maximum` analog levels of the audio capture device. + // Must be set if and only if an analog mode is used. Limited to [0, 65535]. + virtual int set_analog_level_limits(int minimum, int maximum) = 0; + virtual int analog_level_minimum() const = 0; + virtual int analog_level_maximum() const = 0; + + // Returns true if the AGC has detected a saturation event (period where the + // signal reaches digital full-scale) in the current frame and the analog + // level cannot be reduced. + // + // This could be used as an indicator to reduce or disable analog mic gain at + // the audio HAL. + virtual bool stream_is_saturated() const = 0; + + protected: + virtual ~GainControl() {} +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build new file mode 100644 index 0000000000..c6ab9b3160 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("gain_control_interface_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc new file mode 100644 index 0000000000..e40a3f1629 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc @@ -0,0 +1,1238 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * + * Using a feedback system, determines an appropriate analog volume level + * given an input signal and current volume level. Targets a conservative + * signal level and is intended for use with a digital AGC to apply + * additional gain. + * + */ + +#include "modules/audio_processing/agc/legacy/analog_agc.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Errors +#define AGC_UNSPECIFIED_ERROR 18000 +#define AGC_UNINITIALIZED_ERROR 18002 +#define AGC_NULL_POINTER_ERROR 18003 +#define AGC_BAD_PARAMETER_ERROR 18004 + +/* The slope of in Q13*/ +static const int16_t kSlope1[8] = {21793, 12517, 7189, 4129, + 2372, 1362, 472, 78}; + +/* The offset in Q14 */ +static const int16_t kOffset1[8] = {25395, 23911, 22206, 20737, + 19612, 18805, 17951, 17367}; + +/* The slope of in Q13*/ +static const int16_t kSlope2[8] = {2063, 1731, 1452, 1218, 1021, 857, 597, 337}; + +/* The offset in Q14 */ +static const int16_t kOffset2[8] = {18432, 18379, 18290, 18177, + 18052, 17920, 17670, 17286}; + +static const int16_t kMuteGuardTimeMs = 8000; +static const int16_t kInitCheck = 42; +static const size_t kNumSubframes = 10; + +/* Default settings if config is not used */ +#define AGC_DEFAULT_TARGET_LEVEL 3 +#define AGC_DEFAULT_COMP_GAIN 9 +/* This is the target level for the analog part in ENV scale. To convert to RMS + * scale you + * have to add OFFSET_ENV_TO_RMS. + */ +#define ANALOG_TARGET_LEVEL 11 +#define ANALOG_TARGET_LEVEL_2 5 // ANALOG_TARGET_LEVEL / 2 +/* Offset between RMS scale (analog part) and ENV scale (digital part). This + * value actually + * varies with the FIXED_ANALOG_TARGET_LEVEL, hence we should in the future + * replace it with + * a table. + */ +#define OFFSET_ENV_TO_RMS 9 +/* The reference input level at which the digital part gives an output of + * targetLevelDbfs + * (desired level) if we have no compression gain. This level should be set high + * enough not + * to compress the peaks due to the dynamics. + */ +#define DIGITAL_REF_AT_0_COMP_GAIN 4 +/* Speed of reference level decrease. + */ +#define DIFF_REF_TO_ANALOG 5 + +/* Size of analog gain table */ +#define GAIN_TBL_LEN 32 +/* Matlab code: + * fprintf(1, '\t%i, %i, %i, %i,\n', round(10.^(linspace(0,10,32)/20) * 2^12)); + */ +/* Q12 */ +static const uint16_t kGainTableAnalog[GAIN_TBL_LEN] = { + 4096, 4251, 4412, 4579, 4752, 4932, 5118, 5312, 5513, 5722, 5938, + 6163, 6396, 6638, 6889, 7150, 7420, 7701, 7992, 8295, 8609, 8934, + 9273, 9623, 9987, 10365, 10758, 11165, 11587, 12025, 12480, 12953}; + +/* Gain/Suppression tables for virtual Mic (in Q10) */ +static const uint16_t kGainTableVirtualMic[128] = { + 1052, 1081, 1110, 1141, 1172, 1204, 1237, 1271, 1305, 1341, 1378, + 1416, 1454, 1494, 1535, 1577, 1620, 1664, 1710, 1757, 1805, 1854, + 1905, 1957, 2010, 2065, 2122, 2180, 2239, 2301, 2364, 2428, 2495, + 2563, 2633, 2705, 2779, 2855, 2933, 3013, 3096, 3180, 3267, 3357, + 3449, 3543, 3640, 3739, 3842, 3947, 4055, 4166, 4280, 4397, 4517, + 4640, 4767, 4898, 5032, 5169, 5311, 5456, 5605, 5758, 5916, 6078, + 6244, 6415, 6590, 6770, 6956, 7146, 7341, 7542, 7748, 7960, 8178, + 8402, 8631, 8867, 9110, 9359, 9615, 9878, 10148, 10426, 10711, 11004, + 11305, 11614, 11932, 12258, 12593, 12938, 13292, 13655, 14029, 14412, 14807, + 15212, 15628, 16055, 16494, 16945, 17409, 17885, 18374, 18877, 19393, 19923, + 20468, 21028, 21603, 22194, 22801, 23425, 24065, 24724, 25400, 26095, 26808, + 27541, 28295, 29069, 29864, 30681, 31520, 32382}; +static const uint16_t kSuppressionTableVirtualMic[128] = { + 1024, 1006, 988, 970, 952, 935, 918, 902, 886, 870, 854, 839, 824, 809, 794, + 780, 766, 752, 739, 726, 713, 700, 687, 675, 663, 651, 639, 628, 616, 605, + 594, 584, 573, 563, 553, 543, 533, 524, 514, 505, 496, 487, 478, 470, 461, + 453, 445, 437, 429, 421, 414, 406, 399, 392, 385, 378, 371, 364, 358, 351, + 345, 339, 333, 327, 321, 315, 309, 304, 298, 293, 288, 283, 278, 273, 268, + 263, 258, 254, 249, 244, 240, 236, 232, 227, 223, 219, 215, 211, 208, 204, + 200, 197, 193, 190, 186, 183, 180, 176, 173, 170, 167, 164, 161, 158, 155, + 153, 150, 147, 145, 142, 139, 137, 134, 132, 130, 127, 125, 123, 121, 118, + 116, 114, 112, 110, 108, 106, 104, 102}; + +/* Table for target energy levels. Values in Q(-7) + * Matlab code + * targetLevelTable = fprintf('%d,\t%d,\t%d,\t%d,\n', + * round((32767*10.^(-(0:63)'/20)).^2*16/2^7) */ + +static const int32_t kTargetLevelTable[64] = { + 134209536, 106606424, 84680493, 67264106, 53429779, 42440782, 33711911, + 26778323, 21270778, 16895980, 13420954, 10660642, 8468049, 6726411, + 5342978, 4244078, 3371191, 2677832, 2127078, 1689598, 1342095, + 1066064, 846805, 672641, 534298, 424408, 337119, 267783, + 212708, 168960, 134210, 106606, 84680, 67264, 53430, + 42441, 33712, 26778, 21271, 16896, 13421, 10661, + 8468, 6726, 5343, 4244, 3371, 2678, 2127, + 1690, 1342, 1066, 847, 673, 534, 424, + 337, 268, 213, 169, 134, 107, 85, + 67}; + +} // namespace + +int WebRtcAgc_AddMic(void* state, + int16_t* const* in_mic, + size_t num_bands, + size_t samples) { + int32_t nrg, max_nrg, sample, tmp32; + int32_t* ptr; + uint16_t targetGainIdx, gain; + size_t i; + int16_t n, L, tmp16, tmp_speech[16]; + LegacyAgc* stt; + stt = reinterpret_cast(state); + + if (stt->fs == 8000) { + L = 8; + if (samples != 80) { + return -1; + } + } else { + L = 16; + if (samples != 160) { + return -1; + } + } + + /* apply slowly varying digital gain */ + if (stt->micVol > stt->maxAnalog) { + /* `maxLevel` is strictly >= `micVol`, so this condition should be + * satisfied here, ensuring there is no divide-by-zero. */ + RTC_DCHECK_GT(stt->maxLevel, stt->maxAnalog); + + /* Q1 */ + tmp16 = (int16_t)(stt->micVol - stt->maxAnalog); + tmp32 = (GAIN_TBL_LEN - 1) * tmp16; + tmp16 = (int16_t)(stt->maxLevel - stt->maxAnalog); + targetGainIdx = tmp32 / tmp16; + RTC_DCHECK_LT(targetGainIdx, GAIN_TBL_LEN); + + /* Increment through the table towards the target gain. + * If micVol drops below maxAnalog, we allow the gain + * to be dropped immediately. */ + if (stt->gainTableIdx < targetGainIdx) { + stt->gainTableIdx++; + } else if (stt->gainTableIdx > targetGainIdx) { + stt->gainTableIdx--; + } + + /* Q12 */ + gain = kGainTableAnalog[stt->gainTableIdx]; + + for (i = 0; i < samples; i++) { + size_t j; + for (j = 0; j < num_bands; ++j) { + sample = (in_mic[j][i] * gain) >> 12; + if (sample > 32767) { + in_mic[j][i] = 32767; + } else if (sample < -32768) { + in_mic[j][i] = -32768; + } else { + in_mic[j][i] = (int16_t)sample; + } + } + } + } else { + stt->gainTableIdx = 0; + } + + /* compute envelope */ + if (stt->inQueue > 0) { + ptr = stt->env[1]; + } else { + ptr = stt->env[0]; + } + + for (i = 0; i < kNumSubframes; i++) { + /* iterate over samples */ + max_nrg = 0; + for (n = 0; n < L; n++) { + nrg = in_mic[0][i * L + n] * in_mic[0][i * L + n]; + if (nrg > max_nrg) { + max_nrg = nrg; + } + } + ptr[i] = max_nrg; + } + + /* compute energy */ + if (stt->inQueue > 0) { + ptr = stt->Rxx16w32_array[1]; + } else { + ptr = stt->Rxx16w32_array[0]; + } + + for (i = 0; i < kNumSubframes / 2; i++) { + if (stt->fs == 16000) { + WebRtcSpl_DownsampleBy2(&in_mic[0][i * 32], 32, tmp_speech, + stt->filterState); + } else { + memcpy(tmp_speech, &in_mic[0][i * 16], 16 * sizeof(int16_t)); + } + /* Compute energy in blocks of 16 samples */ + ptr[i] = WebRtcSpl_DotProductWithScale(tmp_speech, tmp_speech, 16, 4); + } + + /* update queue information */ + if (stt->inQueue == 0) { + stt->inQueue = 1; + } else { + stt->inQueue = 2; + } + + /* call VAD (use low band only) */ + WebRtcAgc_ProcessVad(&stt->vadMic, in_mic[0], samples); + + return 0; +} + +int WebRtcAgc_AddFarend(void* state, const int16_t* in_far, size_t samples) { + LegacyAgc* stt = reinterpret_cast(state); + + int err = WebRtcAgc_GetAddFarendError(state, samples); + + if (err != 0) + return err; + + return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples); +} + +int WebRtcAgc_GetAddFarendError(void* state, size_t samples) { + LegacyAgc* stt; + stt = reinterpret_cast(state); + + if (stt == NULL) + return -1; + + if (stt->fs == 8000) { + if (samples != 80) + return -1; + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { + if (samples != 160) + return -1; + } else { + return -1; + } + + return 0; +} + +int WebRtcAgc_VirtualMic(void* agcInst, + int16_t* const* in_near, + size_t num_bands, + size_t samples, + int32_t micLevelIn, + int32_t* micLevelOut) { + int32_t tmpFlt, micLevelTmp, gainIdx; + uint16_t gain; + size_t ii, j; + LegacyAgc* stt; + + uint32_t nrg; + size_t sampleCntr; + uint32_t frameNrg = 0; + uint32_t frameNrgLimit = 5500; + int16_t numZeroCrossing = 0; + const int16_t kZeroCrossingLowLim = 15; + const int16_t kZeroCrossingHighLim = 20; + + stt = reinterpret_cast(agcInst); + + /* + * Before applying gain decide if this is a low-level signal. + * The idea is that digital AGC will not adapt to low-level + * signals. + */ + if (stt->fs != 8000) { + frameNrgLimit = frameNrgLimit << 1; + } + + frameNrg = (uint32_t)(in_near[0][0] * in_near[0][0]); + for (sampleCntr = 1; sampleCntr < samples; sampleCntr++) { + // increment frame energy if it is less than the limit + // the correct value of the energy is not important + if (frameNrg < frameNrgLimit) { + nrg = (uint32_t)(in_near[0][sampleCntr] * in_near[0][sampleCntr]); + frameNrg += nrg; + } + + // Count the zero crossings + numZeroCrossing += + ((in_near[0][sampleCntr] ^ in_near[0][sampleCntr - 1]) < 0); + } + + if ((frameNrg < 500) || (numZeroCrossing <= 5)) { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing <= kZeroCrossingLowLim) { + stt->lowLevelSignal = 0; + } else if (frameNrg <= frameNrgLimit) { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing >= kZeroCrossingHighLim) { + stt->lowLevelSignal = 1; + } else { + stt->lowLevelSignal = 0; + } + + micLevelTmp = micLevelIn << stt->scale; + /* Set desired level */ + gainIdx = stt->micVol; + if (stt->micVol > stt->maxAnalog) { + gainIdx = stt->maxAnalog; + } + if (micLevelTmp != stt->micRef) { + /* Something has happened with the physical level, restart. */ + stt->micRef = micLevelTmp; + stt->micVol = 127; + *micLevelOut = 127; + stt->micGainIdx = 127; + gainIdx = 127; + } + /* Pre-process the signal to emulate the microphone level. */ + /* Take one step at a time in the gain table. */ + if (gainIdx > 127) { + gain = kGainTableVirtualMic[gainIdx - 128]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + for (ii = 0; ii < samples; ii++) { + tmpFlt = (in_near[0][ii] * gain) >> 10; + if (tmpFlt > 32767) { + tmpFlt = 32767; + gainIdx--; + if (gainIdx >= 127) { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + if (tmpFlt < -32768) { + tmpFlt = -32768; + gainIdx--; + if (gainIdx >= 127) { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + in_near[0][ii] = (int16_t)tmpFlt; + for (j = 1; j < num_bands; ++j) { + tmpFlt = (in_near[j][ii] * gain) >> 10; + if (tmpFlt > 32767) { + tmpFlt = 32767; + } + if (tmpFlt < -32768) { + tmpFlt = -32768; + } + in_near[j][ii] = (int16_t)tmpFlt; + } + } + /* Set the level we (finally) used */ + stt->micGainIdx = gainIdx; + // *micLevelOut = stt->micGainIdx; + *micLevelOut = stt->micGainIdx >> stt->scale; + /* Add to Mic as if it was the output from a true microphone */ + if (WebRtcAgc_AddMic(agcInst, in_near, num_bands, samples) != 0) { + return -1; + } + return 0; +} + +void WebRtcAgc_UpdateAgcThresholds(LegacyAgc* stt) { + int16_t tmp16; + + /* Set analog target level in envelope dBOv scale */ + tmp16 = (DIFF_REF_TO_ANALOG * stt->compressionGaindB) + ANALOG_TARGET_LEVEL_2; + tmp16 = WebRtcSpl_DivW32W16ResW16((int32_t)tmp16, ANALOG_TARGET_LEVEL); + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN + tmp16; + if (stt->analogTarget < DIGITAL_REF_AT_0_COMP_GAIN) { + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN; + } + if (stt->agcMode == kAgcModeFixedDigital) { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->analogTarget = stt->compressionGaindB; + } + /* Since the offset between RMS and ENV is not constant, we should make this + * into a + * table, but for now, we'll stick with a constant, tuned for the chosen + * analog + * target level. + */ + stt->targetIdx = ANALOG_TARGET_LEVEL + OFFSET_ENV_TO_RMS; + /* Analog adaptation limits */ + /* analogTargetLevel = round((32767*10^(-targetIdx/20))^2*16/2^7) */ + stt->analogTargetLevel = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx]; /* ex. -20 dBov */ + stt->startUpperLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx - 1]; /* -19 dBov */ + stt->startLowerLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx + 1]; /* -21 dBov */ + stt->upperPrimaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx - 2]; /* -18 dBov */ + stt->lowerPrimaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx + 2]; /* -22 dBov */ + stt->upperSecondaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx - 5]; /* -15 dBov */ + stt->lowerSecondaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx + 5]; /* -25 dBov */ + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; +} + +void WebRtcAgc_SaturationCtrl(LegacyAgc* stt, + uint8_t* saturated, + int32_t* env) { + int16_t i, tmpW16; + + /* Check if the signal is saturated */ + for (i = 0; i < 10; i++) { + tmpW16 = (int16_t)(env[i] >> 20); + if (tmpW16 > 875) { + stt->envSum += tmpW16; + } + } + + if (stt->envSum > 25000) { + *saturated = 1; + stt->envSum = 0; + } + + /* stt->envSum *= 0.99; */ + stt->envSum = (int16_t)((stt->envSum * 32440) >> 15); +} + +void WebRtcAgc_ZeroCtrl(LegacyAgc* stt, int32_t* inMicLevel, int32_t* env) { + int16_t i; + int64_t tmp = 0; + int32_t midVal; + + /* Is the input signal zero? */ + for (i = 0; i < 10; i++) { + tmp += env[i]; + } + + /* Each block is allowed to have a few non-zero + * samples. + */ + if (tmp < 500) { + stt->msZero += 10; + } else { + stt->msZero = 0; + } + + if (stt->muteGuardMs > 0) { + stt->muteGuardMs -= 10; + } + + if (stt->msZero > 500) { + stt->msZero = 0; + + /* Increase microphone level only if it's less than 50% */ + midVal = (stt->maxAnalog + stt->minLevel + 1) / 2; + if (*inMicLevel < midVal) { + /* *inMicLevel *= 1.1; */ + *inMicLevel = (1126 * *inMicLevel) >> 10; + /* Reduces risk of a muted mic repeatedly triggering excessive levels due + * to zero signal detection. */ + *inMicLevel = WEBRTC_SPL_MIN(*inMicLevel, stt->zeroCtrlMax); + stt->micVol = *inMicLevel; + } + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* The AGC has a tendency (due to problems with the VAD parameters), to + * vastly increase the volume after a muting event. This timer prevents + * upwards adaptation for a short period. */ + stt->muteGuardMs = kMuteGuardTimeMs; + } +} + +void WebRtcAgc_SpeakerInactiveCtrl(LegacyAgc* stt) { + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + + int32_t tmp32; + int16_t vadThresh; + + if (stt->vadMic.stdLongTerm < 2500) { + stt->vadThreshold = 1500; + } else { + vadThresh = kNormalVadThreshold; + if (stt->vadMic.stdLongTerm < 4500) { + /* Scale between min and max threshold */ + vadThresh += (4500 - stt->vadMic.stdLongTerm) / 2; + } + + /* stt->vadThreshold = (31 * stt->vadThreshold + vadThresh) / 32; */ + tmp32 = vadThresh + 31 * stt->vadThreshold; + stt->vadThreshold = (int16_t)(tmp32 >> 5); + } +} + +void WebRtcAgc_ExpCurve(int16_t volume, int16_t* index) { + // volume in Q14 + // index in [0-7] + /* 8 different curves */ + if (volume > 5243) { + if (volume > 7864) { + if (volume > 12124) { + *index = 7; + } else { + *index = 6; + } + } else { + if (volume > 6554) { + *index = 5; + } else { + *index = 4; + } + } + } else { + if (volume > 2621) { + if (volume > 3932) { + *index = 3; + } else { + *index = 2; + } + } else { + if (volume > 1311) { + *index = 1; + } else { + *index = 0; + } + } + } +} + +int32_t WebRtcAgc_ProcessAnalog(void* state, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t vadLogRatio, + int16_t echo, + uint8_t* saturationWarning) { + uint32_t tmpU32; + int32_t Rxx16w32, tmp32; + int32_t inMicLevelTmp, lastMicVol; + int16_t i; + uint8_t saturated = 0; + LegacyAgc* stt; + + stt = reinterpret_cast(state); + inMicLevelTmp = inMicLevel << stt->scale; + + if (inMicLevelTmp > stt->maxAnalog) { + return -1; + } else if (inMicLevelTmp < stt->minLevel) { + return -1; + } + + if (stt->firstCall == 0) { + int32_t tmpVol; + stt->firstCall = 1; + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; + tmpVol = (stt->minLevel + tmp32); + + /* If the mic level is very low at start, increase it! */ + if ((inMicLevelTmp < tmpVol) && (stt->agcMode == kAgcModeAdaptiveAnalog)) { + inMicLevelTmp = tmpVol; + } + stt->micVol = inMicLevelTmp; + } + + /* Set the mic level to the previous output value if there is digital input + * gain */ + if ((inMicLevelTmp == stt->maxAnalog) && (stt->micVol > stt->maxAnalog)) { + inMicLevelTmp = stt->micVol; + } + + /* If the mic level was manually changed to a very low value raise it! */ + if ((inMicLevelTmp != stt->micVol) && (inMicLevelTmp < stt->minOutput)) { + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; + inMicLevelTmp = (stt->minLevel + tmp32); + stt->micVol = inMicLevelTmp; + } + + if (inMicLevelTmp != stt->micVol) { + if (inMicLevel == stt->lastInMicLevel) { + // We requested a volume adjustment, but it didn't occur. This is + // probably due to a coarse quantization of the volume slider. + // Restore the requested value to prevent getting stuck. + inMicLevelTmp = stt->micVol; + } else { + // As long as the value changed, update to match. + stt->micVol = inMicLevelTmp; + } + } + + if (inMicLevelTmp > stt->maxLevel) { + // Always allow the user to raise the volume above the maxLevel. + stt->maxLevel = inMicLevelTmp; + } + + // Store last value here, after we've taken care of manual updates etc. + stt->lastInMicLevel = inMicLevel; + lastMicVol = stt->micVol; + + /* Checks if the signal is saturated. Also a check if individual samples + * are larger than 12000 is done. If they are the counter for increasing + * the volume level is set to -100ms + */ + WebRtcAgc_SaturationCtrl(stt, &saturated, stt->env[0]); + + /* The AGC is always allowed to lower the level if the signal is saturated */ + if (saturated == 1) { + /* Lower the recording level + * Rxx160_LP is adjusted down because it is so slow it could + * cause the AGC to make wrong decisions. */ + /* stt->Rxx160_LPw32 *= 0.875; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 8) * 7; + + stt->zeroCtrlMax = stt->micVol; + + /* stt->micVol *= 0.903; */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(29591, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 2) { + stt->micVol = lastMicVol - 2; + } + inMicLevelTmp = stt->micVol; + + if (stt->micVol < stt->minOutput) { + *saturationWarning = 1; + } + + /* Reset counter for decrease of volume level to avoid + * decreasing too much. The saturation control can still + * lower the level if needed. */ + stt->msTooHigh = -100; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. This must be done since + * the measure is very slow. */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* Reset to initial values */ + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + stt->changeToSlowMode = 0; + + stt->muteGuardMs = 0; + + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; + } + + /* Check if the input speech is zero. If so the mic volume + * is increased. On some computers the input is zero up as high + * level as 17% */ + WebRtcAgc_ZeroCtrl(stt, &inMicLevelTmp, stt->env[0]); + + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + WebRtcAgc_SpeakerInactiveCtrl(stt); + + for (i = 0; i < 5; i++) { + /* Computed on blocks of 16 samples */ + + Rxx16w32 = stt->Rxx16w32_array[0][i]; + + /* Rxx160w32 in Q(-7) */ + tmp32 = (Rxx16w32 - stt->Rxx16_vectorw32[stt->Rxx16pos]) >> 3; + stt->Rxx160w32 = stt->Rxx160w32 + tmp32; + stt->Rxx16_vectorw32[stt->Rxx16pos] = Rxx16w32; + + /* Circular buffer */ + stt->Rxx16pos++; + if (stt->Rxx16pos == kRxxBufferLen) { + stt->Rxx16pos = 0; + } + + /* Rxx16_LPw32 in Q(-4) */ + tmp32 = (Rxx16w32 - stt->Rxx16_LPw32) >> kAlphaShortTerm; + stt->Rxx16_LPw32 = (stt->Rxx16_LPw32) + tmp32; + + if (vadLogRatio > stt->vadThreshold) { + /* Speech detected! */ + + /* Check if Rxx160_LP is in the correct range. If + * it is too high/low then we set it to the maximum of + * Rxx16_LPw32 during the first 200ms of speech. + */ + if (stt->activeSpeech < 250) { + stt->activeSpeech += 2; + + if (stt->Rxx16_LPw32 > stt->Rxx16_LPw32Max) { + stt->Rxx16_LPw32Max = stt->Rxx16_LPw32; + } + } else if (stt->activeSpeech == 250) { + stt->activeSpeech += 2; + tmp32 = stt->Rxx16_LPw32Max >> 3; + stt->Rxx160_LPw32 = tmp32 * kRxxBufferLen; + } + + tmp32 = (stt->Rxx160w32 - stt->Rxx160_LPw32) >> kAlphaLongTerm; + stt->Rxx160_LPw32 = stt->Rxx160_LPw32 + tmp32; + + if (stt->Rxx160_LPw32 > stt->upperSecondaryLimit) { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechOuterChange) { + stt->msTooHigh = 0; + + /* Lower the recording level */ + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + tmp32 = stt->Rxx160_LPw32 >> 6; + stt->Rxx160_LPw32 = tmp32 * 53; + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + */ + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.95 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(31130, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. + */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + } + } else if (stt->Rxx160_LPw32 > stt->upperLimit) { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechInnerChange) { + /* Lower the recording level */ + stt->msTooHigh = 0; + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 53; + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + */ + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.965 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + WEBRTC_SPL_UMUL(31621, (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + } + } else if (stt->Rxx160_LPw32 < stt->lowerSecondaryLimit) { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechOuterChange) { + /* Raise the recording level */ + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; + if (stt->maxInit != stt->minLevel) { + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, 32^(-2*X)/2+1.05 + */ + weightFIX = + kOffset1[index] - (int16_t)((kSlope1[index] * volNormFIX) >> 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 2) { + stt->micVol = lastMicVol + 2; + } + + inMicLevelTmp = stt->micVol; + } + } else if (stt->Rxx160_LPw32 < stt->lowerLimit) { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechInnerChange) { + /* Raise the recording level */ + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; + if (stt->maxInit != stt->minLevel) { + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, (3.^(-2.*X))/8+1 + */ + weightFIX = + kOffset2[index] - (int16_t)((kSlope2[index] * volNormFIX) >> 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 1) { + stt->micVol = lastMicVol + 1; + } + + inMicLevelTmp = stt->micVol; + } + } else { + /* The signal is inside the desired range which is: + * lowerLimit < Rxx160_LP/640 < upperLimit + */ + if (stt->changeToSlowMode > 4000) { + stt->msecSpeechInnerChange = 1000; + stt->msecSpeechOuterChange = 500; + stt->upperLimit = stt->upperPrimaryLimit; + stt->lowerLimit = stt->lowerPrimaryLimit; + } else { + stt->changeToSlowMode += 2; // in milliseconds + } + stt->msTooLow = 0; + stt->msTooHigh = 0; + + stt->micVol = inMicLevelTmp; + } + } + } + + /* Ensure gain is not increased in presence of echo or after a mute event + * (but allow the zeroCtrl() increase on the frame of a mute detection). + */ + if (echo == 1 || + (stt->muteGuardMs > 0 && stt->muteGuardMs < kMuteGuardTimeMs)) { + if (stt->micVol > lastMicVol) { + stt->micVol = lastMicVol; + } + } + + /* limit the gain */ + if (stt->micVol > stt->maxLevel) { + stt->micVol = stt->maxLevel; + } else if (stt->micVol < stt->minOutput) { + stt->micVol = stt->minOutput; + } + + *outMicLevel = WEBRTC_SPL_MIN(stt->micVol, stt->maxAnalog) >> stt->scale; + + return 0; +} + +int WebRtcAgc_Analyze(void* agcInst, + const int16_t* const* in_near, + size_t num_bands, + size_t samples, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t echo, + uint8_t* saturationWarning, + int32_t gains[11]) { + LegacyAgc* stt = reinterpret_cast(agcInst); + + if (stt == NULL) { + return -1; + } + + if (stt->fs == 8000) { + if (samples != 80) { + return -1; + } + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { + if (samples != 160) { + return -1; + } + } else { + return -1; + } + + *saturationWarning = 0; + // TODO(minyue): PUT IN RANGE CHECKING FOR INPUT LEVELS + *outMicLevel = inMicLevel; + + int32_t error = + WebRtcAgc_ComputeDigitalGains(&stt->digitalAgc, in_near, num_bands, + stt->fs, stt->lowLevelSignal, gains); + if (error == -1) { + return -1; + } + + if (stt->agcMode < kAgcModeFixedDigital && + (stt->lowLevelSignal == 0 || stt->agcMode != kAgcModeAdaptiveDigital)) { + if (WebRtcAgc_ProcessAnalog(agcInst, inMicLevel, outMicLevel, + stt->vadMic.logRatio, echo, + saturationWarning) == -1) { + return -1; + } + } + + /* update queue */ + if (stt->inQueue > 1) { + memcpy(stt->env[0], stt->env[1], 10 * sizeof(int32_t)); + memcpy(stt->Rxx16w32_array[0], stt->Rxx16w32_array[1], 5 * sizeof(int32_t)); + } + + if (stt->inQueue > 0) { + stt->inQueue--; + } + + return 0; +} + +int WebRtcAgc_Process(const void* agcInst, + const int32_t gains[11], + const int16_t* const* in_near, + size_t num_bands, + int16_t* const* out) { + const LegacyAgc* stt = (const LegacyAgc*)agcInst; + return WebRtcAgc_ApplyDigitalGains(gains, num_bands, stt->fs, in_near, out); +} + +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig agcConfig) { + LegacyAgc* stt; + stt = reinterpret_cast(agcInst); + + if (stt == NULL) { + return -1; + } + + if (stt->initFlag != kInitCheck) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + if (agcConfig.limiterEnable != kAgcFalse && + agcConfig.limiterEnable != kAgcTrue) { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->limiterEnable = agcConfig.limiterEnable; + stt->compressionGaindB = agcConfig.compressionGaindB; + if ((agcConfig.targetLevelDbfs < 0) || (agcConfig.targetLevelDbfs > 31)) { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->targetLevelDbfs = agcConfig.targetLevelDbfs; + + if (stt->agcMode == kAgcModeFixedDigital) { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->compressionGaindB += agcConfig.targetLevelDbfs; + } + + /* Update threshold levels for analog adaptation */ + WebRtcAgc_UpdateAgcThresholds(stt); + + /* Recalculate gain table */ + if (WebRtcAgc_CalculateGainTable( + &(stt->digitalAgc.gainTable[0]), stt->compressionGaindB, + stt->targetLevelDbfs, stt->limiterEnable, stt->analogTarget) == -1) { + return -1; + } + /* Store the config in a WebRtcAgcConfig */ + stt->usedConfig.compressionGaindB = agcConfig.compressionGaindB; + stt->usedConfig.limiterEnable = agcConfig.limiterEnable; + stt->usedConfig.targetLevelDbfs = agcConfig.targetLevelDbfs; + + return 0; +} + +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config) { + LegacyAgc* stt; + stt = reinterpret_cast(agcInst); + + if (stt == NULL) { + return -1; + } + + if (config == NULL) { + stt->lastError = AGC_NULL_POINTER_ERROR; + return -1; + } + + if (stt->initFlag != kInitCheck) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + config->limiterEnable = stt->usedConfig.limiterEnable; + config->targetLevelDbfs = stt->usedConfig.targetLevelDbfs; + config->compressionGaindB = stt->usedConfig.compressionGaindB; + + return 0; +} + +void* WebRtcAgc_Create() { + LegacyAgc* stt = static_cast(malloc(sizeof(LegacyAgc))); + + stt->initFlag = 0; + stt->lastError = 0; + + return stt; +} + +void WebRtcAgc_Free(void* state) { + LegacyAgc* stt; + + stt = reinterpret_cast(state); + free(stt); +} + +/* minLevel - Minimum volume level + * maxLevel - Maximum volume level + */ +int WebRtcAgc_Init(void* agcInst, + int32_t minLevel, + int32_t maxLevel, + int16_t agcMode, + uint32_t fs) { + int32_t max_add, tmp32; + int16_t i; + int tmpNorm; + LegacyAgc* stt; + + /* typecast state pointer */ + stt = reinterpret_cast(agcInst); + + if (WebRtcAgc_InitDigital(&stt->digitalAgc, agcMode) != 0) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + /* Analog AGC variables */ + stt->envSum = 0; + + /* mode = 0 - Only saturation protection + * 1 - Analog Automatic Gain Control [-targetLevelDbfs (default -3 + * dBOv)] + * 2 - Digital Automatic Gain Control [-targetLevelDbfs (default -3 + * dBOv)] + * 3 - Fixed Digital Gain [compressionGaindB (default 8 dB)] + */ + if (agcMode < kAgcModeUnchanged || agcMode > kAgcModeFixedDigital) { + return -1; + } + stt->agcMode = agcMode; + stt->fs = fs; + + /* initialize input VAD */ + WebRtcAgc_InitVad(&stt->vadMic); + + /* If the volume range is smaller than 0-256 then + * the levels are shifted up to Q8-domain */ + tmpNorm = WebRtcSpl_NormU32((uint32_t)maxLevel); + stt->scale = tmpNorm - 23; + if (stt->scale < 0) { + stt->scale = 0; + } + // TODO(bjornv): Investigate if we really need to scale up a small range now + // when we have + // a guard against zero-increments. For now, we do not support scale up (scale + // = 0). + stt->scale = 0; + maxLevel <<= stt->scale; + minLevel <<= stt->scale; + + /* Make minLevel and maxLevel static in AdaptiveDigital */ + if (stt->agcMode == kAgcModeAdaptiveDigital) { + minLevel = 0; + maxLevel = 255; + stt->scale = 0; + } + /* The maximum supplemental volume range is based on a vague idea + * of how much lower the gain will be than the real analog gain. */ + max_add = (maxLevel - minLevel) / 4; + + /* Minimum/maximum volume level that can be set */ + stt->minLevel = minLevel; + stt->maxAnalog = maxLevel; + stt->maxLevel = maxLevel + max_add; + stt->maxInit = stt->maxLevel; + + stt->zeroCtrlMax = stt->maxAnalog; + stt->lastInMicLevel = 0; + + /* Initialize micVol parameter */ + stt->micVol = stt->maxAnalog; + if (stt->agcMode == kAgcModeAdaptiveDigital) { + stt->micVol = 127; /* Mid-point of mic level */ + } + stt->micRef = stt->micVol; + stt->micGainIdx = 127; + + /* Minimum output volume is 4% higher than the available lowest volume level + */ + tmp32 = ((stt->maxLevel - stt->minLevel) * 10) >> 8; + stt->minOutput = (stt->minLevel + tmp32); + + stt->msTooLow = 0; + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->firstCall = 0; + stt->msZero = 0; + stt->muteGuardMs = 0; + stt->gainTableIdx = 0; + + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + stt->vadThreshold = kNormalVadThreshold; + stt->inActive = 0; + + for (i = 0; i < kRxxBufferLen; i++) { + stt->Rxx16_vectorw32[i] = (int32_t)1000; /* -54dBm0 */ + } + stt->Rxx160w32 = 125 * kRxxBufferLen; /* (stt->Rxx16_vectorw32[0]>>3) = 125 */ + + stt->Rxx16pos = 0; + stt->Rxx16_LPw32 = (int32_t)16284; /* Q(-4) */ + + for (i = 0; i < 5; i++) { + stt->Rxx16w32_array[0][i] = 0; + } + for (i = 0; i < 10; i++) { + stt->env[0][i] = 0; + stt->env[1][i] = 0; + } + stt->inQueue = 0; + + WebRtcSpl_MemSetW32(stt->filterState, 0, 8); + + stt->initFlag = kInitCheck; + // Default config settings. + stt->defaultConfig.limiterEnable = kAgcTrue; + stt->defaultConfig.targetLevelDbfs = AGC_DEFAULT_TARGET_LEVEL; + stt->defaultConfig.compressionGaindB = AGC_DEFAULT_COMP_GAIN; + + if (WebRtcAgc_set_config(stt, stt->defaultConfig) == -1) { + stt->lastError = AGC_UNSPECIFIED_ERROR; + return -1; + } + stt->Rxx160_LPw32 = stt->analogTargetLevel; // Initialize rms value + + stt->lowLevelSignal = 0; + + /* Only positive values are allowed that are not too large */ + if ((minLevel >= maxLevel) || (maxLevel & 0xFC000000)) { + return -1; + } else { + return 0; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h new file mode 100644 index 0000000000..22cd924a93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ + + +#include "modules/audio_processing/agc/legacy/digital_agc.h" +#include "modules/audio_processing/agc/legacy/gain_control.h" + +namespace webrtc { + +/* Analog Automatic Gain Control variables: + * Constant declarations (inner limits inside which no changes are done) + * In the beginning the range is narrower to widen as soon as the measure + * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0 + * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal + * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm + * The limits are created by running the AGC with a file having the desired + * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined + * by out=10*log10(in/260537279.7); Set the target level to the average level + * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in + * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) ) + */ +constexpr int16_t kRxxBufferLen = 10; + +static const int16_t kMsecSpeechInner = 520; +static const int16_t kMsecSpeechOuter = 340; + +static const int16_t kNormalVadThreshold = 400; + +static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156 +static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977 + +typedef struct { + // Configurable parameters/variables + uint32_t fs; // Sampling frequency + int16_t compressionGaindB; // Fixed gain level in dB + int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3) + int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig) + uint8_t limiterEnable; // Enabling limiter (on/off (default off)) + WebRtcAgcConfig defaultConfig; + WebRtcAgcConfig usedConfig; + + // General variables + int16_t initFlag; + int16_t lastError; + + // Target level parameters + // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7) + int32_t analogTargetLevel; // = kRxxBufferLen * 846805; -22 dBfs + int32_t startUpperLimit; // = kRxxBufferLen * 1066064; -21 dBfs + int32_t startLowerLimit; // = kRxxBufferLen * 672641; -23 dBfs + int32_t upperPrimaryLimit; // = kRxxBufferLen * 1342095; -20 dBfs + int32_t lowerPrimaryLimit; // = kRxxBufferLen * 534298; -24 dBfs + int32_t upperSecondaryLimit; // = kRxxBufferLen * 2677832; -17 dBfs + int32_t lowerSecondaryLimit; // = kRxxBufferLen * 267783; -27 dBfs + uint16_t targetIdx; // Table index for corresponding target level + int16_t analogTarget; // Digital reference level in ENV scale + + // Analog AGC specific variables + int32_t filterState[8]; // For downsampling wb to nb + int32_t upperLimit; // Upper limit for mic energy + int32_t lowerLimit; // Lower limit for mic energy + int32_t Rxx160w32; // Average energy for one frame + int32_t Rxx16_LPw32; // Low pass filtered subframe energies + int32_t Rxx160_LPw32; // Low pass filtered frame energies + int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe + int32_t Rxx16_vectorw32[kRxxBufferLen]; // Array with subframe energies + int32_t Rxx16w32_array[2][5]; // Energy values of microphone signal + int32_t env[2][10]; // Envelope values of subframes + + int16_t Rxx16pos; // Current position in the Rxx16_vectorw32 + int16_t envSum; // Filtered scaled envelope in subframes + int16_t vadThreshold; // Threshold for VAD decision + int16_t inActive; // Inactive time in milliseconds + int16_t msTooLow; // Milliseconds of speech at a too low level + int16_t msTooHigh; // Milliseconds of speech at a too high level + int16_t changeToSlowMode; // Change to slow mode after some time at target + int16_t firstCall; // First call to the process-function + int16_t msZero; // Milliseconds of zero input + int16_t msecSpeechOuterChange; // Min ms of speech between volume changes + int16_t msecSpeechInnerChange; // Min ms of speech between volume changes + int16_t activeSpeech; // Milliseconds of active speech + int16_t muteGuardMs; // Counter to prevent mute action + int16_t inQueue; // 10 ms batch indicator + + // Microphone level variables + int32_t micRef; // Remember ref. mic level for virtual mic + uint16_t gainTableIdx; // Current position in virtual gain table + int32_t micGainIdx; // Gain index of mic level to increase slowly + int32_t micVol; // Remember volume between frames + int32_t maxLevel; // Max possible vol level, incl dig gain + int32_t maxAnalog; // Maximum possible analog volume level + int32_t maxInit; // Initial value of "max" + int32_t minLevel; // Minimum possible volume level + int32_t minOutput; // Minimum output volume level + int32_t zeroCtrlMax; // Remember max gain => don't amp low input + int32_t lastInMicLevel; + + int16_t scale; // Scale factor for internal volume levels + // Structs for VAD and digital_agc + AgcVad vadMic; + DigitalAgc digitalAgc; + + int16_t lowLevelSignal; +} LegacyAgc; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc new file mode 100644 index 0000000000..4cd86acba8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc @@ -0,0 +1,704 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/legacy/digital_agc.h" + +#include + +#include "modules/audio_processing/agc/legacy/gain_control.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// To generate the gaintable, copy&paste the following lines to a Matlab window: +// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1; +// zeros = 0:31; lvl = 2.^(1-zeros); +// A = -10*log10(lvl) * (CompRatio - 1) / CompRatio; +// B = MaxGain - MinGain; +// gains = round(2^16*10.^(0.05 * (MinGain + B * ( +// log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / +// log(1/(1+exp(Knee*B)))))); +// fprintf(1, '\t%i, %i, %i, %i,\n', gains); +// % Matlab code for plotting the gain and input/output level characteristic +// (copy/paste the following 3 lines): +// in = 10*log10(lvl); out = 20*log10(gains/65536); +// subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input +// (dB)'); ylabel('Gain (dB)'); +// subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on; +// xlabel('Input (dB)'); ylabel('Output (dB)'); +// zoom on; + +// Generator table for y=log2(1+e^x) in Q8. +enum { kGenFuncTableSize = 128 }; +static const uint16_t kGenFuncTable[kGenFuncTableSize] = { + 256, 485, 786, 1126, 1484, 1849, 2217, 2586, 2955, 3324, 3693, + 4063, 4432, 4801, 5171, 5540, 5909, 6279, 6648, 7017, 7387, 7756, + 8125, 8495, 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, 11819, + 12188, 12557, 12927, 13296, 13665, 14035, 14404, 14773, 15143, 15512, 15881, + 16251, 16620, 16989, 17359, 17728, 18097, 18466, 18836, 19205, 19574, 19944, + 20313, 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, 23637, 24006, + 24376, 24745, 25114, 25484, 25853, 26222, 26592, 26961, 27330, 27700, 28069, + 28438, 28808, 29177, 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132, + 32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, 35456, 35825, 36194, + 36564, 36933, 37302, 37672, 38041, 38410, 38780, 39149, 39518, 39888, 40257, + 40626, 40996, 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, 44320, + 44689, 45058, 45428, 45797, 46166, 46536, 46905}; + +static const int16_t kAvgDecayTime = 250; // frames; < 3000 + +// the 32 most significant bits of A(19) * B(26) >> 13 +#define AGC_MUL32(A, B) (((B) >> 13) * (A) + (((0x00001FFF & (B)) * (A)) >> 13)) +// C + the 32 most significant bits of A * B +#define AGC_SCALEDIFF32(A, B, C) \ + ((C) + ((B) >> 16) * (A) + (((0x0000FFFF & (B)) * (A)) >> 16)) + +} // namespace + +int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16 + int16_t digCompGaindB, // Q0 + int16_t targetLevelDbfs, // Q0 + uint8_t limiterEnable, + int16_t analogTarget) { // Q0 + // This function generates the compressor gain table used in the fixed digital + // part. + uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox; + int32_t inLevel, limiterLvl; + int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32; + const uint16_t kLog10 = 54426; // log2(10) in Q14 + const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14 + const uint16_t kLogE_1 = 23637; // log2(e) in Q14 + uint16_t constMaxGain; + uint16_t tmpU16, intPart, fracPart; + const int16_t kCompRatio = 3; + int16_t limiterOffset = 0; // Limiter offset + int16_t limiterIdx, limiterLvlX; + int16_t constLinApprox, maxGain, diffGain; + int16_t i, tmp16, tmp16no1; + int zeros, zerosScale; + + // Constants + // kLogE_1 = 23637; // log2(e) in Q14 + // kLog10 = 54426; // log2(10) in Q14 + // kLog10_2 = 49321; // 10*log10(2) in Q14 + + // Calculate maximum digital gain and zero gain level + tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1); + tmp16no1 = analogTarget - targetLevelDbfs; + tmp16no1 += + WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs)); + tmp32no1 = maxGain * kCompRatio; + if ((digCompGaindB <= analogTarget) && (limiterEnable)) { + limiterOffset = 0; + } + + // Calculate the difference between maximum gain and gain at 0dB0v + tmp32no1 = digCompGaindB * (kCompRatio - 1); + diffGain = + WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + if (diffGain < 0 || diffGain >= kGenFuncTableSize) { + RTC_DCHECK(0); + return -1; + } + + // Calculate the limiter level and index: + // limiterLvlX = analogTarget - limiterOffset + // limiterLvl = targetLevelDbfs + limiterOffset/compRatio + limiterLvlX = analogTarget - limiterOffset; + limiterIdx = 2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX * (1 << 13), + kLog10_2 / 2); + tmp16no1 = + WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio); + limiterLvl = targetLevelDbfs + tmp16no1; + + // Calculate (through table lookup): + // constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8) + constMaxGain = kGenFuncTable[diffGain]; // in Q8 + + // Calculate a parameter used to approximate the fractional part of 2^x with a + // piecewise linear function in Q14: + // constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14); + constLinApprox = 22817; // in Q14 + + // Calculate a denominator used in the exponential part to convert from dB to + // linear scale: + // den = 20*constMaxGain (in Q8) + den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8 + + for (i = 0; i < 32; i++) { + // Calculate scaled input level (compressor): + // inLevel = + // fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio) + tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0 + tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14 + inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14 + + // Calculate diffGain-inLevel, to map using the genFuncTable + inLevel = (int32_t)diffGain * (1 << 14) - inLevel; // Q14 + + // Make calculations on abs(inLevel) and compensate for the sign afterwards. + absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14 + + // LUT with interpolation + intPart = (uint16_t)(absInLevel >> 14); + fracPart = + (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part + tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8 + tmpU32no1 = tmpU16 * fracPart; // Q22 + tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22 + logApprox = tmpU32no1 >> 8; // Q14 + // Compensate for negative exponent using the relation: + // log2(1 + 2^-x) = log2(1 + 2^x) - x + if (inLevel < 0) { + zeros = WebRtcSpl_NormU32(absInLevel); + zerosScale = 0; + if (zeros < 15) { + // Not enough space for multiplication + tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1) + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13) + if (zeros < 9) { + zerosScale = 9 - zeros; + tmpU32no1 >>= zerosScale; // Q(zeros+13) + } else { + tmpU32no2 >>= zeros - 9; // Q22 + } + } else { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28 + tmpU32no2 >>= 6; // Q22 + } + logApprox = 0; + if (tmpU32no2 < tmpU32no1) { + logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); // Q14 + } + } + numFIX = (maxGain * constMaxGain) * (1 << 6); // Q14 + numFIX -= (int32_t)logApprox * diffGain; // Q14 + + // Calculate ratio + // Shift `numFIX` as much as possible. + // Ensure we avoid wrap-around in `den` as well. + if (numFIX > (den >> 8) || -numFIX > (den >> 8)) { // `den` is Q8. + zeros = WebRtcSpl_NormW32(numFIX); + } else { + zeros = WebRtcSpl_NormW32(den) + 8; + } + numFIX *= 1 << zeros; // Q(14+zeros) + + // Shift den so we end up in Qy1 + tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 9); // Q(zeros - 1) + y32 = numFIX / tmp32no1; // in Q15 + // This is to do rounding in Q14. + y32 = y32 >= 0 ? (y32 + 1) >> 1 : -((-y32 + 1) >> 1); + + if (limiterEnable && (i < limiterIdx)) { + tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14 + tmp32 -= limiterLvl * (1 << 14); // Q14 + y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20); + } + if (y32 > 39000) { + tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27 + tmp32 >>= 13; // In Q14. + } else { + tmp32 = y32 * kLog10 + 8192; // in Q28 + tmp32 >>= 14; // In Q14. + } + tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16) + + // Calculate power + if (tmp32 > 0) { + intPart = (int16_t)(tmp32 >> 14); + fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14 + if ((fracPart >> 13) != 0) { + tmp16 = (2 << 14) - constLinApprox; + tmp32no2 = (1 << 14) - fracPart; + tmp32no2 *= tmp16; + tmp32no2 >>= 13; + tmp32no2 = (1 << 14) - tmp32no2; + } else { + tmp16 = constLinApprox - (1 << 14); + tmp32no2 = (fracPart * tmp16) >> 13; + } + fracPart = (uint16_t)tmp32no2; + gainTable[i] = + (1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14); + } else { + gainTable[i] = 0; + } + } + + return 0; +} + +int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) { + if (agcMode == kAgcModeFixedDigital) { + // start at minimum to find correct gain faster + stt->capacitorSlow = 0; + } else { + // start out with 0 dB gain + stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f); + } + stt->capacitorFast = 0; + stt->gain = 65536; + stt->gatePrevious = 0; + stt->agcMode = agcMode; + + // initialize VADs + WebRtcAgc_InitVad(&stt->vadNearend); + WebRtcAgc_InitVad(&stt->vadFarend); + + return 0; +} + +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt, + const int16_t* in_far, + size_t nrSamples) { + RTC_DCHECK(stt); + // VAD for far end + WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples); + + return 0; +} + +// Gains is an 11 element long array (one value per ms, incl start & end). +int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* stt, + const int16_t* const* in_near, + size_t num_bands, + uint32_t FS, + int16_t lowlevelSignal, + int32_t gains[11]) { + int32_t tmp32; + int32_t env[10]; + int32_t max_nrg; + int32_t cur_level; + int32_t gain32; + int16_t logratio; + int16_t lower_thr, upper_thr; + int16_t zeros = 0, zeros_fast, frac = 0; + int16_t decay; + int16_t gate, gain_adj; + int16_t k; + size_t n, L; + + // determine number of samples per ms + if (FS == 8000) { + L = 8; + } else if (FS == 16000 || FS == 32000 || FS == 48000) { + L = 16; + } else { + return -1; + } + + // VAD for near end + logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, in_near[0], L * 10); + + // Account for far end VAD + if (stt->vadFarend.counter > 10) { + tmp32 = 3 * logratio; + logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2); + } + + // Determine decay factor depending on VAD + // upper_thr = 1.0f; + // lower_thr = 0.25f; + upper_thr = 1024; // Q10 + lower_thr = 0; // Q10 + if (logratio > upper_thr) { + // decay = -2^17 / DecayTime; -> -65 + decay = -65; + } else if (logratio < lower_thr) { + decay = 0; + } else { + // decay = (int16_t)(((lower_thr - logratio) + // * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10); + // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65 + tmp32 = (lower_thr - logratio) * 65; + decay = (int16_t)(tmp32 >> 10); + } + + // adjust decay factor for long silence (detected as low standard deviation) + // This is only done in the adaptive modes + if (stt->agcMode != kAgcModeFixedDigital) { + if (stt->vadNearend.stdLongTerm < 4000) { + decay = 0; + } else if (stt->vadNearend.stdLongTerm < 8096) { + // decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> + // 12); + tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay; + decay = (int16_t)(tmp32 >> 12); + } + + if (lowlevelSignal != 0) { + decay = 0; + } + } + // Find max amplitude per sub frame + // iterate over sub frames + for (k = 0; k < 10; k++) { + // iterate over samples + max_nrg = 0; + for (n = 0; n < L; n++) { + int32_t nrg = in_near[0][k * L + n] * in_near[0][k * L + n]; + if (nrg > max_nrg) { + max_nrg = nrg; + } + } + env[k] = max_nrg; + } + + // Calculate gain per sub frame + gains[0] = stt->gain; + for (k = 0; k < 10; k++) { + // Fast envelope follower + // decay time = -131000 / -1000 = 131 (ms) + stt->capacitorFast = + AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast); + if (env[k] > stt->capacitorFast) { + stt->capacitorFast = env[k]; + } + // Slow envelope follower + if (env[k] > stt->capacitorSlow) { + // increase capacitorSlow + stt->capacitorSlow = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow), + stt->capacitorSlow); + } else { + // decrease capacitorSlow + stt->capacitorSlow = + AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow); + } + + // use maximum of both capacitors as current level + if (stt->capacitorFast > stt->capacitorSlow) { + cur_level = stt->capacitorFast; + } else { + cur_level = stt->capacitorSlow; + } + // Translate signal level into gain, using a piecewise linear approximation + // find number of leading zeros + zeros = WebRtcSpl_NormU32((uint32_t)cur_level); + if (cur_level == 0) { + zeros = 31; + } + tmp32 = ((uint32_t)cur_level << zeros) & 0x7FFFFFFF; + frac = (int16_t)(tmp32 >> 19); // Q12. + // Interpolate between gainTable[zeros] and gainTable[zeros-1]. + tmp32 = + ((stt->gainTable[zeros - 1] - stt->gainTable[zeros]) * (int64_t)frac) >> + 12; + gains[k + 1] = stt->gainTable[zeros] + tmp32; + } + + // Gate processing (lower gain during absence of speech) + zeros = (zeros << 9) - (frac >> 3); + // find number of leading zeros + zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast); + if (stt->capacitorFast == 0) { + zeros_fast = 31; + } + tmp32 = ((uint32_t)stt->capacitorFast << zeros_fast) & 0x7FFFFFFF; + zeros_fast <<= 9; + zeros_fast -= (int16_t)(tmp32 >> 22); + + gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm; + + if (gate < 0) { + stt->gatePrevious = 0; + } else { + tmp32 = stt->gatePrevious * 7; + gate = (int16_t)((gate + tmp32) >> 3); + stt->gatePrevious = gate; + } + // gate < 0 -> no gate + // gate > 2500 -> max gate + if (gate > 0) { + if (gate < 2500) { + gain_adj = (2500 - gate) >> 5; + } else { + gain_adj = 0; + } + for (k = 0; k < 10; k++) { + if ((gains[k + 1] - stt->gainTable[0]) > 8388608) { + // To prevent wraparound + tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8; + tmp32 *= 178 + gain_adj; + } else { + tmp32 = (gains[k + 1] - stt->gainTable[0]) * (178 + gain_adj); + tmp32 >>= 8; + } + gains[k + 1] = stt->gainTable[0] + tmp32; + } + } + + // Limit gain to avoid overload distortion + for (k = 0; k < 10; k++) { + // Find a shift of gains[k + 1] such that it can be squared without + // overflow, but at least by 10 bits. + zeros = 10; + if (gains[k + 1] > 47452159) { + zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]); + } + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; + // check for overflow + while (AGC_MUL32((env[k] >> 12) + 1, gain32) > + WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10))) { + // multiply by 253/256 ==> -0.1 dB + if (gains[k + 1] > 8388607) { + // Prevent wrap around + gains[k + 1] = (gains[k + 1] / 256) * 253; + } else { + gains[k + 1] = (gains[k + 1] * 253) / 256; + } + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; + } + } + // gain reductions should be done 1 ms earlier than gain increases + for (k = 1; k < 10; k++) { + if (gains[k] > gains[k + 1]) { + gains[k] = gains[k + 1]; + } + } + // save start gain for next frame + stt->gain = gains[10]; + + return 0; +} + +int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11], + size_t num_bands, + uint32_t FS, + const int16_t* const* in_near, + int16_t* const* out) { + // Apply gain + // handle first sub frame separately + size_t L; + int16_t L2; // samples/subframe + + // determine number of samples per ms + if (FS == 8000) { + L = 8; + L2 = 3; + } else if (FS == 16000 || FS == 32000 || FS == 48000) { + L = 16; + L2 = 4; + } else { + return -1; + } + + for (size_t i = 0; i < num_bands; ++i) { + if (in_near[i] != out[i]) { + // Only needed if they don't already point to the same place. + memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0])); + } + } + + // iterate over samples + int32_t delta = (gains[1] - gains[0]) * (1 << (4 - L2)); + int32_t gain32 = gains[0] * (1 << 4); + for (size_t n = 0; n < L; n++) { + for (size_t i = 0; i < num_bands; ++i) { + int32_t out_tmp = (int64_t)out[i][n] * ((gain32 + 127) >> 7) >> 16; + if (out_tmp > 4095) { + out[i][n] = (int16_t)32767; + } else if (out_tmp < -4096) { + out[i][n] = (int16_t)-32768; + } else { + int32_t tmp32 = ((int64_t)out[i][n] * (gain32 >> 4)) >> 16; + out[i][n] = (int16_t)tmp32; + } + } + + gain32 += delta; + } + // iterate over subframes + for (int k = 1; k < 10; k++) { + delta = (gains[k + 1] - gains[k]) * (1 << (4 - L2)); + gain32 = gains[k] * (1 << 4); + // iterate over samples + for (size_t n = 0; n < L; n++) { + for (size_t i = 0; i < num_bands; ++i) { + int64_t tmp64 = ((int64_t)(out[i][k * L + n])) * (gain32 >> 4); + tmp64 = tmp64 >> 16; + if (tmp64 > 32767) { + out[i][k * L + n] = 32767; + } else if (tmp64 < -32768) { + out[i][k * L + n] = -32768; + } else { + out[i][k * L + n] = (int16_t)(tmp64); + } + } + gain32 += delta; + } + } + return 0; +} + +void WebRtcAgc_InitVad(AgcVad* state) { + int16_t k; + + state->HPstate = 0; // state of high pass filter + state->logRatio = 0; // log( P(active) / P(inactive) ) + // average input level (Q10) + state->meanLongTerm = 15 << 10; + + // variance of input level (Q8) + state->varianceLongTerm = 500 << 8; + + state->stdLongTerm = 0; // standard deviation of input level in dB + // short-term average input level (Q10) + state->meanShortTerm = 15 << 10; + + // short-term variance of input level (Q8) + state->varianceShortTerm = 500 << 8; + + state->stdShortTerm = + 0; // short-term standard deviation of input level in dB + state->counter = 3; // counts updates + for (k = 0; k < 8; k++) { + // downsampling filter + state->downState[k] = 0; + } +} + +int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples) { // (i) number of samples + uint32_t nrg; + int32_t out, tmp32, tmp32b; + uint16_t tmpU16; + int16_t k, subfr, tmp16; + int16_t buf1[8]; + int16_t buf2[4]; + int16_t HPstate; + int16_t zeros, dB; + int64_t tmp64; + + // process in 10 sub frames of 1 ms (to save on memory) + nrg = 0; + HPstate = state->HPstate; + for (subfr = 0; subfr < 10; subfr++) { + // downsample to 4 kHz + if (nrSamples == 160) { + for (k = 0; k < 8; k++) { + tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1]; + tmp32 >>= 1; + buf1[k] = (int16_t)tmp32; + } + in += 16; + + WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState); + } else { + WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState); + in += 8; + } + + // high pass filter and compute energy + for (k = 0; k < 4; k++) { + out = buf2[k] + HPstate; + tmp32 = 600 * out; + HPstate = (int16_t)((tmp32 >> 10) - buf2[k]); + + // Add 'out * out / 2**6' to 'nrg' in a non-overflowing + // way. Guaranteed to work as long as 'out * out / 2**6' fits in + // an int32_t. + nrg += out * (out / (1 << 6)); + nrg += out * (out % (1 << 6)) / (1 << 6); + } + } + state->HPstate = HPstate; + + // find number of leading zeros + if (!(0xFFFF0000 & nrg)) { + zeros = 16; + } else { + zeros = 0; + } + if (!(0xFF000000 & (nrg << zeros))) { + zeros += 8; + } + if (!(0xF0000000 & (nrg << zeros))) { + zeros += 4; + } + if (!(0xC0000000 & (nrg << zeros))) { + zeros += 2; + } + if (!(0x80000000 & (nrg << zeros))) { + zeros += 1; + } + + // energy level (range {-32..30}) (Q10) + dB = (15 - zeros) * (1 << 11); + + // Update statistics + + if (state->counter < kAvgDecayTime) { + // decay time = AvgDecTime * 10 ms + state->counter++; + } + + // update short-term estimate of mean energy level (Q10) + tmp32 = state->meanShortTerm * 15 + dB; + state->meanShortTerm = (int16_t)(tmp32 >> 4); + + // update short-term estimate of variance in energy level (Q8) + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceShortTerm * 15; + state->varianceShortTerm = tmp32 / 16; + + // update short-term estimate of standard deviation in energy level (Q10) + tmp32 = state->meanShortTerm * state->meanShortTerm; + tmp32 = (state->varianceShortTerm << 12) - tmp32; + state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); + + // update long-term estimate of mean energy level (Q10) + tmp32 = state->meanLongTerm * state->counter + dB; + state->meanLongTerm = + WebRtcSpl_DivW32W16ResW16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); + + // update long-term estimate of variance in energy level (Q8) + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceLongTerm * state->counter; + state->varianceLongTerm = + WebRtcSpl_DivW32W16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); + + // update long-term estimate of standard deviation in energy level (Q10) + tmp32 = state->meanLongTerm * state->meanLongTerm; + tmp32 = (state->varianceLongTerm << 12) - tmp32; + state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); + + // update voice activity measure (Q10) + tmp16 = 3 << 12; + // TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in + // ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16() + // was used, which did an intermediate cast to (int16_t), hence losing + // significant bits. This cause logRatio to max out positive, rather than + // negative. This is a bug, but has very little significance. + tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm); + tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm); + tmpU16 = (13 << 12); + tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16); + tmp64 = tmp32; + tmp64 += tmp32b >> 10; + tmp64 >>= 6; + + // limit + if (tmp64 > 2048) { + tmp64 = 2048; + } else if (tmp64 < -2048) { + tmp64 = -2048; + } + state->logRatio = (int16_t)tmp64; + + return state->logRatio; // Q10 +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h new file mode 100644 index 0000000000..223c74b9bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +typedef struct { + int32_t downState[8]; + int16_t HPstate; + int16_t counter; + int16_t logRatio; // log( P(active) / P(inactive) ) (Q10) + int16_t meanLongTerm; // Q10 + int32_t varianceLongTerm; // Q8 + int16_t stdLongTerm; // Q10 + int16_t meanShortTerm; // Q10 + int32_t varianceShortTerm; // Q8 + int16_t stdShortTerm; // Q10 +} AgcVad; // total = 54 bytes + +typedef struct { + int32_t capacitorSlow; + int32_t capacitorFast; + int32_t gain; + int32_t gainTable[32]; + int16_t gatePrevious; + int16_t agcMode; + AgcVad vadNearend; + AgcVad vadFarend; +} DigitalAgc; + +int32_t WebRtcAgc_InitDigital(DigitalAgc* digitalAgcInst, int16_t agcMode); + +int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* digitalAgcInst, + const int16_t* const* inNear, + size_t num_bands, + uint32_t FS, + int16_t lowLevelSignal, + int32_t gains[11]); + +int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11], + size_t num_bands, + uint32_t FS, + const int16_t* const* in_near, + int16_t* const* out); + +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst, + const int16_t* inFar, + size_t nrSamples); + +void WebRtcAgc_InitVad(AgcVad* vadInst); + +int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples); // (i) number of samples + +int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16 + int16_t compressionGaindB, // Q0 (in dB) + int16_t targetLevelDbfs, // Q0 (in dB) + uint8_t limiterEnable, + int16_t analogTarget); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h b/third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h new file mode 100644 index 0000000000..6010a988fa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ + +#include +#include + +namespace webrtc { + +enum { + kAgcModeUnchanged, + kAgcModeAdaptiveAnalog, + kAgcModeAdaptiveDigital, + kAgcModeFixedDigital +}; + +enum { kAgcFalse = 0, kAgcTrue }; + +typedef struct { + int16_t targetLevelDbfs; // default 3 (-3 dBOv) + int16_t compressionGaindB; // default 9 dB + uint8_t limiterEnable; // default kAgcTrue (on) +} WebRtcAgcConfig; + +/* + * This function analyses the number of samples passed to + * farend and produces any error code that could arise. + * + * Input: + * - agcInst : AGC instance. + * - samples : Number of samples in input vector. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error. + */ +int WebRtcAgc_GetAddFarendError(void* state, size_t samples); + +/* + * This function processes a 10 ms frame of far-end speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). + * + * Input: + * - agcInst : AGC instance. + * - inFar : Far-end input speech vector + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddFarend(void* agcInst, const int16_t* inFar, size_t samples); + +/* + * This function processes a 10 ms frame of microphone speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). For very low input levels, the input signal is increased in level + * by multiplying and overwriting the samples in inMic[]. + * + * This function should be called before any further processing of the + * near-end microphone signal. + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddMic(void* agcInst, + int16_t* const* inMic, + size_t num_bands, + size_t samples); + +/* + * This function replaces the analog microphone with a virtual one. + * It is a digital gain applied to the input signal and is used in the + * agcAdaptiveDigital mode where no microphone level is adjustable. The length + * of the input speech vector must be given in samples (80 when FS=8000, and 160 + * when FS=16000, FS=32000 or FS=48000). + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector + * - samples : Number of samples in input vector + * - micLevelIn : Input level of microphone (static) + * + * Output: + * - inMic : Microphone output after processing (L band) + * - inMic_H : Microphone output after processing (H band) + * - micLevelOut : Adjusted microphone level after processing + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_VirtualMic(void* agcInst, + int16_t* const* inMic, + size_t num_bands, + size_t samples, + int32_t micLevelIn, + int32_t* micLevelOut); + +/* + * This function analyses a 10 ms frame and produces the analog and digital + * gains required to normalize the signal. The gain adjustments are done only + * during active periods of speech. The length of the speech vectors must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). The echo parameter can be used to ensure the AGC will not adjust + * upward in the presence of echo. + * + * This function should be called after processing the near-end microphone + * signal, in any case after any echo cancellation. + * + * Input: + * - agcInst : AGC instance + * - inNear : Near-end input speech vector for each band + * - num_bands : Number of bands in input/output vector + * - samples : Number of samples in input/output vector + * - inMicLevel : Current microphone volume level + * - echo : Set to 0 if the signal passed to add_mic is + * almost certainly free of echo; otherwise set + * to 1. If you have no information regarding echo + * set to 0. + * + * Output: + * - outMicLevel : Adjusted microphone volume level + * - saturationWarning : A returned value of 1 indicates a saturation event + * has occurred and the volume cannot be further + * reduced. Otherwise will be set to 0. + * - gains : Vector of gains to apply for digital normalization + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_Analyze(void* agcInst, + const int16_t* const* inNear, + size_t num_bands, + size_t samples, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t echo, + uint8_t* saturationWarning, + int32_t gains[11]); + +/* + * This function processes a 10 ms frame by applying precomputed digital gains. + * + * Input: + * - agcInst : AGC instance + * - gains : Vector of gains to apply for digital normalization + * - in_near : Near-end input speech vector for each band + * - num_bands : Number of bands in input/output vector + * + * Output: + * - out : Gain-adjusted near-end speech vector + * : May be the same vector as the input. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_Process(const void* agcInst, + const int32_t gains[11], + const int16_t* const* in_near, + size_t num_bands, + int16_t* const* out); + +/* + * This function sets the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * - config : config struct + * + * Output: + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig config); + +/* + * This function returns the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * + * Output: + * - config : config struct + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config); + +/* + * This function creates and returns an AGC instance, which will contain the + * state information for one (duplex) channel. + */ +void* WebRtcAgc_Create(void); + +/* + * This function frees the AGC instance created at the beginning. + * + * Input: + * - agcInst : AGC instance. + */ +void WebRtcAgc_Free(void* agcInst); + +/* + * This function initializes an AGC instance. + * + * Input: + * - agcInst : AGC instance. + * - minLevel : Minimum possible mic level + * - maxLevel : Maximum possible mic level + * - agcMode : 0 - Unchanged + * : 1 - Adaptive Analog Automatic Gain Control -3dBOv + * : 2 - Adaptive Digital Automatic Gain Control -3dBOv + * : 3 - Fixed Digital Gain 0dB + * - fs : Sampling frequency + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcAgc_Init(void* agcInst, + int32_t minLevel, + int32_t maxLevel, + int16_t agcMode, + uint32_t fs); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build new file mode 100644 index 0000000000..0188a8ac10 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc", + "/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("legacy_agc_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build new file mode 100644 index 0000000000..9db9a639e7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build @@ -0,0 +1,234 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc/agc.cc", + "/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc", + "/third_party/libwebrtc/modules/audio_processing/agc/utility.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("level_estimation_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc new file mode 100644 index 0000000000..b0a1f53b97 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/loudness_histogram.h" + +#include + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +static const double kHistBinCenters[] = { + 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01, + 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01, + 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01, + 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01, + 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01, + 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00, + 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00, + 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00, + 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00, + 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01, + 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01, + 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01, + 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01, + 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01, + 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02, + 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02, + 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02, + 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02, + 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03, + 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03, + 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03, + 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03, + 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03, + 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04, + 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04, + 3.00339145144454e+04, 3.56647189489147e+04}; + +static const double kProbQDomain = 1024.0; +// Loudness of -15 dB (smallest expected loudness) in log domain, +// loudness_db = 13.5 * log10(rms); +static const double kLogDomainMinBinCenter = -2.57752062648587; +// Loudness step of 1 dB in log domain +static const double kLogDomainStepSizeInverse = 5.81954605750359; + +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static const int kLowProbThresholdQ10 = + static_cast(kLowProbabilityThreshold * kProbQDomain); + +LoudnessHistogram::LoudnessHistogram() + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(), + hist_bin_index_(), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(0), + len_high_activity_(0) { + static_assert( + kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]), + "histogram bin centers incorrect size"); +} + +LoudnessHistogram::LoudnessHistogram(int window_size) + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(new int[window_size]), + hist_bin_index_(new int[window_size]), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(window_size), + len_high_activity_(0) {} + +LoudnessHistogram::~LoudnessHistogram() {} + +void LoudnessHistogram::Update(double rms, double activity_probaility) { + // If circular histogram is activated then remove the oldest entry. + if (len_circular_buffer_ > 0) + RemoveOldestEntryAndUpdate(); + + // Find the corresponding bin. + int hist_index = GetBinIndex(rms); + // To Q10 domain. + int prob_q10 = + static_cast(floor(activity_probaility * kProbQDomain)); + InsertNewestEntryAndUpdate(prob_q10, hist_index); +} + +// Doing nothing if buffer is not full, yet. +void LoudnessHistogram::RemoveOldestEntryAndUpdate() { + RTC_DCHECK_GT(len_circular_buffer_, 0); + // Do nothing if circular buffer is not full. + if (!buffer_is_full_) + return; + + int oldest_prob = activity_probability_[buffer_index_]; + int oldest_hist_index = hist_bin_index_[buffer_index_]; + UpdateHist(-oldest_prob, oldest_hist_index); +} + +void LoudnessHistogram::RemoveTransient() { + // Don't expect to be here if high-activity region is longer than + // `kTransientWidthThreshold` or there has not been any transient. + RTC_DCHECK_LE(len_high_activity_, kTransientWidthThreshold); + int index = + (buffer_index_ > 0) ? (buffer_index_ - 1) : len_circular_buffer_ - 1; + while (len_high_activity_ > 0) { + UpdateHist(-activity_probability_[index], hist_bin_index_[index]); + activity_probability_[index] = 0; + index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1); + len_high_activity_--; + } +} + +void LoudnessHistogram::InsertNewestEntryAndUpdate(int activity_prob_q10, + int hist_index) { + // Update the circular buffer if it is enabled. + if (len_circular_buffer_ > 0) { + // Removing transient. + if (activity_prob_q10 <= kLowProbThresholdQ10) { + // Lower than threshold probability, set it to zero. + activity_prob_q10 = 0; + // Check if this has been a transient. + if (len_high_activity_ <= kTransientWidthThreshold) + RemoveTransient(); // Remove this transient. + len_high_activity_ = 0; + } else if (len_high_activity_ <= kTransientWidthThreshold) { + len_high_activity_++; + } + // Updating the circular buffer. + activity_probability_[buffer_index_] = activity_prob_q10; + hist_bin_index_[buffer_index_] = hist_index; + // Increment the buffer index and check for wrap-around. + buffer_index_++; + if (buffer_index_ >= len_circular_buffer_) { + buffer_index_ = 0; + buffer_is_full_ = true; + } + } + + num_updates_++; + if (num_updates_ < 0) + num_updates_--; + + UpdateHist(activity_prob_q10, hist_index); +} + +void LoudnessHistogram::UpdateHist(int activity_prob_q10, int hist_index) { + bin_count_q10_[hist_index] += activity_prob_q10; + audio_content_q10_ += activity_prob_q10; +} + +double LoudnessHistogram::AudioContent() const { + return audio_content_q10_ / kProbQDomain; +} + +LoudnessHistogram* LoudnessHistogram::Create() { + return new LoudnessHistogram; +} + +LoudnessHistogram* LoudnessHistogram::Create(int window_size) { + if (window_size < 0) + return NULL; + return new LoudnessHistogram(window_size); +} + +void LoudnessHistogram::Reset() { + // Reset the histogram, audio-content and number of updates. + memset(bin_count_q10_, 0, sizeof(bin_count_q10_)); + audio_content_q10_ = 0; + num_updates_ = 0; + // Empty the circular buffer. + buffer_index_ = 0; + buffer_is_full_ = false; + len_high_activity_ = 0; +} + +int LoudnessHistogram::GetBinIndex(double rms) { + // First exclude overload cases. + if (rms <= kHistBinCenters[0]) { + return 0; + } else if (rms >= kHistBinCenters[kHistSize - 1]) { + return kHistSize - 1; + } else { + // The quantizer is uniform in log domain. Alternatively we could do binary + // search in linear domain. + double rms_log = log(rms); + + int index = static_cast( + floor((rms_log - kLogDomainMinBinCenter) * kLogDomainStepSizeInverse)); + // The final decision is in linear domain. + double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]); + if (rms > b) { + return index + 1; + } + return index; + } +} + +double LoudnessHistogram::CurrentRms() const { + double p; + double mean_val = 0; + if (audio_content_q10_ > 0) { + double p_total_inverse = 1. / static_cast(audio_content_q10_); + for (int n = 0; n < kHistSize; n++) { + p = static_cast(bin_count_q10_[n]) * p_total_inverse; + mean_val += p * kHistBinCenters[n]; + } + } else { + mean_val = kHistBinCenters[0]; + } + return mean_val; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h new file mode 100644 index 0000000000..51b38714c2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ + +#include + +#include + +namespace webrtc { + +// This class implements the histogram of loudness with circular buffers so that +// the histogram tracks the last T seconds of the loudness. +class LoudnessHistogram { + public: + // Create a non-sliding LoudnessHistogram. + static LoudnessHistogram* Create(); + + // Create a sliding LoudnessHistogram, i.e. the histogram represents the last + // `window_size` samples. + static LoudnessHistogram* Create(int window_size); + ~LoudnessHistogram(); + + // Insert RMS and the corresponding activity probability. + void Update(double rms, double activity_probability); + + // Reset the histogram, forget the past. + void Reset(); + + // Current loudness, which is actually the mean of histogram in loudness + // domain. + double CurrentRms() const; + + // Sum of the histogram content. + double AudioContent() const; + + // Number of times the histogram has been updated. + int num_updates() const { return num_updates_; } + + private: + LoudnessHistogram(); + explicit LoudnessHistogram(int window); + + // Find the histogram bin associated with the given `rms`. + int GetBinIndex(double rms); + + void RemoveOldestEntryAndUpdate(); + void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index); + void UpdateHist(int activity_prob_q10, int hist_index); + void RemoveTransient(); + + // Number of histogram bins. + static const int kHistSize = 77; + + // Number of times the histogram is updated + int num_updates_; + // Audio content, this should be equal to the sum of the components of + // `bin_count_q10_`. + int64_t audio_content_q10_; + + // LoudnessHistogram of input RMS in Q10 with `kHistSize_` bins. In each + // 'Update(),' we increment the associated histogram-bin with the given + // probability. The increment is implemented in Q10 to avoid rounding errors. + int64_t bin_count_q10_[kHistSize]; + + // Circular buffer for probabilities + std::unique_ptr activity_probability_; + // Circular buffer for histogram-indices of probabilities. + std::unique_ptr hist_bin_index_; + // Current index of circular buffer, where the newest data will be written to, + // therefore, pointing to the oldest data if buffer is full. + int buffer_index_; + // Indicating if buffer is full and we had a wrap around. + int buffer_is_full_; + // Size of circular buffer. + int len_circular_buffer_; + int len_high_activity_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc new file mode 100644 index 0000000000..bbc0a7ee92 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Use CreateHistUnittestFile.m to generate the input file. + +#include "modules/audio_processing/agc/loudness_histogram.h" + +#include + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc/utility.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +struct InputOutput { + double rms; + double activity_probability; + double audio_content; + double loudness; +}; + +const double kRelativeErrTol = 1e-10; + +class LoudnessHistogramTest : public ::testing::Test { + protected: + void RunTest(bool enable_circular_buff, absl::string_view filename); + + private: + void TestClean(); + std::unique_ptr hist_; +}; + +void LoudnessHistogramTest::TestClean() { + EXPECT_EQ(hist_->CurrentRms(), 7.59621091765857e-02); + EXPECT_EQ(hist_->AudioContent(), 0); + EXPECT_EQ(hist_->num_updates(), 0); +} + +void LoudnessHistogramTest::RunTest(bool enable_circular_buff, + absl::string_view filename) { + FILE* in_file = fopen(std::string(filename).c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + if (enable_circular_buff) { + int buffer_size; + EXPECT_EQ(fread(&buffer_size, sizeof(buffer_size), 1, in_file), 1u); + hist_.reset(LoudnessHistogram::Create(buffer_size)); + } else { + hist_.reset(LoudnessHistogram::Create()); + } + TestClean(); + + InputOutput io; + int num_updates = 0; + while (fread(&io, sizeof(InputOutput), 1, in_file) == 1) { + if (io.rms < 0) { + // We have to reset. + hist_->Reset(); + TestClean(); + num_updates = 0; + // Read the next chunk of input. + if (fread(&io, sizeof(InputOutput), 1, in_file) != 1) + break; + } + hist_->Update(io.rms, io.activity_probability); + num_updates++; + EXPECT_EQ(hist_->num_updates(), num_updates); + double audio_content = hist_->AudioContent(); + + double abs_err = + std::min(audio_content, io.audio_content) * kRelativeErrTol; + + ASSERT_NEAR(audio_content, io.audio_content, abs_err); + double current_loudness = Linear2Loudness(hist_->CurrentRms()); + abs_err = + std::min(fabs(current_loudness), fabs(io.loudness)) * kRelativeErrTol; + ASSERT_NEAR(current_loudness, io.loudness, abs_err); + } + fclose(in_file); +} + +TEST_F(LoudnessHistogramTest, ActiveCircularBuffer) { + RunTest(true, test::ResourcePath( + "audio_processing/agc/agc_with_circular_buffer", "dat") + .c_str()); +} + +TEST_F(LoudnessHistogramTest, InactiveCircularBuffer) { + RunTest(false, test::ResourcePath( + "audio_processing/agc/agc_no_circular_buffer", "dat") + .c_str()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h b/third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h new file mode 100644 index 0000000000..3080e1563c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ + +#include "api/array_view.h" +#include "modules/audio_processing/agc/agc.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockAgc : public Agc { + public: + virtual ~MockAgc() {} + MOCK_METHOD(void, Process, (rtc::ArrayView audio), (override)); + MOCK_METHOD(bool, GetRmsErrorDb, (int* error), (override)); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(int, set_target_level_dbfs, (int level), (override)); + MOCK_METHOD(int, target_level_dbfs, (), (const, override)); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/utility.cc b/third_party/libwebrtc/modules/audio_processing/agc/utility.cc new file mode 100644 index 0000000000..2a87e5ce74 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/utility.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/utility.h" + +#include + +namespace webrtc { + +static const double kLog10 = 2.30258509299; +static const double kLinear2DbScale = 20.0 / kLog10; +static const double kLinear2LoudnessScale = 13.4 / kLog10; + +double Loudness2Db(double loudness) { + return loudness * kLinear2DbScale / kLinear2LoudnessScale; +} + +double Linear2Loudness(double rms) { + if (rms == 0) + return -15; + return kLinear2LoudnessScale * log(rms); +} + +double Db2Loudness(double db) { + return db * kLinear2LoudnessScale / kLinear2DbScale; +} + +double Dbfs2Loudness(double dbfs) { + return Db2Loudness(90 + dbfs); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/utility.h b/third_party/libwebrtc/modules/audio_processing/agc/utility.h new file mode 100644 index 0000000000..56eec244a7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/utility.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ +#define MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ + +namespace webrtc { + +// TODO(turajs): Add description of function. +double Loudness2Db(double loudness); + +double Linear2Loudness(double rms); + +double Db2Loudness(double db); + +double Dbfs2Loudness(double dbfs); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn new file mode 100644 index 0000000000..bd59ad3dae --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn @@ -0,0 +1,511 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("speech_level_estimator") { + sources = [ + "speech_level_estimator.cc", + "speech_level_estimator.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + "..:api", + "..:apm_logging", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + ] +} + +rtc_library("adaptive_digital_gain_controller") { + sources = [ + "adaptive_digital_gain_controller.cc", + "adaptive_digital_gain_controller.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + ":gain_applier", + "..:api", + "..:apm_logging", + "..:audio_frame_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + "../../../system_wrappers:metrics", + ] +} + +rtc_library("saturation_protector") { + sources = [ + "saturation_protector.cc", + "saturation_protector.h", + "saturation_protector_buffer.cc", + "saturation_protector_buffer.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + "..:apm_logging", + "../../../rtc_base:checks", + "../../../rtc_base:safe_compare", + "../../../rtc_base:safe_minmax", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("biquad_filter") { + visibility = [ "./*" ] + sources = [ + "biquad_filter.cc", + "biquad_filter.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:macromagic", + ] +} + +rtc_library("clipping_predictor") { + visibility = [ + "../agc:agc", + "./*", + ] + + sources = [ + "clipping_predictor.cc", + "clipping_predictor.h", + "clipping_predictor_level_buffer.cc", + "clipping_predictor_level_buffer.h", + ] + + deps = [ + ":gain_map", + "..:api", + "..:audio_frame_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_source_set("common") { + sources = [ "agc2_common.h" ] +} + +rtc_library("fixed_digital") { + sources = [ + "fixed_digital_level_estimator.cc", + "fixed_digital_level_estimator.h", + "interpolated_gain_curve.cc", + "interpolated_gain_curve.h", + "limiter.cc", + "limiter.h", + ] + + visibility = [ + "..:gain_controller2", + "../../audio_mixer:audio_mixer_impl", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_library("gain_applier") { + sources = [ + "gain_applier.cc", + "gain_applier.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + deps = [ + ":common", + "..:audio_frame_view", + "../../../api:array_view", + "../../../rtc_base:safe_minmax", + ] +} + +rtc_source_set("gain_map") { + visibility = [ + "..:analog_mic_simulation", + "../agc:agc", + "./*", + ] + + sources = [ "gain_map_internal.h" ] +} + +rtc_library("input_volume_controller") { + sources = [ + "input_volume_controller.cc", + "input_volume_controller.h", + "speech_probability_buffer.cc", + "speech_probability_buffer.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":clipping_predictor", + ":gain_map", + ":input_volume_stats_reporter", + "..:api", + "..:audio_buffer", + "..:audio_frame_view", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("noise_level_estimator") { + sources = [ + "noise_level_estimator.cc", + "noise_level_estimator.h", + ] + deps = [ + ":biquad_filter", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../system_wrappers", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] +} + +rtc_library("vad_wrapper") { + sources = [ + "vad_wrapper.cc", + "vad_wrapper.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":common", + ":cpu_features", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "rnn_vad", + "rnn_vad:rnn_vad_common", + ] +} + +rtc_library("cpu_features") { + sources = [ + "cpu_features.cc", + "cpu_features.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + deps = [ + "../../../rtc_base:stringutils", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + ] +} + +rtc_library("speech_level_estimator_unittest") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "speech_level_estimator_unittest.cc" ] + deps = [ + ":common", + ":speech_level_estimator", + "..:api", + "..:apm_logging", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + +rtc_library("adaptive_digital_gain_controller_unittest") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "adaptive_digital_gain_controller_unittest.cc" ] + + deps = [ + ":adaptive_digital_gain_controller", + ":common", + ":test_utils", + "..:api", + "..:apm_logging", + "..:audio_frame_view", + "../../../common_audio", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + +rtc_library("gain_applier_unittest") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "gain_applier_unittest.cc" ] + deps = [ + ":gain_applier", + ":test_utils", + "..:audio_frame_view", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + +rtc_library("saturation_protector_unittest") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ + "saturation_protector_buffer_unittest.cc", + "saturation_protector_unittest.cc", + ] + deps = [ + ":common", + ":saturation_protector", + "..:apm_logging", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + +rtc_library("biquad_filter_unittests") { + testonly = true + sources = [ "biquad_filter_unittest.cc" ] + deps = [ + ":biquad_filter", + "../../../rtc_base:gunit_helpers", + ] +} + +rtc_library("fixed_digital_unittests") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ + "agc2_testing_common_unittest.cc", + "compute_interpolated_gain_curve.cc", + "compute_interpolated_gain_curve.h", + "fixed_digital_level_estimator_unittest.cc", + "interpolated_gain_curve_unittest.cc", + "limiter_db_gain_curve.cc", + "limiter_db_gain_curve.h", + "limiter_db_gain_curve_unittest.cc", + "limiter_unittest.cc", + ] + deps = [ + ":common", + ":fixed_digital", + ":test_utils", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + "../../../system_wrappers:metrics", + ] +} + +rtc_library("input_volume_controller_unittests") { + testonly = true + sources = [ + "clipping_predictor_level_buffer_unittest.cc", + "clipping_predictor_unittest.cc", + "input_volume_controller_unittest.cc", + "speech_probability_buffer_unittest.cc", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":clipping_predictor", + ":gain_map", + ":input_volume_controller", + "..:api", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:random", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + "../../../test:field_trial", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gtest", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("noise_estimator_unittests") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "noise_level_estimator_unittest.cc" ] + deps = [ + ":noise_level_estimator", + ":test_utils", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../api:function_view", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + ] +} + +rtc_library("vad_wrapper_unittests") { + testonly = true + sources = [ "vad_wrapper_unittest.cc" ] + deps = [ + ":common", + ":vad_wrapper", + "..:audio_frame_view", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + "../../../rtc_base:safe_compare", + "../../../test:test_support", + ] +} + +rtc_library("test_utils") { + testonly = true + visibility = [ + ":*", + "..:audio_processing_unittests", + ] + sources = [ + "agc2_testing_common.cc", + "agc2_testing_common.h", + "vector_float_frame.cc", + "vector_float_frame.h", + ] + deps = [ + "..:audio_frame_view", + "../../../rtc_base:checks", + "../../../rtc_base:random", + ] +} + +rtc_library("input_volume_stats_reporter") { + sources = [ + "input_volume_stats_reporter.cc", + "input_volume_stats_reporter.h", + ] + deps = [ + "../../../rtc_base:gtest_prod", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("input_volume_stats_reporter_unittests") { + testonly = true + sources = [ "input_volume_stats_reporter_unittest.cc" ] + deps = [ + ":input_volume_stats_reporter", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + "../../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc new file mode 100644 index 0000000000..e8edab602c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +constexpr int kHeadroomHistogramMin = 0; +constexpr int kHeadroomHistogramMax = 50; +constexpr int kGainDbHistogramMax = 30; + +// Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`. +// Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a +// safety margin to allow transient peaks to exceed the target peak level +// without clipping. +float ComputeGainDb(float input_level_dbfs, + const AdaptiveDigitalConfig& config) { + // If the level is very low, apply the maximum gain. + if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) { + return config.max_gain_db; + } + // We expect to end up here most of the time: the level is below + // -headroom, but we can boost it to -headroom. + if (input_level_dbfs < -config.headroom_db) { + return -config.headroom_db - input_level_dbfs; + } + // The level is too high and we can't boost. + RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db); + return 0.0f; +} + +// Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs` +// does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns +// `target_gain_db` so that the output noise level equals +// `max_output_noise_level_dbfs`. +float LimitGainByNoise(float target_gain_db, + float input_noise_level_dbfs, + float max_output_noise_level_dbfs, + ApmDataDumper& apm_data_dumper) { + const float max_allowed_gain_db = + max_output_noise_level_dbfs - input_noise_level_dbfs; + apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db", + max_allowed_gain_db); + return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f)); +} + +float LimitGainByLowConfidence(float target_gain_db, + float last_gain_db, + float limiter_audio_level_dbfs, + bool estimate_is_confident) { + if (estimate_is_confident || + limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) { + return target_gain_db; + } + const float limiter_level_dbfs_before_gain = + limiter_audio_level_dbfs - last_gain_db; + + // Compute a new gain so that `limiter_level_dbfs_before_gain` + + // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`. + const float new_target_gain_db = std::max( + kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f); + return std::min(new_target_gain_db, target_gain_db); +} + +// Computes how the gain should change during this frame. +// Return the gain difference in db to 'last_gain_db'. +float ComputeGainChangeThisFrameDb(float target_gain_db, + float last_gain_db, + bool gain_increase_allowed, + float max_gain_decrease_db, + float max_gain_increase_db) { + RTC_DCHECK_GT(max_gain_decrease_db, 0); + RTC_DCHECK_GT(max_gain_increase_db, 0); + float target_gain_difference_db = target_gain_db - last_gain_db; + if (!gain_increase_allowed) { + target_gain_difference_db = std::min(target_gain_difference_db, 0.0f); + } + return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db, + max_gain_increase_db); +} + +} // namespace + +AdaptiveDigitalGainController::AdaptiveDigitalGainController( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold) + : apm_data_dumper_(apm_data_dumper), + gain_applier_( + /*hard_clip_samples=*/false, + /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)), + config_(config), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), + max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second * + kFrameDurationMs / 1000.0f), + calls_since_last_gain_log_(0), + frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold), + last_gain_db_(config_.initial_gain_db) { + RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f); + RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); + RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f); + RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f); +} + +void AdaptiveDigitalGainController::Process(const FrameInfo& info, + AudioFrameView frame) { + RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f); + RTC_DCHECK_GE(frame.num_channels(), 1); + RTC_DCHECK( + frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 || + frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480) + << "`frame` does not look like a 10 ms frame for an APM supported sample " + "rate"; + + // Compute the input level used to select the desired gain. + RTC_DCHECK_GT(info.headroom_db, 0.0f); + const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db; + + const float target_gain_db = LimitGainByLowConfidence( + LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_), + info.noise_rms_dbfs, config_.max_output_noise_level_dbfs, + *apm_data_dumper_), + last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable); + + // Forbid increasing the gain until enough adjacent speech frames are + // observed. + bool first_confident_speech_frame = false; + if (info.speech_probability < kVadConfidenceThreshold) { + frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_; + } else if (frames_to_gain_increase_allowed_ > 0) { + frames_to_gain_increase_allowed_--; + first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0; + } + apm_data_dumper_->DumpRaw( + "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed", + frames_to_gain_increase_allowed_); + + const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0; + + float max_gain_increase_db = max_gain_change_db_per_10ms_; + if (first_confident_speech_frame) { + // No gain increase happened while waiting for a long enough speech + // sequence. Therefore, temporarily allow a faster gain increase. + RTC_DCHECK(gain_increase_allowed); + max_gain_increase_db *= adjacent_speech_frames_threshold_; + } + + const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( + target_gain_db, last_gain_db_, gain_increase_allowed, + /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_, + max_gain_increase_db); + + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db", + target_gain_db - last_gain_db_); + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db", + gain_change_this_frame_db); + + // Optimization: avoid calling math functions if gain does not + // change. + if (gain_change_this_frame_db != 0.f) { + gain_applier_.SetGainFactor( + DbToRatio(last_gain_db_ + gain_change_this_frame_db)); + } + + gain_applier_.ApplyGain(frame); + + // Remember that the gain has changed for the next iteration. + last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db", + last_gain_db_); + + // Log every 10 seconds. + calls_since_last_gain_log_++; + if (calls_since_last_gain_log_ == 1000) { + calls_since_last_gain_log_ = 0; + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel", + -info.speech_level_dbfs, 0, 100, 101); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel", + -info.noise_rms_dbfs, 0, 100, 101); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin, + kHeadroomHistogramMax, + kHeadroomHistogramMax - kHeadroomHistogramMin + 1); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied", + last_gain_db_, 0, kGainDbHistogramMax, + kGainDbHistogramMax + 1); + RTC_LOG(LS_INFO) << "AGC2 adaptive digital" + << " | speech_dbfs: " << info.speech_level_dbfs + << " | noise_dbfs: " << info.noise_rms_dbfs + << " | headroom_db: " << info.headroom_db + << " | gain_db: " << last_gain_db_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h new file mode 100644 index 0000000000..01335e79db --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ + +#include + +#include "modules/audio_processing/agc2/gain_applier.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +class ApmDataDumper; + +// Selects the target digital gain, decides when and how quickly to adapt to the +// target and applies the current gain to 10 ms frames. +class AdaptiveDigitalGainController { + public: + // Information about a frame to process. + struct FrameInfo { + float speech_probability; // Probability of speech in the [0, 1] range. + float speech_level_dbfs; // Estimated speech level (dBFS). + bool speech_level_reliable; // True with reliable speech level estimation. + float noise_rms_dbfs; // Estimated noise RMS level (dBFS). + float headroom_db; // Headroom (dB). + // TODO(bugs.webrtc.org/7494): Remove `limiter_envelope_dbfs`. + float limiter_envelope_dbfs; // Envelope level from the limiter (dBFS). + }; + + AdaptiveDigitalGainController( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold); + AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete; + AdaptiveDigitalGainController& operator=( + const AdaptiveDigitalGainController&) = delete; + + // Analyzes `info`, updates the digital gain and applies it to a 10 ms + // `frame`. Supports any sample rate supported by APM. + void Process(const FrameInfo& info, AudioFrameView frame); + + private: + ApmDataDumper* const apm_data_dumper_; + GainApplier gain_applier_; + + const AudioProcessing::Config::GainController2::AdaptiveDigital config_; + const int adjacent_speech_frames_threshold_; + const float max_gain_change_db_per_10ms_; + + int calls_since_last_gain_log_; + int frames_to_gain_increase_allowed_; + float last_gain_db_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_gn/moz.build new file mode 100644 index 0000000000..7d16c9a9f5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("adaptive_digital_gain_controller_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc new file mode 100644 index 0000000000..e95cbb5067 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" + +#include +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr int kMono = 1; +constexpr int kStereo = 2; +constexpr int kFrameLen10ms8kHz = 80; +constexpr int kFrameLen10ms48kHz = 480; + +constexpr float kMaxSpeechProbability = 1.0f; + +// Constants used in place of estimated noise levels. +constexpr float kNoNoiseDbfs = kMinLevelDbfs; +constexpr float kWithNoiseDbfs = -20.0f; + +// Number of additional frames to process in the tests to ensure that the tested +// adaptation processes have converged. +constexpr int kNumExtraFrames = 10; + +constexpr float GetMaxGainChangePerFrameDb( + float max_gain_change_db_per_second) { + return max_gain_change_db_per_second * kFrameDurationMs / 1000.0f; +} + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +constexpr AdaptiveDigitalConfig kDefaultConfig{}; + +// Helper to create initialized `AdaptiveDigitalGainController` objects. +struct GainApplierHelper { + GainApplierHelper(const AdaptiveDigitalConfig& config, + int adjacent_speech_frames_threshold) + : apm_data_dumper(0), + gain_applier(std::make_unique( + &apm_data_dumper, + config, + adjacent_speech_frames_threshold)) {} + ApmDataDumper apm_data_dumper; + std::unique_ptr gain_applier; +}; + +// Returns a `FrameInfo` sample to simulate noiseless speech detected with +// maximum probability and with level, headroom and limiter envelope chosen +// so that the resulting gain equals the default initial adaptive digital gain +// i.e., no gain adaptation is expected. +AdaptiveDigitalGainController::FrameInfo GetFrameInfoToNotAdapt( + const AdaptiveDigitalConfig& config) { + AdaptiveDigitalGainController::FrameInfo info; + info.speech_probability = kMaxSpeechProbability; + info.speech_level_dbfs = -config.initial_gain_db - config.headroom_db; + info.speech_level_reliable = true; + info.noise_rms_dbfs = kNoNoiseDbfs; + info.headroom_db = config.headroom_db; + info.limiter_envelope_dbfs = -2.0f; + return info; +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, + GainApplierShouldNotCrash) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + // Make one call with reasonable audio level values and settings. + VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); + helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig), + fake_audio.float_frame_view()); +} + +// Checks that the maximum allowed gain is applied. +TEST(GainController2AdaptiveDigitalGainControllerTest, MaxGainApplied) { + constexpr int kNumFramesToAdapt = + static_cast(kDefaultConfig.max_gain_db / + GetMaxGainChangePerFrameDb( + kDefaultConfig.max_gain_change_db_per_second)) + + kNumExtraFrames; + + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = -60.0f; + float applied_gain; + for (int i = 0; i < kNumFramesToAdapt; ++i) { + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + applied_gain = fake_audio.float_frame_view().channel(0)[0]; + } + const float applied_gain_db = 20.0f * std::log10f(applied_gain); + EXPECT_NEAR(applied_gain_db, kDefaultConfig.max_gain_db, 0.1f); +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + constexpr float initial_level_dbfs = -25.0f; + constexpr float kMaxGainChangeDbPerFrame = + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second); + constexpr int kNumFramesToAdapt = + static_cast(initial_level_dbfs / kMaxGainChangeDbPerFrame) + + kNumExtraFrames; + + const float max_change_per_frame_linear = DbToRatio(kMaxGainChangeDbPerFrame); + + float last_gain_linear = 1.f; + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), + max_change_per_frame_linear); + last_gain_linear = current_gain_linear; + } + + // Check that the same is true when gain decreases as well. + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = 0.f; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), + max_change_per_frame_linear); + last_gain_linear = current_gain_linear; + } +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + constexpr float initial_level_dbfs = -25.0f; + + VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + float maximal_difference = 0.0f; + float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db); + for (const auto& x : fake_audio.float_frame_view().channel(0)) { + const float difference = std::abs(x - current_value); + maximal_difference = std::max(maximal_difference, difference); + current_value = x; + } + + const float max_change_per_frame_linear = DbToRatio( + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second)); + const float max_change_per_sample = + max_change_per_frame_linear / kFrameLen10ms48kHz; + + EXPECT_LE(maximal_difference, max_change_per_sample); +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + constexpr float initial_level_dbfs = -25.0f; + constexpr int num_initial_frames = + kDefaultConfig.initial_gain_db / + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second); + constexpr int num_frames = 50; + + ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs) + << "kWithNoiseDbfs is too low"; + + for (int i = 0; i < num_initial_frames + num_frames; ++i) { + VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + info.noise_rms_dbfs = kWithNoiseDbfs; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + + // Wait so that the adaptive gain applier has time to lower the gain. + if (i > num_initial_frames) { + const float maximal_ratio = + *std::max_element(fake_audio.float_frame_view().channel(0).begin(), + fake_audio.float_frame_view().channel(0).end()); + + EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f); + } + } +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, + CanHandlePositiveSpeechLevels) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + // Make one call with positive audio level values and settings. + VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = 5.0f; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + constexpr float initial_level_dbfs = -25.0f; + constexpr int num_initial_frames = + kDefaultConfig.initial_gain_db / + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second); + constexpr int num_frames = 50; + + ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs) + << "kWithNoiseDbfs is too low"; + + for (int i = 0; i < num_initial_frames + num_frames; ++i) { + VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + info.limiter_envelope_dbfs = 1.0f; + info.speech_level_reliable = false; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + + // Wait so that the adaptive gain applier has time to lower the gain. + if (i > num_initial_frames) { + const float maximal_ratio = + *std::max_element(fake_audio.float_frame_view().channel(0).begin(), + fake_audio.float_frame_view().channel(0).end()); + + EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f); + } + } +} + +class AdaptiveDigitalGainControllerParametrizedTest + : public ::testing::TestWithParam { + protected: + int adjacent_speech_frames_threshold() const { return GetParam(); } +}; + +TEST_P(AdaptiveDigitalGainControllerParametrizedTest, + DoNotIncreaseGainWithTooFewSpeechFrames) { + GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold()); + + // Lower the speech level so that the target gain will be increased. + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs -= 12.0f; + + float prev_gain = 0.0f; + for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { + SCOPED_TRACE(i); + VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); + helper.gain_applier->Process(info, audio.float_frame_view()); + const float gain = audio.float_frame_view().channel(0)[0]; + if (i > 0) { + EXPECT_EQ(prev_gain, gain); // No gain increase applied. + } + prev_gain = gain; + } +} + +TEST_P(AdaptiveDigitalGainControllerParametrizedTest, + IncreaseGainWithEnoughSpeechFrames) { + GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold()); + + // Lower the speech level so that the target gain will be increased. + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs -= 12.0f; + + float prev_gain = 0.0f; + for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { + SCOPED_TRACE(i); + VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); + helper.gain_applier->Process(info, audio.float_frame_view()); + prev_gain = audio.float_frame_view().channel(0)[0]; + } + + // Process one more speech frame. + VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); + helper.gain_applier->Process(info, audio.float_frame_view()); + + // An increased gain has been applied. + EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain); +} + +INSTANTIATE_TEST_SUITE_P( + GainController2, + AdaptiveDigitalGainControllerParametrizedTest, + ::testing::Values(1, 7, 31, kAdjacentSpeechFramesThreshold)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h new file mode 100644 index 0000000000..4597bcd015 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_ + +namespace webrtc { + +constexpr float kMinFloatS16Value = -32768.0f; +constexpr float kMaxFloatS16Value = 32767.0f; +constexpr float kMaxAbsFloatS16Value = 32768.0f; + +// Minimum audio level in dBFS scale for S16 samples. +constexpr float kMinLevelDbfs = -90.31f; + +constexpr int kFrameDurationMs = 10; +constexpr int kSubFramesInFrame = 20; +constexpr int kMaximalNumberOfSamplesPerChannel = 480; + +// Adaptive digital gain applier settings. + +// At what limiter levels should we start decreasing the adaptive digital gain. +constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f; + +// Number of milliseconds to wait to periodically reset the VAD. +constexpr int kVadResetPeriodMs = 1500; + +// Speech probability threshold to detect speech activity. +constexpr float kVadConfidenceThreshold = 0.95f; + +// Minimum number of adjacent speech frames having a sufficiently high speech +// probability to reliably detect speech activity. +constexpr int kAdjacentSpeechFramesThreshold = 12; + +// Number of milliseconds of speech frames to observe to make the estimator +// confident. +constexpr float kLevelEstimatorTimeToConfidenceMs = 400; +constexpr float kLevelEstimatorLeakFactor = + 1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs; + +// Saturation Protector settings. +constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f; +constexpr int kSaturationProtectorBufferSize = 4; + +// Number of interpolation points for each region of the limiter. +// These values have been tuned to limit the interpolated gain curve error given +// the limiter parameters and allowing a maximum error of +/- 32768^-1. +constexpr int kInterpolatedGainCurveKneePoints = 22; +constexpr int kInterpolatedGainCurveBeyondKneePoints = 10; +constexpr int kInterpolatedGainCurveTotalPoints = + kInterpolatedGainCurveKneePoints + kInterpolatedGainCurveBeyondKneePoints; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc new file mode 100644 index 0000000000..125e551b72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/agc2_testing_common.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +std::vector LinSpace(double l, double r, int num_points) { + RTC_CHECK_GE(num_points, 2); + std::vector points(num_points); + const double step = (r - l) / (num_points - 1.0); + points[0] = l; + for (int i = 1; i < num_points - 1; i++) { + points[i] = static_cast(l) + i * step; + } + points[num_points - 1] = r; + return points; +} + +WhiteNoiseGenerator::WhiteNoiseGenerator(int min_amplitude, int max_amplitude) + : rand_gen_(42), + min_amplitude_(min_amplitude), + max_amplitude_(max_amplitude) { + RTC_DCHECK_LT(min_amplitude_, max_amplitude_); + RTC_DCHECK_LE(kMinS16, min_amplitude_); + RTC_DCHECK_LE(min_amplitude_, kMaxS16); + RTC_DCHECK_LE(kMinS16, max_amplitude_); + RTC_DCHECK_LE(max_amplitude_, kMaxS16); +} + +float WhiteNoiseGenerator::operator()() { + return static_cast(rand_gen_.Rand(min_amplitude_, max_amplitude_)); +} + +SineGenerator::SineGenerator(float amplitude, + float frequency_hz, + int sample_rate_hz) + : amplitude_(amplitude), + frequency_hz_(frequency_hz), + sample_rate_hz_(sample_rate_hz), + x_radians_(0.0f) { + RTC_DCHECK_GT(amplitude_, 0); + RTC_DCHECK_LE(amplitude_, kMaxS16); +} + +float SineGenerator::operator()() { + constexpr float kPi = 3.1415926536f; + x_radians_ += frequency_hz_ / sample_rate_hz_ * 2 * kPi; + if (x_radians_ >= 2 * kPi) { + x_radians_ -= 2 * kPi; + } + return amplitude_ * std::sinf(x_radians_); +} + +PulseGenerator::PulseGenerator(float pulse_amplitude, + float no_pulse_amplitude, + float frequency_hz, + int sample_rate_hz) + : pulse_amplitude_(pulse_amplitude), + no_pulse_amplitude_(no_pulse_amplitude), + samples_period_( + static_cast(static_cast(sample_rate_hz) / frequency_hz)), + sample_counter_(0) { + RTC_DCHECK_GE(pulse_amplitude_, kMinS16); + RTC_DCHECK_LE(pulse_amplitude_, kMaxS16); + RTC_DCHECK_GT(no_pulse_amplitude_, kMinS16); + RTC_DCHECK_LE(no_pulse_amplitude_, kMaxS16); + RTC_DCHECK_GT(sample_rate_hz, frequency_hz); +} + +float PulseGenerator::operator()() { + sample_counter_++; + if (sample_counter_ >= samples_period_) { + sample_counter_ -= samples_period_; + } + return static_cast(sample_counter_ == 0 ? pulse_amplitude_ + : no_pulse_amplitude_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h new file mode 100644 index 0000000000..afed97e83b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_ + +#include +#include + +#include "rtc_base/random.h" + +namespace webrtc { +namespace test { + +constexpr float kMinS16 = + static_cast(std::numeric_limits::min()); +constexpr float kMaxS16 = + static_cast(std::numeric_limits::max()); + +// Level Estimator test parameters. +constexpr float kDecayMs = 20.0f; + +// Limiter parameters. +constexpr float kLimiterMaxInputLevelDbFs = 1.f; +constexpr float kLimiterKneeSmoothnessDb = 1.f; +constexpr float kLimiterCompressionRatio = 5.f; + +// Returns evenly spaced `num_points` numbers over a specified interval [l, r]. +std::vector LinSpace(double l, double r, int num_points); + +// Generates white noise. +class WhiteNoiseGenerator { + public: + WhiteNoiseGenerator(int min_amplitude, int max_amplitude); + float operator()(); + + private: + Random rand_gen_; + const int min_amplitude_; + const int max_amplitude_; +}; + +// Generates a sine function. +class SineGenerator { + public: + SineGenerator(float amplitude, float frequency_hz, int sample_rate_hz); + float operator()(); + + private: + const float amplitude_; + const float frequency_hz_; + const int sample_rate_hz_; + float x_radians_; +}; + +// Generates periodic pulses. +class PulseGenerator { + public: + PulseGenerator(float pulse_amplitude, + float no_pulse_amplitude, + float frequency_hz, + int sample_rate_hz); + float operator()(); + + private: + const float pulse_amplitude_; + const float no_pulse_amplitude_; + const int samples_period_; + int sample_counter_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc new file mode 100644 index 0000000000..79c3cc95d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/agc2_testing_common.h" + +#include "rtc_base/gunit.h" + +namespace webrtc { + +TEST(GainController2TestingCommon, LinSpace) { + std::vector points1 = test::LinSpace(-1.0, 2.0, 4); + const std::vector expected_points1{{-1.0, 0.0, 1.0, 2.0}}; + EXPECT_EQ(expected_points1, points1); + + std::vector points2 = test::LinSpace(0.0, 1.0, 4); + const std::vector expected_points2{{0.0, 1.0 / 3.0, 2.0 / 3.0, 1.0}}; + EXPECT_EQ(points2, expected_points2); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc new file mode 100644 index 0000000000..c1b80d7320 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/biquad_filter.h" + +#include "rtc_base/arraysize.h" + +namespace webrtc { + +BiQuadFilter::BiQuadFilter(const Config& config) + : config_(config), state_({}) {} + +BiQuadFilter::~BiQuadFilter() = default; + +void BiQuadFilter::SetConfig(const Config& config) { + config_ = config; + state_ = {}; +} + +void BiQuadFilter::Reset() { + state_ = {}; +} + +void BiQuadFilter::Process(rtc::ArrayView x, + rtc::ArrayView y) { + RTC_DCHECK_EQ(x.size(), y.size()); + const float config_a0 = config_.a[0]; + const float config_a1 = config_.a[1]; + const float config_b0 = config_.b[0]; + const float config_b1 = config_.b[1]; + const float config_b2 = config_.b[2]; + float state_a0 = state_.a[0]; + float state_a1 = state_.a[1]; + float state_b0 = state_.b[0]; + float state_b1 = state_.b[1]; + for (size_t k = 0, x_size = x.size(); k < x_size; ++k) { + // Use a temporary variable for `x[k]` to allow in-place processing. + const float tmp = x[k]; + float y_k = config_b0 * tmp + config_b1 * state_b0 + config_b2 * state_b1 - + config_a0 * state_a0 - config_a1 * state_a1; + state_b1 = state_b0; + state_b0 = tmp; + state_a1 = state_a0; + state_a0 = y_k; + y[k] = y_k; + } + state_.a[0] = state_a0; + state_.a[1] = state_a1; + state_.b[0] = state_b0; + state_.b[1] = state_b1; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h new file mode 100644 index 0000000000..5273ff9386 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_BIQUAD_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_BIQUAD_FILTER_H_ + +#include "api/array_view.h" + +namespace webrtc { + +// Transposed direct form I implementation of a bi-quad filter. +// b[0] + b[1] • z^(-1) + b[2] • z^(-2) +// H(z) = ------------------------------------ +// 1 + a[1] • z^(-1) + a[2] • z^(-2) +class BiQuadFilter { + public: + // Normalized filter coefficients. + // Computed as `[b, a] = scipy.signal.butter(N=2, Wn, btype)`. + struct Config { + float b[3]; // b[0], b[1], b[2]. + float a[2]; // a[1], a[2]. + }; + + explicit BiQuadFilter(const Config& config); + BiQuadFilter(const BiQuadFilter&) = delete; + BiQuadFilter& operator=(const BiQuadFilter&) = delete; + ~BiQuadFilter(); + + // Sets the filter configuration and resets the internal state. + void SetConfig(const Config& config); + + // Zeroes the filter state. + void Reset(); + + // Filters `x` and writes the output in `y`, which must have the same length + // of `x`. In-place processing is supported. + void Process(rtc::ArrayView x, rtc::ArrayView y); + + private: + Config config_; + struct State { + float b[2]; + float a[2]; + } state_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_BIQUAD_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build new file mode 100644 index 0000000000..f396f42e57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("biquad_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc new file mode 100644 index 0000000000..a53036b08e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/biquad_filter.h" + +#include +#include +#include + +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr int kFrameSize = 8; +constexpr int kNumFrames = 4; +using FloatArraySequence = + std::array, kNumFrames>; + +constexpr FloatArraySequence kBiQuadInputSeq = { + {{{-87.166290f, -8.029022f, 101.619583f, -0.294296f, -5.825764f, -8.890625f, + 10.310432f, 54.845333f}}, + {{-64.647644f, -6.883945f, 11.059189f, -95.242538f, -108.870834f, + 11.024944f, 63.044102f, -52.709583f}}, + {{-32.350529f, -18.108028f, -74.022339f, -8.986874f, -1.525581f, + 103.705513f, 6.346226f, -14.319557f}}, + {{22.645832f, -64.597153f, 55.462521f, -109.393188f, 10.117825f, + -40.019642f, -98.612228f, -8.330326f}}}}; + +// Computed as `scipy.signal.butter(N=2, Wn=60/24000, btype='highpass')`. +constexpr BiQuadFilter::Config kBiQuadConfig{ + {0.99446179f, -1.98892358f, 0.99446179f}, + {-1.98889291f, 0.98895425f}}; + +// Comparing to scipy. The expected output is generated as follows: +// zi = np.float32([0, 0]) +// for i in range(4): +// yn, zi = scipy.signal.lfilter(B, A, x[i], zi=zi) +// print(yn) +constexpr FloatArraySequence kBiQuadOutputSeq = { + {{{-86.68354497f, -7.02175351f, 102.10290352f, -0.37487333f, -5.87205847f, + -8.85521608f, 10.33772563f, 54.51157181f}}, + {{-64.92531604f, -6.76395978f, 11.15534507f, -94.68073341f, -107.18177856f, + 13.24642474f, 64.84288941f, -50.97822629f}}, + {{-30.1579652f, -15.64850899f, -71.06662821f, -5.5883229f, 1.91175353f, + 106.5572003f, 8.57183046f, -12.06298473f}}, + {{24.84286614f, -62.18094158f, 57.91488056f, -106.65685933f, 13.38760103f, + -36.60367134f, -94.44880104f, -3.59920354f}}}}; + +// Fails for every pair from two equally sized rtc::ArrayView views such +// that their relative error is above a given threshold. If the expected value +// of a pair is 0, `tolerance` is used to check the absolute error. +void ExpectNearRelative(rtc::ArrayView expected, + rtc::ArrayView computed, + const float tolerance) { + // The relative error is undefined when the expected value is 0. + // When that happens, check the absolute error instead. `safe_den` is used + // below to implement such logic. + auto safe_den = [](float x) { return (x == 0.0f) ? 1.0f : std::fabs(x); }; + ASSERT_EQ(expected.size(), computed.size()); + for (size_t i = 0; i < expected.size(); ++i) { + const float abs_diff = std::fabs(expected[i] - computed[i]); + // No failure when the values are equal. + if (abs_diff == 0.0f) { + continue; + } + SCOPED_TRACE(i); + SCOPED_TRACE(expected[i]); + SCOPED_TRACE(computed[i]); + EXPECT_LE(abs_diff / safe_den(expected[i]), tolerance); + } +} + +// Checks that filtering works when different containers are used both as input +// and as output. +TEST(BiQuadFilterTest, FilterNotInPlace) { + BiQuadFilter filter(kBiQuadConfig); + std::array samples; + + // TODO(https://bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + for (int i = 0; i < kNumFrames; ++i) { + SCOPED_TRACE(i); + filter.Process(kBiQuadInputSeq[i], samples); + ExpectNearRelative(kBiQuadOutputSeq[i], samples, 2e-4f); + } +} + +// Checks that filtering works when the same container is used both as input and +// as output. +TEST(BiQuadFilterTest, FilterInPlace) { + BiQuadFilter filter(kBiQuadConfig); + std::array samples; + + // TODO(https://bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + for (int i = 0; i < kNumFrames; ++i) { + SCOPED_TRACE(i); + std::copy(kBiQuadInputSeq[i].begin(), kBiQuadInputSeq[i].end(), + samples.begin()); + filter.Process({samples}, {samples}); + ExpectNearRelative(kBiQuadOutputSeq[i], samples, 2e-4f); + } +} + +// Checks that different configurations produce different outputs. +TEST(BiQuadFilterTest, SetConfigDifferentOutput) { + BiQuadFilter filter(/*config=*/{{0.97803048f, -1.95606096f, 0.97803048f}, + {-1.95557824f, 0.95654368f}}); + + std::array samples1; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples1); + } + + filter.SetConfig( + {{0.09763107f, 0.19526215f, 0.09763107f}, {-0.94280904f, 0.33333333f}}); + std::array samples2; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples2); + } + + EXPECT_NE(samples1, samples2); +} + +// Checks that when `SetConfig()` is called but the filter coefficients are the +// same the filter state is reset. +TEST(BiQuadFilterTest, SetConfigResetsState) { + BiQuadFilter filter(kBiQuadConfig); + + std::array samples1; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples1); + } + + filter.SetConfig(kBiQuadConfig); + std::array samples2; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples2); + } + + EXPECT_EQ(samples1, samples2); +} + +// Checks that when `Reset()` is called the filter state is reset. +TEST(BiQuadFilterTest, Reset) { + BiQuadFilter filter(kBiQuadConfig); + + std::array samples1; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples1); + } + + filter.Reset(); + std::array samples2; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples2); + } + + EXPECT_EQ(samples1, samples2); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc new file mode 100644 index 0000000000..fd759c63e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/clipping_predictor.h" + +#include +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +constexpr int kClippingPredictorMaxGainChange = 15; + +// Returns an input volume in the [`min_input_volume`, `max_input_volume`] range +// that reduces `gain_error_db`, which is a gain error estimated when +// `input_volume` was applied, according to a fixed gain map. +int ComputeVolumeUpdate(int gain_error_db, + int input_volume, + int min_input_volume, + int max_input_volume) { + RTC_DCHECK_GE(input_volume, 0); + RTC_DCHECK_LE(input_volume, max_input_volume); + if (gain_error_db == 0) { + return input_volume; + } + int new_volume = input_volume; + if (gain_error_db > 0) { + while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db && + new_volume < max_input_volume) { + ++new_volume; + } + } else { + while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db && + new_volume > min_input_volume) { + --new_volume; + } + } + return new_volume; +} + +float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) { + const float crest_factor = + FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average)); + return crest_factor; +} + +// Crest factor-based clipping prediction and clipped level step estimation. +class ClippingEventPredictor : public ClippingPredictor { + public: + // ClippingEventPredictor with `num_channels` channels (limited to values + // higher than zero); window size `window_length` and reference window size + // `reference_window_length` (both referring to the number of frames in the + // respective sliding windows and limited to values higher than zero); + // reference window delay `reference_window_delay` (delay in frames, limited + // to values zero and higher with an additional requirement of + // `window_length` < `reference_window_length` + reference_window_delay`); + // and an estimation peak threshold `clipping_threshold` and a crest factor + // drop threshold `crest_factor_margin` (both in dB). + ClippingEventPredictor(int num_channels, + int window_length, + int reference_window_length, + int reference_window_delay, + float clipping_threshold, + float crest_factor_margin) + : window_length_(window_length), + reference_window_length_(reference_window_length), + reference_window_delay_(reference_window_delay), + clipping_threshold_(clipping_threshold), + crest_factor_margin_(crest_factor_margin) { + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_GT(window_length, 0); + RTC_DCHECK_GT(reference_window_length, 0); + RTC_DCHECK_GE(reference_window_delay, 0); + RTC_DCHECK_GT(reference_window_length + reference_window_delay, + window_length); + const int buffer_length = GetMinFramesProcessed(); + RTC_DCHECK_GT(buffer_length, 0); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_.push_back( + std::make_unique(buffer_length)); + } + } + + ClippingEventPredictor(const ClippingEventPredictor&) = delete; + ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete; + ~ClippingEventPredictor() {} + + void Reset() { + const int num_channels = ch_buffers_.size(); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_[i]->Reset(); + } + } + + // Analyzes a frame of audio and stores the framewise metrics in + // `ch_buffers_`. + void Analyze(const AudioFrameView& frame) { + const int num_channels = frame.num_channels(); + RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); + const int samples_per_channel = frame.samples_per_channel(); + RTC_DCHECK_GT(samples_per_channel, 0); + for (int channel = 0; channel < num_channels; ++channel) { + float sum_squares = 0.0f; + float peak = 0.0f; + for (const auto& sample : frame.channel(channel)) { + sum_squares += sample * sample; + peak = std::max(std::fabs(sample), peak); + } + ch_buffers_[channel]->Push( + {sum_squares / static_cast(samples_per_channel), peak}); + } + } + + // Estimates the analog gain adjustment for channel `channel` using a + // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an + // estimate for the clipped level step equal to `default_clipped_level_step_` + // if at least `GetMinFramesProcessed()` frames have been processed since the + // last reset and a clipping event is predicted. `level`, `min_mic_level`, and + // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. + absl::optional EstimateClippedLevelStep(int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const { + RTC_CHECK_GE(channel, 0); + RTC_CHECK_LT(channel, ch_buffers_.size()); + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, 255); + RTC_DCHECK_GT(default_step, 0); + RTC_DCHECK_LE(default_step, 255); + RTC_DCHECK_GE(min_mic_level, 0); + RTC_DCHECK_LE(min_mic_level, 255); + RTC_DCHECK_GE(max_mic_level, 0); + RTC_DCHECK_LE(max_mic_level, 255); + if (level <= min_mic_level) { + return absl::nullopt; + } + if (PredictClippingEvent(channel)) { + const int new_level = + rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level); + const int step = level - new_level; + if (step > 0) { + return step; + } + } + return absl::nullopt; + } + + private: + int GetMinFramesProcessed() const { + return reference_window_delay_ + reference_window_length_; + } + + // Predicts clipping events based on the processed audio frames. Returns + // true if a clipping event is likely. + bool PredictClippingEvent(int channel) const { + const auto metrics = + ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); + if (!metrics.has_value() || + !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { + return false; + } + const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( + reference_window_delay_, reference_window_length_); + if (!reference_metrics.has_value()) { + return false; + } + const float crest_factor = ComputeCrestFactor(metrics.value()); + const float reference_crest_factor = + ComputeCrestFactor(reference_metrics.value()); + if (crest_factor < reference_crest_factor - crest_factor_margin_) { + return true; + } + return false; + } + + std::vector> ch_buffers_; + const int window_length_; + const int reference_window_length_; + const int reference_window_delay_; + const float clipping_threshold_; + const float crest_factor_margin_; +}; + +// Performs crest factor-based clipping peak prediction. +class ClippingPeakPredictor : public ClippingPredictor { + public: + // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values + // higher than zero); window size `window_length` and reference window size + // `reference_window_length` (both referring to the number of frames in the + // respective sliding windows and limited to values higher than zero); + // reference window delay `reference_window_delay` (delay in frames, limited + // to values zero and higher with an additional requirement of + // `window_length` < `reference_window_length` + reference_window_delay`); + // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive + // clipped level step estimation is used if `adaptive_step_estimation` is + // true. + explicit ClippingPeakPredictor(int num_channels, + int window_length, + int reference_window_length, + int reference_window_delay, + int clipping_threshold, + bool adaptive_step_estimation) + : window_length_(window_length), + reference_window_length_(reference_window_length), + reference_window_delay_(reference_window_delay), + clipping_threshold_(clipping_threshold), + adaptive_step_estimation_(adaptive_step_estimation) { + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_GT(window_length, 0); + RTC_DCHECK_GT(reference_window_length, 0); + RTC_DCHECK_GE(reference_window_delay, 0); + RTC_DCHECK_GT(reference_window_length + reference_window_delay, + window_length); + const int buffer_length = GetMinFramesProcessed(); + RTC_DCHECK_GT(buffer_length, 0); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_.push_back( + std::make_unique(buffer_length)); + } + } + + ClippingPeakPredictor(const ClippingPeakPredictor&) = delete; + ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete; + ~ClippingPeakPredictor() {} + + void Reset() { + const int num_channels = ch_buffers_.size(); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_[i]->Reset(); + } + } + + // Analyzes a frame of audio and stores the framewise metrics in + // `ch_buffers_`. + void Analyze(const AudioFrameView& frame) { + const int num_channels = frame.num_channels(); + RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); + const int samples_per_channel = frame.samples_per_channel(); + RTC_DCHECK_GT(samples_per_channel, 0); + for (int channel = 0; channel < num_channels; ++channel) { + float sum_squares = 0.0f; + float peak = 0.0f; + for (const auto& sample : frame.channel(channel)) { + sum_squares += sample * sample; + peak = std::max(std::fabs(sample), peak); + } + ch_buffers_[channel]->Push( + {sum_squares / static_cast(samples_per_channel), peak}); + } + } + + // Estimates the analog gain adjustment for channel `channel` using a + // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an + // estimate for the clipped level step (equal to + // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at + // least `GetMinFramesProcessed()` frames have been processed since the last + // reset and a clipping event is predicted. `level`, `min_mic_level`, and + // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. + absl::optional EstimateClippedLevelStep(int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const { + RTC_DCHECK_GE(channel, 0); + RTC_DCHECK_LT(channel, ch_buffers_.size()); + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, 255); + RTC_DCHECK_GT(default_step, 0); + RTC_DCHECK_LE(default_step, 255); + RTC_DCHECK_GE(min_mic_level, 0); + RTC_DCHECK_LE(min_mic_level, 255); + RTC_DCHECK_GE(max_mic_level, 0); + RTC_DCHECK_LE(max_mic_level, 255); + if (level <= min_mic_level) { + return absl::nullopt; + } + absl::optional estimate_db = EstimatePeakValue(channel); + if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) { + int step = 0; + if (!adaptive_step_estimation_) { + step = default_step; + } else { + const int estimated_gain_change = + rtc::SafeClamp(-static_cast(std::ceil(estimate_db.value())), + -kClippingPredictorMaxGainChange, 0); + step = + std::max(level - ComputeVolumeUpdate(estimated_gain_change, level, + min_mic_level, max_mic_level), + default_step); + } + const int new_level = + rtc::SafeClamp(level - step, min_mic_level, max_mic_level); + if (level > new_level) { + return level - new_level; + } + } + return absl::nullopt; + } + + private: + int GetMinFramesProcessed() { + return reference_window_delay_ + reference_window_length_; + } + + // Predicts clipping sample peaks based on the processed audio frames. + // Returns the estimated peak value if clipping is predicted. Otherwise + // returns absl::nullopt. + absl::optional EstimatePeakValue(int channel) const { + const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( + reference_window_delay_, reference_window_length_); + if (!reference_metrics.has_value()) { + return absl::nullopt; + } + const auto metrics = + ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); + if (!metrics.has_value() || + !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { + return absl::nullopt; + } + const float reference_crest_factor = + ComputeCrestFactor(reference_metrics.value()); + const float& mean_squares = metrics.value().average; + const float projected_peak = + reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares)); + return projected_peak; + } + + std::vector> ch_buffers_; + const int window_length_; + const int reference_window_length_; + const int reference_window_delay_; + const int clipping_threshold_; + const bool adaptive_step_estimation_; +}; + +} // namespace + +std::unique_ptr CreateClippingPredictor( + int num_channels, + const AudioProcessing::Config::GainController1::AnalogGainController:: + ClippingPredictor& config) { + if (!config.enabled) { + RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled."; + return nullptr; + } + RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled."; + using ClippingPredictorMode = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor::Mode; + switch (config.mode) { + case ClippingPredictorMode::kClippingEventPrediction: + return std::make_unique( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + config.crest_factor_margin); + case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction: + return std::make_unique( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + /*adaptive_step_estimation=*/true); + case ClippingPredictorMode::kFixedStepClippingPeakPrediction: + return std::make_unique( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + /*adaptive_step_estimation=*/false); + } + RTC_DCHECK_NOTREACHED(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.h b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.h new file mode 100644 index 0000000000..14612508c0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +// Frame-wise clipping prediction and clipped level step estimation. Analyzes +// 10 ms multi-channel frames and estimates an analog mic level decrease step +// to possibly avoid clipping when predicted. `Analyze()` and +// `EstimateClippedLevelStep()` can be called in any order. +class ClippingPredictor { + public: + virtual ~ClippingPredictor() = default; + + virtual void Reset() = 0; + + // Analyzes a 10 ms multi-channel audio frame. + virtual void Analyze(const AudioFrameView& frame) = 0; + + // Predicts if clipping is going to occur for the specified `channel` in the + // near-future and, if so, it returns a recommended analog mic level decrease + // step. Returns absl::nullopt if clipping is not predicted. + // `level` is the current analog mic level, `default_step` is the amount the + // mic level is lowered by the analog controller with every clipping event and + // `min_mic_level` and `max_mic_level` is the range of allowed analog mic + // levels. + virtual absl::optional EstimateClippedLevelStep( + int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const = 0; +}; + +// Creates a ClippingPredictor based on the provided `config`. When enabled, +// the following must hold for `config`: +// `window_length < reference_window_length + reference_window_delay`. +// Returns `nullptr` if `config.enabled` is false. +std::unique_ptr CreateClippingPredictor( + int num_channels, + const AudioProcessing::Config::GainController1::AnalogGainController:: + ClippingPredictor& config); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_gn/moz.build new file mode 100644 index 0000000000..9cddd69abe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("clipping_predictor_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc new file mode 100644 index 0000000000..fe4cf2a154 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h" + +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +bool ClippingPredictorLevelBuffer::Level::operator==(const Level& level) const { + constexpr float kEpsilon = 1e-6f; + return std::fabs(average - level.average) < kEpsilon && + std::fabs(max - level.max) < kEpsilon; +} + +ClippingPredictorLevelBuffer::ClippingPredictorLevelBuffer(int capacity) + : tail_(-1), size_(0), data_(std::max(1, capacity)) { + if (capacity > kMaxCapacity) { + RTC_LOG(LS_WARNING) << "[agc]: ClippingPredictorLevelBuffer exceeds the " + << "maximum allowed capacity. Capacity: " << capacity; + } + RTC_DCHECK(!data_.empty()); +} + +void ClippingPredictorLevelBuffer::Reset() { + tail_ = -1; + size_ = 0; +} + +void ClippingPredictorLevelBuffer::Push(Level level) { + ++tail_; + if (tail_ == Capacity()) { + tail_ = 0; + } + if (size_ < Capacity()) { + size_++; + } + data_[tail_] = level; +} + +// TODO(bugs.webrtc.org/12774): Optimize partial computation for long buffers. +absl::optional +ClippingPredictorLevelBuffer::ComputePartialMetrics(int delay, + int num_items) const { + RTC_DCHECK_GE(delay, 0); + RTC_DCHECK_LT(delay, Capacity()); + RTC_DCHECK_GT(num_items, 0); + RTC_DCHECK_LE(num_items, Capacity()); + RTC_DCHECK_LE(delay + num_items, Capacity()); + if (delay + num_items > Size()) { + return absl::nullopt; + } + float sum = 0.0f; + float max = 0.0f; + for (int i = 0; i < num_items && i < Size(); ++i) { + int idx = tail_ - delay - i; + if (idx < 0) { + idx += Capacity(); + } + sum += data_[idx].average; + max = std::fmax(data_[idx].max, max); + } + return absl::optional({sum / static_cast(num_items), max}); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.h new file mode 100644 index 0000000000..c9032773a6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_ + +#include +#include + +#include "absl/types/optional.h" + +namespace webrtc { + +// A circular buffer to store frame-wise `Level` items for clipping prediction. +// The current implementation is not optimized for large buffer lengths. +class ClippingPredictorLevelBuffer { + public: + struct Level { + float average; + float max; + bool operator==(const Level& level) const; + }; + + // Recommended maximum capacity. It is possible to create a buffer with a + // larger capacity, but the implementation is not optimized for large values. + static constexpr int kMaxCapacity = 100; + + // Ctor. Sets the buffer capacity to max(1, `capacity`) and logs a warning + // message if the capacity is greater than `kMaxCapacity`. + explicit ClippingPredictorLevelBuffer(int capacity); + ~ClippingPredictorLevelBuffer() {} + ClippingPredictorLevelBuffer(const ClippingPredictorLevelBuffer&) = delete; + ClippingPredictorLevelBuffer& operator=(const ClippingPredictorLevelBuffer&) = + delete; + + void Reset(); + + // Returns the current number of items stored in the buffer. + int Size() const { return size_; } + + // Returns the capacity of the buffer. + int Capacity() const { return data_.size(); } + + // Adds a `level` item into the circular buffer `data_`. Stores at most + // `Capacity()` items. If more items are pushed, the new item replaces the + // least recently pushed item. + void Push(Level level); + + // If at least `num_items` + `delay` items have been pushed, returns the + // average and maximum value for the `num_items` most recently pushed items + // from `delay` to `delay` - `num_items` (a delay equal to zero corresponds + // to the most recently pushed item). The value of `delay` is limited to + // [0, N] and `num_items` to [1, M] where N + M is the capacity of the buffer. + absl::optional ComputePartialMetrics(int delay, int num_items) const; + + private: + int tail_; + int size_; + std::vector data_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer_unittest.cc new file mode 100644 index 0000000000..7af9a436c9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer_unittest.cc @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h" + +#include + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Eq; +using ::testing::Optional; + +class ClippingPredictorLevelBufferParametrization + : public ::testing::TestWithParam { + protected: + int capacity() const { return GetParam(); } +}; + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckEmptyBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), 0); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckHalfEmptyBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + for (int i = 0; i < buffer.Capacity() / 2; ++i) { + buffer.Push({2, 4}); + } + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), std::max(capacity(), 1) / 2); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckFullBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + for (int i = 0; i < buffer.Capacity(); ++i) { + buffer.Push({2, 4}); + } + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), std::max(capacity(), 1)); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckLargeBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + for (int i = 0; i < 2 * buffer.Capacity(); ++i) { + buffer.Push({2, 4}); + } + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), std::max(capacity(), 1)); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckSizeAfterReset) { + ClippingPredictorLevelBuffer buffer(capacity()); + buffer.Push({1, 1}); + buffer.Push({1, 1}); + buffer.Reset(); + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), 0); + buffer.Push({1, 1}); + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), 1); +} + +INSTANTIATE_TEST_SUITE_P(ClippingPredictorLevelBufferTest, + ClippingPredictorLevelBufferParametrization, + ::testing::Values(-1, 0, 1, 123)); + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterFullBuffer) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/2); + buffer.Push({1, 2}); + buffer.Push({3, 6}); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{3, 6}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{1, 2}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2), + Optional(Eq(ClippingPredictorLevelBuffer::Level{2, 6}))); +} + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterPushBeyondCapacity) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/2); + buffer.Push({1, 1}); + buffer.Push({3, 6}); + buffer.Push({5, 10}); + buffer.Push({7, 14}); + buffer.Push({6, 12}); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{6, 12}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{7, 14}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2), + Optional(Eq(ClippingPredictorLevelBuffer::Level{6.5f, 14}))); +} + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterTooFewItems) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/4); + buffer.Push({1, 2}); + buffer.Push({3, 6}); + EXPECT_EQ(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/3), + absl::nullopt); + EXPECT_EQ(buffer.ComputePartialMetrics(/*delay=*/2, /*num_items=*/1), + absl::nullopt); +} + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterReset) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/2); + buffer.Push({1, 2}); + buffer.Reset(); + buffer.Push({5, 10}); + buffer.Push({7, 14}); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{7, 14}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2), + Optional(Eq(ClippingPredictorLevelBuffer::Level{6, 14}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{5, 10}))); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_unittest.cc new file mode 100644 index 0000000000..af73107749 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_unittest.cc @@ -0,0 +1,491 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/clipping_predictor.h" + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Eq; +using ::testing::Optional; +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; +using ClippingPredictorMode = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor::Mode; + +constexpr int kSampleRateHz = 32000; +constexpr int kNumChannels = 1; +constexpr int kSamplesPerChannel = kSampleRateHz / 100; +constexpr int kMaxMicLevel = 255; +constexpr int kMinMicLevel = 12; +constexpr int kDefaultClippedLevelStep = 15; +constexpr float kMaxSampleS16 = + static_cast(std::numeric_limits::max()); + +// Threshold in dB corresponding to a signal with an amplitude equal to 99% of +// the dynamic range - i.e., computed as `20*log10(0.99)`. +constexpr float kClippingThresholdDb = -0.08729610804900176f; + +void CallAnalyze(int num_calls, + const AudioFrameView& frame, + ClippingPredictor& predictor) { + for (int i = 0; i < num_calls; ++i) { + predictor.Analyze(frame); + } +} + +// Creates and analyzes an audio frame with a non-zero (approx. 4.15dB) crest +// factor. +void AnalyzeNonZeroCrestFactorAudio(int num_calls, + int num_channels, + float peak_ratio, + ClippingPredictor& predictor) { + RTC_DCHECK_GT(num_calls, 0); + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_LE(peak_ratio, 1.0f); + std::vector audio(num_channels); + std::vector audio_data(num_channels * kSamplesPerChannel, 0.0f); + for (int channel = 0; channel < num_channels; ++channel) { + audio[channel] = &audio_data[channel * kSamplesPerChannel]; + for (int sample = 0; sample < kSamplesPerChannel; sample += 10) { + audio[channel][sample] = 0.1f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 1] = 0.2f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 2] = 0.3f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 3] = 0.4f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 4] = 0.5f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 5] = 0.6f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 6] = 0.7f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 7] = 0.8f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 8] = 0.9f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 9] = 1.0f * peak_ratio * kMaxSampleS16; + } + } + AudioFrameView frame(audio.data(), num_channels, + kSamplesPerChannel); + CallAnalyze(num_calls, frame, predictor); +} + +void CheckChannelEstimatesWithValue(int num_channels, + int level, + int default_step, + int min_mic_level, + int max_mic_level, + const ClippingPredictor& predictor, + int expected) { + for (int i = 0; i < num_channels; ++i) { + SCOPED_TRACE(i); + EXPECT_THAT(predictor.EstimateClippedLevelStep( + i, level, default_step, min_mic_level, max_mic_level), + Optional(Eq(expected))); + } +} + +void CheckChannelEstimatesWithoutValue(int num_channels, + int level, + int default_step, + int min_mic_level, + int max_mic_level, + const ClippingPredictor& predictor) { + for (int i = 0; i < num_channels; ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(predictor.EstimateClippedLevelStep(i, level, default_step, + min_mic_level, max_mic_level), + absl::nullopt); + } +} + +// Creates and analyzes an audio frame with a zero crest factor. +void AnalyzeZeroCrestFactorAudio(int num_calls, + int num_channels, + float peak_ratio, + ClippingPredictor& predictor) { + RTC_DCHECK_GT(num_calls, 0); + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_LE(peak_ratio, 1.f); + std::vector audio(num_channels); + std::vector audio_data(num_channels * kSamplesPerChannel, 0.f); + for (int channel = 0; channel < num_channels; ++channel) { + audio[channel] = &audio_data[channel * kSamplesPerChannel]; + for (int sample = 0; sample < kSamplesPerChannel; ++sample) { + audio[channel][sample] = peak_ratio * kMaxSampleS16; + } + } + auto frame = AudioFrameView(audio.data(), num_channels, + kSamplesPerChannel); + CallAnalyze(num_calls, frame, predictor); +} + +TEST(ClippingPeakPredictorTest, NoPredictorCreated) { + auto predictor = + CreateClippingPredictor(kNumChannels, /*config=*/{/*enabled=*/false}); + EXPECT_FALSE(predictor); +} + +TEST(ClippingPeakPredictorTest, ClippingEventPredictionCreated) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + auto predictor = CreateClippingPredictor( + kNumChannels, + /*config=*/{/*enabled=*/true, + /*mode=*/ClippingPredictorMode::kClippingEventPrediction}); + EXPECT_TRUE(predictor); +} + +TEST(ClippingPeakPredictorTest, AdaptiveStepClippingPeakPredictionCreated) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + auto predictor = CreateClippingPredictor( + kNumChannels, /*config=*/{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction}); + EXPECT_TRUE(predictor); +} + +TEST(ClippingPeakPredictorTest, FixedStepClippingPeakPredictionCreated) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + auto predictor = CreateClippingPredictor( + kNumChannels, /*config=*/{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kFixedStepClippingPeakPrediction}); + EXPECT_TRUE(predictor); +} + +class ClippingPredictorParameterization + : public ::testing::TestWithParam> { + protected: + int num_channels() const { return std::get<0>(GetParam()); } + ClippingPredictorConfig GetConfig(ClippingPredictorMode mode) const { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + return {/*enabled=*/true, + /*mode=*/mode, + /*window_length=*/std::get<1>(GetParam()), + /*reference_window_length=*/std::get<2>(GetParam()), + /*reference_window_delay=*/std::get<3>(GetParam()), + /*clipping_threshold=*/-1.0f, + /*crest_factor_margin=*/0.5f}; + } +}; + +TEST_P(ClippingPredictorParameterization, + CheckClippingEventPredictorEstimateAfterCrestFactorDrop) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kClippingEventPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeNonZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue( + num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); +} + +TEST_P(ClippingPredictorParameterization, + CheckClippingEventPredictorNoEstimateAfterConstantCrestFactor) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kClippingEventPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeNonZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length, + num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST_P(ClippingPredictorParameterization, + CheckClippingPeakPredictorEstimateAfterHighCrestFactor) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeNonZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length, + num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue( + num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); +} + +TEST_P(ClippingPredictorParameterization, + CheckClippingPeakPredictorNoEstimateAfterLowCrestFactor) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length, + num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor, + ClippingPredictorParameterization, + ::testing::Combine(::testing::Values(1, 5), + ::testing::Values(1, 5, 10), + ::testing::Values(1, 5), + ::testing::Values(0, 1, 5))); + +class ClippingEventPredictorParameterization + : public ::testing::TestWithParam> { + protected: + ClippingPredictorConfig GetConfig() const { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + return {/*enabled=*/true, + /*mode=*/ClippingPredictorMode::kClippingEventPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/std::get<0>(GetParam()), + /*crest_factor_margin=*/std::get<1>(GetParam())}; + } +}; + +TEST_P(ClippingEventPredictorParameterization, + CheckEstimateAfterCrestFactorDrop) { + const ClippingPredictorConfig config = GetConfig(); + auto predictor = CreateClippingPredictor(kNumChannels, config); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + // TODO(bugs.webrtc.org/12774): Add clarifying comment. + // TODO(bugs.webrtc.org/12774): Remove 4.15f threshold and split tests. + if (config.clipping_threshold < kClippingThresholdDb && + config.crest_factor_margin < 4.15f) { + CheckChannelEstimatesWithValue( + kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); + } else { + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + } +} + +INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor, + ClippingEventPredictorParameterization, + ::testing::Combine(::testing::Values(-1.0f, 0.0f), + ::testing::Values(3.0f, 4.16f))); + +class ClippingPredictorModeParameterization + : public ::testing::TestWithParam { + protected: + ClippingPredictorConfig GetConfig(float clipping_threshold_dbfs) const { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + return {/*enabled=*/true, + /*mode=*/GetParam(), + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/clipping_threshold_dbfs, + /*crest_factor_margin=*/3.0f}; + } +}; + +TEST_P(ClippingPredictorModeParameterization, + CheckEstimateAfterHighCrestFactorWithNoClippingMargin) { + const ClippingPredictorConfig config = GetConfig( + /*clipping_threshold_dbfs=*/0.0f); + auto predictor = CreateClippingPredictor(kNumChannels, config); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + // Since the clipping threshold is set to 0 dBFS, `EstimateClippedLevelStep()` + // is expected to return an unavailable value. + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST_P(ClippingPredictorModeParameterization, + CheckEstimateAfterHighCrestFactorWithClippingMargin) { + const ClippingPredictorConfig config = + GetConfig(/*clipping_threshold_dbfs=*/-1.0f); + auto predictor = CreateClippingPredictor(kNumChannels, config); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length, + kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + // TODO(bugs.webrtc.org/12774): Add clarifying comment. + const float expected_step = + config.mode == ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction + ? 17 + : kDefaultClippedLevelStep; + CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, expected_step); +} + +INSTANTIATE_TEST_SUITE_P( + GainController1ClippingPredictor, + ClippingPredictorModeParameterization, + ::testing::Values( + ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction, + ClippingPredictorMode::kFixedStepClippingPeakPrediction)); + +TEST(ClippingEventPredictorTest, CheckEstimateAfterReset) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kClippingEventPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f, + /*crest_factor_margin=*/3.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + predictor->Reset(); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST(ClippingPeakPredictorTest, CheckNoEstimateAfterReset) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + predictor->Reset(); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST(ClippingPeakPredictorTest, CheckAdaptiveStepEstimate) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, /*expected=*/17); +} + +TEST(ClippingPeakPredictorTest, CheckFixedStepEstimate) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kFixedStepClippingPeakPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue( + kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build new file mode 100644 index 0000000000..274cae23a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("common_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc new file mode 100644 index 0000000000..221b499e32 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/compute_interpolated_gain_curve.h" + +#include +#include +#include +#include +#include +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +std::pair ComputeLinearApproximationParams( + const LimiterDbGainCurve* limiter, + const double x) { + const double m = limiter->GetGainFirstDerivativeLinear(x); + const double q = limiter->GetGainLinear(x) - m * x; + return {m, q}; +} + +double ComputeAreaUnderPiecewiseLinearApproximation( + const LimiterDbGainCurve* limiter, + const double x0, + const double x1) { + RTC_CHECK_LT(x0, x1); + + // Linear approximation in x0 and x1. + double m0, q0, m1, q1; + std::tie(m0, q0) = ComputeLinearApproximationParams(limiter, x0); + std::tie(m1, q1) = ComputeLinearApproximationParams(limiter, x1); + + // Intersection point between two adjacent linear pieces. + RTC_CHECK_NE(m1, m0); + const double x_split = (q0 - q1) / (m1 - m0); + RTC_CHECK_LT(x0, x_split); + RTC_CHECK_LT(x_split, x1); + + auto area_under_linear_piece = [](double x_l, double x_r, double m, + double q) { + return x_r * (m * x_r / 2.0 + q) - x_l * (m * x_l / 2.0 + q); + }; + return area_under_linear_piece(x0, x_split, m0, q0) + + area_under_linear_piece(x_split, x1, m1, q1); +} + +// Computes the approximation error in the limiter region for a given interval. +// The error is computed as the difference between the areas beneath the limiter +// curve to approximate and its linear under-approximation. +double LimiterUnderApproximationNegativeError(const LimiterDbGainCurve* limiter, + const double x0, + const double x1) { + const double area_limiter = limiter->GetGainIntegralLinear(x0, x1); + const double area_interpolated_curve = + ComputeAreaUnderPiecewiseLinearApproximation(limiter, x0, x1); + RTC_CHECK_GE(area_limiter, area_interpolated_curve); + return area_limiter - area_interpolated_curve; +} + +// Automatically finds where to sample the beyond-knee region of a limiter using +// a greedy optimization algorithm that iteratively decreases the approximation +// error. +// The solution is sub-optimal because the algorithm is greedy and the points +// are assigned by halving intervals (starting with the whole beyond-knee region +// as a single interval). However, even if sub-optimal, this algorithm works +// well in practice and it is efficiently implemented using priority queues. +std::vector SampleLimiterRegion(const LimiterDbGainCurve* limiter) { + static_assert(kInterpolatedGainCurveBeyondKneePoints > 2, ""); + + struct Interval { + Interval() = default; // Ctor required by std::priority_queue. + Interval(double l, double r, double e) : x0(l), x1(r), error(e) { + RTC_CHECK(x0 < x1); + } + bool operator<(const Interval& other) const { return error < other.error; } + + double x0; + double x1; + double error; + }; + + std::priority_queue> q; + q.emplace(limiter->limiter_start_linear(), limiter->max_input_level_linear(), + LimiterUnderApproximationNegativeError( + limiter, limiter->limiter_start_linear(), + limiter->max_input_level_linear())); + + // Iteratively find points by halving the interval with greatest error. + while (q.size() < kInterpolatedGainCurveBeyondKneePoints) { + // Get the interval with highest error. + const auto interval = q.top(); + q.pop(); + + // Split `interval` and enqueue. + double x_split = (interval.x0 + interval.x1) / 2.0; + q.emplace(interval.x0, x_split, + LimiterUnderApproximationNegativeError(limiter, interval.x0, + x_split)); // Left. + q.emplace(x_split, interval.x1, + LimiterUnderApproximationNegativeError(limiter, x_split, + interval.x1)); // Right. + } + + // Copy x1 values and sort them. + RTC_CHECK_EQ(q.size(), kInterpolatedGainCurveBeyondKneePoints); + std::vector samples(kInterpolatedGainCurveBeyondKneePoints); + for (size_t i = 0; i < kInterpolatedGainCurveBeyondKneePoints; ++i) { + const auto interval = q.top(); + q.pop(); + samples[i] = interval.x1; + } + RTC_CHECK(q.empty()); + std::sort(samples.begin(), samples.end()); + + return samples; +} + +// Compute the parameters to over-approximate the knee region via linear +// interpolation. Over-approximating is saturation-safe since the knee region is +// convex. +void PrecomputeKneeApproxParams(const LimiterDbGainCurve* limiter, + test::InterpolatedParameters* parameters) { + static_assert(kInterpolatedGainCurveKneePoints > 2, ""); + // Get `kInterpolatedGainCurveKneePoints` - 1 equally spaced points. + const std::vector points = test::LinSpace( + limiter->knee_start_linear(), limiter->limiter_start_linear(), + kInterpolatedGainCurveKneePoints - 1); + + // Set the first two points. The second is computed to help with the beginning + // of the knee region, which has high curvature. + parameters->computed_approximation_params_x[0] = points[0]; + parameters->computed_approximation_params_x[1] = + (points[0] + points[1]) / 2.0; + // Copy the remaining points. + std::copy(std::begin(points) + 1, std::end(points), + std::begin(parameters->computed_approximation_params_x) + 2); + + // Compute (m, q) pairs for each linear piece y = mx + q. + for (size_t i = 0; i < kInterpolatedGainCurveKneePoints - 1; ++i) { + const double x0 = parameters->computed_approximation_params_x[i]; + const double x1 = parameters->computed_approximation_params_x[i + 1]; + const double y0 = limiter->GetGainLinear(x0); + const double y1 = limiter->GetGainLinear(x1); + RTC_CHECK_NE(x1, x0); + parameters->computed_approximation_params_m[i] = (y1 - y0) / (x1 - x0); + parameters->computed_approximation_params_q[i] = + y0 - parameters->computed_approximation_params_m[i] * x0; + } +} + +// Compute the parameters to under-approximate the beyond-knee region via linear +// interpolation and greedy sampling. Under-approximating is saturation-safe +// since the beyond-knee region is concave. +void PrecomputeBeyondKneeApproxParams( + const LimiterDbGainCurve* limiter, + test::InterpolatedParameters* parameters) { + // Find points on which the linear pieces are tangent to the gain curve. + const auto samples = SampleLimiterRegion(limiter); + + // Parametrize each linear piece. + double m, q; + std::tie(m, q) = ComputeLinearApproximationParams( + limiter, + parameters + ->computed_approximation_params_x[kInterpolatedGainCurveKneePoints - + 1]); + parameters + ->computed_approximation_params_m[kInterpolatedGainCurveKneePoints - 1] = + m; + parameters + ->computed_approximation_params_q[kInterpolatedGainCurveKneePoints - 1] = + q; + for (size_t i = 0; i < samples.size(); ++i) { + std::tie(m, q) = ComputeLinearApproximationParams(limiter, samples[i]); + parameters + ->computed_approximation_params_m[i + + kInterpolatedGainCurveKneePoints] = m; + parameters + ->computed_approximation_params_q[i + + kInterpolatedGainCurveKneePoints] = q; + } + + // Find the point of intersection between adjacent linear pieces. They will be + // used as boundaries between adjacent linear pieces. + for (size_t i = kInterpolatedGainCurveKneePoints; + i < kInterpolatedGainCurveKneePoints + + kInterpolatedGainCurveBeyondKneePoints; + ++i) { + RTC_CHECK_NE(parameters->computed_approximation_params_m[i], + parameters->computed_approximation_params_m[i - 1]); + parameters->computed_approximation_params_x[i] = + ( // Formula: (q0 - q1) / (m1 - m0). + parameters->computed_approximation_params_q[i - 1] - + parameters->computed_approximation_params_q[i]) / + (parameters->computed_approximation_params_m[i] - + parameters->computed_approximation_params_m[i - 1]); + } +} + +} // namespace + +namespace test { + +InterpolatedParameters ComputeInterpolatedGainCurveApproximationParams() { + InterpolatedParameters parameters; + LimiterDbGainCurve limiter; + parameters.computed_approximation_params_x.fill(0.0f); + parameters.computed_approximation_params_m.fill(0.0f); + parameters.computed_approximation_params_q.fill(0.0f); + PrecomputeKneeApproxParams(&limiter, ¶meters); + PrecomputeBeyondKneeApproxParams(&limiter, ¶meters); + return parameters; +} +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h new file mode 100644 index 0000000000..08b676f5fd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_COMPUTE_INTERPOLATED_GAIN_CURVE_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_COMPUTE_INTERPOLATED_GAIN_CURVE_H_ + +#include + +#include "modules/audio_processing/agc2/agc2_common.h" + +namespace webrtc { + +namespace test { + +// Parameters for interpolated gain curve using under-approximation to +// avoid saturation. +// +// The saturation gain is defined in order to let hard-clipping occur for +// those samples having a level that falls in the saturation region. It is an +// upper bound of the actual gain to apply - i.e., that returned by the +// limiter. + +// Knee and beyond-knee regions approximation parameters. +// The gain curve is approximated as a piece-wise linear function. +// `approx_params_x_` are the boundaries between adjacent linear pieces, +// `approx_params_m_` and `approx_params_q_` are the slope and the y-intercept +// values of each piece. +struct InterpolatedParameters { + std::array + computed_approximation_params_x; + std::array + computed_approximation_params_m; + std::array + computed_approximation_params_q; +}; + +InterpolatedParameters ComputeInterpolatedGainCurveApproximationParams(); +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_COMPUTE_INTERPOLATED_GAIN_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc new file mode 100644 index 0000000000..cced7614bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/cpu_features.h" + +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" + +namespace webrtc { + +std::string AvailableCpuFeatures::ToString() const { + char buf[64]; + rtc::SimpleStringBuilder builder(buf); + bool first = true; + if (sse2) { + builder << (first ? "SSE2" : "_SSE2"); + first = false; + } + if (avx2) { + builder << (first ? "AVX2" : "_AVX2"); + first = false; + } + if (neon) { + builder << (first ? "NEON" : "_NEON"); + first = false; + } + if (first) { + return "none"; + } + return builder.str(); +} + +// Detects available CPU features. +AvailableCpuFeatures GetAvailableCpuFeatures() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + return {/*sse2=*/GetCPUInfo(kSSE2) != 0, + /*avx2=*/GetCPUInfo(kAVX2) != 0, + /*neon=*/false}; +#elif defined(WEBRTC_HAS_NEON) + return {/*sse2=*/false, + /*avx2=*/false, + /*neon=*/true}; +#else + return {/*sse2=*/false, + /*avx2=*/false, + /*neon=*/false}; +#endif +} + +AvailableCpuFeatures NoAvailableCpuFeatures() { + return {/*sse2=*/false, /*avx2=*/false, /*neon=*/false}; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h new file mode 100644 index 0000000000..54ddfb3055 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_CPU_FEATURES_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_CPU_FEATURES_H_ + +#include + +namespace webrtc { + +// Collection of flags indicating which CPU features are available on the +// current platform. True means available. +struct AvailableCpuFeatures { + AvailableCpuFeatures(bool sse2, bool avx2, bool neon) + : sse2(sse2), avx2(avx2), neon(neon) {} + // Intel. + bool sse2; + bool avx2; + // ARM. + bool neon; + std::string ToString() const; +}; + +// Detects what CPU features are available. +AvailableCpuFeatures GetAvailableCpuFeatures(); + +// Returns the CPU feature flags all set to false. +AvailableCpuFeatures NoAvailableCpuFeatures(); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_CPU_FEATURES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build new file mode 100644 index 0000000000..a4572251ad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("cpu_features_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build new file mode 100644 index 0000000000..3a54fc3171 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build @@ -0,0 +1,235 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("fixed_digital_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc new file mode 100644 index 0000000000..1995b24913 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr float kInitialFilterStateLevel = 0.0f; + +// Instant attack. +constexpr float kAttackFilterConstant = 0.0f; + +// Limiter decay constant. +// Computed as `10 ** (-1/20 * subframe_duration / kDecayMs)` where: +// - `subframe_duration` is `kFrameDurationMs / kSubFramesInFrame`; +// - `kDecayMs` is defined in agc2_testing_common.h. +constexpr float kDecayFilterConstant = 0.9971259f; + +} // namespace + +FixedDigitalLevelEstimator::FixedDigitalLevelEstimator( + int sample_rate_hz, + ApmDataDumper* apm_data_dumper) + : apm_data_dumper_(apm_data_dumper), + filter_state_level_(kInitialFilterStateLevel) { + SetSampleRate(sample_rate_hz); + CheckParameterCombination(); + RTC_DCHECK(apm_data_dumper_); + apm_data_dumper_->DumpRaw("agc2_level_estimator_samplerate", sample_rate_hz); +} + +void FixedDigitalLevelEstimator::CheckParameterCombination() { + RTC_DCHECK_GT(samples_in_frame_, 0); + RTC_DCHECK_LE(kSubFramesInFrame, samples_in_frame_); + RTC_DCHECK_EQ(samples_in_frame_ % kSubFramesInFrame, 0); + RTC_DCHECK_GT(samples_in_sub_frame_, 1); +} + +std::array FixedDigitalLevelEstimator::ComputeLevel( + const AudioFrameView& float_frame) { + RTC_DCHECK_GT(float_frame.num_channels(), 0); + RTC_DCHECK_EQ(float_frame.samples_per_channel(), samples_in_frame_); + + // Compute max envelope without smoothing. + std::array envelope{}; + for (int channel_idx = 0; channel_idx < float_frame.num_channels(); + ++channel_idx) { + const auto channel = float_frame.channel(channel_idx); + for (int sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) { + for (int sample_in_sub_frame = 0; + sample_in_sub_frame < samples_in_sub_frame_; ++sample_in_sub_frame) { + envelope[sub_frame] = + std::max(envelope[sub_frame], + std::abs(channel[sub_frame * samples_in_sub_frame_ + + sample_in_sub_frame])); + } + } + } + + // Make sure envelope increases happen one step earlier so that the + // corresponding *gain decrease* doesn't miss a sudden signal + // increase due to interpolation. + for (int sub_frame = 0; sub_frame < kSubFramesInFrame - 1; ++sub_frame) { + if (envelope[sub_frame] < envelope[sub_frame + 1]) { + envelope[sub_frame] = envelope[sub_frame + 1]; + } + } + + // Add attack / decay smoothing. + for (int sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) { + const float envelope_value = envelope[sub_frame]; + if (envelope_value > filter_state_level_) { + envelope[sub_frame] = envelope_value * (1 - kAttackFilterConstant) + + filter_state_level_ * kAttackFilterConstant; + } else { + envelope[sub_frame] = envelope_value * (1 - kDecayFilterConstant) + + filter_state_level_ * kDecayFilterConstant; + } + filter_state_level_ = envelope[sub_frame]; + + // Dump data for debug. + RTC_DCHECK(apm_data_dumper_); + const auto channel = float_frame.channel(0); + apm_data_dumper_->DumpRaw("agc2_level_estimator_samples", + samples_in_sub_frame_, + &channel[sub_frame * samples_in_sub_frame_]); + apm_data_dumper_->DumpRaw("agc2_level_estimator_level", + envelope[sub_frame]); + } + + return envelope; +} + +void FixedDigitalLevelEstimator::SetSampleRate(int sample_rate_hz) { + samples_in_frame_ = + rtc::CheckedDivExact(sample_rate_hz * kFrameDurationMs, 1000); + samples_in_sub_frame_ = + rtc::CheckedDivExact(samples_in_frame_, kSubFramesInFrame); + CheckParameterCombination(); +} + +void FixedDigitalLevelEstimator::Reset() { + filter_state_level_ = kInitialFilterStateLevel; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h new file mode 100644 index 0000000000..d26b55950c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_ + +#include +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +class ApmDataDumper; +// Produces a smooth signal level estimate from an input audio +// stream. The estimate smoothing is done through exponential +// filtering. +class FixedDigitalLevelEstimator { + public: + // Sample rates are allowed if the number of samples in a frame + // (sample_rate_hz * kFrameDurationMs / 1000) is divisible by + // kSubFramesInSample. For kFrameDurationMs=10 and + // kSubFramesInSample=20, this means that sample_rate_hz has to be + // divisible by 2000. + FixedDigitalLevelEstimator(int sample_rate_hz, + ApmDataDumper* apm_data_dumper); + + FixedDigitalLevelEstimator(const FixedDigitalLevelEstimator&) = delete; + FixedDigitalLevelEstimator& operator=(const FixedDigitalLevelEstimator&) = + delete; + + // The input is assumed to be in FloatS16 format. Scaled input will + // produce similarly scaled output. A frame of with kFrameDurationMs + // ms of audio produces a level estimates in the same scale. The + // level estimate contains kSubFramesInFrame values. + std::array ComputeLevel( + const AudioFrameView& float_frame); + + // Rate may be changed at any time (but not concurrently) from the + // value passed to the constructor. The class is not thread safe. + void SetSampleRate(int sample_rate_hz); + + // Resets the level estimator internal state. + void Reset(); + + float LastAudioLevel() const { return filter_state_level_; } + + private: + void CheckParameterCombination(); + + ApmDataDumper* const apm_data_dumper_ = nullptr; + float filter_state_level_; + int samples_in_frame_; + int samples_in_sub_frame_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc new file mode 100644 index 0000000000..97b421d04c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr float kInputLevel = 10000.f; + +// Run audio at specified settings through the level estimator, and +// verify that the output level falls within the bounds. +void TestLevelEstimator(int sample_rate_hz, + int num_channels, + float input_level_linear_scale, + float expected_min, + float expected_max) { + ApmDataDumper apm_data_dumper(0); + FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper); + + const VectorFloatFrame vectors_with_float_frame( + num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), + input_level_linear_scale); + + for (int i = 0; i < 500; ++i) { + const auto level = level_estimator.ComputeLevel( + vectors_with_float_frame.float_frame_view()); + + // Give the estimator some time to ramp up. + if (i < 50) { + continue; + } + + for (const auto& x : level) { + EXPECT_LE(expected_min, x); + EXPECT_LE(x, expected_max); + } + } +} + +// Returns time it takes for the level estimator to decrease its level +// estimate by 'level_reduction_db'. +float TimeMsToDecreaseLevel(int sample_rate_hz, + int num_channels, + float input_level_db, + float level_reduction_db) { + const float input_level = DbfsToFloatS16(input_level_db); + RTC_DCHECK_GT(level_reduction_db, 0); + + const VectorFloatFrame vectors_with_float_frame( + num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), input_level); + + ApmDataDumper apm_data_dumper(0); + FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper); + + // Give the LevelEstimator plenty of time to ramp up and stabilize + float last_level = 0.f; + for (int i = 0; i < 500; ++i) { + const auto level_envelope = level_estimator.ComputeLevel( + vectors_with_float_frame.float_frame_view()); + last_level = *level_envelope.rbegin(); + } + + // Set input to 0. + VectorFloatFrame vectors_with_zero_float_frame( + num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), 0); + + const float reduced_level_linear = + DbfsToFloatS16(input_level_db - level_reduction_db); + int sub_frames_until_level_reduction = 0; + while (last_level > reduced_level_linear) { + const auto level_envelope = level_estimator.ComputeLevel( + vectors_with_zero_float_frame.float_frame_view()); + for (const auto& v : level_envelope) { + EXPECT_LT(v, last_level); + sub_frames_until_level_reduction++; + last_level = v; + if (last_level <= reduced_level_linear) { + break; + } + } + } + return static_cast(sub_frames_until_level_reduction) * + kFrameDurationMs / kSubFramesInFrame; +} +} // namespace + +TEST(GainController2FixedDigitalLevelEstimator, EstimatorShouldNotCrash) { + TestLevelEstimator(8000, 1, 0, std::numeric_limits::lowest(), + std::numeric_limits::max()); +} + +TEST(GainController2FixedDigitalLevelEstimator, + EstimatorShouldEstimateConstantLevel) { + TestLevelEstimator(10000, 1, kInputLevel, kInputLevel * 0.99, + kInputLevel * 1.01); +} + +TEST(GainController2FixedDigitalLevelEstimator, + EstimatorShouldEstimateConstantLevelForManyChannels) { + constexpr size_t num_channels = 10; + TestLevelEstimator(20000, num_channels, kInputLevel, kInputLevel * 0.99, + kInputLevel * 1.01); +} + +TEST(GainController2FixedDigitalLevelEstimator, TimeToDecreaseForLowLevel) { + constexpr float kLevelReductionDb = 25; + constexpr float kInitialLowLevel = -40; + constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs; + + const float time_to_decrease = + TimeMsToDecreaseLevel(22000, 1, kInitialLowLevel, kLevelReductionDb); + + EXPECT_LE(kExpectedTime * 0.9, time_to_decrease); + EXPECT_LE(time_to_decrease, kExpectedTime * 1.1); +} + +TEST(GainController2FixedDigitalLevelEstimator, + TimeToDecreaseForFullScaleLevel) { + constexpr float kLevelReductionDb = 25; + constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs; + + const float time_to_decrease = + TimeMsToDecreaseLevel(26000, 1, 0, kLevelReductionDb); + + EXPECT_LE(kExpectedTime * 0.9, time_to_decrease); + EXPECT_LE(time_to_decrease, kExpectedTime * 1.1); +} + +TEST(GainController2FixedDigitalLevelEstimator, + TimeToDecreaseForMultipleChannels) { + constexpr float kLevelReductionDb = 25; + constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs; + constexpr size_t kNumChannels = 10; + + const float time_to_decrease = + TimeMsToDecreaseLevel(28000, kNumChannels, 0, kLevelReductionDb); + + EXPECT_LE(kExpectedTime * 0.9, time_to_decrease); + EXPECT_LE(time_to_decrease, kExpectedTime * 1.1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc new file mode 100644 index 0000000000..f9e276d3a8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/gain_applier.h" + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +// Returns true when the gain factor is so close to 1 that it would +// not affect int16 samples. +bool GainCloseToOne(float gain_factor) { + return 1.f - 1.f / kMaxFloatS16Value <= gain_factor && + gain_factor <= 1.f + 1.f / kMaxFloatS16Value; +} + +void ClipSignal(AudioFrameView signal) { + for (int k = 0; k < signal.num_channels(); ++k) { + rtc::ArrayView channel_view = signal.channel(k); + for (auto& sample : channel_view) { + sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value); + } + } +} + +void ApplyGainWithRamping(float last_gain_linear, + float gain_at_end_of_frame_linear, + float inverse_samples_per_channel, + AudioFrameView float_frame) { + // Do not modify the signal. + if (last_gain_linear == gain_at_end_of_frame_linear && + GainCloseToOne(gain_at_end_of_frame_linear)) { + return; + } + + // Gain is constant and different from 1. + if (last_gain_linear == gain_at_end_of_frame_linear) { + for (int k = 0; k < float_frame.num_channels(); ++k) { + rtc::ArrayView channel_view = float_frame.channel(k); + for (auto& sample : channel_view) { + sample *= gain_at_end_of_frame_linear; + } + } + return; + } + + // The gain changes. We have to change slowly to avoid discontinuities. + const float increment = (gain_at_end_of_frame_linear - last_gain_linear) * + inverse_samples_per_channel; + float gain = last_gain_linear; + for (int i = 0; i < float_frame.samples_per_channel(); ++i) { + for (int ch = 0; ch < float_frame.num_channels(); ++ch) { + float_frame.channel(ch)[i] *= gain; + } + gain += increment; + } +} + +} // namespace + +GainApplier::GainApplier(bool hard_clip_samples, float initial_gain_factor) + : hard_clip_samples_(hard_clip_samples), + last_gain_factor_(initial_gain_factor), + current_gain_factor_(initial_gain_factor) {} + +void GainApplier::ApplyGain(AudioFrameView signal) { + if (static_cast(signal.samples_per_channel()) != samples_per_channel_) { + Initialize(signal.samples_per_channel()); + } + + ApplyGainWithRamping(last_gain_factor_, current_gain_factor_, + inverse_samples_per_channel_, signal); + + last_gain_factor_ = current_gain_factor_; + + if (hard_clip_samples_) { + ClipSignal(signal); + } +} + +// TODO(bugs.webrtc.org/7494): Remove once switched to gains in dB. +void GainApplier::SetGainFactor(float gain_factor) { + RTC_DCHECK_GT(gain_factor, 0.f); + current_gain_factor_ = gain_factor; +} + +void GainApplier::Initialize(int samples_per_channel) { + RTC_DCHECK_GT(samples_per_channel, 0); + samples_per_channel_ = static_cast(samples_per_channel); + inverse_samples_per_channel_ = 1.f / samples_per_channel_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h new file mode 100644 index 0000000000..ba8a4a4cd2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_GAIN_APPLIER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_GAIN_APPLIER_H_ + +#include + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class GainApplier { + public: + GainApplier(bool hard_clip_samples, float initial_gain_factor); + + void ApplyGain(AudioFrameView signal); + void SetGainFactor(float gain_factor); + float GetGainFactor() const { return current_gain_factor_; } + + private: + void Initialize(int samples_per_channel); + + // Whether to clip samples after gain is applied. If 'true', result + // will fit in FloatS16 range. + const bool hard_clip_samples_; + float last_gain_factor_; + + // If this value is not equal to 'last_gain_factor', gain will be + // ramped from 'last_gain_factor_' to this value during the next + // 'ApplyGain'. + float current_gain_factor_; + int samples_per_channel_ = -1; + float inverse_samples_per_channel_ = -1.f; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_GAIN_APPLIER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build new file mode 100644 index 0000000000..394aa109fa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("gain_applier_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc new file mode 100644 index 0000000000..3296345e62 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/gain_applier.h" + +#include + +#include +#include + +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +TEST(AutomaticGainController2GainApplier, InitialGainIsRespected) { + constexpr float initial_signal_level = 123.f; + constexpr float gain_factor = 10.f; + VectorFloatFrame fake_audio(1, 1, initial_signal_level); + GainApplier gain_applier(true, gain_factor); + + gain_applier.ApplyGain(fake_audio.float_frame_view()); + EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], + initial_signal_level * gain_factor, 0.1f); +} + +TEST(AutomaticGainController2GainApplier, ClippingIsDone) { + constexpr float initial_signal_level = 30000.f; + constexpr float gain_factor = 10.f; + VectorFloatFrame fake_audio(1, 1, initial_signal_level); + GainApplier gain_applier(true, gain_factor); + + gain_applier.ApplyGain(fake_audio.float_frame_view()); + EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], + std::numeric_limits::max(), 0.1f); +} + +TEST(AutomaticGainController2GainApplier, ClippingIsNotDone) { + constexpr float initial_signal_level = 30000.f; + constexpr float gain_factor = 10.f; + VectorFloatFrame fake_audio(1, 1, initial_signal_level); + GainApplier gain_applier(false, gain_factor); + + gain_applier.ApplyGain(fake_audio.float_frame_view()); + + EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], + initial_signal_level * gain_factor, 0.1f); +} + +TEST(AutomaticGainController2GainApplier, RampingIsDone) { + constexpr float initial_signal_level = 30000.f; + constexpr float initial_gain_factor = 1.f; + constexpr float target_gain_factor = 0.5f; + constexpr int num_channels = 3; + constexpr int samples_per_channel = 4; + VectorFloatFrame fake_audio(num_channels, samples_per_channel, + initial_signal_level); + GainApplier gain_applier(false, initial_gain_factor); + + gain_applier.SetGainFactor(target_gain_factor); + gain_applier.ApplyGain(fake_audio.float_frame_view()); + + // The maximal gain change should be close to that in linear interpolation. + for (size_t channel = 0; channel < num_channels; ++channel) { + float max_signal_change = 0.f; + float last_signal_level = initial_signal_level; + for (const auto sample : fake_audio.float_frame_view().channel(channel)) { + const float current_change = fabs(last_signal_level - sample); + max_signal_change = std::max(max_signal_change, current_change); + last_signal_level = sample; + } + const float total_gain_change = + fabs((initial_gain_factor - target_gain_factor) * initial_signal_level); + EXPECT_NEAR(max_signal_change, total_gain_change / samples_per_channel, + 0.1f); + } + + // Next frame should have the desired level. + VectorFloatFrame next_fake_audio_frame(num_channels, samples_per_channel, + initial_signal_level); + gain_applier.ApplyGain(next_fake_audio_frame.float_frame_view()); + + // The last sample should have the new gain. + EXPECT_NEAR(next_fake_audio_frame.float_frame_view().channel(0)[0], + initial_signal_level * target_gain_factor, 0.1f); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_gn/moz.build new file mode 100644 index 0000000000..5b1d68415a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("gain_map_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_internal.h b/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_internal.h new file mode 100644 index 0000000000..7c669fc9dd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_internal.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_GAIN_MAP_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_GAIN_MAP_INTERNAL_H_ + +namespace webrtc { + +static constexpr int kGainMapSize = 256; +// Maps input volumes, which are values in the [0, 255] range, to gains in dB. +// The values below are generated with numpy as follows: +// SI = 2 # Initial slope. +// SF = 0.25 # Final slope. +// D = 8/256 # Quantization factor. +// x = np.linspace(0, 255, 256) # Input volumes. +// y = (SF * x + (SI - SF) * (1 - np.exp(-D*x)) / D - 56).round() +static const int kGainMap[kGainMapSize] = { + -56, -54, -52, -50, -48, -47, -45, -43, -42, -40, -38, -37, -35, -34, -33, + -31, -30, -29, -27, -26, -25, -24, -23, -22, -20, -19, -18, -17, -16, -15, + -14, -14, -13, -12, -11, -10, -9, -8, -8, -7, -6, -5, -5, -4, -3, + -2, -2, -1, 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + 13, 14, 14, 15, 15, 15, 16, 16, 17, 17, 17, 18, 18, 18, 19, + 19, 19, 20, 20, 21, 21, 21, 22, 22, 22, 23, 23, 23, 24, 24, + 24, 24, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 28, 28, 28, + 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 32, 32, 32, 32, 33, + 33, 33, 33, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, + 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, 40, 40, 40, 40, 41, + 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 44, 44, 44, 44, 45, + 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, 48, 48, 48, 48, + 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, 52, 52, 52, + 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, 56, 56, + 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60, + 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, + 64}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_GAIN_MAP_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc new file mode 100644 index 0000000000..bcc650fb3e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc @@ -0,0 +1,580 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/input_volume_controller.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// Amount of error we tolerate in the microphone input volume (presumably due to +// OS quantization) before we assume the user has manually adjusted the volume. +constexpr int kVolumeQuantizationSlack = 25; + +constexpr int kMaxInputVolume = 255; +static_assert(kGainMapSize > kMaxInputVolume, "gain map too small"); + +// Maximum absolute RMS error. +constexpr int KMaxAbsRmsErrorDbfs = 15; +static_assert(KMaxAbsRmsErrorDbfs > 0, ""); + +using Agc1ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; + +// TODO(webrtc:7494): Hardcode clipping predictor parameters and remove this +// function after no longer needed in the ctor. +Agc1ClippingPredictorConfig CreateClippingPredictorConfig(bool enabled) { + Agc1ClippingPredictorConfig config; + config.enabled = enabled; + + return config; +} + +// Returns an input volume in the [`min_input_volume`, `kMaxInputVolume`] range +// that reduces `gain_error_db`, which is a gain error estimated when +// `input_volume` was applied, according to a fixed gain map. +int ComputeVolumeUpdate(int gain_error_db, + int input_volume, + int min_input_volume) { + RTC_DCHECK_GE(input_volume, 0); + RTC_DCHECK_LE(input_volume, kMaxInputVolume); + if (gain_error_db == 0) { + return input_volume; + } + + int new_volume = input_volume; + if (gain_error_db > 0) { + while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db && + new_volume < kMaxInputVolume) { + ++new_volume; + } + } else { + while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db && + new_volume > min_input_volume) { + --new_volume; + } + } + return new_volume; +} + +// Returns the proportion of samples in the buffer which are at full-scale +// (and presumably clipped). +float ComputeClippedRatio(const float* const* audio, + size_t num_channels, + size_t samples_per_channel) { + RTC_DCHECK_GT(samples_per_channel, 0); + int num_clipped = 0; + for (size_t ch = 0; ch < num_channels; ++ch) { + int num_clipped_in_ch = 0; + for (size_t i = 0; i < samples_per_channel; ++i) { + RTC_DCHECK(audio[ch]); + if (audio[ch][i] >= 32767.0f || audio[ch][i] <= -32768.0f) { + ++num_clipped_in_ch; + } + } + num_clipped = std::max(num_clipped, num_clipped_in_ch); + } + return static_cast(num_clipped) / (samples_per_channel); +} + +void LogClippingMetrics(int clipping_rate) { + RTC_LOG(LS_INFO) << "[AGC2] Input clipping rate: " << clipping_rate << "%"; + RTC_HISTOGRAM_COUNTS_LINEAR(/*name=*/"WebRTC.Audio.Agc.InputClippingRate", + /*sample=*/clipping_rate, /*min=*/0, /*max=*/100, + /*bucket_count=*/50); +} + +// Compares `speech_level_dbfs` to the [`target_range_min_dbfs`, +// `target_range_max_dbfs`] range and returns the error to be compensated via +// input volume adjustment. Returns a positive value when the level is below +// the range, a negative value when the level is above the range, zero +// otherwise. +int GetSpeechLevelRmsErrorDb(float speech_level_dbfs, + int target_range_min_dbfs, + int target_range_max_dbfs) { + constexpr float kMinSpeechLevelDbfs = -90.0f; + constexpr float kMaxSpeechLevelDbfs = 30.0f; + RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs); + RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs); + speech_level_dbfs = rtc::SafeClamp( + speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs); + + int rms_error_db = 0; + if (speech_level_dbfs > target_range_max_dbfs) { + rms_error_db = std::round(target_range_max_dbfs - speech_level_dbfs); + } else if (speech_level_dbfs < target_range_min_dbfs) { + rms_error_db = std::round(target_range_min_dbfs - speech_level_dbfs); + } + + return rms_error_db; +} + +} // namespace + +MonoInputVolumeController::MonoInputVolumeController( + int min_input_volume_after_clipping, + int min_input_volume, + int update_input_volume_wait_frames, + float speech_probability_threshold, + float speech_ratio_threshold) + : min_input_volume_(min_input_volume), + min_input_volume_after_clipping_(min_input_volume_after_clipping), + max_input_volume_(kMaxInputVolume), + update_input_volume_wait_frames_( + std::max(update_input_volume_wait_frames, 1)), + speech_probability_threshold_(speech_probability_threshold), + speech_ratio_threshold_(speech_ratio_threshold) { + RTC_DCHECK_GE(min_input_volume_, 0); + RTC_DCHECK_LE(min_input_volume_, 255); + RTC_DCHECK_GE(min_input_volume_after_clipping_, 0); + RTC_DCHECK_LE(min_input_volume_after_clipping_, 255); + RTC_DCHECK_GE(max_input_volume_, 0); + RTC_DCHECK_LE(max_input_volume_, 255); + RTC_DCHECK_GE(update_input_volume_wait_frames_, 0); + RTC_DCHECK_GE(speech_probability_threshold_, 0.0f); + RTC_DCHECK_LE(speech_probability_threshold_, 1.0f); + RTC_DCHECK_GE(speech_ratio_threshold_, 0.0f); + RTC_DCHECK_LE(speech_ratio_threshold_, 1.0f); +} + +MonoInputVolumeController::~MonoInputVolumeController() = default; + +void MonoInputVolumeController::Initialize() { + max_input_volume_ = kMaxInputVolume; + capture_output_used_ = true; + check_volume_on_next_process_ = true; + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + is_first_frame_ = true; +} + +// A speeh segment is considered active if at least +// `update_input_volume_wait_frames_` new frames have been processed since the +// previous update and the ratio of non-silence frames (i.e., frames with a +// `speech_probability` higher than `speech_probability_threshold_`) is at least +// `speech_ratio_threshold_`. +void MonoInputVolumeController::Process(absl::optional rms_error_db, + float speech_probability) { + if (check_volume_on_next_process_) { + check_volume_on_next_process_ = false; + // We have to wait until the first process call to check the volume, + // because Chromium doesn't guarantee it to be valid any earlier. + CheckVolumeAndReset(); + } + + // Count frames with a high speech probability as speech. + if (speech_probability >= speech_probability_threshold_) { + ++speech_frames_since_update_input_volume_; + } + + // Reset the counters and maybe update the input volume. + if (++frames_since_update_input_volume_ >= update_input_volume_wait_frames_) { + const float speech_ratio = + static_cast(speech_frames_since_update_input_volume_) / + static_cast(update_input_volume_wait_frames_); + + // Always reset the counters regardless of whether the volume changes or + // not. + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + + // Update the input volume if allowed. + if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_ && + rms_error_db.has_value()) { + UpdateInputVolume(*rms_error_db); + } + } + + is_first_frame_ = false; +} + +void MonoInputVolumeController::HandleClipping(int clipped_level_step) { + RTC_DCHECK_GT(clipped_level_step, 0); + // Always decrease the maximum input volume, even if the current input volume + // is below threshold. + SetMaxLevel(std::max(min_input_volume_after_clipping_, + max_input_volume_ - clipped_level_step)); + if (log_to_histograms_) { + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed", + last_recommended_input_volume_ - clipped_level_step >= + min_input_volume_after_clipping_); + } + if (last_recommended_input_volume_ > min_input_volume_after_clipping_) { + // Don't try to adjust the input volume if we're already below the limit. As + // a consequence, if the user has brought the input volume above the limit, + // we will still not react until the postproc updates the input volume. + SetInputVolume( + std::max(min_input_volume_after_clipping_, + last_recommended_input_volume_ - clipped_level_step)); + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + is_first_frame_ = false; + } +} + +void MonoInputVolumeController::SetInputVolume(int new_volume) { + int applied_input_volume = recommended_input_volume_; + if (applied_input_volume == 0) { + RTC_DLOG(LS_INFO) + << "[AGC2] The applied input volume is zero, taking no action."; + return; + } + if (applied_input_volume < 0 || applied_input_volume > kMaxInputVolume) { + RTC_LOG(LS_ERROR) << "[AGC2] Invalid value for the applied input volume: " + << applied_input_volume; + return; + } + + // Detect manual input volume adjustments by checking if the + // `applied_input_volume` is outside of the `[last_recommended_input_volume_ - + // kVolumeQuantizationSlack, last_recommended_input_volume_ + + // kVolumeQuantizationSlack]` range. + if (applied_input_volume > + last_recommended_input_volume_ + kVolumeQuantizationSlack || + applied_input_volume < + last_recommended_input_volume_ - kVolumeQuantizationSlack) { + RTC_DLOG(LS_INFO) + << "[AGC2] The input volume was manually adjusted. Updating " + "stored input volume from " + << last_recommended_input_volume_ << " to " << applied_input_volume; + last_recommended_input_volume_ = applied_input_volume; + // Always allow the user to increase the volume. + if (last_recommended_input_volume_ > max_input_volume_) { + SetMaxLevel(last_recommended_input_volume_); + } + // Take no action in this case, since we can't be sure when the volume + // was manually adjusted. + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + is_first_frame_ = false; + return; + } + + new_volume = std::min(new_volume, max_input_volume_); + if (new_volume == last_recommended_input_volume_) { + return; + } + + recommended_input_volume_ = new_volume; + RTC_DLOG(LS_INFO) << "[AGC2] Applied input volume: " << applied_input_volume + << " | last recommended input volume: " + << last_recommended_input_volume_ + << " | newly recommended input volume: " << new_volume; + last_recommended_input_volume_ = new_volume; +} + +void MonoInputVolumeController::SetMaxLevel(int input_volume) { + RTC_DCHECK_GE(input_volume, min_input_volume_after_clipping_); + max_input_volume_ = input_volume; + RTC_DLOG(LS_INFO) << "[AGC2] Maximum input volume updated: " + << max_input_volume_; +} + +void MonoInputVolumeController::HandleCaptureOutputUsedChange( + bool capture_output_used) { + if (capture_output_used_ == capture_output_used) { + return; + } + capture_output_used_ = capture_output_used; + + if (capture_output_used) { + // When we start using the output, we should reset things to be safe. + check_volume_on_next_process_ = true; + } +} + +int MonoInputVolumeController::CheckVolumeAndReset() { + int input_volume = recommended_input_volume_; + // Reasons for taking action at startup: + // 1) A person starting a call is expected to be heard. + // 2) Independent of interpretation of `input_volume` == 0 we should raise it + // so the AGC can do its job properly. + if (input_volume == 0 && !startup_) { + RTC_DLOG(LS_INFO) + << "[AGC2] The applied input volume is zero, taking no action."; + return 0; + } + if (input_volume < 0 || input_volume > kMaxInputVolume) { + RTC_LOG(LS_ERROR) << "[AGC2] Invalid value for the applied input volume: " + << input_volume; + return -1; + } + RTC_DLOG(LS_INFO) << "[AGC2] Initial input volume: " << input_volume; + + if (input_volume < min_input_volume_) { + input_volume = min_input_volume_; + RTC_DLOG(LS_INFO) + << "[AGC2] The initial input volume is too low, raising to " + << input_volume; + recommended_input_volume_ = input_volume; + } + + last_recommended_input_volume_ = input_volume; + startup_ = false; + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + is_first_frame_ = true; + + return 0; +} + +void MonoInputVolumeController::UpdateInputVolume(int rms_error_db) { + RTC_DLOG(LS_INFO) << "[AGC2] RMS error: " << rms_error_db << " dB"; + // Prevent too large microphone input volume changes by clamping the RMS + // error. + rms_error_db = + rtc::SafeClamp(rms_error_db, -KMaxAbsRmsErrorDbfs, KMaxAbsRmsErrorDbfs); + if (rms_error_db == 0) { + return; + } + SetInputVolume(ComputeVolumeUpdate( + rms_error_db, last_recommended_input_volume_, min_input_volume_)); +} + +InputVolumeController::InputVolumeController(int num_capture_channels, + const Config& config) + : num_capture_channels_(num_capture_channels), + min_input_volume_(config.min_input_volume), + capture_output_used_(true), + clipped_level_step_(config.clipped_level_step), + clipped_ratio_threshold_(config.clipped_ratio_threshold), + clipped_wait_frames_(config.clipped_wait_frames), + clipping_predictor_(CreateClippingPredictor( + num_capture_channels, + CreateClippingPredictorConfig(config.enable_clipping_predictor))), + use_clipping_predictor_step_( + !!clipping_predictor_ && + CreateClippingPredictorConfig(config.enable_clipping_predictor) + .use_predicted_step), + frames_since_clipped_(config.clipped_wait_frames), + clipping_rate_log_counter_(0), + clipping_rate_log_(0.0f), + target_range_max_dbfs_(config.target_range_max_dbfs), + target_range_min_dbfs_(config.target_range_min_dbfs), + channel_controllers_(num_capture_channels) { + RTC_LOG(LS_INFO) + << "[AGC2] Input volume controller enabled. Minimum input volume: " + << min_input_volume_; + + for (auto& controller : channel_controllers_) { + controller = std::make_unique( + config.clipped_level_min, min_input_volume_, + config.update_input_volume_wait_frames, + config.speech_probability_threshold, config.speech_ratio_threshold); + } + + RTC_DCHECK(!channel_controllers_.empty()); + RTC_DCHECK_GT(clipped_level_step_, 0); + RTC_DCHECK_LE(clipped_level_step_, 255); + RTC_DCHECK_GT(clipped_ratio_threshold_, 0.0f); + RTC_DCHECK_LT(clipped_ratio_threshold_, 1.0f); + RTC_DCHECK_GT(clipped_wait_frames_, 0); + channel_controllers_[0]->ActivateLogging(); +} + +InputVolumeController::~InputVolumeController() {} + +void InputVolumeController::Initialize() { + for (auto& controller : channel_controllers_) { + controller->Initialize(); + } + capture_output_used_ = true; + + AggregateChannelLevels(); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; + + applied_input_volume_ = absl::nullopt; +} + +void InputVolumeController::AnalyzeInputAudio(int applied_input_volume, + const AudioBuffer& audio_buffer) { + RTC_DCHECK_GE(applied_input_volume, 0); + RTC_DCHECK_LE(applied_input_volume, 255); + + SetAppliedInputVolume(applied_input_volume); + + RTC_DCHECK_EQ(audio_buffer.num_channels(), channel_controllers_.size()); + const float* const* audio = audio_buffer.channels_const(); + size_t samples_per_channel = audio_buffer.num_frames(); + RTC_DCHECK(audio); + + AggregateChannelLevels(); + if (!capture_output_used_) { + return; + } + + if (!!clipping_predictor_) { + AudioFrameView frame = AudioFrameView( + audio, num_capture_channels_, static_cast(samples_per_channel)); + clipping_predictor_->Analyze(frame); + } + + // Check for clipped samples. We do this in the preprocessing phase in order + // to catch clipped echo as well. + // + // If we find a sufficiently clipped frame, drop the current microphone + // input volume and enforce a new maximum input volume, dropped the same + // amount from the current maximum. This harsh treatment is an effort to avoid + // repeated clipped echo events. + float clipped_ratio = + ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel); + clipping_rate_log_ = std::max(clipped_ratio, clipping_rate_log_); + clipping_rate_log_counter_++; + constexpr int kNumFramesIn30Seconds = 3000; + if (clipping_rate_log_counter_ == kNumFramesIn30Seconds) { + LogClippingMetrics(std::round(100.0f * clipping_rate_log_)); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; + } + + if (frames_since_clipped_ < clipped_wait_frames_) { + ++frames_since_clipped_; + return; + } + + const bool clipping_detected = clipped_ratio > clipped_ratio_threshold_; + bool clipping_predicted = false; + int predicted_step = 0; + if (!!clipping_predictor_) { + for (int channel = 0; channel < num_capture_channels_; ++channel) { + const auto step = clipping_predictor_->EstimateClippedLevelStep( + channel, recommended_input_volume_, clipped_level_step_, + channel_controllers_[channel]->min_input_volume_after_clipping(), + kMaxInputVolume); + if (step.has_value()) { + predicted_step = std::max(predicted_step, step.value()); + clipping_predicted = true; + } + } + } + + if (clipping_detected) { + RTC_DLOG(LS_INFO) << "[AGC2] Clipping detected (ratio: " << clipped_ratio + << ")"; + } + + int step = clipped_level_step_; + if (clipping_predicted) { + predicted_step = std::max(predicted_step, clipped_level_step_); + RTC_DLOG(LS_INFO) << "[AGC2] Clipping predicted (volume down step: " + << predicted_step << ")"; + if (use_clipping_predictor_step_) { + step = predicted_step; + } + } + + if (clipping_detected || + (clipping_predicted && use_clipping_predictor_step_)) { + for (auto& state_ch : channel_controllers_) { + state_ch->HandleClipping(step); + } + frames_since_clipped_ = 0; + if (!!clipping_predictor_) { + clipping_predictor_->Reset(); + } + } + + AggregateChannelLevels(); +} + +absl::optional InputVolumeController::RecommendInputVolume( + float speech_probability, + absl::optional speech_level_dbfs) { + // Only process if applied input volume is set. + if (!applied_input_volume_.has_value()) { + RTC_LOG(LS_ERROR) << "[AGC2] Applied input volume not set."; + return absl::nullopt; + } + + AggregateChannelLevels(); + const int volume_after_clipping_handling = recommended_input_volume_; + + if (!capture_output_used_) { + return applied_input_volume_; + } + + absl::optional rms_error_db; + if (speech_level_dbfs.has_value()) { + // Compute the error for all frames (both speech and non-speech frames). + rms_error_db = GetSpeechLevelRmsErrorDb( + *speech_level_dbfs, target_range_min_dbfs_, target_range_max_dbfs_); + } + + for (auto& controller : channel_controllers_) { + controller->Process(rms_error_db, speech_probability); + } + + AggregateChannelLevels(); + if (volume_after_clipping_handling != recommended_input_volume_) { + // The recommended input volume was adjusted in order to match the target + // level. + UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget( + recommended_input_volume_); + } + + applied_input_volume_ = absl::nullopt; + return recommended_input_volume(); +} + +void InputVolumeController::HandleCaptureOutputUsedChange( + bool capture_output_used) { + for (auto& controller : channel_controllers_) { + controller->HandleCaptureOutputUsedChange(capture_output_used); + } + + capture_output_used_ = capture_output_used; +} + +void InputVolumeController::SetAppliedInputVolume(int input_volume) { + applied_input_volume_ = input_volume; + + for (auto& controller : channel_controllers_) { + controller->set_stream_analog_level(input_volume); + } + + AggregateChannelLevels(); +} + +void InputVolumeController::AggregateChannelLevels() { + int new_recommended_input_volume = + channel_controllers_[0]->recommended_analog_level(); + channel_controlling_gain_ = 0; + for (size_t ch = 1; ch < channel_controllers_.size(); ++ch) { + int input_volume = channel_controllers_[ch]->recommended_analog_level(); + if (input_volume < new_recommended_input_volume) { + new_recommended_input_volume = input_volume; + channel_controlling_gain_ = static_cast(ch); + } + } + + // Enforce the minimum input volume when a recommendation is made. + if (applied_input_volume_.has_value() && *applied_input_volume_ > 0) { + new_recommended_input_volume = + std::max(new_recommended_input_volume, min_input_volume_); + } + + recommended_input_volume_ = new_recommended_input_volume; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.h b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.h new file mode 100644 index 0000000000..21405542dc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/clipping_predictor.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { + +class MonoInputVolumeController; + +// The input volume controller recommends what volume to use, handles volume +// changes and clipping detection and prediction. In particular, it handles +// changes triggered by the user (e.g., volume set to zero by a HW mute button). +// This class is not thread-safe. +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class InputVolumeController final { + public: + // Config for the constructor. + struct Config { + // Minimum input volume that can be recommended. Not enforced when the + // applied input volume is zero outside startup. + int min_input_volume = 20; + // Lowest input volume level that will be applied in response to clipping. + int clipped_level_min = 70; + // Amount input volume level is lowered with every clipping event. Limited + // to (0, 255]. + int clipped_level_step = 15; + // Proportion of clipped samples required to declare a clipping event. + // Limited to (0.0f, 1.0f). + float clipped_ratio_threshold = 0.1f; + // Time in frames to wait after a clipping event before checking again. + // Limited to values higher than 0. + int clipped_wait_frames = 300; + // Enables clipping prediction functionality. + bool enable_clipping_predictor = false; + // Speech level target range (dBFS). If the speech level is in the range + // [`target_range_min_dbfs`, `target_range_max_dbfs`], no input volume + // adjustments are done based on the speech level. For speech levels below + // and above the range, the targets `target_range_min_dbfs` and + // `target_range_max_dbfs` are used, respectively. + int target_range_max_dbfs = -30; + int target_range_min_dbfs = -50; + // Number of wait frames between the recommended input volume updates. + int update_input_volume_wait_frames = 100; + // Speech probability threshold: speech probabilities below the threshold + // are considered silence. Limited to [0.0f, 1.0f]. + float speech_probability_threshold = 0.7f; + // Minimum speech frame ratio for volume updates to be allowed. Limited to + // [0.0f, 1.0f]. + float speech_ratio_threshold = 0.6f; + }; + + // Ctor. `num_capture_channels` specifies the number of channels for the audio + // passed to `AnalyzePreProcess()` and `Process()`. Clamps + // `config.startup_min_level` in the [12, 255] range. + InputVolumeController(int num_capture_channels, const Config& config); + + ~InputVolumeController(); + InputVolumeController(const InputVolumeController&) = delete; + InputVolumeController& operator=(const InputVolumeController&) = delete; + + // TODO(webrtc:7494): Integrate initialization into ctor and remove. + void Initialize(); + + // Analyzes `audio_buffer` before `RecommendInputVolume()` is called so tha + // the analysis can be performed before digital processing operations take + // place (e.g., echo cancellation). The analysis consists of input clipping + // detection and prediction (if enabled). + void AnalyzeInputAudio(int applied_input_volume, + const AudioBuffer& audio_buffer); + + // Adjusts the recommended input volume upwards/downwards based on the result + // of `AnalyzeInputAudio()` and on `speech_level_dbfs` (if specified). Must + // be called after `AnalyzeInputAudio()`. The value of `speech_probability` + // is expected to be in the range [0, 1] and `speech_level_dbfs` in the range + // [-90, 30] and both should be estimated after echo cancellation and noise + // suppression are applied. Returns a non-empty input volume recommendation if + // available. If `capture_output_used_` is true, returns the applied input + // volume. + absl::optional RecommendInputVolume( + float speech_probability, + absl::optional speech_level_dbfs); + + // Stores whether the capture output will be used or not. Call when the + // capture stream output has been flagged to be used/not-used. If unused, the + // controller disregards all incoming audio. + void HandleCaptureOutputUsedChange(bool capture_output_used); + + // Returns true if clipping prediction is enabled. + // TODO(bugs.webrtc.org/7494): Deprecate this method. + bool clipping_predictor_enabled() const { return !!clipping_predictor_; } + + // Returns true if clipping prediction is used to adjust the input volume. + // TODO(bugs.webrtc.org/7494): Deprecate this method. + bool use_clipping_predictor_step() const { + return use_clipping_predictor_step_; + } + + // Only use for testing: Use `RecommendInputVolume()` elsewhere. + // Returns the value of a member variable, needed for testing + // `AnalyzeInputAudio()`. + int recommended_input_volume() const { return recommended_input_volume_; } + + // Only use for testing. + bool capture_output_used() const { return capture_output_used_; } + + private: + friend class InputVolumeControllerTestHelper; + + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeDefault); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeDisabled); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, + MinInputVolumeOutOfRangeAbove); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, + MinInputVolumeOutOfRangeBelow); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeEnabled50); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerParametrizedTest, + ClippingParametersVerified); + + // Sets the applied input volume and resets the recommended input volume. + void SetAppliedInputVolume(int level); + + void AggregateChannelLevels(); + + const int num_capture_channels_; + + // Minimum input volume that can be recommended. + const int min_input_volume_; + + // TODO(bugs.webrtc.org/7494): Once + // `AudioProcessingImpl::recommended_stream_analog_level()` becomes a trivial + // getter, leave uninitialized. + // Recommended input volume. After `SetAppliedInputVolume()` is called it + // holds holds the observed input volume. Possibly updated by + // `AnalyzePreProcess()` and `Process()`; after these calls, holds the + // recommended input volume. + int recommended_input_volume_ = 0; + // Applied input volume. After `SetAppliedInputVolume()` is called it holds + // the current applied volume. + absl::optional applied_input_volume_; + + bool capture_output_used_; + + // Clipping detection and prediction. + const int clipped_level_step_; + const float clipped_ratio_threshold_; + const int clipped_wait_frames_; + const std::unique_ptr clipping_predictor_; + const bool use_clipping_predictor_step_; + int frames_since_clipped_; + int clipping_rate_log_counter_; + float clipping_rate_log_; + + // Target range minimum and maximum. If the seech level is in the range + // [`target_range_min_dbfs`, `target_range_max_dbfs`], no volume adjustments + // take place. Instead, the digital gain controller is assumed to adapt to + // compensate for the speech level RMS error. + const int target_range_max_dbfs_; + const int target_range_min_dbfs_; + + // Channel controllers updating the gain upwards/downwards. + std::vector> channel_controllers_; + int channel_controlling_gain_ = 0; +}; + +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class MonoInputVolumeController { + public: + MonoInputVolumeController(int min_input_volume_after_clipping, + int min_input_volume, + int update_input_volume_wait_frames, + float speech_probability_threshold, + float speech_ratio_threshold); + ~MonoInputVolumeController(); + MonoInputVolumeController(const MonoInputVolumeController&) = delete; + MonoInputVolumeController& operator=(const MonoInputVolumeController&) = + delete; + + void Initialize(); + void HandleCaptureOutputUsedChange(bool capture_output_used); + + // Sets the current input volume. + void set_stream_analog_level(int input_volume) { + recommended_input_volume_ = input_volume; + } + + // Lowers the recommended input volume in response to clipping based on the + // suggested reduction `clipped_level_step`. Must be called after + // `set_stream_analog_level()`. + void HandleClipping(int clipped_level_step); + + // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore. + // Adjusts the recommended input volume upwards/downwards depending on the + // result of `HandleClipping()` and on `rms_error_dbfs`. Updates are only + // allowed for active speech segments and when `rms_error_dbfs` is not empty. + // Must be called after `HandleClipping()`. + void Process(absl::optional rms_error_dbfs, float speech_probability); + + // Returns the recommended input volume. Must be called after `Process()`. + int recommended_analog_level() const { return recommended_input_volume_; } + + void ActivateLogging() { log_to_histograms_ = true; } + + int min_input_volume_after_clipping() const { + return min_input_volume_after_clipping_; + } + + // Only used for testing. + int min_input_volume() const { return min_input_volume_; } + + private: + // Sets a new input volume, after first checking that it hasn't been updated + // by the user, in which case no action is taken. + void SetInputVolume(int new_volume); + + // Sets the maximum input volume that the input volume controller is allowed + // to apply. The volume must be at least `kClippedLevelMin`. + void SetMaxLevel(int level); + + int CheckVolumeAndReset(); + + // Updates the recommended input volume. If the volume slider needs to be + // moved, we check first if the user has adjusted it, in which case we take no + // action and cache the updated level. + void UpdateInputVolume(int rms_error_dbfs); + + const int min_input_volume_; + const int min_input_volume_after_clipping_; + int max_input_volume_; + + int last_recommended_input_volume_ = 0; + + bool capture_output_used_ = true; + bool check_volume_on_next_process_ = true; + bool startup_ = true; + + // TODO(bugs.webrtc.org/7494): Create a separate member for the applied + // input volume. + // Recommended input volume. After `set_stream_analog_level()` is + // called, it holds the observed applied input volume. Possibly updated by + // `HandleClipping()` and `Process()`; after these calls, holds the + // recommended input volume. + int recommended_input_volume_ = 0; + + bool log_to_histograms_ = false; + + // Counters for frames and speech frames since the last update in the + // recommended input volume. + const int update_input_volume_wait_frames_; + int frames_since_update_input_volume_ = 0; + int speech_frames_since_update_input_volume_ = 0; + bool is_first_frame_ = true; + + // Speech probability threshold for a frame to be considered speech (instead + // of silence). Limited to [0.0f, 1.0f]. + const float speech_probability_threshold_; + // Minimum ratio of speech frames. Limited to [0.0f, 1.0f]. + const float speech_ratio_threshold_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_gn/moz.build new file mode 100644 index 0000000000..582eb326f3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_gn/moz.build @@ -0,0 +1,234 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("input_volume_controller_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_unittest.cc new file mode 100644 index 0000000000..d1bdcf25a5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_unittest.cc @@ -0,0 +1,1857 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/input_volume_controller.h" + +#include +#include +#include +#include +#include + +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/metrics.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::DoAll; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { +namespace { + +constexpr int kSampleRateHz = 32000; +constexpr int kNumChannels = 1; +constexpr int kInitialInputVolume = 128; +constexpr int kClippedMin = 165; // Arbitrary, but different from the default. +constexpr float kAboveClippedThreshold = 0.2f; +constexpr int kMinMicLevel = 20; +constexpr int kClippedLevelStep = 15; +constexpr float kClippedRatioThreshold = 0.1f; +constexpr int kClippedWaitFrames = 300; +constexpr float kHighSpeechProbability = 0.7f; +constexpr float kLowSpeechProbability = 0.1f; +constexpr float kSpeechLevel = -25.0f; +constexpr float kSpeechProbabilityThreshold = 0.5f; +constexpr float kSpeechRatioThreshold = 0.8f; + +constexpr float kMinSample = std::numeric_limits::min(); +constexpr float kMaxSample = std::numeric_limits::max(); + +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; + +using InputVolumeControllerConfig = InputVolumeController::Config; + +constexpr ClippingPredictorConfig kDefaultClippingPredictorConfig{}; + +std::unique_ptr CreateInputVolumeController( + int clipped_level_step = kClippedLevelStep, + float clipped_ratio_threshold = kClippedRatioThreshold, + int clipped_wait_frames = kClippedWaitFrames, + bool enable_clipping_predictor = false, + int update_input_volume_wait_frames = 0) { + InputVolumeControllerConfig config{ + .min_input_volume = kMinMicLevel, + .clipped_level_min = kClippedMin, + .clipped_level_step = clipped_level_step, + .clipped_ratio_threshold = clipped_ratio_threshold, + .clipped_wait_frames = clipped_wait_frames, + .enable_clipping_predictor = enable_clipping_predictor, + .target_range_max_dbfs = -18, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = update_input_volume_wait_frames, + .speech_probability_threshold = kSpeechProbabilityThreshold, + .speech_ratio_threshold = kSpeechRatioThreshold, + }; + + return std::make_unique(/*num_capture_channels=*/1, + config); +} + +// (Over)writes `samples_value` for the samples in `audio_buffer`. +// When `clipped_ratio`, a value in [0, 1], is greater than 0, the corresponding +// fraction of the frame is set to a full scale value to simulate clipping. +void WriteAudioBufferSamples(float samples_value, + float clipped_ratio, + AudioBuffer& audio_buffer) { + RTC_DCHECK_GE(samples_value, kMinSample); + RTC_DCHECK_LE(samples_value, kMaxSample); + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + int num_channels = audio_buffer.num_channels(); + int num_samples = audio_buffer.num_frames(); + int num_clipping_samples = clipped_ratio * num_samples; + for (int ch = 0; ch < num_channels; ++ch) { + int i = 0; + for (; i < num_clipping_samples; ++i) { + audio_buffer.channels()[ch][i] = 32767.0f; + } + for (; i < num_samples; ++i) { + audio_buffer.channels()[ch][i] = samples_value; + } + } +} + +// (Over)writes samples in `audio_buffer`. Alternates samples `samples_value` +// and zero. +void WriteAlternatingAudioBufferSamples(float samples_value, + AudioBuffer& audio_buffer) { + RTC_DCHECK_GE(samples_value, kMinSample); + RTC_DCHECK_LE(samples_value, kMaxSample); + const int num_channels = audio_buffer.num_channels(); + const int num_frames = audio_buffer.num_frames(); + for (int ch = 0; ch < num_channels; ++ch) { + for (int i = 0; i < num_frames; i += 2) { + audio_buffer.channels()[ch][i] = samples_value; + audio_buffer.channels()[ch][i + 1] = 0.0f; + } + } +} + +// Reads a given number of 10 ms chunks from a PCM file and feeds them to +// `InputVolumeController`. +class SpeechSamplesReader { + private: + // Recording properties. + static constexpr int kPcmSampleRateHz = 16000; + static constexpr int kPcmNumChannels = 1; + static constexpr int kPcmBytesPerSamples = sizeof(int16_t); + + public: + SpeechSamplesReader() + : is_(test::ResourcePath("audio_processing/agc/agc_audio", "pcm"), + std::ios::binary | std::ios::ate), + audio_buffer_(kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels), + buffer_(audio_buffer_.num_frames()), + buffer_num_bytes_(buffer_.size() * kPcmBytesPerSamples) { + RTC_CHECK(is_); + } + + // Reads `num_frames` 10 ms frames from the beginning of the PCM file, applies + // `gain_db` and feeds the frames into `controller` by calling + // `AnalyzeInputAudio()` and `RecommendInputVolume()` for each frame. Reads + // the number of 10 ms frames available in the PCM file if `num_frames` is too + // large - i.e., does not loop. `speech_probability` and `speech_level_dbfs` + // are passed to `RecommendInputVolume()`. + int Feed(int num_frames, + int applied_input_volume, + int gain_db, + float speech_probability, + absl::optional speech_level_dbfs, + InputVolumeController& controller) { + RTC_DCHECK(controller.capture_output_used()); + + float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. + is_.seekg(0, is_.beg); // Start from the beginning of the PCM file. + + // Read and feed frames. + for (int i = 0; i < num_frames; ++i) { + is_.read(reinterpret_cast(buffer_.data()), buffer_num_bytes_); + if (is_.gcount() < buffer_num_bytes_) { + // EOF reached. Stop. + break; + } + // Apply gain and copy samples into `audio_buffer_`. + std::transform(buffer_.begin(), buffer_.end(), + audio_buffer_.channels()[0], [gain](int16_t v) -> float { + return rtc::SafeClamp(static_cast(v) * gain, + kMinSample, kMaxSample); + }); + controller.AnalyzeInputAudio(applied_input_volume, audio_buffer_); + const auto recommended_input_volume = controller.RecommendInputVolume( + speech_probability, speech_level_dbfs); + + // Expect no errors: Applied volume set for every frame; + // `RecommendInputVolume()` returns a non-empty value. + EXPECT_TRUE(recommended_input_volume.has_value()); + + applied_input_volume = *recommended_input_volume; + } + return applied_input_volume; + } + + private: + std::ifstream is_; + AudioBuffer audio_buffer_; + std::vector buffer_; + const std::streamsize buffer_num_bytes_; +}; + +// Runs the MonoInputVolumeControl processing sequence following the API +// contract. Returns the updated recommended input volume. +float UpdateRecommendedInputVolume(MonoInputVolumeController& mono_controller, + int applied_input_volume, + float speech_probability, + absl::optional rms_error_dbfs) { + mono_controller.set_stream_analog_level(applied_input_volume); + EXPECT_EQ(mono_controller.recommended_analog_level(), applied_input_volume); + mono_controller.Process(rms_error_dbfs, speech_probability); + return mono_controller.recommended_analog_level(); +} + +} // namespace + +// TODO(bugs.webrtc.org/12874): Use constexpr struct with designated +// initializers once fixed. +constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() { + InputVolumeControllerConfig config{ + .clipped_level_min = kClippedMin, + .clipped_level_step = kClippedLevelStep, + .clipped_ratio_threshold = kClippedRatioThreshold, + .clipped_wait_frames = kClippedWaitFrames, + .enable_clipping_predictor = kDefaultClippingPredictorConfig.enabled, + .target_range_max_dbfs = -18, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = 0, + .speech_probability_threshold = 0.5f, + .speech_ratio_threshold = 1.0f, + }; + return config; +} + +// Helper class that provides an `InputVolumeController` instance with an +// `AudioBuffer` instance and `CallAgcSequence()`, a helper method that runs the +// `InputVolumeController` instance on the `AudioBuffer` one by sticking to the +// API contract. +class InputVolumeControllerTestHelper { + public: + // Ctor. Initializes `audio_buffer` with zeros. + // TODO(bugs.webrtc.org/7494): Remove the default argument. + InputVolumeControllerTestHelper(const InputVolumeController::Config& config = + GetInputVolumeControllerTestConfig()) + : audio_buffer(kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels), + controller(/*num_capture_channels=*/1, config) { + controller.Initialize(); + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, + audio_buffer); + } + + // Calls the sequence of `InputVolumeController` methods according to the API + // contract, namely: + // - Sets the applied input volume; + // - Uses `audio_buffer` to call `AnalyzeInputAudio()` and + // `RecommendInputVolume()`; + // Returns the recommended input volume. + absl::optional CallAgcSequence(int applied_input_volume, + float speech_probability, + absl::optional speech_level_dbfs, + int num_calls = 1) { + RTC_DCHECK_GE(num_calls, 1); + absl::optional volume = applied_input_volume; + for (int i = 0; i < num_calls; ++i) { + // Repeat the initial volume if `RecommendInputVolume()` doesn't return a + // value. + controller.AnalyzeInputAudio(volume.value_or(applied_input_volume), + audio_buffer); + volume = controller.RecommendInputVolume(speech_probability, + speech_level_dbfs); + + // Allow deviation from the API contract: `RecommendInputVolume()` doesn't + // return a recommended input volume. + if (volume.has_value()) { + EXPECT_EQ(*volume, controller.recommended_input_volume()); + } + } + return volume; + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + int CallRecommendInputVolume(int num_calls, + int initial_volume, + float speech_probability, + absl::optional speech_level_dbfs) { + RTC_DCHECK(controller.capture_output_used()); + + // Create non-clipping audio for `AnalyzeInputAudio()`. + WriteAlternatingAudioBufferSamples(0.1f * kMaxSample, audio_buffer); + int volume = initial_volume; + for (int i = 0; i < num_calls; ++i) { + controller.AnalyzeInputAudio(volume, audio_buffer); + const auto recommended_input_volume = controller.RecommendInputVolume( + speech_probability, speech_level_dbfs); + + // Expect no errors: Applied volume set for every frame; + // `RecommendInputVolume()` returns a non-empty value. + EXPECT_TRUE(recommended_input_volume.has_value()); + + volume = *recommended_input_volume; + } + return volume; + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + void CallAnalyzeInputAudio(int num_calls, float clipped_ratio) { + RTC_DCHECK(controller.capture_output_used()); + + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + WriteAudioBufferSamples(/*samples_value=*/0.0f, clipped_ratio, + audio_buffer); + for (int i = 0; i < num_calls; ++i) { + controller.AnalyzeInputAudio(controller.recommended_input_volume(), + audio_buffer); + } + } + + AudioBuffer audio_buffer; + InputVolumeController controller; +}; + +class InputVolumeControllerChannelSampleRateTest + : public ::testing::TestWithParam> { + protected: + int GetNumChannels() const { return std::get<0>(GetParam()); } + int GetSampleRateHz() const { return std::get<1>(GetParam()); } +}; + +TEST_P(InputVolumeControllerChannelSampleRateTest, CheckIsAlive) { + const int num_channels = GetNumChannels(); + const int sample_rate_hz = GetSampleRateHz(); + + constexpr InputVolumeController::Config kConfig{.enable_clipping_predictor = + true}; + InputVolumeController controller(num_channels, kConfig); + controller.Initialize(); + AudioBuffer buffer(sample_rate_hz, num_channels, sample_rate_hz, num_channels, + sample_rate_hz, num_channels); + + constexpr int kStartupVolume = 100; + int applied_initial_volume = kStartupVolume; + + // Trigger a downward adaptation with clipping. + constexpr int kLevelWithinTargetDbfs = + (kConfig.target_range_min_dbfs + kConfig.target_range_max_dbfs) / 2; + WriteAlternatingAudioBufferSamples(/*samples_value=*/kMaxSample, buffer); + const int initial_volume1 = applied_initial_volume; + for (int i = 0; i < 400; ++i) { + controller.AnalyzeInputAudio(applied_initial_volume, buffer); + auto recommended_input_volume = controller.RecommendInputVolume( + kLowSpeechProbability, + /*speech_level_dbfs=*/kLevelWithinTargetDbfs); + ASSERT_TRUE(recommended_input_volume.has_value()); + applied_initial_volume = *recommended_input_volume; + } + ASSERT_LT(controller.recommended_input_volume(), initial_volume1); + + // Fill in audio that does not clip. + WriteAlternatingAudioBufferSamples(/*samples_value=*/1234.5f, buffer); + + // Trigger an upward adaptation. + const int initial_volume2 = controller.recommended_input_volume(); + for (int i = 0; i < kConfig.clipped_wait_frames; ++i) { + controller.AnalyzeInputAudio(applied_initial_volume, buffer); + auto recommended_input_volume = controller.RecommendInputVolume( + kHighSpeechProbability, + /*speech_level_dbfs=*/kConfig.target_range_min_dbfs - 5); + ASSERT_TRUE(recommended_input_volume.has_value()); + applied_initial_volume = *recommended_input_volume; + } + EXPECT_GT(controller.recommended_input_volume(), initial_volume2); + + // Trigger a downward adaptation. + const int initial_volume = controller.recommended_input_volume(); + for (int i = 0; i < kConfig.update_input_volume_wait_frames; ++i) { + controller.AnalyzeInputAudio(applied_initial_volume, buffer); + auto recommended_input_volume = controller.RecommendInputVolume( + kHighSpeechProbability, + /*speech_level_dbfs=*/kConfig.target_range_max_dbfs + 5); + ASSERT_TRUE(recommended_input_volume.has_value()); + applied_initial_volume = *recommended_input_volume; + } + EXPECT_LT(controller.recommended_input_volume(), initial_volume); +} + +INSTANTIATE_TEST_SUITE_P( + , + InputVolumeControllerChannelSampleRateTest, + ::testing::Combine(::testing::Values(1, 2, 3, 6), + ::testing::Values(8000, 16000, 32000, 48000))); + +class InputVolumeControllerParametrizedTest + : public ::testing::TestWithParam {}; + +TEST_P(InputVolumeControllerParametrizedTest, + StartupMinVolumeConfigurationRespectedWhenAppliedInputVolumeAboveMin) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + + EXPECT_EQ(*helper.CallAgcSequence(/*applied_input_volume=*/128, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80), + 128); +} + +TEST_P( + InputVolumeControllerParametrizedTest, + StartupMinVolumeConfigurationRespectedWhenAppliedInputVolumeMaybeBelowMin) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + + EXPECT_GE(*helper.CallAgcSequence(/*applied_input_volume=*/10, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80), + 10); +} + +TEST_P(InputVolumeControllerParametrizedTest, + StartupMinVolumeRespectedWhenAppliedVolumeNonZero) { + const int kMinInputVolume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = kMinInputVolume, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = 1, + .speech_probability_threshold = 0.5f, + .speech_ratio_threshold = 0.5f}); + + // Volume change possible; speech level below the digital gain window. + int volume = *helper.CallAgcSequence(/*applied_input_volume=*/1, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80); + + EXPECT_EQ(volume, kMinInputVolume); +} + +TEST_P(InputVolumeControllerParametrizedTest, + MinVolumeRepeatedlyRespectedWhenAppliedVolumeNonZero) { + const int kMinInputVolume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = kMinInputVolume, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = 1, + .speech_probability_threshold = 0.5f, + .speech_ratio_threshold = 0.5f}); + + // Volume change possible; speech level below the digital gain window. + for (int i = 0; i < 100; ++i) { + const int volume = *helper.CallAgcSequence(/*applied_input_volume=*/1, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80); + EXPECT_GE(volume, kMinInputVolume); + } +} + +TEST_P(InputVolumeControllerParametrizedTest, + StartupMinVolumeRespectedOnceWhenAppliedVolumeZero) { + const int kMinInputVolume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = kMinInputVolume, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = 1, + .speech_probability_threshold = 0.5f, + .speech_ratio_threshold = 0.5f}); + + int volume = *helper.CallAgcSequence(/*applied_input_volume=*/0, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80); + + EXPECT_EQ(volume, kMinInputVolume); + + // No change of volume regardless of a speech level below the digital gain + // window; applied volume is zero. + volume = *helper.CallAgcSequence(/*applied_input_volume=*/0, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80); + + EXPECT_EQ(volume, 0); +} + +TEST_P(InputVolumeControllerParametrizedTest, MicVolumeResponseToRmsError) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Inside the digital gain's window; no change of volume. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -23.0f); + + // Inside the digital gain's window; no change of volume. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -28.0f); + + // Above the digital gain's window; volume should be increased. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -29.0f); + EXPECT_EQ(volume, 128); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -38.0f); + EXPECT_EQ(volume, 156); + + // Inside the digital gain's window; no change of volume. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -23.0f); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -18.0f); + + // Below the digial gain's window; volume should be decreased. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 155); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 151); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -9.0f); + EXPECT_EQ(volume, 119); +} + +TEST_P(InputVolumeControllerParametrizedTest, MicVolumeIsLimited) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + const int min_input_volume = GetParam(); + config.min_input_volume = min_input_volume; + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Maximum upwards change is limited. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 183); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 243); + + // Won't go higher than the maximum. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 255); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 254); + + // Maximum downwards change is limited. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 194); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 137); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 88); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 54); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 33); + + // Won't go lower than the minimum. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, std::max(18, min_input_volume)); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, std::max(12, min_input_volume)); +} + +TEST_P(InputVolumeControllerParametrizedTest, NoActionWhileMuted) { + InputVolumeControllerTestHelper helper_1( + /*config=*/{.min_input_volume = GetParam()}); + InputVolumeControllerTestHelper helper_2( + /*config=*/{.min_input_volume = GetParam()}); + + int volume_1 = *helper_1.CallAgcSequence(/*applied_input_volume=*/255, + kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + int volume_2 = *helper_2.CallAgcSequence(/*applied_input_volume=*/255, + kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + + EXPECT_EQ(volume_1, 255); + EXPECT_EQ(volume_2, 255); + + helper_2.controller.HandleCaptureOutputUsedChange(false); + + WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); + + volume_1 = + *helper_1.CallAgcSequence(volume_1, kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + volume_2 = + *helper_2.CallAgcSequence(volume_2, kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + + EXPECT_LT(volume_1, 255); + EXPECT_EQ(volume_2, 255); +} + +TEST_P(InputVolumeControllerParametrizedTest, + UnmutingChecksVolumeWithoutRaising) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + helper.controller.HandleCaptureOutputUsedChange(false); + helper.controller.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 127; + + // SetMicVolume should not be called. + EXPECT_EQ( + helper.CallRecommendInputVolume(/*num_calls=*/1, kInputVolume, + kHighSpeechProbability, kSpeechLevel), + kInputVolume); +} + +TEST_P(InputVolumeControllerParametrizedTest, UnmutingRaisesTooLowVolume) { + const int min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = min_input_volume}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + helper.controller.HandleCaptureOutputUsedChange(false); + helper.controller.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 11; + + EXPECT_EQ( + helper.CallRecommendInputVolume(/*num_calls=*/1, kInputVolume, + kHighSpeechProbability, kSpeechLevel), + min_input_volume); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ManualLevelChangeResultsInNoSetMicCall) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // GetMicVolume returns a value outside of the quantization slack, indicating + // a manual volume change. + ASSERT_NE(volume, 154); + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/154, kHighSpeechProbability, -29.0f); + EXPECT_EQ(volume, 154); + + // Do the same thing, except downwards now. + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/100, kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 100); + + // And finally verify the AGC continues working without a manual change. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 99); +} + +TEST_P(InputVolumeControllerParametrizedTest, + RecoveryAfterManualLevelChangeFromMax) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Force the mic up to max volume. Takes a few steps due to the residual + // gain limitation. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 183); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 243); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 255); + + // Manual change does not result in SetMicVolume call. + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/50, kHighSpeechProbability, -17.0f); + EXPECT_EQ(helper.controller.recommended_input_volume(), 50); + + // Continues working as usual afterwards. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -38.0f); + + EXPECT_EQ(volume, 65); +} + +// Checks that the minimum input volume is enforced during the upward adjustment +// of the input volume. +TEST_P(InputVolumeControllerParametrizedTest, + EnforceMinInputVolumeDuringUpwardsAdjustment) { + const int min_input_volume = GetParam(); + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = min_input_volume; + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Manual change below min, but strictly positive, otherwise no action will be + // taken. + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/1, kHighSpeechProbability, -17.0f); + + // Trigger an upward adjustment of the input volume. + EXPECT_EQ(volume, min_input_volume); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -29.0f); + EXPECT_EQ(volume, min_input_volume); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -30.0f); + EXPECT_EQ(volume, min_input_volume); + + // After a number of consistently low speech level observations, the input + // volume is eventually raised above the minimum. + volume = helper.CallRecommendInputVolume(/*num_calls=*/10, volume, + kHighSpeechProbability, -38.0f); + EXPECT_GT(volume, min_input_volume); +} + +// Checks that, when the min mic level override is specified, AGC immediately +// applies the minimum mic level after the mic level is manually set below the +// minimum gain to enforce. +TEST_P(InputVolumeControllerParametrizedTest, + RecoveryAfterManualLevelChangeBelowMin) { + const int min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = min_input_volume}); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Manual change below min, but strictly positive, otherwise + // AGC won't take any action. + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/1, kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, min_input_volume); +} + +TEST_P(InputVolumeControllerParametrizedTest, NoClippingHasNoImpact) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/100, /*clipped_ratio=*/0); + EXPECT_EQ(helper.controller.recommended_input_volume(), 128); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ClippingUnderThresholdHasNoImpact) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, /*clipped_ratio=*/0.099); + EXPECT_EQ(helper.controller.recommended_input_volume(), 128); +} + +TEST_P(InputVolumeControllerParametrizedTest, ClippingLowersVolume) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, /*clipped_ratio=*/0.2); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); +} + +TEST_P(InputVolumeControllerParametrizedTest, + WaitingPeriodBetweenClippingChecks) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); + + helper.CallAnalyzeInputAudio(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 225); +} + +TEST_P(InputVolumeControllerParametrizedTest, ClippingLoweringIsLimited) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + helper.CallAgcSequence(/*applied_input_volume=*/180, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), kClippedMin); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1000, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), kClippedMin); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ClippingMaxIsRespectedWhenEqualToLevel) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); + + helper.CallRecommendInputVolume(/*num_calls=*/10, /*initial_volume=*/240, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ClippingMaxIsRespectedWhenHigherThanLevel) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + helper.CallAgcSequence(/*applied_input_volume=*/200, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + int volume = helper.controller.recommended_input_volume(); + EXPECT_EQ(volume, 185); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -58.0f); + EXPECT_EQ(volume, 240); + volume = helper.CallRecommendInputVolume(/*num_calls=*/10, volume, + kHighSpeechProbability, -58.0f); + EXPECT_EQ(volume, 240); +} + +TEST_P(InputVolumeControllerParametrizedTest, UserCanRaiseVolumeAfterClipping) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + helper.CallAgcSequence(/*applied_input_volume=*/225, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 210); + + // User changed the volume. + int volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume-*/ 250, kHighSpeechProbability, -32.0f); + EXPECT_EQ(volume, 250); + + // Move down... + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -8.0f); + EXPECT_EQ(volume, 210); + // And back up to the new max established by the user. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -58.0f); + EXPECT_EQ(volume, 250); + // Will not move above new maximum. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 250); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ClippingDoesNotPullLowVolumeBackUp) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + helper.CallAgcSequence(/*applied_input_volume=*/80, kHighSpeechProbability, + kSpeechLevel); + + int initial_volume = helper.controller.recommended_input_volume(); + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), initial_volume); +} + +TEST_P(InputVolumeControllerParametrizedTest, TakesNoActionOnZeroMicVolume) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + EXPECT_EQ( + helper.CallRecommendInputVolume(/*num_calls=*/10, /*initial_volume=*/0, + kHighSpeechProbability, -48.0f), + 0); +} + +TEST_P(InputVolumeControllerParametrizedTest, ClippingDetectionLowersVolume) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(/*applied_input_volume=*/255, + kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + + EXPECT_EQ(volume, 255); + + WriteAlternatingAudioBufferSamples(0.99f * kMaxSample, helper.audio_buffer); + volume = *helper.CallAgcSequence(volume, kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/100); + + EXPECT_EQ(volume, 255); + + WriteAlternatingAudioBufferSamples(kMaxSample, helper.audio_buffer); + volume = *helper.CallAgcSequence(volume, kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/100); + + EXPECT_EQ(volume, 240); +} + +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_level_step`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_ratio_threshold`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_wait_frames`. +// Verifies that configurable clipping parameters are initialized as intended. +TEST_P(InputVolumeControllerParametrizedTest, ClippingParametersVerified) { + std::unique_ptr controller = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames); + controller->Initialize(); + EXPECT_EQ(controller->clipped_level_step_, kClippedLevelStep); + EXPECT_EQ(controller->clipped_ratio_threshold_, kClippedRatioThreshold); + EXPECT_EQ(controller->clipped_wait_frames_, kClippedWaitFrames); + std::unique_ptr controller_custom = + CreateInputVolumeController(/*clipped_level_step=*/10, + /*clipped_ratio_threshold=*/0.2f, + /*clipped_wait_frames=*/50); + controller_custom->Initialize(); + EXPECT_EQ(controller_custom->clipped_level_step_, 10); + EXPECT_EQ(controller_custom->clipped_ratio_threshold_, 0.2f); + EXPECT_EQ(controller_custom->clipped_wait_frames_, 50); +} + +TEST_P(InputVolumeControllerParametrizedTest, + DisableClippingPredictorDisablesClippingPredictor) { + std::unique_ptr controller = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false); + controller->Initialize(); + + EXPECT_FALSE(controller->clipping_predictor_enabled()); + EXPECT_FALSE(controller->use_clipping_predictor_step()); +} + +TEST_P(InputVolumeControllerParametrizedTest, + EnableClippingPredictorEnablesClippingPredictor) { + std::unique_ptr controller = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/true); + controller->Initialize(); + + EXPECT_TRUE(controller->clipping_predictor_enabled()); + EXPECT_TRUE(controller->use_clipping_predictor_step()); +} + +TEST_P(InputVolumeControllerParametrizedTest, + DisableClippingPredictorDoesNotLowerVolume) { + int volume = 255; + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.enable_clipping_predictor = false; + auto helper = InputVolumeControllerTestHelper(config); + helper.controller.Initialize(); + + EXPECT_FALSE(helper.controller.clipping_predictor_enabled()); + EXPECT_FALSE(helper.controller.use_clipping_predictor_step()); + + // Expect no change if clipping prediction is enabled. + for (int j = 0; j < 31; ++j) { + WriteAlternatingAudioBufferSamples(0.99f * kMaxSample, helper.audio_buffer); + volume = + *helper.CallAgcSequence(volume, kLowSpeechProbability, kSpeechLevel, + /*num_calls=*/5); + + WriteAudioBufferSamples(0.99f * kMaxSample, /*clipped_ratio=*/0.0f, + helper.audio_buffer); + volume = + *helper.CallAgcSequence(volume, kLowSpeechProbability, kSpeechLevel, + /*num_calls=*/5); + + EXPECT_EQ(volume, 255); + } +} + +// TODO(bugs.webrtc.org/7494): Split into several smaller tests. +TEST_P(InputVolumeControllerParametrizedTest, + UsedClippingPredictionsProduceLowerAnalogLevels) { + constexpr int kInitialLevel = 255; + constexpr float kCloseToClippingPeakRatio = 0.99f; + int volume_1 = kInitialLevel; + int volume_2 = kInitialLevel; + + // Create two helpers, one with clipping prediction and one without. + auto config_1 = GetInputVolumeControllerTestConfig(); + auto config_2 = GetInputVolumeControllerTestConfig(); + config_1.enable_clipping_predictor = true; + config_2.enable_clipping_predictor = false; + auto helper_1 = InputVolumeControllerTestHelper(config_1); + auto helper_2 = InputVolumeControllerTestHelper(config_2); + helper_1.controller.Initialize(); + helper_2.controller.Initialize(); + + EXPECT_TRUE(helper_1.controller.clipping_predictor_enabled()); + EXPECT_FALSE(helper_2.controller.clipping_predictor_enabled()); + EXPECT_TRUE(helper_1.controller.use_clipping_predictor_step()); + + // Expect a change if clipping prediction is enabled. + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_1.audio_buffer); + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + EXPECT_EQ(volume_1, kInitialLevel - kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel); + + // Expect no change during waiting. + for (int i = 0; i < kClippedWaitFrames / 10; ++i) { + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_1.audio_buffer); + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + EXPECT_EQ(volume_1, kInitialLevel - kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel); + } + + // Expect a change when the prediction step is used. + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_1.audio_buffer); + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + EXPECT_EQ(volume_1, kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel); + + // Expect no change when clipping is not detected or predicted. + for (int i = 0; i < 2 * kClippedWaitFrames / 10; ++i) { + WriteAlternatingAudioBufferSamples(/*samples_value=*/0.0f, + helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(/*samples_value=*/0.0f, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, + helper_1.audio_buffer); + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + } + + EXPECT_EQ(volume_1, kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel); + + // Expect a change for clipping frames. + WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 1); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 1); + + EXPECT_EQ(volume_1, kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel - kClippedLevelStep); + + // Expect no change during waiting. + for (int i = 0; i < kClippedWaitFrames / 10; ++i) { + WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(kMaxSample, /*clipped_ratio=*/1.0f, + helper_1.audio_buffer); + WriteAudioBufferSamples(kMaxSample, /*clipped_ratio=*/1.0f, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + } + + EXPECT_EQ(volume_1, kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel - kClippedLevelStep); + + // Expect a change for clipping frames. + WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 1); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 1); + + EXPECT_EQ(volume_1, kInitialLevel - 4 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel - 2 * kClippedLevelStep); +} + +// Checks that passing an empty speech level has no effect on the input volume. +TEST_P(InputVolumeControllerParametrizedTest, EmptyRmsErrorHasNoEffect) { + InputVolumeController controller(kNumChannels, + GetInputVolumeControllerTestConfig()); + controller.Initialize(); + + // Feed speech with low energy that would trigger an upward adapation of + // the analog level if an speech level was not low and the RMS level empty. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + int volume = reader.Feed(kNumFrames, kInitialInputVolume, kGainDb, + kLowSpeechProbability, absl::nullopt, controller); + + // Check that no adaptation occurs. + ASSERT_EQ(volume, kInitialInputVolume); +} + +// Checks that the recommended input volume is not updated unless enough +// frames have been processed after the previous update. +TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) { + constexpr int kInputVolume = kInitialInputVolume; + std::unique_ptr controller_wait_0 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/0); + std::unique_ptr controller_wait_100 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/100); + controller_wait_0->Initialize(); + controller_wait_100->Initialize(); + + SpeechSamplesReader reader_1; + SpeechSamplesReader reader_2; + int volume_wait_0 = reader_1.Feed( + /*num_frames=*/99, kInputVolume, /*gain_db=*/0, kHighSpeechProbability, + /*speech_level_dbfs=*/-42.0f, *controller_wait_0); + int volume_wait_100 = reader_2.Feed( + /*num_frames=*/99, kInputVolume, /*gain_db=*/0, kHighSpeechProbability, + /*speech_level_dbfs=*/-42.0f, *controller_wait_100); + + // Check that adaptation only occurs if enough frames have been processed. + ASSERT_GT(volume_wait_0, kInputVolume); + ASSERT_EQ(volume_wait_100, kInputVolume); + + volume_wait_0 = + reader_1.Feed(/*num_frames=*/1, volume_wait_0, + /*gain_db=*/0, kHighSpeechProbability, + /*speech_level_dbfs=*/-42.0f, *controller_wait_0); + volume_wait_100 = + reader_2.Feed(/*num_frames=*/1, volume_wait_100, + /*gain_db=*/0, kHighSpeechProbability, + /*speech_level_dbfs=*/-42.0f, *controller_wait_100); + + // Check that adaptation only occurs when enough frames have been processed. + ASSERT_GT(volume_wait_0, kInputVolume); + ASSERT_GT(volume_wait_100, kInputVolume); +} + +INSTANTIATE_TEST_SUITE_P(, + InputVolumeControllerParametrizedTest, + ::testing::Values(12, 20)); + +TEST(InputVolumeControllerTest, + MinInputVolumeEnforcedWithClippingWhenAboveClippedLevelMin) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = 80, .clipped_level_min = 70}); + + // Trigger a downward adjustment caused by clipping input. Use a low speech + // probability to limit the volume changes to clipping handling. + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + helper.audio_buffer); + constexpr int kNumCalls = 800; + helper.CallAgcSequence(/*applied_input_volume=*/100, kLowSpeechProbability, + /*speech_level_dbfs=*/-18.0f, kNumCalls); + + EXPECT_EQ(helper.controller.recommended_input_volume(), 80); +} + +TEST(InputVolumeControllerTest, + ClippedlevelMinEnforcedWithClippingWhenAboveMinInputVolume) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = 70, .clipped_level_min = 80}); + + // Trigger a downward adjustment caused by clipping input. Use a low speech + // probability to limit the volume changes to clipping handling. + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + helper.audio_buffer); + constexpr int kNumCalls = 800; + helper.CallAgcSequence(/*applied_input_volume=*/100, kLowSpeechProbability, + /*speech_level_dbfs=*/-18.0f, kNumCalls); + + EXPECT_EQ(helper.controller.recommended_input_volume(), 80); +} + +TEST(InputVolumeControllerTest, SpeechRatioThresholdIsEffective) { + constexpr int kInputVolume = kInitialInputVolume; + // Create two input volume controllers with 10 frames between volume updates + // and the minimum speech ratio of 0.8 and speech probability threshold 0.5. + std::unique_ptr controller_1 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + std::unique_ptr controller_2 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + controller_1->Initialize(); + controller_2->Initialize(); + + SpeechSamplesReader reader_1; + SpeechSamplesReader reader_2; + + int volume_1 = reader_1.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, + /*speech_probability=*/0.7f, + /*speech_level_dbfs=*/-42.0f, *controller_1); + int volume_2 = reader_2.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, + /*speech_probability=*/0.4f, + /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_EQ(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); + + volume_1 = reader_1.Feed(/*num_frames=*/2, volume_1, /*gain_db=*/0, + /*speech_probability=*/0.4f, + /*speech_level_dbfs=*/-42.0f, *controller_1); + volume_2 = reader_2.Feed(/*num_frames=*/2, volume_2, /*gain_db=*/0, + /*speech_probability=*/0.4f, + /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_EQ(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); + + volume_1 = reader_1.Feed( + /*num_frames=*/7, volume_1, /*gain_db=*/0, + /*speech_probability=*/0.7f, /*speech_level_dbfs=*/-42.0f, *controller_1); + volume_2 = reader_2.Feed( + /*num_frames=*/7, volume_2, /*gain_db=*/0, + /*speech_probability=*/0.7f, /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_GT(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); +} + +TEST(InputVolumeControllerTest, SpeechProbabilityThresholdIsEffective) { + constexpr int kInputVolume = kInitialInputVolume; + // Create two input volume controllers with the exact same settings and + // 10 frames between volume updates. + std::unique_ptr controller_1 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + std::unique_ptr controller_2 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + controller_1->Initialize(); + controller_2->Initialize(); + + SpeechSamplesReader reader_1; + SpeechSamplesReader reader_2; + + // Process with two sets of inputs: Use `reader_1` to process inputs + // that make the volume to be adjusted after enough frames have been + // processsed and `reader_2` to process inputs that won't make the volume + // to be adjusted. + int volume_1 = reader_1.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, + /*speech_probability=*/0.5f, + /*speech_level_dbfs=*/-42.0f, *controller_1); + int volume_2 = reader_2.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, + /*speech_probability=*/0.49f, + /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_EQ(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); + + reader_1.Feed(/*num_frames=*/2, volume_1, /*gain_db=*/0, + /*speech_probability=*/0.49f, /*speech_level_dbfs=*/-42.0f, + *controller_1); + reader_2.Feed(/*num_frames=*/2, volume_2, /*gain_db=*/0, + /*speech_probability=*/0.49f, /*speech_level_dbfs=*/-42.0f, + *controller_2); + + ASSERT_EQ(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); + + volume_1 = reader_1.Feed( + /*num_frames=*/7, volume_1, /*gain_db=*/0, + /*speech_probability=*/0.5f, /*speech_level_dbfs=*/-42.0f, *controller_1); + volume_2 = reader_2.Feed( + /*num_frames=*/7, volume_2, /*gain_db=*/0, + /*speech_probability=*/0.5f, /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_GT(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); +} + +TEST(InputVolumeControllerTest, + DoNotLogRecommendedInputVolumeOnChangeToMatchTarget) { + metrics::Reset(); + + SpeechSamplesReader reader; + auto controller = CreateInputVolumeController(); + controller->Initialize(); + // Trigger a downward volume change by inputting audio that clips. Pass a + // speech level that falls in the target range to make sure that the + // adaptation is not made to match the target range. + constexpr int kStartupVolume = 255; + const int volume = reader.Feed(/*num_frames=*/14, kStartupVolume, + /*gain_db=*/50, kHighSpeechProbability, + /*speech_level_dbfs=*/-20.0f, *controller); + ASSERT_LT(volume, kStartupVolume); + EXPECT_METRIC_THAT( + metrics::Samples( + "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"), + ::testing::IsEmpty()); +} + +TEST(InputVolumeControllerTest, + LogRecommendedInputVolumeOnUpwardChangeToMatchTarget) { + metrics::Reset(); + + SpeechSamplesReader reader; + auto controller = CreateInputVolumeController(); + controller->Initialize(); + constexpr int kStartupVolume = 100; + // Trigger an upward volume change by inputting audio that does not clip and + // by passing a speech level below the target range. + const int volume = reader.Feed(/*num_frames=*/14, kStartupVolume, + /*gain_db=*/-6, kHighSpeechProbability, + /*speech_level_dbfs=*/-50.0f, *controller); + ASSERT_GT(volume, kStartupVolume); + EXPECT_METRIC_THAT( + metrics::Samples( + "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"), + ::testing::Not(::testing::IsEmpty())); +} + +TEST(InputVolumeControllerTest, + LogRecommendedInputVolumeOnDownwardChangeToMatchTarget) { + metrics::Reset(); + + SpeechSamplesReader reader; + auto controller = CreateInputVolumeController(); + controller->Initialize(); + constexpr int kStartupVolume = 100; + // Trigger a downward volume change by inputting audio that does not clip and + // by passing a speech level above the target range. + const int volume = reader.Feed(/*num_frames=*/14, kStartupVolume, + /*gain_db=*/-6, kHighSpeechProbability, + /*speech_level_dbfs=*/-5.0f, *controller); + ASSERT_LT(volume, kStartupVolume); + EXPECT_METRIC_THAT( + metrics::Samples( + "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"), + ::testing::Not(::testing::IsEmpty())); +} + +TEST(MonoInputVolumeControllerTest, CheckHandleClippingLowersVolume) { + constexpr int kInitialInputVolume = 100; + constexpr int kInputVolumeStep = 29; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/70, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + UpdateRecommendedInputVolume(mono_controller, kInitialInputVolume, + kLowSpeechProbability, + /*rms_error_dbfs*/ -10.0f); + + mono_controller.HandleClipping(kInputVolumeStep); + + EXPECT_EQ(mono_controller.recommended_analog_level(), + kInitialInputVolume - kInputVolumeStep); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessNegativeRmsErrorDecreasesInputVolume) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, -10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume, kInitialInputVolume); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessPositiveRmsErrorIncreasesInputVolume) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, 10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, 10.0f); + + EXPECT_GT(volume, kInitialInputVolume); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessNegativeRmsErrorDecreasesInputVolumeWithLimit) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_3( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, + /*speech_probability_threshold=*/0.7, + /*speech_ratio_threshold=*/0.8); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + mono_controller_3.Initialize(); + + // Process RMS errors in the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -14.0f); + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -14.0f); + // Process RMS errors outside the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -15.0f); + int volume_3 = UpdateRecommendedInputVolume( + mono_controller_3, kInitialInputVolume, kHighSpeechProbability, -30.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -15.0f); + volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3, + kHighSpeechProbability, -30.0f); + + EXPECT_LT(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); + EXPECT_EQ(volume_2, volume_3); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessPositiveRmsErrorIncreasesInputVolumeWithLimit) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_3( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + mono_controller_3.Initialize(); + + // Process RMS errors in the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, 14.0f); + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, 14.0f); + // Process RMS errors outside the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, 15.0f); + int volume_3 = UpdateRecommendedInputVolume( + mono_controller_3, kInitialInputVolume, kHighSpeechProbability, 30.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, 15.0f); + volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3, + kHighSpeechProbability, 30.0f); + + EXPECT_GT(volume_1, kInitialInputVolume); + EXPECT_GT(volume_2, volume_1); + EXPECT_EQ(volume_2, volume_3); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessRmsErrorDecreasesInputVolumeRepeatedly) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume_before = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f); + volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_before, kInitialInputVolume); + + int volume_after = UpdateRecommendedInputVolume( + mono_controller, volume_before, kHighSpeechProbability, -10.0f); + volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_after, volume_before); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessPositiveRmsErrorIncreasesInputVolumeRepeatedly) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume_before = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f); + volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before, + kHighSpeechProbability, 10.0f); + + EXPECT_GT(volume_before, kInitialInputVolume); + + int volume_after = UpdateRecommendedInputVolume( + mono_controller, volume_before, kHighSpeechProbability, 10.0f); + volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after, + kHighSpeechProbability, 10.0f); + + EXPECT_GT(volume_after, volume_before); +} + +TEST(MonoInputVolumeControllerTest, CheckClippedLevelMinIsEffective) { + constexpr int kInitialInputVolume = 100; + constexpr int kClippedLevelMin = 70; + MonoInputVolumeController mono_controller_1( + kClippedLevelMin, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + kClippedLevelMin, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + // Process one frame to reset the state for `HandleClipping()`. + EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume, + kLowSpeechProbability, -10.0f), + kInitialInputVolume); + EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume, + kLowSpeechProbability, -10.0f), + kInitialInputVolume); + + mono_controller_1.HandleClipping(29); + mono_controller_2.HandleClipping(31); + + EXPECT_EQ(mono_controller_2.recommended_analog_level(), kClippedLevelMin); + EXPECT_LT(mono_controller_2.recommended_analog_level(), + mono_controller_1.recommended_analog_level()); +} + +TEST(MonoInputVolumeControllerTest, CheckMinMicLevelIsEffective) { + constexpr int kInitialInputVolume = 100; + constexpr int kMinMicLevel = 64; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, kMinMicLevel, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, kMinMicLevel, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -30.0f); + + EXPECT_LT(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); + EXPECT_EQ(volume_2, kMinMicLevel); +} + +TEST(MonoInputVolumeControllerTest, + CheckUpdateInputVolumeWaitFramesIsEffective) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/1, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_2, kInitialInputVolume); +} + +TEST(MonoInputVolumeControllerTest, + CheckSpeechProbabilityThresholdIsEffective) { + constexpr int kInitialInputVolume = 100; + constexpr float kSpeechProbabilityThreshold = 0.8f; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kSpeechProbabilityThreshold, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kSpeechProbabilityThreshold, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = + UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume, + kSpeechProbabilityThreshold, -10.0f); + int volume_2 = + UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume, + kSpeechProbabilityThreshold, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, volume_1, kSpeechProbabilityThreshold - 0.1f, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kSpeechProbabilityThreshold, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); +} + +TEST(MonoInputVolumeControllerTest, CheckSpeechRatioThresholdIsEffective) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/4, kHighSpeechProbability, + /*speech_ratio_threshold=*/0.75f); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/4, kHighSpeechProbability, + /*speech_ratio_threshold=*/0.75f); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kLowSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kLowSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kLowSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessEmptyRmsErrorDoesNotLowerVolume) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, volume_1, kHighSpeechProbability, absl::nullopt); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc new file mode 100644 index 0000000000..05624b1f92 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" + +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +using InputVolumeType = InputVolumeStatsReporter::InputVolumeType; + +constexpr int kFramesIn60Seconds = 6000; +constexpr int kMinInputVolume = 0; +constexpr int kMaxInputVolume = 255; +constexpr int kMaxUpdate = kMaxInputVolume - kMinInputVolume; + +int ComputeAverageUpdate(int sum_updates, int num_updates) { + RTC_DCHECK_GE(sum_updates, 0); + RTC_DCHECK_LE(sum_updates, kMaxUpdate * kFramesIn60Seconds); + RTC_DCHECK_GE(num_updates, 0); + RTC_DCHECK_LE(num_updates, kFramesIn60Seconds); + if (num_updates == 0) { + return 0; + } + return std::round(static_cast(sum_updates) / + static_cast(num_updates)); +} + +constexpr absl::string_view MetricNamePrefix( + InputVolumeType input_volume_type) { + switch (input_volume_type) { + case InputVolumeType::kApplied: + return "WebRTC.Audio.Apm.AppliedInputVolume."; + case InputVolumeType::kRecommended: + return "WebRTC.Audio.Apm.RecommendedInputVolume."; + } +} + +metrics::Histogram* CreateVolumeHistogram(InputVolumeType input_volume_type) { + char buffer[64]; + rtc::SimpleStringBuilder builder(buffer); + builder << MetricNamePrefix(input_volume_type) << "OnChange"; + return metrics::HistogramFactoryGetCountsLinear(/*name=*/builder.str(), + /*min=*/1, + /*max=*/kMaxInputVolume, + /*bucket_count=*/50); +} + +metrics::Histogram* CreateRateHistogram(InputVolumeType input_volume_type, + absl::string_view name) { + char buffer[64]; + rtc::SimpleStringBuilder builder(buffer); + builder << MetricNamePrefix(input_volume_type) << name; + return metrics::HistogramFactoryGetCountsLinear(/*name=*/builder.str(), + /*min=*/1, + /*max=*/kFramesIn60Seconds, + /*bucket_count=*/50); +} + +metrics::Histogram* CreateAverageHistogram(InputVolumeType input_volume_type, + absl::string_view name) { + char buffer[64]; + rtc::SimpleStringBuilder builder(buffer); + builder << MetricNamePrefix(input_volume_type) << name; + return metrics::HistogramFactoryGetCountsLinear(/*name=*/builder.str(), + /*min=*/1, + /*max=*/kMaxUpdate, + /*bucket_count=*/50); +} + +} // namespace + +InputVolumeStatsReporter::InputVolumeStatsReporter(InputVolumeType type) + : histograms_( + {.on_volume_change = CreateVolumeHistogram(type), + .decrease_rate = CreateRateHistogram(type, "DecreaseRate"), + .decrease_average = CreateAverageHistogram(type, "DecreaseAverage"), + .increase_rate = CreateRateHistogram(type, "IncreaseRate"), + .increase_average = CreateAverageHistogram(type, "IncreaseAverage"), + .update_rate = CreateRateHistogram(type, "UpdateRate"), + .update_average = CreateAverageHistogram(type, "UpdateAverage")}), + cannot_log_stats_(!histograms_.AllPointersSet()) { + if (cannot_log_stats_) { + RTC_LOG(LS_WARNING) << "Will not log any `" << MetricNamePrefix(type) + << "*` histogram stats."; + } +} + +InputVolumeStatsReporter::~InputVolumeStatsReporter() = default; + +void InputVolumeStatsReporter::UpdateStatistics(int input_volume) { + if (cannot_log_stats_) { + // Since the stats cannot be logged, do not bother updating them. + return; + } + + RTC_DCHECK_GE(input_volume, kMinInputVolume); + RTC_DCHECK_LE(input_volume, kMaxInputVolume); + if (previous_input_volume_.has_value() && + input_volume != previous_input_volume_.value()) { + // Update stats when the input volume changes. + metrics::HistogramAdd(histograms_.on_volume_change, input_volume); + // Update stats that are periodically logged. + const int volume_change = input_volume - previous_input_volume_.value(); + if (volume_change < 0) { + ++volume_update_stats_.num_decreases; + volume_update_stats_.sum_decreases -= volume_change; + } else { + ++volume_update_stats_.num_increases; + volume_update_stats_.sum_increases += volume_change; + } + } + // Periodically log input volume change metrics. + if (++log_volume_update_stats_counter_ >= kFramesIn60Seconds) { + LogVolumeUpdateStats(); + volume_update_stats_ = {}; + log_volume_update_stats_counter_ = 0; + } + previous_input_volume_ = input_volume; +} + +void InputVolumeStatsReporter::LogVolumeUpdateStats() const { + // Decrease rate and average. + metrics::HistogramAdd(histograms_.decrease_rate, + volume_update_stats_.num_decreases); + if (volume_update_stats_.num_decreases > 0) { + int average_decrease = ComputeAverageUpdate( + volume_update_stats_.sum_decreases, volume_update_stats_.num_decreases); + metrics::HistogramAdd(histograms_.decrease_average, average_decrease); + } + // Increase rate and average. + metrics::HistogramAdd(histograms_.increase_rate, + volume_update_stats_.num_increases); + if (volume_update_stats_.num_increases > 0) { + int average_increase = ComputeAverageUpdate( + volume_update_stats_.sum_increases, volume_update_stats_.num_increases); + metrics::HistogramAdd(histograms_.increase_average, average_increase); + } + // Update rate and average. + int num_updates = + volume_update_stats_.num_decreases + volume_update_stats_.num_increases; + metrics::HistogramAdd(histograms_.update_rate, num_updates); + if (num_updates > 0) { + int average_update = ComputeAverageUpdate( + volume_update_stats_.sum_decreases + volume_update_stats_.sum_increases, + num_updates); + metrics::HistogramAdd(histograms_.update_average, average_update); + } +} + +void UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget(int volume) { + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget", volume, + 1, kMaxInputVolume, 50); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.h b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.h new file mode 100644 index 0000000000..31b110031c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_STATS_REPORTER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_STATS_REPORTER_H_ + +#include "absl/types/optional.h" +#include "rtc_base/gtest_prod_util.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +// Input volume statistics calculator. Computes aggregate stats based on the +// framewise input volume observed by `UpdateStatistics()`. Periodically logs +// the statistics into a histogram. +class InputVolumeStatsReporter { + public: + enum class InputVolumeType { + kApplied = 0, + kRecommended = 1, + }; + + explicit InputVolumeStatsReporter(InputVolumeType input_volume_type); + InputVolumeStatsReporter(const InputVolumeStatsReporter&) = delete; + InputVolumeStatsReporter operator=(const InputVolumeStatsReporter&) = delete; + ~InputVolumeStatsReporter(); + + // Updates the stats based on `input_volume`. Periodically logs the stats into + // a histogram. + void UpdateStatistics(int input_volume); + + private: + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsForEmptyStats); + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterNoVolumeChange); + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterVolumeIncrease); + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterVolumeDecrease); + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterReset); + + // Stores input volume update stats to enable calculation of update rate and + // average update separately for volume increases and decreases. + struct VolumeUpdateStats { + int num_decreases = 0; + int num_increases = 0; + int sum_decreases = 0; + int sum_increases = 0; + } volume_update_stats_; + + // Returns a copy of the stored statistics. Use only for testing. + VolumeUpdateStats volume_update_stats() const { return volume_update_stats_; } + + // Computes aggregate stat and logs them into a histogram. + void LogVolumeUpdateStats() const; + + // Histograms. + struct Histograms { + metrics::Histogram* const on_volume_change; + metrics::Histogram* const decrease_rate; + metrics::Histogram* const decrease_average; + metrics::Histogram* const increase_rate; + metrics::Histogram* const increase_average; + metrics::Histogram* const update_rate; + metrics::Histogram* const update_average; + bool AllPointersSet() const { + return !!on_volume_change && !!decrease_rate && !!decrease_average && + !!increase_rate && !!increase_average && !!update_rate && + !!update_average; + } + } histograms_; + + // True if the stats cannot be logged. + const bool cannot_log_stats_; + + int log_volume_update_stats_counter_ = 0; + absl::optional previous_input_volume_ = absl::nullopt; +}; + +// Updates the histogram that keeps track of recommended input volume changes +// required in order to match the target level in the input volume adaptation +// process. +void UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget(int volume); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_STATS_REPORTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_gn/moz.build new file mode 100644 index 0000000000..5a662f4079 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("input_volume_stats_reporter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_unittest.cc new file mode 100644 index 0000000000..e762c1fb59 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_unittest.cc @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" + +#include "absl/strings/string_view.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/metrics.h" +#include "test/gmock.h" + +namespace webrtc { +namespace { + +using InputVolumeType = InputVolumeStatsReporter::InputVolumeType; + +constexpr int kFramesIn60Seconds = 6000; + +constexpr absl::string_view kLabelPrefix = "WebRTC.Audio.Apm."; + +class InputVolumeStatsReporterTest + : public ::testing::TestWithParam { + public: + InputVolumeStatsReporterTest() { metrics::Reset(); } + + protected: + InputVolumeType InputVolumeType() const { return GetParam(); } + std::string VolumeLabel() const { + return (rtc::StringBuilder(kLabelPrefix) << VolumeTypeLabel() << "OnChange") + .str(); + } + std::string DecreaseRateLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "DecreaseRate") + .str(); + } + std::string DecreaseAverageLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "DecreaseAverage") + .str(); + } + std::string IncreaseRateLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "IncreaseRate") + .str(); + } + std::string IncreaseAverageLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "IncreaseAverage") + .str(); + } + std::string UpdateRateLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "UpdateRate") + .str(); + } + std::string UpdateAverageLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "UpdateAverage") + .str(); + } + + private: + absl::string_view VolumeTypeLabel() const { + switch (InputVolumeType()) { + case InputVolumeType::kApplied: + return "AppliedInputVolume."; + case InputVolumeType::kRecommended: + return "RecommendedInputVolume."; + } + } +}; + +TEST_P(InputVolumeStatsReporterTest, CheckVolumeOnChangeIsEmpty) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + stats_reporter.UpdateStatistics(10); + EXPECT_METRIC_THAT(metrics::Samples(VolumeLabel()), ::testing::ElementsAre()); +} + +TEST_P(InputVolumeStatsReporterTest, CheckRateAverageStatsEmpty) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + constexpr int kInputVolume = 10; + stats_reporter.UpdateStatistics(kInputVolume); + // Update almost until the periodic logging and reset. + for (int i = 0; i < kFramesIn60Seconds - 2; i += 2) { + stats_reporter.UpdateStatistics(kInputVolume + 2); + stats_reporter.UpdateStatistics(kInputVolume); + } + EXPECT_METRIC_THAT(metrics::Samples(UpdateRateLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(DecreaseRateLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(IncreaseRateLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(UpdateAverageLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(DecreaseAverageLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(IncreaseAverageLabel()), + ::testing::ElementsAre()); +} + +TEST_P(InputVolumeStatsReporterTest, CheckSamples) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + + constexpr int kInputVolume1 = 10; + stats_reporter.UpdateStatistics(kInputVolume1); + // Update until periodic logging. + constexpr int kInputVolume2 = 12; + for (int i = 0; i < kFramesIn60Seconds; i += 2) { + stats_reporter.UpdateStatistics(kInputVolume2); + stats_reporter.UpdateStatistics(kInputVolume1); + } + // Update until periodic logging. + constexpr int kInputVolume3 = 13; + for (int i = 0; i < kFramesIn60Seconds; i += 2) { + stats_reporter.UpdateStatistics(kInputVolume3); + stats_reporter.UpdateStatistics(kInputVolume1); + } + + // Check volume changes stats. + EXPECT_METRIC_THAT( + metrics::Samples(VolumeLabel()), + ::testing::ElementsAre( + ::testing::Pair(kInputVolume1, kFramesIn60Seconds), + ::testing::Pair(kInputVolume2, kFramesIn60Seconds / 2), + ::testing::Pair(kInputVolume3, kFramesIn60Seconds / 2))); + + // Check volume change rate stats. + EXPECT_METRIC_THAT( + metrics::Samples(UpdateRateLabel()), + ::testing::ElementsAre(::testing::Pair(kFramesIn60Seconds - 1, 1), + ::testing::Pair(kFramesIn60Seconds, 1))); + EXPECT_METRIC_THAT( + metrics::Samples(DecreaseRateLabel()), + ::testing::ElementsAre(::testing::Pair(kFramesIn60Seconds / 2 - 1, 1), + ::testing::Pair(kFramesIn60Seconds / 2, 1))); + EXPECT_METRIC_THAT( + metrics::Samples(IncreaseRateLabel()), + ::testing::ElementsAre(::testing::Pair(kFramesIn60Seconds / 2, 2))); + + // Check volume change average stats. + EXPECT_METRIC_THAT( + metrics::Samples(UpdateAverageLabel()), + ::testing::ElementsAre(::testing::Pair(2, 1), ::testing::Pair(3, 1))); + EXPECT_METRIC_THAT( + metrics::Samples(DecreaseAverageLabel()), + ::testing::ElementsAre(::testing::Pair(2, 1), ::testing::Pair(3, 1))); + EXPECT_METRIC_THAT( + metrics::Samples(IncreaseAverageLabel()), + ::testing::ElementsAre(::testing::Pair(2, 1), ::testing::Pair(3, 1))); +} +} // namespace + +TEST_P(InputVolumeStatsReporterTest, CheckVolumeUpdateStatsForEmptyStats) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + const auto& update_stats = stats_reporter.volume_update_stats(); + EXPECT_EQ(update_stats.num_decreases, 0); + EXPECT_EQ(update_stats.sum_decreases, 0); + EXPECT_EQ(update_stats.num_increases, 0); + EXPECT_EQ(update_stats.sum_increases, 0); +} + +TEST_P(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterNoVolumeChange) { + constexpr int kInputVolume = 10; + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume); + const auto& update_stats = stats_reporter.volume_update_stats(); + EXPECT_EQ(update_stats.num_decreases, 0); + EXPECT_EQ(update_stats.sum_decreases, 0); + EXPECT_EQ(update_stats.num_increases, 0); + EXPECT_EQ(update_stats.sum_increases, 0); +} + +TEST_P(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterVolumeIncrease) { + constexpr int kInputVolume = 10; + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume + 4); + stats_reporter.UpdateStatistics(kInputVolume + 5); + const auto& update_stats = stats_reporter.volume_update_stats(); + EXPECT_EQ(update_stats.num_decreases, 0); + EXPECT_EQ(update_stats.sum_decreases, 0); + EXPECT_EQ(update_stats.num_increases, 2); + EXPECT_EQ(update_stats.sum_increases, 5); +} + +TEST_P(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterVolumeDecrease) { + constexpr int kInputVolume = 10; + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume - 4); + stats_reporter.UpdateStatistics(kInputVolume - 5); + const auto& stats_update = stats_reporter.volume_update_stats(); + EXPECT_EQ(stats_update.num_decreases, 2); + EXPECT_EQ(stats_update.sum_decreases, 5); + EXPECT_EQ(stats_update.num_increases, 0); + EXPECT_EQ(stats_update.sum_increases, 0); +} + +TEST_P(InputVolumeStatsReporterTest, CheckVolumeUpdateStatsAfterReset) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + constexpr int kInputVolume = 10; + stats_reporter.UpdateStatistics(kInputVolume); + // Update until the periodic reset. + for (int i = 0; i < kFramesIn60Seconds - 2; i += 2) { + stats_reporter.UpdateStatistics(kInputVolume + 2); + stats_reporter.UpdateStatistics(kInputVolume); + } + const auto& stats_before_reset = stats_reporter.volume_update_stats(); + EXPECT_EQ(stats_before_reset.num_decreases, kFramesIn60Seconds / 2 - 1); + EXPECT_EQ(stats_before_reset.sum_decreases, kFramesIn60Seconds - 2); + EXPECT_EQ(stats_before_reset.num_increases, kFramesIn60Seconds / 2 - 1); + EXPECT_EQ(stats_before_reset.sum_increases, kFramesIn60Seconds - 2); + stats_reporter.UpdateStatistics(kInputVolume + 2); + const auto& stats_during_reset = stats_reporter.volume_update_stats(); + EXPECT_EQ(stats_during_reset.num_decreases, 0); + EXPECT_EQ(stats_during_reset.sum_decreases, 0); + EXPECT_EQ(stats_during_reset.num_increases, 0); + EXPECT_EQ(stats_during_reset.sum_increases, 0); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume + 3); + const auto& stats_after_reset = stats_reporter.volume_update_stats(); + EXPECT_EQ(stats_after_reset.num_decreases, 1); + EXPECT_EQ(stats_after_reset.sum_decreases, 2); + EXPECT_EQ(stats_after_reset.num_increases, 1); + EXPECT_EQ(stats_after_reset.sum_increases, 3); +} + +INSTANTIATE_TEST_SUITE_P(, + InputVolumeStatsReporterTest, + ::testing::Values(InputVolumeType::kApplied, + InputVolumeType::kRecommended)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc new file mode 100644 index 0000000000..bb6e038514 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/interpolated_gain_curve.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { + +constexpr std::array + InterpolatedGainCurve::approximation_params_x_; + +constexpr std::array + InterpolatedGainCurve::approximation_params_m_; + +constexpr std::array + InterpolatedGainCurve::approximation_params_q_; + +InterpolatedGainCurve::InterpolatedGainCurve( + ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name_prefix) + : region_logger_( + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix << ".FixedDigitalGainCurveRegion.Identity") + .str(), + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix << ".FixedDigitalGainCurveRegion.Knee") + .str(), + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix << ".FixedDigitalGainCurveRegion.Limiter") + .str(), + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix + << ".FixedDigitalGainCurveRegion.Saturation") + .str()), + apm_data_dumper_(apm_data_dumper) {} + +InterpolatedGainCurve::~InterpolatedGainCurve() { + if (stats_.available) { + RTC_DCHECK(apm_data_dumper_); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_identity", + stats_.look_ups_identity_region); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_knee", + stats_.look_ups_knee_region); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_limiter", + stats_.look_ups_limiter_region); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_saturation", + stats_.look_ups_saturation_region); + region_logger_.LogRegionStats(stats_); + } +} + +InterpolatedGainCurve::RegionLogger::RegionLogger( + absl::string_view identity_histogram_name, + absl::string_view knee_histogram_name, + absl::string_view limiter_histogram_name, + absl::string_view saturation_histogram_name) + : identity_histogram( + metrics::HistogramFactoryGetCounts(identity_histogram_name, + 1, + 10000, + 50)), + knee_histogram(metrics::HistogramFactoryGetCounts(knee_histogram_name, + 1, + 10000, + 50)), + limiter_histogram( + metrics::HistogramFactoryGetCounts(limiter_histogram_name, + 1, + 10000, + 50)), + saturation_histogram( + metrics::HistogramFactoryGetCounts(saturation_histogram_name, + 1, + 10000, + 50)) {} + +InterpolatedGainCurve::RegionLogger::~RegionLogger() = default; + +void InterpolatedGainCurve::RegionLogger::LogRegionStats( + const InterpolatedGainCurve::Stats& stats) const { + using Region = InterpolatedGainCurve::GainCurveRegion; + const int duration_s = + stats.region_duration_frames / (1000 / kFrameDurationMs); + + switch (stats.region) { + case Region::kIdentity: { + if (identity_histogram) { + metrics::HistogramAdd(identity_histogram, duration_s); + } + break; + } + case Region::kKnee: { + if (knee_histogram) { + metrics::HistogramAdd(knee_histogram, duration_s); + } + break; + } + case Region::kLimiter: { + if (limiter_histogram) { + metrics::HistogramAdd(limiter_histogram, duration_s); + } + break; + } + case Region::kSaturation: { + if (saturation_histogram) { + metrics::HistogramAdd(saturation_histogram, duration_s); + } + break; + } + default: { + RTC_DCHECK_NOTREACHED(); + } + } +} + +void InterpolatedGainCurve::UpdateStats(float input_level) const { + stats_.available = true; + + GainCurveRegion region; + + if (input_level < approximation_params_x_[0]) { + stats_.look_ups_identity_region++; + region = GainCurveRegion::kIdentity; + } else if (input_level < + approximation_params_x_[kInterpolatedGainCurveKneePoints - 1]) { + stats_.look_ups_knee_region++; + region = GainCurveRegion::kKnee; + } else if (input_level < kMaxInputLevelLinear) { + stats_.look_ups_limiter_region++; + region = GainCurveRegion::kLimiter; + } else { + stats_.look_ups_saturation_region++; + region = GainCurveRegion::kSaturation; + } + + if (region == stats_.region) { + ++stats_.region_duration_frames; + } else { + region_logger_.LogRegionStats(stats_); + + stats_.region_duration_frames = 0; + stats_.region = region; + } +} + +// Looks up a gain to apply given a non-negative input level. +// The cost of this operation depends on the region in which `input_level` +// falls. +// For the identity and the saturation regions the cost is O(1). +// For the other regions, namely knee and limiter, the cost is +// O(2 + log2(`LightkInterpolatedGainCurveTotalPoints`), plus O(1) for the +// linear interpolation (one product and one sum). +float InterpolatedGainCurve::LookUpGainToApply(float input_level) const { + UpdateStats(input_level); + + if (input_level <= approximation_params_x_[0]) { + // Identity region. + return 1.0f; + } + + if (input_level >= kMaxInputLevelLinear) { + // Saturating lower bound. The saturing samples exactly hit the clipping + // level. This method achieves has the lowest harmonic distorsion, but it + // may reduce the amplitude of the non-saturating samples too much. + return 32768.f / input_level; + } + + // Knee and limiter regions; find the linear piece index. Spelling + // out the complete type was the only way to silence both the clang + // plugin and the windows compilers. + std::array::const_iterator it = + std::lower_bound(approximation_params_x_.begin(), + approximation_params_x_.end(), input_level); + const size_t index = std::distance(approximation_params_x_.begin(), it) - 1; + RTC_DCHECK_LE(0, index); + RTC_DCHECK_LT(index, approximation_params_m_.size()); + RTC_DCHECK_LE(approximation_params_x_[index], input_level); + if (index < approximation_params_m_.size() - 1) { + RTC_DCHECK_LE(input_level, approximation_params_x_[index + 1]); + } + + // Piece-wise linear interploation. + const float gain = approximation_params_m_[index] * input_level + + approximation_params_q_[index]; + RTC_DCHECK_LE(0.f, gain); + return gain; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h new file mode 100644 index 0000000000..8dd3e48f21 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_INTERPOLATED_GAIN_CURVE_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_INTERPOLATED_GAIN_CURVE_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "rtc_base/gtest_prod_util.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +class ApmDataDumper; + +constexpr float kInputLevelScalingFactor = 32768.0f; + +// Defined as DbfsToLinear(kLimiterMaxInputLevelDbFs) +constexpr float kMaxInputLevelLinear = static_cast(36766.300710566735); + +// Interpolated gain curve using under-approximation to avoid saturation. +// +// The goal of this class is allowing fast look ups to get an accurate +// estimates of the gain to apply given an estimated input level. +class InterpolatedGainCurve { + public: + enum class GainCurveRegion { + kIdentity = 0, + kKnee = 1, + kLimiter = 2, + kSaturation = 3 + }; + + struct Stats { + // Region in which the output level equals the input one. + size_t look_ups_identity_region = 0; + // Smoothing between the identity and the limiter regions. + size_t look_ups_knee_region = 0; + // Limiter region in which the output and input levels are linearly related. + size_t look_ups_limiter_region = 0; + // Region in which saturation may occur since the input level is beyond the + // maximum expected by the limiter. + size_t look_ups_saturation_region = 0; + // True if stats have been populated. + bool available = false; + + // The current region, and for how many frames the level has been + // in that region. + GainCurveRegion region = GainCurveRegion::kIdentity; + int64_t region_duration_frames = 0; + }; + + InterpolatedGainCurve(ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name_prefix); + ~InterpolatedGainCurve(); + + InterpolatedGainCurve(const InterpolatedGainCurve&) = delete; + InterpolatedGainCurve& operator=(const InterpolatedGainCurve&) = delete; + + Stats get_stats() const { return stats_; } + + // Given a non-negative input level (linear scale), a scalar factor to apply + // to a sub-frame is returned. + // Levels above kLimiterMaxInputLevelDbFs will be reduced to 0 dBFS + // after applying this gain + float LookUpGainToApply(float input_level) const; + + private: + // For comparing 'approximation_params_*_' with ones computed by + // ComputeInterpolatedGainCurve. + FRIEND_TEST_ALL_PREFIXES(GainController2InterpolatedGainCurve, + CheckApproximationParams); + + struct RegionLogger { + metrics::Histogram* identity_histogram; + metrics::Histogram* knee_histogram; + metrics::Histogram* limiter_histogram; + metrics::Histogram* saturation_histogram; + + RegionLogger(absl::string_view identity_histogram_name, + absl::string_view knee_histogram_name, + absl::string_view limiter_histogram_name, + absl::string_view saturation_histogram_name); + + ~RegionLogger(); + + void LogRegionStats(const InterpolatedGainCurve::Stats& stats) const; + } region_logger_; + + void UpdateStats(float input_level) const; + + ApmDataDumper* const apm_data_dumper_; + + static constexpr std::array + approximation_params_x_ = { + {30057.296875, 30148.986328125, 30240.67578125, 30424.052734375, + 30607.4296875, 30790.806640625, 30974.18359375, 31157.560546875, + 31340.939453125, 31524.31640625, 31707.693359375, 31891.0703125, + 32074.447265625, 32257.82421875, 32441.201171875, 32624.580078125, + 32807.95703125, 32991.33203125, 33174.7109375, 33358.08984375, + 33541.46484375, 33724.84375, 33819.53515625, 34009.5390625, + 34200.05859375, 34389.81640625, 34674.48828125, 35054.375, + 35434.86328125, 35814.81640625, 36195.16796875, 36575.03125}}; + static constexpr std::array + approximation_params_m_ = { + {-3.515235675877192989e-07, -1.050251626111275982e-06, + -2.085213736791047268e-06, -3.443004743530764244e-06, + -4.773849468620028347e-06, -6.077375928725814447e-06, + -7.353257842623861507e-06, -8.601219633419532329e-06, + -9.821013009059242904e-06, -1.101243378798244521e-05, + -1.217532644659513608e-05, -1.330956911260727793e-05, + -1.441507538402220234e-05, -1.549179251014720649e-05, + -1.653970684856176376e-05, -1.755882840370759368e-05, + -1.854918446042574942e-05, -1.951086778717581183e-05, + -2.044398024736437947e-05, -2.1348627342376858e-05, + -2.222496914328075945e-05, -2.265374678245279938e-05, + -2.242570917587727308e-05, -2.220122041762806475e-05, + -2.19802095671184361e-05, -2.176260204578284174e-05, + -2.133731686626560986e-05, -2.092481918225530535e-05, + -2.052459603874012828e-05, -2.013615448959171772e-05, + -1.975903069251216948e-05, -1.939277899509761482e-05}}; + + static constexpr std::array + approximation_params_q_ = { + {1.010565876960754395, 1.031631827354431152, 1.062929749488830566, + 1.104239225387573242, 1.144973039627075195, 1.185109615325927734, + 1.224629044532775879, 1.263512492179870605, 1.301741957664489746, + 1.339300632476806641, 1.376173257827758789, 1.412345528602600098, + 1.447803974151611328, 1.482536554336547852, 1.516532182693481445, + 1.549780607223510742, 1.582272171974182129, 1.613999366760253906, + 1.644955039024353027, 1.675132393836975098, 1.704526185989379883, + 1.718986630439758301, 1.711274504661560059, 1.703639745712280273, + 1.696081161499023438, 1.688597679138183594, 1.673851132392883301, + 1.659391283988952637, 1.645209431648254395, 1.631297469139099121, + 1.617647409439086914, 1.604251742362976074}}; + + // Stats. + mutable Stats stats_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_INTERPOLATED_GAIN_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc new file mode 100644 index 0000000000..7861ae997d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/interpolated_gain_curve.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/compute_interpolated_gain_curve.h" +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr double kLevelEpsilon = 1e-2 * kMaxAbsFloatS16Value; +constexpr float kInterpolatedGainCurveTolerance = 1.f / 32768.f; +ApmDataDumper apm_data_dumper(0); +static_assert(std::is_trivially_destructible::value, ""); +const LimiterDbGainCurve limiter; + +} // namespace + +TEST(GainController2InterpolatedGainCurve, CreateUse) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, DbfsToFloatS16(limiter.max_input_level_db() + 1), 500); + for (const auto level : levels) { + EXPECT_GE(igc.LookUpGainToApply(level), 0.0f); + } +} + +TEST(GainController2InterpolatedGainCurve, CheckValidOutput) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, limiter.max_input_level_linear() * 2.0, 500); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + const float gain = igc.LookUpGainToApply(level); + EXPECT_LE(0.0f, gain); + EXPECT_LE(gain, 1.0f); + } +} + +TEST(GainController2InterpolatedGainCurve, CheckMonotonicity) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, limiter.max_input_level_linear() + kLevelEpsilon + 0.5, + 500); + float prev_gain = igc.LookUpGainToApply(0.0f); + for (const auto level : levels) { + const float gain = igc.LookUpGainToApply(level); + EXPECT_GE(prev_gain, gain); + prev_gain = gain; + } +} + +TEST(GainController2InterpolatedGainCurve, CheckApproximation) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, limiter.max_input_level_linear() - kLevelEpsilon, 500); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + EXPECT_LT( + std::fabs(limiter.GetGainLinear(level) - igc.LookUpGainToApply(level)), + kInterpolatedGainCurveTolerance); + } +} + +TEST(GainController2InterpolatedGainCurve, CheckRegionBoundaries) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const std::vector levels{ + {kLevelEpsilon, limiter.knee_start_linear() + kLevelEpsilon, + limiter.limiter_start_linear() + kLevelEpsilon, + limiter.max_input_level_linear() + kLevelEpsilon}}; + for (const auto level : levels) { + igc.LookUpGainToApply(level); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(1ul, stats.look_ups_identity_region); + EXPECT_EQ(1ul, stats.look_ups_knee_region); + EXPECT_EQ(1ul, stats.look_ups_limiter_region); + EXPECT_EQ(1ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckIdentityRegion) { + constexpr size_t kNumSteps = 10; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = + test::LinSpace(kLevelEpsilon, limiter.knee_start_linear(), kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + EXPECT_EQ(1.0f, igc.LookUpGainToApply(level)); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(kNumSteps - 1, stats.look_ups_identity_region); + EXPECT_EQ(1ul, stats.look_ups_knee_region); + EXPECT_EQ(0ul, stats.look_ups_limiter_region); + EXPECT_EQ(0ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckNoOverApproximationKnee) { + constexpr size_t kNumSteps = 10; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = + test::LinSpace(limiter.knee_start_linear() + kLevelEpsilon, + limiter.limiter_start_linear(), kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + // Small tolerance added (needed because comparing a float with a double). + EXPECT_LE(igc.LookUpGainToApply(level), + limiter.GetGainLinear(level) + 1e-7); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(0ul, stats.look_ups_identity_region); + EXPECT_EQ(kNumSteps - 1, stats.look_ups_knee_region); + EXPECT_EQ(1ul, stats.look_ups_limiter_region); + EXPECT_EQ(0ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckNoOverApproximationBeyondKnee) { + constexpr size_t kNumSteps = 10; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + limiter.limiter_start_linear() + kLevelEpsilon, + limiter.max_input_level_linear() - kLevelEpsilon, kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + // Small tolerance added (needed because comparing a float with a double). + EXPECT_LE(igc.LookUpGainToApply(level), + limiter.GetGainLinear(level) + 1e-7); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(0ul, stats.look_ups_identity_region); + EXPECT_EQ(0ul, stats.look_ups_knee_region); + EXPECT_EQ(kNumSteps, stats.look_ups_limiter_region); + EXPECT_EQ(0ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, + CheckNoOverApproximationWithSaturation) { + constexpr size_t kNumSteps = 3; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + limiter.max_input_level_linear() + kLevelEpsilon, + limiter.max_input_level_linear() + kLevelEpsilon + 0.5, kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + EXPECT_LE(igc.LookUpGainToApply(level), limiter.GetGainLinear(level)); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(0ul, stats.look_ups_identity_region); + EXPECT_EQ(0ul, stats.look_ups_knee_region); + EXPECT_EQ(0ul, stats.look_ups_limiter_region); + EXPECT_EQ(kNumSteps, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckApproximationParams) { + test::InterpolatedParameters parameters = + test::ComputeInterpolatedGainCurveApproximationParams(); + + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + for (size_t i = 0; i < kInterpolatedGainCurveTotalPoints; ++i) { + // The tolerance levels are chosen to account for deviations due + // to computing with single precision floating point numbers. + EXPECT_NEAR(igc.approximation_params_x_[i], + parameters.computed_approximation_params_x[i], 0.9f); + EXPECT_NEAR(igc.approximation_params_m_[i], + parameters.computed_approximation_params_m[i], 0.00001f); + EXPECT_NEAR(igc.approximation_params_q_[i], + parameters.computed_approximation_params_q[i], 0.001f); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc new file mode 100644 index 0000000000..7a1e2202be --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter.h" + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +// This constant affects the way scaling factors are interpolated for the first +// sub-frame of a frame. Only in the case in which the first sub-frame has an +// estimated level which is greater than the that of the previous analyzed +// sub-frame, linear interpolation is replaced with a power function which +// reduces the chances of over-shooting (and hence saturation), however reducing +// the fixed gain effectiveness. +constexpr float kAttackFirstSubframeInterpolationPower = 8.0f; + +void InterpolateFirstSubframe(float last_factor, + float current_factor, + rtc::ArrayView subframe) { + const int n = rtc::dchecked_cast(subframe.size()); + constexpr float p = kAttackFirstSubframeInterpolationPower; + for (int i = 0; i < n; ++i) { + subframe[i] = std::pow(1.f - i / n, p) * (last_factor - current_factor) + + current_factor; + } +} + +void ComputePerSampleSubframeFactors( + const std::array& scaling_factors, + int samples_per_channel, + rtc::ArrayView per_sample_scaling_factors) { + const int num_subframes = scaling_factors.size() - 1; + const int subframe_size = + rtc::CheckedDivExact(samples_per_channel, num_subframes); + + // Handle first sub-frame differently in case of attack. + const bool is_attack = scaling_factors[0] > scaling_factors[1]; + if (is_attack) { + InterpolateFirstSubframe( + scaling_factors[0], scaling_factors[1], + rtc::ArrayView( + per_sample_scaling_factors.subview(0, subframe_size))); + } + + for (int i = is_attack ? 1 : 0; i < num_subframes; ++i) { + const int subframe_start = i * subframe_size; + const float scaling_start = scaling_factors[i]; + const float scaling_end = scaling_factors[i + 1]; + const float scaling_diff = (scaling_end - scaling_start) / subframe_size; + for (int j = 0; j < subframe_size; ++j) { + per_sample_scaling_factors[subframe_start + j] = + scaling_start + scaling_diff * j; + } + } +} + +void ScaleSamples(rtc::ArrayView per_sample_scaling_factors, + AudioFrameView signal) { + const int samples_per_channel = signal.samples_per_channel(); + RTC_DCHECK_EQ(samples_per_channel, per_sample_scaling_factors.size()); + for (int i = 0; i < signal.num_channels(); ++i) { + rtc::ArrayView channel = signal.channel(i); + for (int j = 0; j < samples_per_channel; ++j) { + channel[j] = rtc::SafeClamp(channel[j] * per_sample_scaling_factors[j], + kMinFloatS16Value, kMaxFloatS16Value); + } + } +} + +void CheckLimiterSampleRate(int sample_rate_hz) { + // Check that per_sample_scaling_factors_ is large enough. + RTC_DCHECK_LE(sample_rate_hz, + kMaximalNumberOfSamplesPerChannel * 1000 / kFrameDurationMs); +} + +} // namespace + +Limiter::Limiter(int sample_rate_hz, + ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name) + : interp_gain_curve_(apm_data_dumper, histogram_name), + level_estimator_(sample_rate_hz, apm_data_dumper), + apm_data_dumper_(apm_data_dumper) { + CheckLimiterSampleRate(sample_rate_hz); +} + +Limiter::~Limiter() = default; + +void Limiter::Process(AudioFrameView signal) { + const std::array level_estimate = + level_estimator_.ComputeLevel(signal); + + RTC_DCHECK_EQ(level_estimate.size() + 1, scaling_factors_.size()); + scaling_factors_[0] = last_scaling_factor_; + std::transform(level_estimate.begin(), level_estimate.end(), + scaling_factors_.begin() + 1, [this](float x) { + return interp_gain_curve_.LookUpGainToApply(x); + }); + + const int samples_per_channel = signal.samples_per_channel(); + RTC_DCHECK_LE(samples_per_channel, kMaximalNumberOfSamplesPerChannel); + + auto per_sample_scaling_factors = rtc::ArrayView( + &per_sample_scaling_factors_[0], samples_per_channel); + ComputePerSampleSubframeFactors(scaling_factors_, samples_per_channel, + per_sample_scaling_factors); + ScaleSamples(per_sample_scaling_factors, signal); + + last_scaling_factor_ = scaling_factors_.back(); + + // Dump data for debug. + apm_data_dumper_->DumpRaw("agc2_limiter_last_scaling_factor", + last_scaling_factor_); + apm_data_dumper_->DumpRaw( + "agc2_limiter_region", + static_cast(interp_gain_curve_.get_stats().region)); +} + +InterpolatedGainCurve::Stats Limiter::GetGainCurveStats() const { + return interp_gain_curve_.get_stats(); +} + +void Limiter::SetSampleRate(int sample_rate_hz) { + CheckLimiterSampleRate(sample_rate_hz); + level_estimator_.SetSampleRate(sample_rate_hz); +} + +void Limiter::Reset() { + level_estimator_.Reset(); +} + +float Limiter::LastAudioLevel() const { + return level_estimator_.LastAudioLevel(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter.h b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.h new file mode 100644 index 0000000000..d4d556349c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_LIMITER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_LIMITER_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h" +#include "modules/audio_processing/agc2/interpolated_gain_curve.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class ApmDataDumper; + +class Limiter { + public: + Limiter(int sample_rate_hz, + ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name_prefix); + Limiter(const Limiter& limiter) = delete; + Limiter& operator=(const Limiter& limiter) = delete; + ~Limiter(); + + // Applies limiter and hard-clipping to `signal`. + void Process(AudioFrameView signal); + InterpolatedGainCurve::Stats GetGainCurveStats() const; + + // Supported rates must be + // * supported by FixedDigitalLevelEstimator + // * below kMaximalNumberOfSamplesPerChannel*1000/kFrameDurationMs + // so that samples_per_channel fit in the + // per_sample_scaling_factors_ array. + void SetSampleRate(int sample_rate_hz); + + // Resets the internal state. + void Reset(); + + float LastAudioLevel() const; + + private: + const InterpolatedGainCurve interp_gain_curve_; + FixedDigitalLevelEstimator level_estimator_; + ApmDataDumper* const apm_data_dumper_ = nullptr; + + // Work array containing the sub-frame scaling factors to be interpolated. + std::array scaling_factors_ = {}; + std::array + per_sample_scaling_factors_ = {}; + float last_scaling_factor_ = 1.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_LIMITER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc new file mode 100644 index 0000000000..d47c0b2e17 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +double ComputeKneeStart(double max_input_level_db, + double knee_smoothness_db, + double compression_ratio) { + RTC_CHECK_LT((compression_ratio - 1.0) * knee_smoothness_db / + (2.0 * compression_ratio), + max_input_level_db); + return -knee_smoothness_db / 2.0 - + max_input_level_db / (compression_ratio - 1.0); +} + +std::array ComputeKneeRegionPolynomial(double knee_start_dbfs, + double knee_smoothness_db, + double compression_ratio) { + const double a = (1.0 - compression_ratio) / + (2.0 * knee_smoothness_db * compression_ratio); + const double b = 1.0 - 2.0 * a * knee_start_dbfs; + const double c = a * knee_start_dbfs * knee_start_dbfs; + return {{a, b, c}}; +} + +double ComputeLimiterD1(double max_input_level_db, double compression_ratio) { + return (std::pow(10.0, -max_input_level_db / (20.0 * compression_ratio)) * + (1.0 - compression_ratio) / compression_ratio) / + kMaxAbsFloatS16Value; +} + +constexpr double ComputeLimiterD2(double compression_ratio) { + return (1.0 - 2.0 * compression_ratio) / compression_ratio; +} + +double ComputeLimiterI2(double max_input_level_db, + double compression_ratio, + double gain_curve_limiter_i1) { + RTC_CHECK_NE(gain_curve_limiter_i1, 0.f); + return std::pow(10.0, -max_input_level_db / (20.0 * compression_ratio)) / + gain_curve_limiter_i1 / + std::pow(kMaxAbsFloatS16Value, gain_curve_limiter_i1 - 1); +} + +} // namespace + +LimiterDbGainCurve::LimiterDbGainCurve() + : max_input_level_linear_(DbfsToFloatS16(max_input_level_db_)), + knee_start_dbfs_(ComputeKneeStart(max_input_level_db_, + knee_smoothness_db_, + compression_ratio_)), + knee_start_linear_(DbfsToFloatS16(knee_start_dbfs_)), + limiter_start_dbfs_(knee_start_dbfs_ + knee_smoothness_db_), + limiter_start_linear_(DbfsToFloatS16(limiter_start_dbfs_)), + knee_region_polynomial_(ComputeKneeRegionPolynomial(knee_start_dbfs_, + knee_smoothness_db_, + compression_ratio_)), + gain_curve_limiter_d1_( + ComputeLimiterD1(max_input_level_db_, compression_ratio_)), + gain_curve_limiter_d2_(ComputeLimiterD2(compression_ratio_)), + gain_curve_limiter_i1_(1.0 / compression_ratio_), + gain_curve_limiter_i2_(ComputeLimiterI2(max_input_level_db_, + compression_ratio_, + gain_curve_limiter_i1_)) { + static_assert(knee_smoothness_db_ > 0.0f, ""); + static_assert(compression_ratio_ > 1.0f, ""); + RTC_CHECK_GE(max_input_level_db_, knee_start_dbfs_ + knee_smoothness_db_); +} + +constexpr double LimiterDbGainCurve::max_input_level_db_; +constexpr double LimiterDbGainCurve::knee_smoothness_db_; +constexpr double LimiterDbGainCurve::compression_ratio_; + +double LimiterDbGainCurve::GetOutputLevelDbfs(double input_level_dbfs) const { + if (input_level_dbfs < knee_start_dbfs_) { + return input_level_dbfs; + } else if (input_level_dbfs < limiter_start_dbfs_) { + return GetKneeRegionOutputLevelDbfs(input_level_dbfs); + } + return GetCompressorRegionOutputLevelDbfs(input_level_dbfs); +} + +double LimiterDbGainCurve::GetGainLinear(double input_level_linear) const { + if (input_level_linear < knee_start_linear_) { + return 1.0; + } + return DbfsToFloatS16( + GetOutputLevelDbfs(FloatS16ToDbfs(input_level_linear))) / + input_level_linear; +} + +// Computes the first derivative of GetGainLinear() in `x`. +double LimiterDbGainCurve::GetGainFirstDerivativeLinear(double x) const { + // Beyond-knee region only. + RTC_CHECK_GE(x, limiter_start_linear_ - 1e-7 * kMaxAbsFloatS16Value); + return gain_curve_limiter_d1_ * + std::pow(x / kMaxAbsFloatS16Value, gain_curve_limiter_d2_); +} + +// Computes the integral of GetGainLinear() in the range [x0, x1]. +double LimiterDbGainCurve::GetGainIntegralLinear(double x0, double x1) const { + RTC_CHECK_LE(x0, x1); // Valid interval. + RTC_CHECK_GE(x0, limiter_start_linear_); // Beyond-knee region only. + auto limiter_integral = [this](const double& x) { + return gain_curve_limiter_i2_ * std::pow(x, gain_curve_limiter_i1_); + }; + return limiter_integral(x1) - limiter_integral(x0); +} + +double LimiterDbGainCurve::GetKneeRegionOutputLevelDbfs( + double input_level_dbfs) const { + return knee_region_polynomial_[0] * input_level_dbfs * input_level_dbfs + + knee_region_polynomial_[1] * input_level_dbfs + + knee_region_polynomial_[2]; +} + +double LimiterDbGainCurve::GetCompressorRegionOutputLevelDbfs( + double input_level_dbfs) const { + return (input_level_dbfs - max_input_level_db_) / compression_ratio_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h new file mode 100644 index 0000000000..9086e26739 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_LIMITER_DB_GAIN_CURVE_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_LIMITER_DB_GAIN_CURVE_H_ + +#include + +#include "modules/audio_processing/agc2/agc2_testing_common.h" + +namespace webrtc { + +// A class for computing a limiter gain curve (in dB scale) given a set of +// hard-coded parameters (namely, kLimiterDbGainCurveMaxInputLevelDbFs, +// kLimiterDbGainCurveKneeSmoothnessDb, and +// kLimiterDbGainCurveCompressionRatio). The generated curve consists of four +// regions: identity (linear), knee (quadratic polynomial), compression +// (linear), saturation (linear). The aforementioned constants are used to shape +// the different regions. +class LimiterDbGainCurve { + public: + LimiterDbGainCurve(); + + double max_input_level_db() const { return max_input_level_db_; } + double max_input_level_linear() const { return max_input_level_linear_; } + double knee_start_linear() const { return knee_start_linear_; } + double limiter_start_linear() const { return limiter_start_linear_; } + + // These methods can be marked 'constexpr' in C++ 14. + double GetOutputLevelDbfs(double input_level_dbfs) const; + double GetGainLinear(double input_level_linear) const; + double GetGainFirstDerivativeLinear(double x) const; + double GetGainIntegralLinear(double x0, double x1) const; + + private: + double GetKneeRegionOutputLevelDbfs(double input_level_dbfs) const; + double GetCompressorRegionOutputLevelDbfs(double input_level_dbfs) const; + + static constexpr double max_input_level_db_ = test::kLimiterMaxInputLevelDbFs; + static constexpr double knee_smoothness_db_ = test::kLimiterKneeSmoothnessDb; + static constexpr double compression_ratio_ = test::kLimiterCompressionRatio; + + const double max_input_level_linear_; + + // Do not modify signal with level <= knee_start_dbfs_. + const double knee_start_dbfs_; + const double knee_start_linear_; + + // The upper end of the knee region, which is between knee_start_dbfs_ and + // limiter_start_dbfs_. + const double limiter_start_dbfs_; + const double limiter_start_linear_; + + // Coefficients {a, b, c} of the knee region polynomial + // ax^2 + bx + c in the DB scale. + const std::array knee_region_polynomial_; + + // Parameters for the computation of the first derivative of GetGainLinear(). + const double gain_curve_limiter_d1_; + const double gain_curve_limiter_d2_; + + // Parameters for the computation of the integral of GetGainLinear(). + const double gain_curve_limiter_i1_; + const double gain_curve_limiter_i2_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_LIMITER_DB_GAIN_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc new file mode 100644 index 0000000000..049c8d568e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" + +#include "rtc_base/gunit.h" + +namespace webrtc { + +TEST(FixedDigitalGainController2Limiter, ConstructDestruct) { + LimiterDbGainCurve l; +} + +TEST(FixedDigitalGainController2Limiter, GainCurveShouldBeMonotone) { + LimiterDbGainCurve l; + float last_output_level = 0.f; + bool has_last_output_level = false; + for (float level = -90.f; level <= l.max_input_level_db(); level += 0.5f) { + const float current_output_level = l.GetOutputLevelDbfs(level); + if (!has_last_output_level) { + last_output_level = current_output_level; + has_last_output_level = true; + } + EXPECT_LE(last_output_level, current_output_level); + last_output_level = current_output_level; + } +} + +TEST(FixedDigitalGainController2Limiter, GainCurveShouldBeContinuous) { + LimiterDbGainCurve l; + float last_output_level = 0.f; + bool has_last_output_level = false; + constexpr float kMaxDelta = 0.5f; + for (float level = -90.f; level <= l.max_input_level_db(); level += 0.5f) { + const float current_output_level = l.GetOutputLevelDbfs(level); + if (!has_last_output_level) { + last_output_level = current_output_level; + has_last_output_level = true; + } + EXPECT_LE(current_output_level, last_output_level + kMaxDelta); + last_output_level = current_output_level; + } +} + +TEST(FixedDigitalGainController2Limiter, OutputGainShouldBeLessThanFullScale) { + LimiterDbGainCurve l; + for (float level = -90.f; level <= l.max_input_level_db(); level += 0.5f) { + const float current_output_level = l.GetOutputLevelDbfs(level); + EXPECT_LE(current_output_level, 0.f); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc new file mode 100644 index 0000000000..e662a7fc89 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter.h" + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { + +TEST(Limiter, LimiterShouldConstructAndRun) { + const int sample_rate_hz = 48000; + ApmDataDumper apm_data_dumper(0); + + Limiter limiter(sample_rate_hz, &apm_data_dumper, ""); + + VectorFloatFrame vectors_with_float_frame(1, sample_rate_hz / 100, + kMaxAbsFloatS16Value); + limiter.Process(vectors_with_float_frame.float_frame_view()); +} + +TEST(Limiter, OutputVolumeAboveThreshold) { + const int sample_rate_hz = 48000; + const float input_level = + (kMaxAbsFloatS16Value + DbfsToFloatS16(test::kLimiterMaxInputLevelDbFs)) / + 2.f; + ApmDataDumper apm_data_dumper(0); + + Limiter limiter(sample_rate_hz, &apm_data_dumper, ""); + + // Give the level estimator time to adapt. + for (int i = 0; i < 5; ++i) { + VectorFloatFrame vectors_with_float_frame(1, sample_rate_hz / 100, + input_level); + limiter.Process(vectors_with_float_frame.float_frame_view()); + } + + VectorFloatFrame vectors_with_float_frame(1, sample_rate_hz / 100, + input_level); + limiter.Process(vectors_with_float_frame.float_frame_view()); + rtc::ArrayView channel = + vectors_with_float_frame.float_frame_view().channel(0); + + for (const auto& sample : channel) { + EXPECT_LT(0.9f * kMaxAbsFloatS16Value, sample); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc new file mode 100644 index 0000000000..691513b509 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/noise_level_estimator.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr int kFramesPerSecond = 100; + +float FrameEnergy(const AudioFrameView& audio) { + float energy = 0.0f; + for (int k = 0; k < audio.num_channels(); ++k) { + float channel_energy = + std::accumulate(audio.channel(k).begin(), audio.channel(k).end(), 0.0f, + [](float a, float b) -> float { return a + b * b; }); + energy = std::max(channel_energy, energy); + } + return energy; +} + +float EnergyToDbfs(float signal_energy, int num_samples) { + RTC_DCHECK_GE(signal_energy, 0.0f); + const float rms_square = signal_energy / num_samples; + constexpr float kMinDbfs = -90.30899869919436f; + if (rms_square <= 1.0f) { + return kMinDbfs; + } + return 10.0f * std::log10(rms_square) + kMinDbfs; +} + +// Updates the noise floor with instant decay and slow attack. This tuning is +// specific for AGC2, so that (i) it can promptly increase the gain if the noise +// floor drops (instant decay) and (ii) in case of music or fast speech, due to +// which the noise floor can be overestimated, the gain reduction is slowed +// down. +float SmoothNoiseFloorEstimate(float current_estimate, float new_estimate) { + constexpr float kAttack = 0.5f; + if (current_estimate < new_estimate) { + // Attack phase. + return kAttack * new_estimate + (1.0f - kAttack) * current_estimate; + } + // Instant attack. + return new_estimate; +} + +class NoiseFloorEstimator : public NoiseLevelEstimator { + public: + // Update the noise floor every 5 seconds. + static constexpr int kUpdatePeriodNumFrames = 500; + static_assert(kUpdatePeriodNumFrames >= 200, + "A too small value may cause noise level overestimation."); + static_assert(kUpdatePeriodNumFrames <= 1500, + "A too large value may make AGC2 slow at reacting to increased " + "noise levels."); + + NoiseFloorEstimator(ApmDataDumper* data_dumper) : data_dumper_(data_dumper) { + RTC_DCHECK(data_dumper_); + // Initially assume that 48 kHz will be used. `Analyze()` will detect the + // used sample rate and call `Initialize()` again if needed. + Initialize(/*sample_rate_hz=*/48000); + } + NoiseFloorEstimator(const NoiseFloorEstimator&) = delete; + NoiseFloorEstimator& operator=(const NoiseFloorEstimator&) = delete; + ~NoiseFloorEstimator() = default; + + float Analyze(const AudioFrameView& frame) override { + // Detect sample rate changes. + const int sample_rate_hz = + static_cast(frame.samples_per_channel() * kFramesPerSecond); + if (sample_rate_hz != sample_rate_hz_) { + Initialize(sample_rate_hz); + } + + const float frame_energy = FrameEnergy(frame); + if (frame_energy <= min_noise_energy_) { + // Ignore frames when muted or below the minimum measurable energy. + if (data_dumper_) + data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level", + noise_energy_); + return EnergyToDbfs(noise_energy_, + static_cast(frame.samples_per_channel())); + } + + if (preliminary_noise_energy_set_) { + preliminary_noise_energy_ = + std::min(preliminary_noise_energy_, frame_energy); + } else { + preliminary_noise_energy_ = frame_energy; + preliminary_noise_energy_set_ = true; + } + if (data_dumper_) + data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level", + preliminary_noise_energy_); + + if (counter_ == 0) { + // Full period observed. + first_period_ = false; + // Update the estimated noise floor energy with the preliminary + // estimation. + noise_energy_ = SmoothNoiseFloorEstimate( + /*current_estimate=*/noise_energy_, + /*new_estimate=*/preliminary_noise_energy_); + // Reset for a new observation period. + counter_ = kUpdatePeriodNumFrames; + preliminary_noise_energy_set_ = false; + } else if (first_period_) { + // While analyzing the signal during the initial period, continuously + // update the estimated noise energy, which is monotonic. + noise_energy_ = preliminary_noise_energy_; + counter_--; + } else { + // During the observation period it's only allowed to lower the energy. + noise_energy_ = std::min(noise_energy_, preliminary_noise_energy_); + counter_--; + } + + float noise_rms_dbfs = EnergyToDbfs( + noise_energy_, static_cast(frame.samples_per_channel())); + if (data_dumper_) + data_dumper_->DumpRaw("agc2_noise_rms_dbfs", noise_rms_dbfs); + + return noise_rms_dbfs; + } + + private: + void Initialize(int sample_rate_hz) { + sample_rate_hz_ = sample_rate_hz; + first_period_ = true; + preliminary_noise_energy_set_ = false; + // Initialize the minimum noise energy to -84 dBFS. + min_noise_energy_ = sample_rate_hz * 2.0f * 2.0f / kFramesPerSecond; + preliminary_noise_energy_ = min_noise_energy_; + noise_energy_ = min_noise_energy_; + counter_ = kUpdatePeriodNumFrames; + } + + ApmDataDumper* const data_dumper_; + int sample_rate_hz_; + float min_noise_energy_; + bool first_period_; + bool preliminary_noise_energy_set_; + float preliminary_noise_energy_; + float noise_energy_; + int counter_; +}; + +} // namespace + +std::unique_ptr CreateNoiseFloorEstimator( + ApmDataDumper* data_dumper) { + return std::make_unique(data_dumper); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h new file mode 100644 index 0000000000..9f3b957486 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ + +#include + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class ApmDataDumper; + +// Noise level estimator interface. +class NoiseLevelEstimator { + public: + virtual ~NoiseLevelEstimator() = default; + // Analyzes a 10 ms `frame`, updates the noise level estimation and returns + // the value for the latter in dBFS. + virtual float Analyze(const AudioFrameView& frame) = 0; +}; + +// Creates a noise level estimator based on noise floor detection. +std::unique_ptr CreateNoiseFloorEstimator( + ApmDataDumper* data_dumper); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build new file mode 100644 index 0000000000..ba000d3862 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("noise_level_estimator_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc new file mode 100644 index 0000000000..8168c5a229 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/noise_level_estimator.h" + +#include +#include +#include +#include + +#include "api/function_view.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr int kNumIterations = 200; +constexpr int kFramesPerSecond = 100; + +// Runs the noise estimator on audio generated by 'sample_generator' +// for kNumIterations. Returns the last noise level estimate. +float RunEstimator(rtc::FunctionView sample_generator, + NoiseLevelEstimator& estimator, + int sample_rate_hz) { + const int samples_per_channel = + rtc::CheckedDivExact(sample_rate_hz, kFramesPerSecond); + VectorFloatFrame signal(1, samples_per_channel, 0.0f); + for (int i = 0; i < kNumIterations; ++i) { + AudioFrameView frame_view = signal.float_frame_view(); + for (int j = 0; j < samples_per_channel; ++j) { + frame_view.channel(0)[j] = sample_generator(); + } + estimator.Analyze(frame_view); + } + return estimator.Analyze(signal.float_frame_view()); +} + +class NoiseEstimatorParametrization : public ::testing::TestWithParam { + protected: + int sample_rate_hz() const { return GetParam(); } +}; + +// Checks that full scale white noise maps to about -5.5 dBFS. +TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithRandomNoise) { + ApmDataDumper data_dumper(0); + auto estimator = CreateNoiseFloorEstimator(&data_dumper); + + test::WhiteNoiseGenerator gen(/*min_amplitude=*/test::kMinS16, + /*max_amplitude=*/test::kMaxS16); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); + EXPECT_NEAR(noise_level_dbfs, -5.5f, 0.5f); +} + +// Checks that a full scale sine wave maps to about -3 dBFS. +TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithSineTone) { + ApmDataDumper data_dumper(0); + auto estimator = CreateNoiseFloorEstimator(&data_dumper); + + test::SineGenerator gen(/*amplitude=*/test::kMaxS16, /*frequency_hz=*/600.0f, + sample_rate_hz()); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); + EXPECT_NEAR(noise_level_dbfs, -3.0f, 0.1f); +} + +// Check that sufficiently spaced periodic pulses do not raise the estimated +// noise floor, which is determined by the amplitude of the non-pulse samples. +TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithPulseTone) { + ApmDataDumper data_dumper(0); + auto estimator = CreateNoiseFloorEstimator(&data_dumper); + + constexpr float kNoPulseAmplitude = 10.0f; + test::PulseGenerator gen(/*pulse_amplitude=*/test::kMaxS16, kNoPulseAmplitude, + /*frequency_hz=*/20.0f, sample_rate_hz()); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); + const float expected_noise_floor_dbfs = + 20.0f * std::log10f(kNoPulseAmplitude / test::kMaxS16); + EXPECT_NEAR(noise_level_dbfs, expected_noise_floor_dbfs, 0.5f); +} + +INSTANTIATE_TEST_SUITE_P(GainController2NoiseEstimator, + NoiseEstimatorParametrization, + ::testing::Values(8000, 16000, 32000, 48000)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn new file mode 100644 index 0000000000..d709eb3699 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn @@ -0,0 +1,334 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +rtc_library("rnn_vad") { + visibility = [ "../*" ] + sources = [ + "features_extraction.cc", + "features_extraction.h", + "rnn.cc", + "rnn.h", + ] + + defines = [] + if (rtc_build_with_neon && target_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad_common", + ":rnn_vad_layers", + ":rnn_vad_lp_residual", + ":rnn_vad_pitch", + ":rnn_vad_sequence_buffer", + ":rnn_vad_spectral_features", + "..:biquad_filter", + "..:cpu_features", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + "../../../../rtc_base:safe_conversions", + "//third_party/rnnoise:rnn_vad", + ] +} + +rtc_library("rnn_vad_auto_correlation") { + sources = [ + "auto_correlation.cc", + "auto_correlation.h", + ] + deps = [ + ":rnn_vad_common", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../utility:pffft_wrapper", + ] +} + +rtc_source_set("rnn_vad_common") { + # TODO(alessiob): Make this target visibility private. + visibility = [ + ":*", + "..:vad_wrapper", + ] + sources = [ "common.h" ] + deps = [ + "../../../../rtc_base/system:arch", + "../../../../system_wrappers", + ] +} + +rtc_library("rnn_vad_lp_residual") { + sources = [ + "lp_residual.cc", + "lp_residual.h", + ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + ] +} + +rtc_source_set("rnn_vad_layers") { + sources = [ + "rnn_fc.cc", + "rnn_fc.h", + "rnn_gru.cc", + "rnn_gru.h", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad_common", + ":vector_math", + "..:cpu_features", + "../../../../api:array_view", + "../../../../api:function_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + "//third_party/rnnoise:rnn_vad", + ] + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":vector_math_avx2" ] + } + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_source_set("vector_math") { + sources = [ "vector_math.h" ] + deps = [ + "..:cpu_features", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base/system:arch", + ] +} + +if (current_cpu == "x86" || current_cpu == "x64") { + rtc_library("vector_math_avx2") { + sources = [ "vector_math_avx2.cc" ] + if (is_win && !build_with_mozilla) { + cflags = [ "/arch:AVX2" ] + } else { + cflags = [ + "-mavx2", + "-mfma", + ] + } + deps = [ + ":vector_math", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + ] + } +} + +rtc_library("rnn_vad_pitch") { + sources = [ + "pitch_search.cc", + "pitch_search.h", + "pitch_search_internal.cc", + "pitch_search_internal.h", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad_auto_correlation", + ":rnn_vad_common", + ":vector_math", + "..:cpu_features", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:gtest_prod", + "../../../../rtc_base:safe_compare", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base/system:arch", + ] + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":vector_math_avx2" ] + } +} + +rtc_source_set("rnn_vad_ring_buffer") { + sources = [ "ring_buffer.h" ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + ] +} + +rtc_source_set("rnn_vad_sequence_buffer") { + sources = [ "sequence_buffer.h" ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + ] +} + +rtc_library("rnn_vad_spectral_features") { + sources = [ + "spectral_features.cc", + "spectral_features.h", + "spectral_features_internal.cc", + "spectral_features_internal.h", + ] + deps = [ + ":rnn_vad_common", + ":rnn_vad_ring_buffer", + ":rnn_vad_symmetric_matrix_buffer", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + "../../utility:pffft_wrapper", + ] +} + +rtc_source_set("rnn_vad_symmetric_matrix_buffer") { + sources = [ "symmetric_matrix_buffer.h" ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + ] +} + +if (rtc_include_tests) { + rtc_library("test_utils") { + testonly = true + sources = [ + "test_utils.cc", + "test_utils.h", + ] + deps = [ + ":rnn_vad", + ":rnn_vad_common", + "../../../../api:array_view", + "../../../../api:scoped_refptr", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + "../../../../test:fileutils", + "../../../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } + + unittest_resources = [ + "../../../../resources/audio_processing/agc2/rnn_vad/band_energies.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/pitch_buf_24k.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/pitch_lp_res.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/pitch_search_int.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/samples.pcm", + "../../../../resources/audio_processing/agc2/rnn_vad/vad_prob.dat", + ] + + if (is_ios) { + bundle_data("unittests_bundle_data") { + testonly = true + sources = unittest_resources + outputs = [ "{{bundle_resources_dir}}/{{source_file_part}}" ] + } + } + + rtc_library("unittests") { + testonly = true + sources = [ + "auto_correlation_unittest.cc", + "features_extraction_unittest.cc", + "lp_residual_unittest.cc", + "pitch_search_internal_unittest.cc", + "pitch_search_unittest.cc", + "ring_buffer_unittest.cc", + "rnn_fc_unittest.cc", + "rnn_gru_unittest.cc", + "rnn_unittest.cc", + "rnn_vad_unittest.cc", + "sequence_buffer_unittest.cc", + "spectral_features_internal_unittest.cc", + "spectral_features_unittest.cc", + "symmetric_matrix_buffer_unittest.cc", + "vector_math_unittest.cc", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad", + ":rnn_vad_auto_correlation", + ":rnn_vad_common", + ":rnn_vad_layers", + ":rnn_vad_lp_residual", + ":rnn_vad_pitch", + ":rnn_vad_ring_buffer", + ":rnn_vad_sequence_buffer", + ":rnn_vad_spectral_features", + ":rnn_vad_symmetric_matrix_buffer", + ":test_utils", + ":vector_math", + "..:cpu_features", + "../..:audioproc_test_utils", + "../../../../api:array_view", + "../../../../common_audio/", + "../../../../rtc_base:checks", + "../../../../rtc_base:logging", + "../../../../rtc_base:safe_compare", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base:stringutils", + "../../../../rtc_base/system:arch", + "../../../../test:test_support", + "../../utility:pffft_wrapper", + "//third_party/rnnoise:rnn_vad", + ] + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":vector_math_avx2" ] + } + absl_deps = [ "//third_party/abseil-cpp/absl/memory" ] + data = unittest_resources + if (is_ios) { + deps += [ ":unittests_bundle_data" ] + } + } + + if (!build_with_chromium) { + rtc_executable("rnn_vad_tool") { + testonly = true + sources = [ "rnn_vad_tool.cc" ] + deps = [ + ":rnn_vad", + ":rnn_vad_common", + "..:cpu_features", + "../../../../api:array_view", + "../../../../common_audio", + "../../../../rtc_base:logging", + "../../../../rtc_base:safe_compare", + "../../../../test:test_support", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS new file mode 100644 index 0000000000..773c2d7edd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+third_party/rnnoise", +] diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc new file mode 100644 index 0000000000..3ddeec8dba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/auto_correlation.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kAutoCorrelationFftOrder = 9; // Length-512 FFT. +static_assert(1 << kAutoCorrelationFftOrder > + kNumLags12kHz + kBufSize12kHz - kMaxPitch12kHz, + ""); + +} // namespace + +AutoCorrelationCalculator::AutoCorrelationCalculator() + : fft_(1 << kAutoCorrelationFftOrder, Pffft::FftType::kReal), + tmp_(fft_.CreateBuffer()), + X_(fft_.CreateBuffer()), + H_(fft_.CreateBuffer()) {} + +AutoCorrelationCalculator::~AutoCorrelationCalculator() = default; + +// The auto-correlations coefficients are computed as follows: +// |.........|...........| <- pitch buffer +// [ x (fixed) ] +// [ y_0 ] +// [ y_{m-1} ] +// x and y are sub-array of equal length; x is never moved, whereas y slides. +// The cross-correlation between y_0 and x corresponds to the auto-correlation +// for the maximum pitch period. Hence, the first value in `auto_corr` has an +// inverted lag equal to 0 that corresponds to a lag equal to the maximum +// pitch period. +void AutoCorrelationCalculator::ComputeOnPitchBuffer( + rtc::ArrayView pitch_buf, + rtc::ArrayView auto_corr) { + RTC_DCHECK_LT(auto_corr.size(), kMaxPitch12kHz); + RTC_DCHECK_GT(pitch_buf.size(), kMaxPitch12kHz); + constexpr int kFftFrameSize = 1 << kAutoCorrelationFftOrder; + constexpr int kConvolutionLength = kBufSize12kHz - kMaxPitch12kHz; + static_assert(kConvolutionLength == kFrameSize20ms12kHz, + "Mismatch between pitch buffer size, frame size and maximum " + "pitch period."); + static_assert(kFftFrameSize > kNumLags12kHz + kConvolutionLength, + "The FFT length is not sufficiently big to avoid cyclic " + "convolution errors."); + auto tmp = tmp_->GetView(); + + // Compute the FFT for the reversed reference frame - i.e., + // pitch_buf[-kConvolutionLength:]. + std::reverse_copy(pitch_buf.end() - kConvolutionLength, pitch_buf.end(), + tmp.begin()); + std::fill(tmp.begin() + kConvolutionLength, tmp.end(), 0.f); + fft_.ForwardTransform(*tmp_, H_.get(), /*ordered=*/false); + + // Compute the FFT for the sliding frames chunk. The sliding frames are + // defined as pitch_buf[i:i+kConvolutionLength] where i in + // [0, kNumLags12kHz). The chunk includes all of them, hence it is + // defined as pitch_buf[:kNumLags12kHz+kConvolutionLength]. + std::copy(pitch_buf.begin(), + pitch_buf.begin() + kConvolutionLength + kNumLags12kHz, + tmp.begin()); + std::fill(tmp.begin() + kNumLags12kHz + kConvolutionLength, tmp.end(), 0.f); + fft_.ForwardTransform(*tmp_, X_.get(), /*ordered=*/false); + + // Convolve in the frequency domain. + constexpr float kScalingFactor = 1.f / static_cast(kFftFrameSize); + std::fill(tmp.begin(), tmp.end(), 0.f); + fft_.FrequencyDomainConvolve(*X_, *H_, tmp_.get(), kScalingFactor); + fft_.BackwardTransform(*tmp_, tmp_.get(), /*ordered=*/false); + + // Extract the auto-correlation coefficients. + std::copy(tmp.begin() + kConvolutionLength - 1, + tmp.begin() + kConvolutionLength + kNumLags12kHz - 1, + auto_corr.begin()); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h new file mode 100644 index 0000000000..1ae5054567 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_AUTO_CORRELATION_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_AUTO_CORRELATION_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/utility/pffft_wrapper.h" + +namespace webrtc { +namespace rnn_vad { + +// Class to compute the auto correlation on the pitch buffer for a target pitch +// interval. +class AutoCorrelationCalculator { + public: + AutoCorrelationCalculator(); + AutoCorrelationCalculator(const AutoCorrelationCalculator&) = delete; + AutoCorrelationCalculator& operator=(const AutoCorrelationCalculator&) = + delete; + ~AutoCorrelationCalculator(); + + // Computes the auto-correlation coefficients for a target pitch interval. + // `auto_corr` indexes are inverted lags. + void ComputeOnPitchBuffer( + rtc::ArrayView pitch_buf, + rtc::ArrayView auto_corr); + + private: + Pffft fft_; + std::unique_ptr tmp_; + std::unique_ptr X_; + std::unique_ptr H_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_AUTO_CORRELATION_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc new file mode 100644 index 0000000000..76001ed7b7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/auto_correlation.h" + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Checks that the auto correlation function produces output within tolerance +// given test input data. +TEST(RnnVadTest, PitchBufferAutoCorrelationWithinTolerance) { + PitchTestData test_data; + std::array pitch_buf_decimated; + Decimate2x(test_data.PitchBuffer24kHzView(), pitch_buf_decimated); + std::array computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + AutoCorrelationCalculator auto_corr_calculator; + auto_corr_calculator.ComputeOnPitchBuffer(pitch_buf_decimated, + computed_output); + } + auto auto_corr_view = test_data.AutoCorrelation12kHzView(); + ExpectNearAbsolute({auto_corr_view.data(), auto_corr_view.size()}, + computed_output, 3e-3f); +} + +// Checks that the auto correlation function computes the right thing for a +// simple use case. +TEST(RnnVadTest, CheckAutoCorrelationOnConstantPitchBuffer) { + // Create constant signal with no pitch. + std::array pitch_buf_decimated; + std::fill(pitch_buf_decimated.begin(), pitch_buf_decimated.end(), 1.f); + std::array computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + AutoCorrelationCalculator auto_corr_calculator; + auto_corr_calculator.ComputeOnPitchBuffer(pitch_buf_decimated, + computed_output); + } + // The expected output is a vector filled with the same expected + // auto-correlation value. The latter equals the length of a 20 ms frame. + constexpr int kFrameSize20ms12kHz = kFrameSize20ms24kHz / 2; + std::array expected_output; + std::fill(expected_output.begin(), expected_output.end(), + static_cast(kFrameSize20ms12kHz)); + ExpectNearAbsolute(expected_output, computed_output, 4e-5f); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h new file mode 100644 index 0000000000..c099373200 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ + +#include + +namespace webrtc { +namespace rnn_vad { + +constexpr double kPi = 3.14159265358979323846; + +constexpr int kSampleRate24kHz = 24000; +constexpr int kFrameSize10ms24kHz = kSampleRate24kHz / 100; +constexpr int kFrameSize20ms24kHz = kFrameSize10ms24kHz * 2; + +// Pitch buffer. +constexpr int kMinPitch24kHz = kSampleRate24kHz / 800; // 0.00125 s. +constexpr int kMaxPitch24kHz = kSampleRate24kHz / 62.5; // 0.016 s. +constexpr int kBufSize24kHz = kMaxPitch24kHz + kFrameSize20ms24kHz; +static_assert((kBufSize24kHz & 1) == 0, "The buffer size must be even."); + +// 24 kHz analysis. +// Define a higher minimum pitch period for the initial search. This is used to +// avoid searching for very short periods, for which a refinement step is +// responsible. +constexpr int kInitialMinPitch24kHz = 3 * kMinPitch24kHz; +static_assert(kMinPitch24kHz < kInitialMinPitch24kHz, ""); +static_assert(kInitialMinPitch24kHz < kMaxPitch24kHz, ""); +static_assert(kMaxPitch24kHz > kInitialMinPitch24kHz, ""); +// Number of (inverted) lags during the initial pitch search phase at 24 kHz. +constexpr int kInitialNumLags24kHz = kMaxPitch24kHz - kInitialMinPitch24kHz; +// Number of (inverted) lags during the pitch search refinement phase at 24 kHz. +constexpr int kRefineNumLags24kHz = kMaxPitch24kHz + 1; +static_assert( + kRefineNumLags24kHz > kInitialNumLags24kHz, + "The refinement step must search the pitch in an extended pitch range."); + +// 12 kHz analysis. +constexpr int kSampleRate12kHz = 12000; +constexpr int kFrameSize10ms12kHz = kSampleRate12kHz / 100; +constexpr int kFrameSize20ms12kHz = kFrameSize10ms12kHz * 2; +constexpr int kBufSize12kHz = kBufSize24kHz / 2; +constexpr int kInitialMinPitch12kHz = kInitialMinPitch24kHz / 2; +constexpr int kMaxPitch12kHz = kMaxPitch24kHz / 2; +static_assert(kMaxPitch12kHz > kInitialMinPitch12kHz, ""); +// The inverted lags for the pitch interval [`kInitialMinPitch12kHz`, +// `kMaxPitch12kHz`] are in the range [0, `kNumLags12kHz`]. +constexpr int kNumLags12kHz = kMaxPitch12kHz - kInitialMinPitch12kHz; + +// 48 kHz constants. +constexpr int kMinPitch48kHz = kMinPitch24kHz * 2; +constexpr int kMaxPitch48kHz = kMaxPitch24kHz * 2; + +// Spectral features. +constexpr int kNumBands = 22; +constexpr int kNumLowerBands = 6; +static_assert((0 < kNumLowerBands) && (kNumLowerBands < kNumBands), ""); +constexpr int kCepstralCoeffsHistorySize = 8; +static_assert(kCepstralCoeffsHistorySize > 2, + "The history size must at least be 3 to compute first and second " + "derivatives."); + +constexpr int kFeatureVectorSize = 42; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc new file mode 100644 index 0000000000..502023428d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" + +#include + +#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Computed as `scipy.signal.butter(N=2, Wn=60/24000, btype='highpass')`. +constexpr BiQuadFilter::Config kHpfConfig24k{ + {0.99446179f, -1.98892358f, 0.99446179f}, + {-1.98889291f, 0.98895425f}}; + +} // namespace + +FeaturesExtractor::FeaturesExtractor(const AvailableCpuFeatures& cpu_features) + : use_high_pass_filter_(false), + hpf_(kHpfConfig24k), + pitch_buf_24kHz_(), + pitch_buf_24kHz_view_(pitch_buf_24kHz_.GetBufferView()), + lp_residual_(kBufSize24kHz), + lp_residual_view_(lp_residual_.data(), kBufSize24kHz), + pitch_estimator_(cpu_features), + reference_frame_view_(pitch_buf_24kHz_.GetMostRecentValuesView()) { + RTC_DCHECK_EQ(kBufSize24kHz, lp_residual_.size()); + Reset(); +} + +FeaturesExtractor::~FeaturesExtractor() = default; + +void FeaturesExtractor::Reset() { + pitch_buf_24kHz_.Reset(); + spectral_features_extractor_.Reset(); + if (use_high_pass_filter_) { + hpf_.Reset(); + } +} + +bool FeaturesExtractor::CheckSilenceComputeFeatures( + rtc::ArrayView samples, + rtc::ArrayView feature_vector) { + // Pre-processing. + if (use_high_pass_filter_) { + std::array samples_filtered; + hpf_.Process(samples, samples_filtered); + // Feed buffer with the pre-processed version of `samples`. + pitch_buf_24kHz_.Push(samples_filtered); + } else { + // Feed buffer with `samples`. + pitch_buf_24kHz_.Push(samples); + } + // Extract the LP residual. + float lpc_coeffs[kNumLpcCoefficients]; + ComputeAndPostProcessLpcCoefficients(pitch_buf_24kHz_view_, lpc_coeffs); + ComputeLpResidual(lpc_coeffs, pitch_buf_24kHz_view_, lp_residual_view_); + // Estimate pitch on the LP-residual and write the normalized pitch period + // into the output vector (normalization based on training data stats). + pitch_period_48kHz_ = pitch_estimator_.Estimate(lp_residual_view_); + feature_vector[kFeatureVectorSize - 2] = 0.01f * (pitch_period_48kHz_ - 300); + // Extract lagged frames (according to the estimated pitch period). + RTC_DCHECK_LE(pitch_period_48kHz_ / 2, kMaxPitch24kHz); + auto lagged_frame = pitch_buf_24kHz_view_.subview( + kMaxPitch24kHz - pitch_period_48kHz_ / 2, kFrameSize20ms24kHz); + // Analyze reference and lagged frames checking if silence has been detected + // and write the feature vector. + return spectral_features_extractor_.CheckSilenceComputeFeatures( + reference_frame_view_, {lagged_frame.data(), kFrameSize20ms24kHz}, + {feature_vector.data() + kNumLowerBands, kNumBands - kNumLowerBands}, + {feature_vector.data(), kNumLowerBands}, + {feature_vector.data() + kNumBands, kNumLowerBands}, + {feature_vector.data() + kNumBands + kNumLowerBands, kNumLowerBands}, + {feature_vector.data() + kNumBands + 2 * kNumLowerBands, kNumLowerBands}, + &feature_vector[kFeatureVectorSize - 1]); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h new file mode 100644 index 0000000000..d47a85bfb0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/biquad_filter.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" +#include "modules/audio_processing/agc2/rnn_vad/sequence_buffer.h" +#include "modules/audio_processing/agc2/rnn_vad/spectral_features.h" + +namespace webrtc { +namespace rnn_vad { + +// Feature extractor to feed the VAD RNN. +class FeaturesExtractor { + public: + explicit FeaturesExtractor(const AvailableCpuFeatures& cpu_features); + FeaturesExtractor(const FeaturesExtractor&) = delete; + FeaturesExtractor& operator=(const FeaturesExtractor&) = delete; + ~FeaturesExtractor(); + void Reset(); + // Analyzes the samples, computes the feature vector and returns true if + // silence is detected (false if not). When silence is detected, + // `feature_vector` is partially written and therefore must not be used to + // feed the VAD RNN. + bool CheckSilenceComputeFeatures( + rtc::ArrayView samples, + rtc::ArrayView feature_vector); + + private: + const bool use_high_pass_filter_; + // TODO(bugs.webrtc.org/7494): Remove HPF depending on how AGC2 is used in APM + // and on whether an HPF is already used as pre-processing step in APM. + BiQuadFilter hpf_; + SequenceBuffer + pitch_buf_24kHz_; + rtc::ArrayView pitch_buf_24kHz_view_; + std::vector lp_residual_; + rtc::ArrayView lp_residual_view_; + PitchEstimator pitch_estimator_; + rtc::ArrayView reference_frame_view_; + SpectralFeaturesExtractor spectral_features_extractor_; + int pitch_period_48kHz_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc new file mode 100644 index 0000000000..96f956adfe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" + +#include +#include + +#include "modules/audio_processing/agc2/cpu_features.h" +#include "rtc_base/numerics/safe_compare.h" +#include "rtc_base/numerics/safe_conversions.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int ceil(int n, int m) { + return (n + m - 1) / m; +} + +// Number of 10 ms frames required to fill a pitch buffer having size +// `kBufSize24kHz`. +constexpr int kNumTestDataFrames = ceil(kBufSize24kHz, kFrameSize10ms24kHz); +// Number of samples for the test data. +constexpr int kNumTestDataSize = kNumTestDataFrames * kFrameSize10ms24kHz; + +// Verifies that the pitch in Hz is in the detectable range. +bool PitchIsValid(float pitch_hz) { + const int pitch_period = static_cast(kSampleRate24kHz) / pitch_hz; + return kInitialMinPitch24kHz <= pitch_period && + pitch_period <= kMaxPitch24kHz; +} + +void CreatePureTone(float amplitude, float freq_hz, rtc::ArrayView dst) { + for (int i = 0; rtc::SafeLt(i, dst.size()); ++i) { + dst[i] = amplitude * std::sin(2.f * kPi * freq_hz * i / kSampleRate24kHz); + } +} + +// Feeds `features_extractor` with `samples` splitting it in 10 ms frames. +// For every frame, the output is written into `feature_vector`. Returns true +// if silence is detected in the last frame. +bool FeedTestData(FeaturesExtractor& features_extractor, + rtc::ArrayView samples, + rtc::ArrayView feature_vector) { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + bool is_silence = true; + const int num_frames = samples.size() / kFrameSize10ms24kHz; + for (int i = 0; i < num_frames; ++i) { + is_silence = features_extractor.CheckSilenceComputeFeatures( + {samples.data() + i * kFrameSize10ms24kHz, kFrameSize10ms24kHz}, + feature_vector); + } + return is_silence; +} + +// Extracts the features for two pure tones and verifies that the pitch field +// values reflect the known tone frequencies. +TEST(RnnVadTest, FeatureExtractionLowHighPitch) { + constexpr float amplitude = 1000.f; + constexpr float low_pitch_hz = 150.f; + constexpr float high_pitch_hz = 250.f; + ASSERT_TRUE(PitchIsValid(low_pitch_hz)); + ASSERT_TRUE(PitchIsValid(high_pitch_hz)); + + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + FeaturesExtractor features_extractor(cpu_features); + std::vector samples(kNumTestDataSize); + std::vector feature_vector(kFeatureVectorSize); + ASSERT_EQ(kFeatureVectorSize, rtc::dchecked_cast(feature_vector.size())); + rtc::ArrayView feature_vector_view( + feature_vector.data(), kFeatureVectorSize); + + // Extract the normalized scalar feature that is proportional to the estimated + // pitch period. + constexpr int pitch_feature_index = kFeatureVectorSize - 2; + // Low frequency tone - i.e., high period. + CreatePureTone(amplitude, low_pitch_hz, samples); + ASSERT_FALSE(FeedTestData(features_extractor, samples, feature_vector_view)); + float high_pitch_period = feature_vector_view[pitch_feature_index]; + // High frequency tone - i.e., low period. + features_extractor.Reset(); + CreatePureTone(amplitude, high_pitch_hz, samples); + ASSERT_FALSE(FeedTestData(features_extractor, samples, feature_vector_view)); + float low_pitch_period = feature_vector_view[pitch_feature_index]; + // Check. + EXPECT_LT(low_pitch_period, high_pitch_period); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc new file mode 100644 index 0000000000..484bfba459 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h" + +#include +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Computes auto-correlation coefficients for `x` and writes them in +// `auto_corr`. The lag values are in {0, ..., max_lag - 1}, where max_lag +// equals the size of `auto_corr`. +void ComputeAutoCorrelation( + rtc::ArrayView x, + rtc::ArrayView auto_corr) { + constexpr int max_lag = auto_corr.size(); + RTC_DCHECK_LT(max_lag, x.size()); + for (int lag = 0; lag < max_lag; ++lag) { + auto_corr[lag] = + std::inner_product(x.begin(), x.end() - lag, x.begin() + lag, 0.f); + } +} + +// Applies denoising to the auto-correlation coefficients. +void DenoiseAutoCorrelation( + rtc::ArrayView auto_corr) { + // Assume -40 dB white noise floor. + auto_corr[0] *= 1.0001f; + // Hard-coded values obtained as + // [np.float32((0.008*0.008*i*i)) for i in range(1,5)]. + auto_corr[1] -= auto_corr[1] * 0.000064f; + auto_corr[2] -= auto_corr[2] * 0.000256f; + auto_corr[3] -= auto_corr[3] * 0.000576f; + auto_corr[4] -= auto_corr[4] * 0.001024f; + static_assert(kNumLpcCoefficients == 5, "Update `auto_corr`."); +} + +// Computes the initial inverse filter coefficients given the auto-correlation +// coefficients of an input frame. +void ComputeInitialInverseFilterCoefficients( + rtc::ArrayView auto_corr, + rtc::ArrayView lpc_coeffs) { + float error = auto_corr[0]; + for (int i = 0; i < kNumLpcCoefficients - 1; ++i) { + float reflection_coeff = 0.f; + for (int j = 0; j < i; ++j) { + reflection_coeff += lpc_coeffs[j] * auto_corr[i - j]; + } + reflection_coeff += auto_corr[i + 1]; + + // Avoid division by numbers close to zero. + constexpr float kMinErrorMagnitude = 1e-6f; + if (std::fabs(error) < kMinErrorMagnitude) { + error = std::copysign(kMinErrorMagnitude, error); + } + + reflection_coeff /= -error; + // Update LPC coefficients and total error. + lpc_coeffs[i] = reflection_coeff; + for (int j = 0; j < ((i + 1) >> 1); ++j) { + const float tmp1 = lpc_coeffs[j]; + const float tmp2 = lpc_coeffs[i - 1 - j]; + lpc_coeffs[j] = tmp1 + reflection_coeff * tmp2; + lpc_coeffs[i - 1 - j] = tmp2 + reflection_coeff * tmp1; + } + error -= reflection_coeff * reflection_coeff * error; + if (error < 0.001f * auto_corr[0]) { + break; + } + } +} + +} // namespace + +void ComputeAndPostProcessLpcCoefficients( + rtc::ArrayView x, + rtc::ArrayView lpc_coeffs) { + std::array auto_corr; + ComputeAutoCorrelation(x, auto_corr); + if (auto_corr[0] == 0.f) { // Empty frame. + std::fill(lpc_coeffs.begin(), lpc_coeffs.end(), 0); + return; + } + DenoiseAutoCorrelation(auto_corr); + std::array lpc_coeffs_pre{}; + ComputeInitialInverseFilterCoefficients(auto_corr, lpc_coeffs_pre); + // LPC coefficients post-processing. + // TODO(bugs.webrtc.org/9076): Consider removing these steps. + lpc_coeffs_pre[0] *= 0.9f; + lpc_coeffs_pre[1] *= 0.9f * 0.9f; + lpc_coeffs_pre[2] *= 0.9f * 0.9f * 0.9f; + lpc_coeffs_pre[3] *= 0.9f * 0.9f * 0.9f * 0.9f; + constexpr float kC = 0.8f; + lpc_coeffs[0] = lpc_coeffs_pre[0] + kC; + lpc_coeffs[1] = lpc_coeffs_pre[1] + kC * lpc_coeffs_pre[0]; + lpc_coeffs[2] = lpc_coeffs_pre[2] + kC * lpc_coeffs_pre[1]; + lpc_coeffs[3] = lpc_coeffs_pre[3] + kC * lpc_coeffs_pre[2]; + lpc_coeffs[4] = kC * lpc_coeffs_pre[3]; + static_assert(kNumLpcCoefficients == 5, "Update `lpc_coeffs(_pre)`."); +} + +void ComputeLpResidual( + rtc::ArrayView lpc_coeffs, + rtc::ArrayView x, + rtc::ArrayView y) { + RTC_DCHECK_GT(x.size(), kNumLpcCoefficients); + RTC_DCHECK_EQ(x.size(), y.size()); + // The code below implements the following operation: + // y[i] = x[i] + dot_product({x[i], ..., x[i - kNumLpcCoefficients + 1]}, + // lpc_coeffs) + // Edge case: i < kNumLpcCoefficients. + y[0] = x[0]; + for (int i = 1; i < kNumLpcCoefficients; ++i) { + y[i] = + std::inner_product(x.crend() - i, x.crend(), lpc_coeffs.cbegin(), x[i]); + } + // Regular case. + auto last = x.crend(); + for (int i = kNumLpcCoefficients; rtc::SafeLt(i, y.size()); ++i, --last) { + y[i] = std::inner_product(last - kNumLpcCoefficients, last, + lpc_coeffs.cbegin(), x[i]); + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h new file mode 100644 index 0000000000..d04c536ec1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_LP_RESIDUAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_LP_RESIDUAL_H_ + +#include + +#include "api/array_view.h" + +namespace webrtc { +namespace rnn_vad { + +// Linear predictive coding (LPC) inverse filter length. +constexpr int kNumLpcCoefficients = 5; + +// Given a frame `x`, computes a post-processed version of LPC coefficients +// tailored for pitch estimation. +void ComputeAndPostProcessLpcCoefficients( + rtc::ArrayView x, + rtc::ArrayView lpc_coeffs); + +// Computes the LP residual for the input frame `x` and the LPC coefficients +// `lpc_coeffs`. `y` and `x` can point to the same array for in-place +// computation. +void ComputeLpResidual( + rtc::ArrayView lpc_coeffs, + rtc::ArrayView x, + rtc::ArrayView y); + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_LP_RESIDUAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc new file mode 100644 index 0000000000..7b3a4a3f65 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h" + +#include +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Checks that the LP residual can be computed on an empty frame. +TEST(RnnVadTest, LpResidualOfEmptyFrame) { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + // Input frame (empty, i.e., all samples set to 0). + std::array empty_frame; + empty_frame.fill(0.f); + // Compute inverse filter coefficients. + std::array lpc; + ComputeAndPostProcessLpcCoefficients(empty_frame, lpc); + // Compute LP residual. + std::array lp_residual; + ComputeLpResidual(lpc, empty_frame, lp_residual); +} + +// Checks that the computed LP residual is bit-exact given test input data. +TEST(RnnVadTest, LpResidualPipelineBitExactness) { + // Input and expected output readers. + ChunksFileReader pitch_buffer_reader = CreatePitchBuffer24kHzReader(); + ChunksFileReader lp_pitch_reader = CreateLpResidualAndPitchInfoReader(); + + // Buffers. + std::vector pitch_buffer_24kHz(kBufSize24kHz); + std::array lpc; + std::vector computed_lp_residual(kBufSize24kHz); + std::vector expected_lp_residual(kBufSize24kHz); + + // Test length. + const int num_frames = + std::min(pitch_buffer_reader.num_chunks, 300); // Max 3 s. + ASSERT_GE(lp_pitch_reader.num_chunks, num_frames); + + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + for (int i = 0; i < num_frames; ++i) { + SCOPED_TRACE(i); + // Read input. + ASSERT_TRUE(pitch_buffer_reader.reader->ReadChunk(pitch_buffer_24kHz)); + // Read expected output (ignore pitch gain and period). + ASSERT_TRUE(lp_pitch_reader.reader->ReadChunk(expected_lp_residual)); + lp_pitch_reader.reader->SeekForward(2); // Pitch period and strength. + // Check every 200 ms. + if (i % 20 == 0) { + ComputeAndPostProcessLpcCoefficients(pitch_buffer_24kHz, lpc); + ComputeLpResidual(lpc, pitch_buffer_24kHz, computed_lp_residual); + ExpectNearAbsolute(expected_lp_residual, computed_lp_residual, kFloatMin); + } + } +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc new file mode 100644 index 0000000000..419620fc0c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { + +PitchEstimator::PitchEstimator(const AvailableCpuFeatures& cpu_features) + : cpu_features_(cpu_features), + y_energy_24kHz_(kRefineNumLags24kHz, 0.f), + pitch_buffer_12kHz_(kBufSize12kHz), + auto_correlation_12kHz_(kNumLags12kHz) {} + +PitchEstimator::~PitchEstimator() = default; + +int PitchEstimator::Estimate( + rtc::ArrayView pitch_buffer) { + rtc::ArrayView pitch_buffer_12kHz_view( + pitch_buffer_12kHz_.data(), kBufSize12kHz); + RTC_DCHECK_EQ(pitch_buffer_12kHz_.size(), pitch_buffer_12kHz_view.size()); + rtc::ArrayView auto_correlation_12kHz_view( + auto_correlation_12kHz_.data(), kNumLags12kHz); + RTC_DCHECK_EQ(auto_correlation_12kHz_.size(), + auto_correlation_12kHz_view.size()); + + // TODO(bugs.chromium.org/10480): Use `cpu_features_` to estimate pitch. + // Perform the initial pitch search at 12 kHz. + Decimate2x(pitch_buffer, pitch_buffer_12kHz_view); + auto_corr_calculator_.ComputeOnPitchBuffer(pitch_buffer_12kHz_view, + auto_correlation_12kHz_view); + CandidatePitchPeriods pitch_periods = ComputePitchPeriod12kHz( + pitch_buffer_12kHz_view, auto_correlation_12kHz_view, cpu_features_); + // The refinement is done using the pitch buffer that contains 24 kHz samples. + // Therefore, adapt the inverted lags in `pitch_candidates_inv_lags` from 12 + // to 24 kHz. + pitch_periods.best *= 2; + pitch_periods.second_best *= 2; + + // Refine the initial pitch period estimation from 12 kHz to 48 kHz. + // Pre-compute frame energies at 24 kHz. + rtc::ArrayView y_energy_24kHz_view( + y_energy_24kHz_.data(), kRefineNumLags24kHz); + RTC_DCHECK_EQ(y_energy_24kHz_.size(), y_energy_24kHz_view.size()); + ComputeSlidingFrameSquareEnergies24kHz(pitch_buffer, y_energy_24kHz_view, + cpu_features_); + // Estimation at 48 kHz. + const int pitch_lag_48kHz = ComputePitchPeriod48kHz( + pitch_buffer, y_energy_24kHz_view, pitch_periods, cpu_features_); + last_pitch_48kHz_ = ComputeExtendedPitchPeriod48kHz( + pitch_buffer, y_energy_24kHz_view, + /*initial_pitch_period_48kHz=*/kMaxPitch48kHz - pitch_lag_48kHz, + last_pitch_48kHz_, cpu_features_); + return last_pitch_48kHz_.period; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h new file mode 100644 index 0000000000..42c448eb56 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/auto_correlation.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { +namespace rnn_vad { + +// Pitch estimator. +class PitchEstimator { + public: + explicit PitchEstimator(const AvailableCpuFeatures& cpu_features); + PitchEstimator(const PitchEstimator&) = delete; + PitchEstimator& operator=(const PitchEstimator&) = delete; + ~PitchEstimator(); + // Returns the estimated pitch period at 48 kHz. + int Estimate(rtc::ArrayView pitch_buffer); + + private: + FRIEND_TEST_ALL_PREFIXES(RnnVadTest, PitchSearchWithinTolerance); + float GetLastPitchStrengthForTesting() const { + return last_pitch_48kHz_.strength; + } + + const AvailableCpuFeatures cpu_features_; + PitchInfo last_pitch_48kHz_{}; + AutoCorrelationCalculator auto_corr_calculator_; + std::vector y_energy_24kHz_; + std::vector pitch_buffer_12kHz_; + std::vector auto_correlation_12kHz_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc new file mode 100644 index 0000000000..e8c912518d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc @@ -0,0 +1,513 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" + +#include + +#include +#include +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +float ComputeAutoCorrelation( + int inverted_lag, + rtc::ArrayView pitch_buffer, + const VectorMath& vector_math) { + RTC_DCHECK_LT(inverted_lag, kBufSize24kHz); + RTC_DCHECK_LT(inverted_lag, kRefineNumLags24kHz); + static_assert(kMaxPitch24kHz < kBufSize24kHz, ""); + return vector_math.DotProduct( + pitch_buffer.subview(/*offset=*/kMaxPitch24kHz), + pitch_buffer.subview(inverted_lag, kFrameSize20ms24kHz)); +} + +// Given an auto-correlation coefficient `curr_auto_correlation` and its +// neighboring values `prev_auto_correlation` and `next_auto_correlation` +// computes a pseudo-interpolation offset to be applied to the pitch period +// associated to `curr`. The output is a lag in {-1, 0, +1}. +// TODO(bugs.webrtc.org/9076): Consider removing this method. +// `GetPitchPseudoInterpolationOffset()` it is relevant only if the spectral +// analysis works at a sample rate that is twice as that of the pitch buffer; +// In particular, it is not relevant for the estimated pitch period feature fed +// into the RNN. +int GetPitchPseudoInterpolationOffset(float prev_auto_correlation, + float curr_auto_correlation, + float next_auto_correlation) { + if ((next_auto_correlation - prev_auto_correlation) > + 0.7f * (curr_auto_correlation - prev_auto_correlation)) { + return 1; // `next_auto_correlation` is the largest auto-correlation + // coefficient. + } else if ((prev_auto_correlation - next_auto_correlation) > + 0.7f * (curr_auto_correlation - next_auto_correlation)) { + return -1; // `prev_auto_correlation` is the largest auto-correlation + // coefficient. + } + return 0; +} + +// Refines a pitch period `lag` encoded as lag with pseudo-interpolation. The +// output sample rate is twice as that of `lag`. +int PitchPseudoInterpolationLagPitchBuf( + int lag, + rtc::ArrayView pitch_buffer, + const VectorMath& vector_math) { + int offset = 0; + // Cannot apply pseudo-interpolation at the boundaries. + if (lag > 0 && lag < kMaxPitch24kHz) { + const int inverted_lag = kMaxPitch24kHz - lag; + offset = GetPitchPseudoInterpolationOffset( + ComputeAutoCorrelation(inverted_lag + 1, pitch_buffer, vector_math), + ComputeAutoCorrelation(inverted_lag, pitch_buffer, vector_math), + ComputeAutoCorrelation(inverted_lag - 1, pitch_buffer, vector_math)); + } + return 2 * lag + offset; +} + +// Integer multipliers used in ComputeExtendedPitchPeriod48kHz() when +// looking for sub-harmonics. +// The values have been chosen to serve the following algorithm. Given the +// initial pitch period T, we examine whether one of its harmonics is the true +// fundamental frequency. We consider T/k with k in {2, ..., 15}. For each of +// these harmonics, in addition to the pitch strength of itself, we choose one +// multiple of its pitch period, n*T/k, to validate it (by averaging their pitch +// strengths). The multiplier n is chosen so that n*T/k is used only one time +// over all k. When for example k = 4, we should also expect a peak at 3*T/4. +// When k = 8 instead we don't want to look at 2*T/8, since we have already +// checked T/4 before. Instead, we look at T*3/8. +// The array can be generate in Python as follows: +// from fractions import Fraction +// # Smallest positive integer not in X. +// def mex(X): +// for i in range(1, int(max(X)+2)): +// if i not in X: +// return i +// # Visited multiples of the period. +// S = {1} +// for n in range(2, 16): +// sn = mex({n * i for i in S} | {1}) +// S = S | {Fraction(1, n), Fraction(sn, n)} +// print(sn, end=', ') +constexpr std::array kSubHarmonicMultipliers = { + {3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}}; + +struct Range { + int min; + int max; +}; + +// Number of analyzed pitches to the left(right) of a pitch candidate. +constexpr int kPitchNeighborhoodRadius = 2; + +// Creates a pitch period interval centered in `inverted_lag` with hard-coded +// radius. Clipping is applied so that the interval is always valid for a 24 kHz +// pitch buffer. +Range CreateInvertedLagRange(int inverted_lag) { + return {std::max(inverted_lag - kPitchNeighborhoodRadius, 0), + std::min(inverted_lag + kPitchNeighborhoodRadius, + kInitialNumLags24kHz - 1)}; +} + +constexpr int kNumPitchCandidates = 2; // Best and second best. +// Maximum number of analyzed pitch periods. +constexpr int kMaxPitchPeriods24kHz = + kNumPitchCandidates * (2 * kPitchNeighborhoodRadius + 1); + +// Collection of inverted lags. +class InvertedLagsIndex { + public: + InvertedLagsIndex() : num_entries_(0) {} + // Adds an inverted lag to the index. Cannot add more than + // `kMaxPitchPeriods24kHz` values. + void Append(int inverted_lag) { + RTC_DCHECK_LT(num_entries_, kMaxPitchPeriods24kHz); + inverted_lags_[num_entries_++] = inverted_lag; + } + const int* data() const { return inverted_lags_.data(); } + int size() const { return num_entries_; } + + private: + std::array inverted_lags_; + int num_entries_; +}; + +// Computes the auto correlation coefficients for the inverted lags in the +// closed interval `inverted_lags`. Updates `inverted_lags_index` by appending +// the inverted lags for the computed auto correlation values. +void ComputeAutoCorrelation( + Range inverted_lags, + rtc::ArrayView pitch_buffer, + rtc::ArrayView auto_correlation, + InvertedLagsIndex& inverted_lags_index, + const VectorMath& vector_math) { + // Check valid range. + RTC_DCHECK_LE(inverted_lags.min, inverted_lags.max); + // Trick to avoid zero initialization of `auto_correlation`. + // Needed by the pseudo-interpolation. + if (inverted_lags.min > 0) { + auto_correlation[inverted_lags.min - 1] = 0.f; + } + if (inverted_lags.max < kInitialNumLags24kHz - 1) { + auto_correlation[inverted_lags.max + 1] = 0.f; + } + // Check valid `inverted_lag` indexes. + RTC_DCHECK_GE(inverted_lags.min, 0); + RTC_DCHECK_LT(inverted_lags.max, kInitialNumLags24kHz); + for (int inverted_lag = inverted_lags.min; inverted_lag <= inverted_lags.max; + ++inverted_lag) { + auto_correlation[inverted_lag] = + ComputeAutoCorrelation(inverted_lag, pitch_buffer, vector_math); + inverted_lags_index.Append(inverted_lag); + } +} + +// Searches the strongest pitch period at 24 kHz and returns its inverted lag at +// 48 kHz. +int ComputePitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView inverted_lags, + rtc::ArrayView auto_correlation, + rtc::ArrayView y_energy, + const VectorMath& vector_math) { + static_assert(kMaxPitch24kHz > kInitialNumLags24kHz, ""); + static_assert(kMaxPitch24kHz < kBufSize24kHz, ""); + int best_inverted_lag = 0; // Pitch period. + float best_numerator = -1.f; // Pitch strength numerator. + float best_denominator = 0.f; // Pitch strength denominator. + for (int inverted_lag : inverted_lags) { + // A pitch candidate must have positive correlation. + if (auto_correlation[inverted_lag] > 0.f) { + // Auto-correlation energy normalized by frame energy. + const float numerator = + auto_correlation[inverted_lag] * auto_correlation[inverted_lag]; + const float denominator = y_energy[inverted_lag]; + // Compare numerator/denominator ratios without using divisions. + if (numerator * best_denominator > best_numerator * denominator) { + best_inverted_lag = inverted_lag; + best_numerator = numerator; + best_denominator = denominator; + } + } + } + // Pseudo-interpolation to transform `best_inverted_lag` (24 kHz pitch) to a + // 48 kHz pitch period. + if (best_inverted_lag == 0 || best_inverted_lag >= kInitialNumLags24kHz - 1) { + // Cannot apply pseudo-interpolation at the boundaries. + return best_inverted_lag * 2; + } + int offset = GetPitchPseudoInterpolationOffset( + auto_correlation[best_inverted_lag + 1], + auto_correlation[best_inverted_lag], + auto_correlation[best_inverted_lag - 1]); + // TODO(bugs.webrtc.org/9076): When retraining, check if `offset` below should + // be subtracted since `inverted_lag` is an inverted lag but offset is a lag. + return 2 * best_inverted_lag + offset; +} + +// Returns an alternative pitch period for `pitch_period` given a `multiplier` +// and a `divisor` of the period. +constexpr int GetAlternativePitchPeriod(int pitch_period, + int multiplier, + int divisor) { + RTC_DCHECK_GT(divisor, 0); + // Same as `round(multiplier * pitch_period / divisor)`. + return (2 * multiplier * pitch_period + divisor) / (2 * divisor); +} + +// Returns true if the alternative pitch period is stronger than the initial one +// given the last estimated pitch and the value of `period_divisor` used to +// compute the alternative pitch period via `GetAlternativePitchPeriod()`. +bool IsAlternativePitchStrongerThanInitial(PitchInfo last, + PitchInfo initial, + PitchInfo alternative, + int period_divisor) { + // Initial pitch period candidate thresholds for a sample rate of 24 kHz. + // Computed as [5*k*k for k in range(16)]. + constexpr std::array kInitialPitchPeriodThresholds = { + {20, 45, 80, 125, 180, 245, 320, 405, 500, 605, 720, 845, 980, 1125}}; + static_assert( + kInitialPitchPeriodThresholds.size() == kSubHarmonicMultipliers.size(), + ""); + RTC_DCHECK_GE(last.period, 0); + RTC_DCHECK_GE(initial.period, 0); + RTC_DCHECK_GE(alternative.period, 0); + RTC_DCHECK_GE(period_divisor, 2); + // Compute a term that lowers the threshold when `alternative.period` is close + // to the last estimated period `last.period` - i.e., pitch tracking. + float lower_threshold_term = 0.f; + if (std::abs(alternative.period - last.period) <= 1) { + // The candidate pitch period is within 1 sample from the last one. + // Make the candidate at `alternative.period` very easy to be accepted. + lower_threshold_term = last.strength; + } else if (std::abs(alternative.period - last.period) == 2 && + initial.period > + kInitialPitchPeriodThresholds[period_divisor - 2]) { + // The candidate pitch period is 2 samples far from the last one and the + // period `initial.period` (from which `alternative.period` has been + // derived) is greater than a threshold. Make `alternative.period` easy to + // be accepted. + lower_threshold_term = 0.5f * last.strength; + } + // Set the threshold based on the strength of the initial estimate + // `initial.period`. Also reduce the chance of false positives caused by a + // bias towards high frequencies (originating from short-term correlations). + float threshold = + std::max(0.3f, 0.7f * initial.strength - lower_threshold_term); + if (alternative.period < 3 * kMinPitch24kHz) { + // High frequency. + threshold = std::max(0.4f, 0.85f * initial.strength - lower_threshold_term); + } else if (alternative.period < 2 * kMinPitch24kHz) { + // Even higher frequency. + threshold = std::max(0.5f, 0.9f * initial.strength - lower_threshold_term); + } + return alternative.strength > threshold; +} + +} // namespace + +void Decimate2x(rtc::ArrayView src, + rtc::ArrayView dst) { + // TODO(bugs.webrtc.org/9076): Consider adding anti-aliasing filter. + static_assert(2 * kBufSize12kHz == kBufSize24kHz, ""); + for (int i = 0; i < kBufSize12kHz; ++i) { + dst[i] = src[2 * i]; + } +} + +void ComputeSlidingFrameSquareEnergies24kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + AvailableCpuFeatures cpu_features) { + VectorMath vector_math(cpu_features); + static_assert(kFrameSize20ms24kHz < kBufSize24kHz, ""); + const auto frame_20ms_view = pitch_buffer.subview(0, kFrameSize20ms24kHz); + float yy = vector_math.DotProduct(frame_20ms_view, frame_20ms_view); + y_energy[0] = yy; + static_assert(kMaxPitch24kHz - 1 + kFrameSize20ms24kHz < kBufSize24kHz, ""); + static_assert(kMaxPitch24kHz < kRefineNumLags24kHz, ""); + for (int inverted_lag = 0; inverted_lag < kMaxPitch24kHz; ++inverted_lag) { + yy -= pitch_buffer[inverted_lag] * pitch_buffer[inverted_lag]; + yy += pitch_buffer[inverted_lag + kFrameSize20ms24kHz] * + pitch_buffer[inverted_lag + kFrameSize20ms24kHz]; + yy = std::max(1.f, yy); + y_energy[inverted_lag + 1] = yy; + } +} + +CandidatePitchPeriods ComputePitchPeriod12kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView auto_correlation, + AvailableCpuFeatures cpu_features) { + static_assert(kMaxPitch12kHz > kNumLags12kHz, ""); + static_assert(kMaxPitch12kHz < kBufSize12kHz, ""); + + // Stores a pitch candidate period and strength information. + struct PitchCandidate { + // Pitch period encoded as inverted lag. + int period_inverted_lag = 0; + // Pitch strength encoded as a ratio. + float strength_numerator = -1.f; + float strength_denominator = 0.f; + // Compare the strength of two pitch candidates. + bool HasStrongerPitchThan(const PitchCandidate& b) const { + // Comparing the numerator/denominator ratios without using divisions. + return strength_numerator * b.strength_denominator > + b.strength_numerator * strength_denominator; + } + }; + + VectorMath vector_math(cpu_features); + static_assert(kFrameSize20ms12kHz + 1 < kBufSize12kHz, ""); + const auto frame_view = pitch_buffer.subview(0, kFrameSize20ms12kHz + 1); + float denominator = 1.f + vector_math.DotProduct(frame_view, frame_view); + // Search best and second best pitches by looking at the scaled + // auto-correlation. + PitchCandidate best; + PitchCandidate second_best; + second_best.period_inverted_lag = 1; + for (int inverted_lag = 0; inverted_lag < kNumLags12kHz; ++inverted_lag) { + // A pitch candidate must have positive correlation. + if (auto_correlation[inverted_lag] > 0.f) { + PitchCandidate candidate{ + inverted_lag, + auto_correlation[inverted_lag] * auto_correlation[inverted_lag], + denominator}; + if (candidate.HasStrongerPitchThan(second_best)) { + if (candidate.HasStrongerPitchThan(best)) { + second_best = best; + best = candidate; + } else { + second_best = candidate; + } + } + } + // Update `squared_energy_y` for the next inverted lag. + const float y_old = pitch_buffer[inverted_lag]; + const float y_new = pitch_buffer[inverted_lag + kFrameSize20ms12kHz]; + denominator -= y_old * y_old; + denominator += y_new * y_new; + denominator = std::max(0.f, denominator); + } + return {best.period_inverted_lag, second_best.period_inverted_lag}; +} + +int ComputePitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + CandidatePitchPeriods pitch_candidates, + AvailableCpuFeatures cpu_features) { + // Compute the auto-correlation terms only for neighbors of the two pitch + // candidates (best and second best). + std::array auto_correlation; + InvertedLagsIndex inverted_lags_index; + // Create two inverted lag ranges so that `r1` precedes `r2`. + const bool swap_candidates = + pitch_candidates.best > pitch_candidates.second_best; + const Range r1 = CreateInvertedLagRange( + swap_candidates ? pitch_candidates.second_best : pitch_candidates.best); + const Range r2 = CreateInvertedLagRange( + swap_candidates ? pitch_candidates.best : pitch_candidates.second_best); + // Check valid ranges. + RTC_DCHECK_LE(r1.min, r1.max); + RTC_DCHECK_LE(r2.min, r2.max); + // Check `r1` precedes `r2`. + RTC_DCHECK_LE(r1.min, r2.min); + RTC_DCHECK_LE(r1.max, r2.max); + VectorMath vector_math(cpu_features); + if (r1.max + 1 >= r2.min) { + // Overlapping or adjacent ranges. + ComputeAutoCorrelation({r1.min, r2.max}, pitch_buffer, auto_correlation, + inverted_lags_index, vector_math); + } else { + // Disjoint ranges. + ComputeAutoCorrelation(r1, pitch_buffer, auto_correlation, + inverted_lags_index, vector_math); + ComputeAutoCorrelation(r2, pitch_buffer, auto_correlation, + inverted_lags_index, vector_math); + } + return ComputePitchPeriod48kHz(pitch_buffer, inverted_lags_index, + auto_correlation, y_energy, vector_math); +} + +PitchInfo ComputeExtendedPitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + int initial_pitch_period_48kHz, + PitchInfo last_pitch_48kHz, + AvailableCpuFeatures cpu_features) { + RTC_DCHECK_LE(kMinPitch48kHz, initial_pitch_period_48kHz); + RTC_DCHECK_LE(initial_pitch_period_48kHz, kMaxPitch48kHz); + + // Stores information for a refined pitch candidate. + struct RefinedPitchCandidate { + int period; + float strength; + // Additional strength data used for the final pitch estimation. + float xy; // Auto-correlation. + float y_energy; // Energy of the sliding frame `y`. + }; + + const float x_energy = y_energy[kMaxPitch24kHz]; + const auto pitch_strength = [x_energy](float xy, float y_energy) { + RTC_DCHECK_GE(x_energy * y_energy, 0.f); + return xy / std::sqrt(1.f + x_energy * y_energy); + }; + VectorMath vector_math(cpu_features); + + // Initialize the best pitch candidate with `initial_pitch_period_48kHz`. + RefinedPitchCandidate best_pitch; + best_pitch.period = + std::min(initial_pitch_period_48kHz / 2, kMaxPitch24kHz - 1); + best_pitch.xy = ComputeAutoCorrelation(kMaxPitch24kHz - best_pitch.period, + pitch_buffer, vector_math); + best_pitch.y_energy = y_energy[kMaxPitch24kHz - best_pitch.period]; + best_pitch.strength = pitch_strength(best_pitch.xy, best_pitch.y_energy); + // Keep a copy of the initial pitch candidate. + const PitchInfo initial_pitch{best_pitch.period, best_pitch.strength}; + // 24 kHz version of the last estimated pitch. + const PitchInfo last_pitch{last_pitch_48kHz.period / 2, + last_pitch_48kHz.strength}; + + // Find `max_period_divisor` such that the result of + // `GetAlternativePitchPeriod(initial_pitch_period, 1, max_period_divisor)` + // equals `kMinPitch24kHz`. + const int max_period_divisor = + (2 * initial_pitch.period) / (2 * kMinPitch24kHz - 1); + for (int period_divisor = 2; period_divisor <= max_period_divisor; + ++period_divisor) { + PitchInfo alternative_pitch; + alternative_pitch.period = GetAlternativePitchPeriod( + initial_pitch.period, /*multiplier=*/1, period_divisor); + RTC_DCHECK_GE(alternative_pitch.period, kMinPitch24kHz); + // When looking at `alternative_pitch.period`, we also look at one of its + // sub-harmonics. `kSubHarmonicMultipliers` is used to know where to look. + // `period_divisor` == 2 is a special case since `dual_alternative_period` + // might be greater than the maximum pitch period. + int dual_alternative_period = GetAlternativePitchPeriod( + initial_pitch.period, kSubHarmonicMultipliers[period_divisor - 2], + period_divisor); + RTC_DCHECK_GT(dual_alternative_period, 0); + if (period_divisor == 2 && dual_alternative_period > kMaxPitch24kHz) { + dual_alternative_period = initial_pitch.period; + } + RTC_DCHECK_NE(alternative_pitch.period, dual_alternative_period) + << "The lower pitch period and the additional sub-harmonic must not " + "coincide."; + // Compute an auto-correlation score for the primary pitch candidate + // `alternative_pitch.period` by also looking at its possible sub-harmonic + // `dual_alternative_period`. + const float xy_primary_period = ComputeAutoCorrelation( + kMaxPitch24kHz - alternative_pitch.period, pitch_buffer, vector_math); + // TODO(webrtc:10480): Copy `xy_primary_period` if the secondary period is + // equal to the primary one. + const float xy_secondary_period = ComputeAutoCorrelation( + kMaxPitch24kHz - dual_alternative_period, pitch_buffer, vector_math); + const float xy = 0.5f * (xy_primary_period + xy_secondary_period); + const float yy = + 0.5f * (y_energy[kMaxPitch24kHz - alternative_pitch.period] + + y_energy[kMaxPitch24kHz - dual_alternative_period]); + alternative_pitch.strength = pitch_strength(xy, yy); + + // Maybe update best period. + if (IsAlternativePitchStrongerThanInitial( + last_pitch, initial_pitch, alternative_pitch, period_divisor)) { + best_pitch = {alternative_pitch.period, alternative_pitch.strength, xy, + yy}; + } + } + + // Final pitch strength and period. + best_pitch.xy = std::max(0.f, best_pitch.xy); + RTC_DCHECK_LE(0.f, best_pitch.y_energy); + float final_pitch_strength = + (best_pitch.y_energy <= best_pitch.xy) + ? 1.f + : best_pitch.xy / (best_pitch.y_energy + 1.f); + final_pitch_strength = std::min(best_pitch.strength, final_pitch_strength); + int final_pitch_period_48kHz = std::max( + kMinPitch48kHz, PitchPseudoInterpolationLagPitchBuf( + best_pitch.period, pitch_buffer, vector_math)); + + return {final_pitch_period_48kHz, final_pitch_strength}; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h new file mode 100644 index 0000000000..aa2dd13745 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" + +namespace webrtc { +namespace rnn_vad { + +// Performs 2x decimation without any anti-aliasing filter. +void Decimate2x(rtc::ArrayView src, + rtc::ArrayView dst); + +// Key concepts and keywords used below in this file. +// +// The pitch estimation relies on a pitch buffer, which is an array-like data +// structured designed as follows: +// +// |....A....|.....B.....| +// +// The part on the left, named `A` contains the oldest samples, whereas `B` +// contains the most recent ones. The size of `A` corresponds to the maximum +// pitch period, that of `B` to the analysis frame size (e.g., 16 ms and 20 ms +// respectively). +// +// Pitch estimation is essentially based on the analysis of two 20 ms frames +// extracted from the pitch buffer. One frame, called `x`, is kept fixed and +// corresponds to `B` - i.e., the most recent 20 ms. The other frame, called +// `y`, is extracted from different parts of the buffer instead. +// +// The offset between `x` and `y` corresponds to a specific pitch period. +// For instance, if `y` is positioned at the beginning of the pitch buffer, then +// the cross-correlation between `x` and `y` can be used as an indication of the +// strength for the maximum pitch. +// +// Such an offset can be encoded in two ways: +// - As a lag, which is the index in the pitch buffer for the first item in `y` +// - As an inverted lag, which is the number of samples from the beginning of +// `x` and the end of `y` +// +// |---->| lag +// |....A....|.....B.....| +// |<--| inverted lag +// |.....y.....| `y` 20 ms frame +// +// The inverted lag has the advantage of being directly proportional to the +// corresponding pitch period. + +// Computes the sum of squared samples for every sliding frame `y` in the pitch +// buffer. The indexes of `y_energy` are inverted lags. +void ComputeSlidingFrameSquareEnergies24kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + AvailableCpuFeatures cpu_features); + +// Top-2 pitch period candidates. Unit: number of samples - i.e., inverted lags. +struct CandidatePitchPeriods { + int best; + int second_best; +}; + +// Computes the candidate pitch periods at 12 kHz given a view on the 12 kHz +// pitch buffer and the auto-correlation values (having inverted lags as +// indexes). +CandidatePitchPeriods ComputePitchPeriod12kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView auto_correlation, + AvailableCpuFeatures cpu_features); + +// Computes the pitch period at 48 kHz given a view on the 24 kHz pitch buffer, +// the energies for the sliding frames `y` at 24 kHz and the pitch period +// candidates at 24 kHz (encoded as inverted lag). +int ComputePitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + CandidatePitchPeriods pitch_candidates_24kHz, + AvailableCpuFeatures cpu_features); + +struct PitchInfo { + int period; + float strength; +}; + +// Computes the pitch period at 48 kHz searching in an extended pitch range +// given a view on the 24 kHz pitch buffer, the energies for the sliding frames +// `y` at 24 kHz, the initial 48 kHz estimation (computed by +// `ComputePitchPeriod48kHz()`) and the last estimated pitch. +PitchInfo ComputeExtendedPitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + int initial_pitch_period_48kHz, + PitchInfo last_pitch_48kHz, + AvailableCpuFeatures cpu_features); + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc new file mode 100644 index 0000000000..2a6e68f157 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" + +#include +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "rtc_base/strings/string_builder.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kTestPitchPeriodsLow = 3 * kMinPitch48kHz / 2; +constexpr int kTestPitchPeriodsHigh = (3 * kMinPitch48kHz + kMaxPitch48kHz) / 2; + +constexpr float kTestPitchStrengthLow = 0.35f; +constexpr float kTestPitchStrengthHigh = 0.75f; + +template +std::string PrintTestIndexAndCpuFeatures( + const ::testing::TestParamInfo& info) { + rtc::StringBuilder builder; + builder << info.index << "_" << info.param.cpu_features.ToString(); + return builder.str(); +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + return v; +} + +// Checks that the frame-wise sliding square energy function produces output +// within tolerance given test input data. +TEST(RnnVadTest, ComputeSlidingFrameSquareEnergies24kHzWithinTolerance) { + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + + PitchTestData test_data; + std::array computed_output; + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + computed_output, cpu_features); + auto square_energies_view = test_data.SquareEnergies24kHzView(); + ExpectNearAbsolute({square_energies_view.data(), square_energies_view.size()}, + computed_output, 1e-3f); +} + +// Checks that the estimated pitch period is bit-exact given test input data. +TEST(RnnVadTest, ComputePitchPeriod12kHzBitExactness) { + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + + PitchTestData test_data; + std::array pitch_buf_decimated; + Decimate2x(test_data.PitchBuffer24kHzView(), pitch_buf_decimated); + CandidatePitchPeriods pitch_candidates; + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + pitch_candidates = ComputePitchPeriod12kHz( + pitch_buf_decimated, test_data.AutoCorrelation12kHzView(), cpu_features); + EXPECT_EQ(pitch_candidates.best, 140); + EXPECT_EQ(pitch_candidates.second_best, 142); +} + +// Checks that the refined pitch period is bit-exact given test input data. +TEST(RnnVadTest, ComputePitchPeriod48kHzBitExactness) { + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + + PitchTestData test_data; + std::vector y_energy(kRefineNumLags24kHz); + rtc::ArrayView y_energy_view(y_energy.data(), + kRefineNumLags24kHz); + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + y_energy_view, cpu_features); + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + EXPECT_EQ( + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + /*pitch_candidates=*/{280, 284}, cpu_features), + 560); + EXPECT_EQ( + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + /*pitch_candidates=*/{260, 284}, cpu_features), + 568); +} + +struct PitchCandidatesParameters { + CandidatePitchPeriods pitch_candidates; + AvailableCpuFeatures cpu_features; +}; + +class PitchCandidatesParametrization + : public ::testing::TestWithParam {}; + +// Checks that the result of `ComputePitchPeriod48kHz()` does not depend on the +// order of the input pitch candidates. +TEST_P(PitchCandidatesParametrization, + ComputePitchPeriod48kHzOrderDoesNotMatter) { + const PitchCandidatesParameters params = GetParam(); + const CandidatePitchPeriods swapped_pitch_candidates{ + params.pitch_candidates.second_best, params.pitch_candidates.best}; + + PitchTestData test_data; + std::vector y_energy(kRefineNumLags24kHz); + rtc::ArrayView y_energy_view(y_energy.data(), + kRefineNumLags24kHz); + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + y_energy_view, params.cpu_features); + EXPECT_EQ( + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + params.pitch_candidates, params.cpu_features), + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + swapped_pitch_candidates, params.cpu_features)); +} + +std::vector CreatePitchCandidatesParameters() { + std::vector v; + for (AvailableCpuFeatures cpu_features : GetCpuFeaturesToTest()) { + v.push_back({{0, 2}, cpu_features}); + v.push_back({{260, 284}, cpu_features}); + v.push_back({{280, 284}, cpu_features}); + v.push_back( + {{kInitialNumLags24kHz - 2, kInitialNumLags24kHz - 1}, cpu_features}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + PitchCandidatesParametrization, + ::testing::ValuesIn(CreatePitchCandidatesParameters()), + PrintTestIndexAndCpuFeatures); + +struct ExtendedPitchPeriodSearchParameters { + int initial_pitch_period; + PitchInfo last_pitch; + PitchInfo expected_pitch; + AvailableCpuFeatures cpu_features; +}; + +class ExtendedPitchPeriodSearchParametrizaion + : public ::testing::TestWithParam {}; + +// Checks that the computed pitch period is bit-exact and that the computed +// pitch strength is within tolerance given test input data. +TEST_P(ExtendedPitchPeriodSearchParametrizaion, + PeriodBitExactnessGainWithinTolerance) { + const ExtendedPitchPeriodSearchParameters params = GetParam(); + + PitchTestData test_data; + std::vector y_energy(kRefineNumLags24kHz); + rtc::ArrayView y_energy_view(y_energy.data(), + kRefineNumLags24kHz); + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + y_energy_view, params.cpu_features); + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + const auto computed_output = ComputeExtendedPitchPeriod48kHz( + test_data.PitchBuffer24kHzView(), y_energy_view, + params.initial_pitch_period, params.last_pitch, params.cpu_features); + EXPECT_EQ(params.expected_pitch.period, computed_output.period); + EXPECT_NEAR(params.expected_pitch.strength, computed_output.strength, 1e-6f); +} + +std::vector +CreateExtendedPitchPeriodSearchParameters() { + std::vector v; + for (AvailableCpuFeatures cpu_features : GetCpuFeaturesToTest()) { + for (int last_pitch_period : + {kTestPitchPeriodsLow, kTestPitchPeriodsHigh}) { + for (float last_pitch_strength : + {kTestPitchStrengthLow, kTestPitchStrengthHigh}) { + v.push_back({kTestPitchPeriodsLow, + {last_pitch_period, last_pitch_strength}, + {91, -0.0188608f}, + cpu_features}); + v.push_back({kTestPitchPeriodsHigh, + {last_pitch_period, last_pitch_strength}, + {475, -0.0904344f}, + cpu_features}); + } + } + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + ExtendedPitchPeriodSearchParametrizaion, + ::testing::ValuesIn(CreateExtendedPitchPeriodSearchParameters()), + PrintTestIndexAndCpuFeatures); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc new file mode 100644 index 0000000000..79b44b995c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" + +#include +#include + +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { + +// Checks that the computed pitch period is bit-exact and that the computed +// pitch gain is within tolerance given test input data. +TEST(RnnVadTest, PitchSearchWithinTolerance) { + ChunksFileReader reader = CreateLpResidualAndPitchInfoReader(); + const int num_frames = std::min(reader.num_chunks, 300); // Max 3 s. + std::vector lp_residual(kBufSize24kHz); + float expected_pitch_period, expected_pitch_strength; + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + PitchEstimator pitch_estimator(cpu_features); + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + for (int i = 0; i < num_frames; ++i) { + SCOPED_TRACE(i); + ASSERT_TRUE(reader.reader->ReadChunk(lp_residual)); + ASSERT_TRUE(reader.reader->ReadValue(expected_pitch_period)); + ASSERT_TRUE(reader.reader->ReadValue(expected_pitch_strength)); + int pitch_period = + pitch_estimator.Estimate({lp_residual.data(), kBufSize24kHz}); + EXPECT_EQ(expected_pitch_period, pitch_period); + EXPECT_NEAR(expected_pitch_strength, + pitch_estimator.GetLastPitchStrengthForTesting(), 15e-6f); + } + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h new file mode 100644 index 0000000000..a6f7fdd1a6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RING_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RING_BUFFER_H_ + +#include +#include +#include + +#include "api/array_view.h" + +namespace webrtc { +namespace rnn_vad { + +// Ring buffer for N arrays of type T each one with size S. +template +class RingBuffer { + static_assert(S > 0, ""); + static_assert(N > 0, ""); + static_assert(std::is_arithmetic::value, + "Integral or floating point required."); + + public: + RingBuffer() : tail_(0) {} + RingBuffer(const RingBuffer&) = delete; + RingBuffer& operator=(const RingBuffer&) = delete; + ~RingBuffer() = default; + // Set the ring buffer values to zero. + void Reset() { buffer_.fill(0); } + // Replace the least recently pushed array in the buffer with `new_values`. + void Push(rtc::ArrayView new_values) { + std::memcpy(buffer_.data() + S * tail_, new_values.data(), S * sizeof(T)); + tail_ += 1; + if (tail_ == N) + tail_ = 0; + } + // Return an array view onto the array with a given delay. A view on the last + // and least recently push array is returned when `delay` is 0 and N - 1 + // respectively. + rtc::ArrayView GetArrayView(int delay) const { + RTC_DCHECK_LE(0, delay); + RTC_DCHECK_LT(delay, N); + int offset = tail_ - 1 - delay; + if (offset < 0) + offset += N; + return {buffer_.data() + S * offset, S}; + } + + private: + int tail_; // Index of the least recently pushed sub-array. + std::array buffer_{}; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RING_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc new file mode 100644 index 0000000000..d11d4eac3e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Compare the elements of two given array views. +template +void ExpectEq(rtc::ArrayView a, rtc::ArrayView b) { + for (int i = 0; i < S; ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(a[i], b[i]); + } +} + +// Test push/read sequences. +template +void TestRingBuffer() { + SCOPED_TRACE(N); + SCOPED_TRACE(S); + std::array prev_pushed_array; + std::array pushed_array; + rtc::ArrayView pushed_array_view(pushed_array.data(), S); + + // Init. + RingBuffer ring_buf; + ring_buf.GetArrayView(0); + pushed_array.fill(0); + ring_buf.Push(pushed_array_view); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(0)); + + // Push N times and check most recent and second most recent. + for (T v = 1; v <= static_cast(N); ++v) { + SCOPED_TRACE(v); + prev_pushed_array = pushed_array; + pushed_array.fill(v); + ring_buf.Push(pushed_array_view); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(0)); + if (N > 1) { + pushed_array.fill(v - 1); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(1)); + } + } + + // Check buffer. + for (int delay = 2; delay < N; ++delay) { + SCOPED_TRACE(delay); + T expected_value = N - static_cast(delay); + pushed_array.fill(expected_value); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(delay)); + } +} + +// Check that for different delays, different views are returned. +TEST(RnnVadTest, RingBufferArrayViews) { + constexpr int s = 3; + constexpr int n = 4; + RingBuffer ring_buf; + std::array pushed_array; + pushed_array.fill(1); + for (int k = 0; k <= n; ++k) { // Push data n + 1 times. + SCOPED_TRACE(k); + // Check array views. + for (int i = 0; i < n; ++i) { + SCOPED_TRACE(i); + auto view_i = ring_buf.GetArrayView(i); + for (int j = i + 1; j < n; ++j) { + SCOPED_TRACE(j); + auto view_j = ring_buf.GetArrayView(j); + EXPECT_NE(view_i, view_j); + } + } + ring_buf.Push(pushed_array); + } +} + +TEST(RnnVadTest, RingBufferUnsigned) { + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); +} + +TEST(RnnVadTest, RingBufferSigned) { + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); +} + +TEST(RnnVadTest, RingBufferFloating) { + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc new file mode 100644 index 0000000000..475bef9775 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" + +#include "rtc_base/checks.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +using ::rnnoise::kInputLayerInputSize; +static_assert(kFeatureVectorSize == kInputLayerInputSize, ""); +using ::rnnoise::kInputDenseBias; +using ::rnnoise::kInputDenseWeights; +using ::rnnoise::kInputLayerOutputSize; +static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); + +using ::rnnoise::kHiddenGruBias; +using ::rnnoise::kHiddenGruRecurrentWeights; +using ::rnnoise::kHiddenGruWeights; +using ::rnnoise::kHiddenLayerOutputSize; +static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, ""); + +using ::rnnoise::kOutputDenseBias; +using ::rnnoise::kOutputDenseWeights; +using ::rnnoise::kOutputLayerOutputSize; +static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); + +} // namespace + +RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features) + : input_(kInputLayerInputSize, + kInputLayerOutputSize, + kInputDenseBias, + kInputDenseWeights, + ActivationFunction::kTansigApproximated, + cpu_features, + /*layer_name=*/"FC1"), + hidden_(kInputLayerOutputSize, + kHiddenLayerOutputSize, + kHiddenGruBias, + kHiddenGruWeights, + kHiddenGruRecurrentWeights, + cpu_features, + /*layer_name=*/"GRU1"), + output_(kHiddenLayerOutputSize, + kOutputLayerOutputSize, + kOutputDenseBias, + kOutputDenseWeights, + ActivationFunction::kSigmoidApproximated, + // The output layer is just 24x1. The unoptimized code is faster. + NoAvailableCpuFeatures(), + /*layer_name=*/"FC2") { + // Input-output chaining size checks. + RTC_DCHECK_EQ(input_.size(), hidden_.input_size()) + << "The input and the hidden layers sizes do not match."; + RTC_DCHECK_EQ(hidden_.size(), output_.input_size()) + << "The hidden and the output layers sizes do not match."; +} + +RnnVad::~RnnVad() = default; + +void RnnVad::Reset() { + hidden_.Reset(); +} + +float RnnVad::ComputeVadProbability( + rtc::ArrayView feature_vector, + bool is_silence) { + if (is_silence) { + Reset(); + return 0.f; + } + input_.ComputeOutput(feature_vector); + hidden_.ComputeOutput(input_); + output_.ComputeOutput(hidden_); + RTC_DCHECK_EQ(output_.size(), 1); + return output_.data()[0]; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h new file mode 100644 index 0000000000..3148f1b3ff --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ + +#include +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" + +namespace webrtc { +namespace rnn_vad { + +// Recurrent network with hard-coded architecture and weights for voice activity +// detection. +class RnnVad { + public: + explicit RnnVad(const AvailableCpuFeatures& cpu_features); + RnnVad(const RnnVad&) = delete; + RnnVad& operator=(const RnnVad&) = delete; + ~RnnVad(); + void Reset(); + // Observes `feature_vector` and `is_silence`, updates the RNN and returns the + // current voice probability. + float ComputeVadProbability( + rtc::ArrayView feature_vector, + bool is_silence); + + private: + FullyConnectedLayer input_; + GatedRecurrentLayer hidden_; + FullyConnectedLayer output_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc new file mode 100644 index 0000000000..91501fb6e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +std::vector GetScaledParams(rtc::ArrayView params) { + std::vector scaled_params(params.size()); + std::transform(params.begin(), params.end(), scaled_params.begin(), + [](int8_t x) -> float { + return ::rnnoise::kWeightsScale * static_cast(x); + }); + return scaled_params; +} + +// TODO(bugs.chromium.org/10480): Hard-code optimized layout and remove this +// function to improve setup time. +// Casts and scales `weights` and re-arranges the layout. +std::vector PreprocessWeights(rtc::ArrayView weights, + int output_size) { + if (output_size == 1) { + return GetScaledParams(weights); + } + // Transpose, scale and cast. + const int input_size = rtc::CheckedDivExact( + rtc::dchecked_cast(weights.size()), output_size); + std::vector w(weights.size()); + for (int o = 0; o < output_size; ++o) { + for (int i = 0; i < input_size; ++i) { + w[o * input_size + i] = rnnoise::kWeightsScale * + static_cast(weights[i * output_size + o]); + } + } + return w; +} + +rtc::FunctionView GetActivationFunction( + ActivationFunction activation_function) { + switch (activation_function) { + case ActivationFunction::kTansigApproximated: + return ::rnnoise::TansigApproximated; + case ActivationFunction::kSigmoidApproximated: + return ::rnnoise::SigmoidApproximated; + } +} + +} // namespace + +FullyConnectedLayer::FullyConnectedLayer( + const int input_size, + const int output_size, + const rtc::ArrayView bias, + const rtc::ArrayView weights, + ActivationFunction activation_function, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name) + : input_size_(input_size), + output_size_(output_size), + bias_(GetScaledParams(bias)), + weights_(PreprocessWeights(weights, output_size)), + vector_math_(cpu_features), + activation_function_(GetActivationFunction(activation_function)) { + RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits) + << "Insufficient FC layer over-allocation (" << layer_name << ")."; + RTC_DCHECK_EQ(output_size_, bias_.size()) + << "Mismatching output size and bias terms array size (" << layer_name + << ")."; + RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size()) + << "Mismatching input-output size and weight coefficients array size (" + << layer_name << ")."; +} + +FullyConnectedLayer::~FullyConnectedLayer() = default; + +void FullyConnectedLayer::ComputeOutput(rtc::ArrayView input) { + RTC_DCHECK_EQ(input.size(), input_size_); + rtc::ArrayView weights(weights_); + for (int o = 0; o < output_size_; ++o) { + output_[o] = activation_function_( + bias_[o] + vector_math_.DotProduct( + input, weights.subview(o * input_size_, input_size_))); + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h new file mode 100644 index 0000000000..d23957a6f2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "api/function_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +namespace webrtc { +namespace rnn_vad { + +// Activation function for a neural network cell. +enum class ActivationFunction { kTansigApproximated, kSigmoidApproximated }; + +// Maximum number of units for an FC layer. +constexpr int kFullyConnectedLayerMaxUnits = 24; + +// Fully-connected layer with a custom activation function which owns the output +// buffer. +class FullyConnectedLayer { + public: + // Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`. + FullyConnectedLayer(int input_size, + int output_size, + rtc::ArrayView bias, + rtc::ArrayView weights, + ActivationFunction activation_function, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name); + FullyConnectedLayer(const FullyConnectedLayer&) = delete; + FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete; + ~FullyConnectedLayer(); + + // Returns the size of the input vector. + int input_size() const { return input_size_; } + // Returns the pointer to the first element of the output buffer. + const float* data() const { return output_.data(); } + // Returns the size of the output buffer. + int size() const { return output_size_; } + + // Computes the fully-connected layer output. + void ComputeOutput(rtc::ArrayView input); + + private: + const int input_size_; + const int output_size_; + const std::vector bias_; + const std::vector weights_; + const VectorMath vector_math_; + rtc::FunctionView activation_function_; + // Over-allocated array with size equal to `output_size_`. + std::array output_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc new file mode 100644 index 0000000000..ff9bb18bc2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/logging.h" +#include "rtc_base/system/arch.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +using ::rnnoise::kInputDenseBias; +using ::rnnoise::kInputDenseWeights; +using ::rnnoise::kInputLayerInputSize; +using ::rnnoise::kInputLayerOutputSize; + +// Fully connected layer test data. +constexpr std::array kFullyConnectedInputVector = { + -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f, + -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f, + -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f, + -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f, + 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f, + -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f, + 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f}; +constexpr std::array kFullyConnectedExpectedOutput = { + -0.623293f, -0.988299f, 0.999378f, 0.967168f, 0.103087f, -0.978545f, + -0.856347f, 0.346675f, 1.f, -0.717442f, -0.544176f, 0.960363f, + 0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f, + 0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f}; + +class RnnFcParametrization + : public ::testing::TestWithParam {}; + +// Checks that the output of a fully connected layer is within tolerance given +// test input data. +TEST_P(RnnFcParametrization, CheckFullyConnectedLayerOutput) { + FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize, + kInputDenseBias, kInputDenseWeights, + ActivationFunction::kTansigApproximated, + /*cpu_features=*/GetParam(), + /*layer_name=*/"FC"); + fc.ComputeOutput(kFullyConnectedInputVector); + ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f); +} + +TEST_P(RnnFcParametrization, DISABLED_BenchmarkFullyConnectedLayer) { + const AvailableCpuFeatures cpu_features = GetParam(); + FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize, + kInputDenseBias, kInputDenseWeights, + ActivationFunction::kTansigApproximated, cpu_features, + /*layer_name=*/"FC"); + + constexpr int kNumTests = 10000; + ::webrtc::test::PerformanceTimer perf_timer(kNumTests); + for (int k = 0; k < kNumTests; ++k) { + perf_timer.StartTimer(); + fc.ComputeOutput(kFullyConnectedInputVector); + perf_timer.StopTimer(); + } + RTC_LOG(LS_INFO) << "CPU features: " << cpu_features.ToString() << " | " + << (perf_timer.GetDurationAverage() / 1000) << " +/- " + << (perf_timer.GetDurationStandardDeviation() / 1000) + << " ms"; +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnFcParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc new file mode 100644 index 0000000000..ef37410caa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kNumGruGates = 3; // Update, reset, output. + +std::vector PreprocessGruTensor(rtc::ArrayView tensor_src, + int output_size) { + // Transpose, cast and scale. + // `n` is the size of the first dimension of the 3-dim tensor `weights`. + const int n = rtc::CheckedDivExact(rtc::dchecked_cast(tensor_src.size()), + output_size * kNumGruGates); + const int stride_src = kNumGruGates * output_size; + const int stride_dst = n * output_size; + std::vector tensor_dst(tensor_src.size()); + for (int g = 0; g < kNumGruGates; ++g) { + for (int o = 0; o < output_size; ++o) { + for (int i = 0; i < n; ++i) { + tensor_dst[g * stride_dst + o * n + i] = + ::rnnoise::kWeightsScale * + static_cast( + tensor_src[i * stride_src + g * output_size + o]); + } + } + } + return tensor_dst; +} + +// Computes the output for the update or the reset gate. +// Operation: `g = sigmoid(W^T∙i + R^T∙s + b)` where +// - `g`: output gate vector +// - `W`: weights matrix +// - `i`: input vector +// - `R`: recurrent weights matrix +// - `s`: state gate vector +// - `b`: bias vector +void ComputeUpdateResetGate(int input_size, + int output_size, + const VectorMath& vector_math, + rtc::ArrayView input, + rtc::ArrayView state, + rtc::ArrayView bias, + rtc::ArrayView weights, + rtc::ArrayView recurrent_weights, + rtc::ArrayView gate) { + RTC_DCHECK_EQ(input.size(), input_size); + RTC_DCHECK_EQ(state.size(), output_size); + RTC_DCHECK_EQ(bias.size(), output_size); + RTC_DCHECK_EQ(weights.size(), input_size * output_size); + RTC_DCHECK_EQ(recurrent_weights.size(), output_size * output_size); + RTC_DCHECK_GE(gate.size(), output_size); // `gate` is over-allocated. + for (int o = 0; o < output_size; ++o) { + float x = bias[o]; + x += vector_math.DotProduct(input, + weights.subview(o * input_size, input_size)); + x += vector_math.DotProduct( + state, recurrent_weights.subview(o * output_size, output_size)); + gate[o] = ::rnnoise::SigmoidApproximated(x); + } +} + +// Computes the output for the state gate. +// Operation: `s' = u .* s + (1 - u) .* ReLU(W^T∙i + R^T∙(s .* r) + b)` where +// - `s'`: output state gate vector +// - `s`: previous state gate vector +// - `u`: update gate vector +// - `W`: weights matrix +// - `i`: input vector +// - `R`: recurrent weights matrix +// - `r`: reset gate vector +// - `b`: bias vector +// - `.*` element-wise product +void ComputeStateGate(int input_size, + int output_size, + const VectorMath& vector_math, + rtc::ArrayView input, + rtc::ArrayView update, + rtc::ArrayView reset, + rtc::ArrayView bias, + rtc::ArrayView weights, + rtc::ArrayView recurrent_weights, + rtc::ArrayView state) { + RTC_DCHECK_EQ(input.size(), input_size); + RTC_DCHECK_GE(update.size(), output_size); // `update` is over-allocated. + RTC_DCHECK_GE(reset.size(), output_size); // `reset` is over-allocated. + RTC_DCHECK_EQ(bias.size(), output_size); + RTC_DCHECK_EQ(weights.size(), input_size * output_size); + RTC_DCHECK_EQ(recurrent_weights.size(), output_size * output_size); + RTC_DCHECK_EQ(state.size(), output_size); + std::array reset_x_state; + for (int o = 0; o < output_size; ++o) { + reset_x_state[o] = state[o] * reset[o]; + } + for (int o = 0; o < output_size; ++o) { + float x = bias[o]; + x += vector_math.DotProduct(input, + weights.subview(o * input_size, input_size)); + x += vector_math.DotProduct( + {reset_x_state.data(), static_cast(output_size)}, + recurrent_weights.subview(o * output_size, output_size)); + state[o] = update[o] * state[o] + (1.f - update[o]) * std::max(0.f, x); + } +} + +} // namespace + +GatedRecurrentLayer::GatedRecurrentLayer( + const int input_size, + const int output_size, + const rtc::ArrayView bias, + const rtc::ArrayView weights, + const rtc::ArrayView recurrent_weights, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name) + : input_size_(input_size), + output_size_(output_size), + bias_(PreprocessGruTensor(bias, output_size)), + weights_(PreprocessGruTensor(weights, output_size)), + recurrent_weights_(PreprocessGruTensor(recurrent_weights, output_size)), + vector_math_(cpu_features) { + RTC_DCHECK_LE(output_size_, kGruLayerMaxUnits) + << "Insufficient GRU layer over-allocation (" << layer_name << ")."; + RTC_DCHECK_EQ(kNumGruGates * output_size_, bias_.size()) + << "Mismatching output size and bias terms array size (" << layer_name + << ")."; + RTC_DCHECK_EQ(kNumGruGates * input_size_ * output_size_, weights_.size()) + << "Mismatching input-output size and weight coefficients array size (" + << layer_name << ")."; + RTC_DCHECK_EQ(kNumGruGates * output_size_ * output_size_, + recurrent_weights_.size()) + << "Mismatching input-output size and recurrent weight coefficients array" + " size (" + << layer_name << ")."; + Reset(); +} + +GatedRecurrentLayer::~GatedRecurrentLayer() = default; + +void GatedRecurrentLayer::Reset() { + state_.fill(0.f); +} + +void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView input) { + RTC_DCHECK_EQ(input.size(), input_size_); + + // The tensors below are organized as a sequence of flattened tensors for the + // `update`, `reset` and `state` gates. + rtc::ArrayView bias(bias_); + rtc::ArrayView weights(weights_); + rtc::ArrayView recurrent_weights(recurrent_weights_); + // Strides to access to the flattened tensors for a specific gate. + const int stride_weights = input_size_ * output_size_; + const int stride_recurrent_weights = output_size_ * output_size_; + + rtc::ArrayView state(state_.data(), output_size_); + + // Update gate. + std::array update; + ComputeUpdateResetGate( + input_size_, output_size_, vector_math_, input, state, + bias.subview(0, output_size_), weights.subview(0, stride_weights), + recurrent_weights.subview(0, stride_recurrent_weights), update); + // Reset gate. + std::array reset; + ComputeUpdateResetGate(input_size_, output_size_, vector_math_, input, state, + bias.subview(output_size_, output_size_), + weights.subview(stride_weights, stride_weights), + recurrent_weights.subview(stride_recurrent_weights, + stride_recurrent_weights), + reset); + // State gate. + ComputeStateGate(input_size_, output_size_, vector_math_, input, update, + reset, bias.subview(2 * output_size_, output_size_), + weights.subview(2 * stride_weights, stride_weights), + recurrent_weights.subview(2 * stride_recurrent_weights, + stride_recurrent_weights), + state); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h new file mode 100644 index 0000000000..3407dfcdf1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_GRU_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_GRU_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +namespace webrtc { +namespace rnn_vad { + +// Maximum number of units for a GRU layer. +constexpr int kGruLayerMaxUnits = 24; + +// Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as +// activation functions for the update/reset and output gates respectively. +class GatedRecurrentLayer { + public: + // Ctor. `output_size` cannot be greater than `kGruLayerMaxUnits`. + GatedRecurrentLayer(int input_size, + int output_size, + rtc::ArrayView bias, + rtc::ArrayView weights, + rtc::ArrayView recurrent_weights, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name); + GatedRecurrentLayer(const GatedRecurrentLayer&) = delete; + GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete; + ~GatedRecurrentLayer(); + + // Returns the size of the input vector. + int input_size() const { return input_size_; } + // Returns the pointer to the first element of the output buffer. + const float* data() const { return state_.data(); } + // Returns the size of the output buffer. + int size() const { return output_size_; } + + // Resets the GRU state. + void Reset(); + // Computes the recurrent layer output and updates the status. + void ComputeOutput(rtc::ArrayView input); + + private: + const int input_size_; + const int output_size_; + const std::vector bias_; + const std::vector weights_; + const std::vector recurrent_weights_; + const VectorMath vector_math_; + // Over-allocated array with size equal to `output_size_`. + std::array state_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_GRU_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc new file mode 100644 index 0000000000..88ae72803a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +void TestGatedRecurrentLayer( + GatedRecurrentLayer& gru, + rtc::ArrayView input_sequence, + rtc::ArrayView expected_output_sequence) { + const int input_sequence_length = rtc::CheckedDivExact( + rtc::dchecked_cast(input_sequence.size()), gru.input_size()); + const int output_sequence_length = rtc::CheckedDivExact( + rtc::dchecked_cast(expected_output_sequence.size()), gru.size()); + ASSERT_EQ(input_sequence_length, output_sequence_length) + << "The test data length is invalid."; + // Feed the GRU layer and check the output at every step. + gru.Reset(); + for (int i = 0; i < input_sequence_length; ++i) { + SCOPED_TRACE(i); + gru.ComputeOutput( + input_sequence.subview(i * gru.input_size(), gru.input_size())); + const auto expected_output = + expected_output_sequence.subview(i * gru.size(), gru.size()); + ExpectNearAbsolute(expected_output, gru, 3e-6f); + } +} + +// Gated recurrent units layer test data. +constexpr int kGruInputSize = 5; +constexpr int kGruOutputSize = 4; +constexpr std::array kGruBias = {96, -99, -81, -114, 49, 119, + -118, 68, -76, 91, 121, 125}; +constexpr std::array kGruWeights = { + // Input 0. + 124, 9, 1, 116, // Update. + -66, -21, -118, -110, // Reset. + 104, 75, -23, -51, // Output. + // Input 1. + -72, -111, 47, 93, // Update. + 77, -98, 41, -8, // Reset. + 40, -23, -43, -107, // Output. + // Input 2. + 9, -73, 30, -32, // Update. + -2, 64, -26, 91, // Reset. + -48, -24, -28, -104, // Output. + // Input 3. + 74, -46, 116, 15, // Update. + 32, 52, -126, -38, // Reset. + -121, 12, -16, 110, // Output. + // Input 4. + -95, 66, -103, -35, // Update. + -38, 3, -126, -61, // Reset. + 28, 98, -117, -43 // Output. +}; +constexpr std::array kGruRecurrentWeights = { + // Output 0. + -3, 87, 50, 51, // Update. + -22, 27, -39, 62, // Reset. + 31, -83, -52, -48, // Output. + // Output 1. + -6, 83, -19, 104, // Update. + 105, 48, 23, 68, // Reset. + 23, 40, 7, -120, // Output. + // Output 2. + 64, -62, 117, 85, // Update. + 51, -43, 54, -105, // Reset. + 120, 56, -128, -107, // Output. + // Output 3. + 39, 50, -17, -47, // Update. + -117, 14, 108, 12, // Reset. + -7, -72, 103, -87, // Output. +}; +constexpr std::array kGruInputSequence = { + 0.89395463f, 0.93224651f, 0.55788344f, 0.32341808f, 0.93355054f, + 0.13475326f, 0.97370994f, 0.14253306f, 0.93710381f, 0.76093364f, + 0.65780413f, 0.41657975f, 0.49403164f, 0.46843281f, 0.75138855f, + 0.24517593f, 0.47657707f, 0.57064998f, 0.435184f, 0.19319285f}; +constexpr std::array kGruExpectedOutputSequence = { + 0.0239123f, 0.5773077f, 0.f, 0.f, + 0.01282811f, 0.64330572f, 0.f, 0.04863098f, + 0.00781069f, 0.75267816f, 0.f, 0.02579715f, + 0.00471378f, 0.59162533f, 0.11087593f, 0.01334511f}; + +class RnnGruParametrization + : public ::testing::TestWithParam {}; + +// Checks that the output of a GRU layer is within tolerance given test input +// data. +TEST_P(RnnGruParametrization, CheckGatedRecurrentLayer) { + GatedRecurrentLayer gru(kGruInputSize, kGruOutputSize, kGruBias, kGruWeights, + kGruRecurrentWeights, + /*cpu_features=*/GetParam(), + /*layer_name=*/"GRU"); + TestGatedRecurrentLayer(gru, kGruInputSequence, kGruExpectedOutputSequence); +} + +TEST_P(RnnGruParametrization, DISABLED_BenchmarkGatedRecurrentLayer) { + // Prefetch test data. + std::unique_ptr reader = CreateGruInputReader(); + std::vector gru_input_sequence(reader->size()); + reader->ReadChunk(gru_input_sequence); + + using ::rnnoise::kHiddenGruBias; + using ::rnnoise::kHiddenGruRecurrentWeights; + using ::rnnoise::kHiddenGruWeights; + using ::rnnoise::kHiddenLayerOutputSize; + using ::rnnoise::kInputLayerOutputSize; + + GatedRecurrentLayer gru(kInputLayerOutputSize, kHiddenLayerOutputSize, + kHiddenGruBias, kHiddenGruWeights, + kHiddenGruRecurrentWeights, + /*cpu_features=*/GetParam(), + /*layer_name=*/"GRU"); + + rtc::ArrayView input_sequence(gru_input_sequence); + ASSERT_EQ(input_sequence.size() % kInputLayerOutputSize, + static_cast(0)); + const int input_sequence_length = + input_sequence.size() / kInputLayerOutputSize; + + constexpr int kNumTests = 100; + ::webrtc::test::PerformanceTimer perf_timer(kNumTests); + for (int k = 0; k < kNumTests; ++k) { + perf_timer.StartTimer(); + for (int i = 0; i < input_sequence_length; ++i) { + gru.ComputeOutput( + input_sequence.subview(i * gru.input_size(), gru.input_size())); + } + perf_timer.StopTimer(); + } + RTC_LOG(LS_INFO) << (perf_timer.GetDurationAverage() / 1000) << " +/- " + << (perf_timer.GetDurationStandardDeviation() / 1000) + << " ms"; +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnGruParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc new file mode 100644 index 0000000000..4c5409a14e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr std::array kFeatures = { + -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f, + -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f, + -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f, + -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f, + 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f, + -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f, + 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f}; + +void WarmUpRnnVad(RnnVad& rnn_vad) { + for (int i = 0; i < 10; ++i) { + rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + } +} + +// Checks that the speech probability is zero with silence. +TEST(RnnVadTest, CheckZeroProbabilityWithSilence) { + RnnVad rnn_vad(GetAvailableCpuFeatures()); + WarmUpRnnVad(rnn_vad); + EXPECT_EQ(rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true), 0.f); +} + +// Checks that the same output is produced after reset given the same input +// sequence. +TEST(RnnVadTest, CheckRnnVadReset) { + RnnVad rnn_vad(GetAvailableCpuFeatures()); + WarmUpRnnVad(rnn_vad); + float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + rnn_vad.Reset(); + WarmUpRnnVad(rnn_vad); + float post = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + EXPECT_EQ(pre, post); +} + +// Checks that the same output is produced after silence is observed given the +// same input sequence. +TEST(RnnVadTest, CheckRnnVadSilence) { + RnnVad rnn_vad(GetAvailableCpuFeatures()); + WarmUpRnnVad(rnn_vad); + float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true); + WarmUpRnnVad(rnn_vad); + float post = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + EXPECT_EQ(pre, post); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build new file mode 100644 index 0000000000..3f00e43e7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_auto_correlation_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build new file mode 100644 index 0000000000..64d3371d81 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_common_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build new file mode 100644 index 0000000000..82e5302312 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build new file mode 100644 index 0000000000..7dd4619965 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_layers_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build new file mode 100644 index 0000000000..1fcbbbe408 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_lp_residual_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build new file mode 100644 index 0000000000..6a69de9e29 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_pitch_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build new file mode 100644 index 0000000000..d2a4eb7261 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_ring_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build new file mode 100644 index 0000000000..c6913470c3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_sequence_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build new file mode 100644 index 0000000000..8298c7e091 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_spectral_features_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build new file mode 100644 index 0000000000..acb9330454 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_symmetric_matrix_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc new file mode 100644 index 0000000000..a0e1242eb4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/resampler/push_sinc_resampler.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_compare.h" + +ABSL_FLAG(std::string, i, "", "Path to the input wav file"); +ABSL_FLAG(std::string, f, "", "Path to the output features file"); +ABSL_FLAG(std::string, o, "", "Path to the output VAD probabilities file"); + +namespace webrtc { +namespace rnn_vad { +namespace test { + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + rtc::LogMessage::LogToDebug(rtc::LS_INFO); + + // Open wav input file and check properties. + const std::string input_wav_file = absl::GetFlag(FLAGS_i); + WavReader wav_reader(input_wav_file); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files are supported"; + return 1; + } + if (wav_reader.sample_rate() % 100 != 0) { + RTC_LOG(LS_ERROR) << "The sample rate rate must allow 10 ms frames."; + return 1; + } + RTC_LOG(LS_INFO) << "Input sample rate: " << wav_reader.sample_rate(); + + // Init output files. + const std::string output_vad_probs_file = absl::GetFlag(FLAGS_o); + FILE* vad_probs_file = fopen(output_vad_probs_file.c_str(), "wb"); + FILE* features_file = nullptr; + const std::string output_feature_file = absl::GetFlag(FLAGS_f); + if (!output_feature_file.empty()) { + features_file = fopen(output_feature_file.c_str(), "wb"); + } + + // Initialize. + const int frame_size_10ms = + rtc::CheckedDivExact(wav_reader.sample_rate(), 100); + std::vector samples_10ms; + samples_10ms.resize(frame_size_10ms); + std::array samples_10ms_24kHz; + PushSincResampler resampler(frame_size_10ms, kFrameSize10ms24kHz); + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + FeaturesExtractor features_extractor(cpu_features); + std::array feature_vector; + RnnVad rnn_vad(cpu_features); + + // Compute VAD probabilities. + while (true) { + // Read frame at the input sample rate. + const size_t read_samples = + wav_reader.ReadSamples(frame_size_10ms, samples_10ms.data()); + if (rtc::SafeLt(read_samples, frame_size_10ms)) { + break; // EOF. + } + // Resample input. + resampler.Resample(samples_10ms.data(), samples_10ms.size(), + samples_10ms_24kHz.data(), samples_10ms_24kHz.size()); + + // Extract features and feed the RNN. + bool is_silence = features_extractor.CheckSilenceComputeFeatures( + samples_10ms_24kHz, feature_vector); + float vad_probability = + rnn_vad.ComputeVadProbability(feature_vector, is_silence); + // Write voice probability. + RTC_DCHECK_GE(vad_probability, 0.f); + RTC_DCHECK_GE(1.f, vad_probability); + fwrite(&vad_probability, sizeof(float), 1, vad_probs_file); + // Write features. + if (features_file) { + const float float_is_silence = is_silence ? 1.f : 0.f; + fwrite(&float_is_silence, sizeof(float), 1, features_file); + if (is_silence) { + // Do not write uninitialized values. + feature_vector.fill(0.f); + } + fwrite(feature_vector.data(), sizeof(float), kFeatureVectorSize, + features_file); + } + } + + // Close output file(s). + fclose(vad_probs_file); + RTC_LOG(LS_INFO) << "VAD probabilities written to " << output_vad_probs_file; + if (features_file) { + fclose(features_file); + RTC_LOG(LS_INFO) << "features written to " << output_feature_file; + } + + return 0; +} + +} // namespace test +} // namespace rnn_vad +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::rnn_vad::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc new file mode 100644 index 0000000000..f33cd14a8a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include + +#include "common_audio/resampler/push_sinc_resampler.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kFrameSize10ms48kHz = 480; + +void DumpPerfStats(int num_samples, + int sample_rate, + double average_us, + double standard_deviation) { + float audio_track_length_ms = + 1e3f * static_cast(num_samples) / static_cast(sample_rate); + float average_ms = static_cast(average_us) / 1e3f; + float speed = audio_track_length_ms / average_ms; + RTC_LOG(LS_INFO) << "track duration (ms): " << audio_track_length_ms; + RTC_LOG(LS_INFO) << "average processing time (ms): " << average_ms << " +/- " + << (standard_deviation / 1e3); + RTC_LOG(LS_INFO) << "speed: " << speed << "x"; +} + +// When the RNN VAD model is updated and the expected output changes, set the +// constant below to true in order to write new expected output binary files. +constexpr bool kWriteComputedOutputToFile = false; + +// Avoids that one forgets to set `kWriteComputedOutputToFile` back to false +// when the expected output files are re-exported. +TEST(RnnVadTest, CheckWriteComputedOutputIsFalse) { + ASSERT_FALSE(kWriteComputedOutputToFile) + << "Cannot land if kWriteComputedOutput is true."; +} + +class RnnVadProbabilityParametrization + : public ::testing::TestWithParam {}; + +// Checks that the computed VAD probability for a test input sequence sampled at +// 48 kHz is within tolerance. +TEST_P(RnnVadProbabilityParametrization, RnnVadProbabilityWithinTolerance) { + // Init resampler, feature extractor and RNN. + PushSincResampler decimator(kFrameSize10ms48kHz, kFrameSize10ms24kHz); + const AvailableCpuFeatures cpu_features = GetParam(); + FeaturesExtractor features_extractor(cpu_features); + RnnVad rnn_vad(cpu_features); + + // Init input samples and expected output readers. + std::unique_ptr samples_reader = CreatePcmSamplesReader(); + std::unique_ptr expected_vad_prob_reader = CreateVadProbsReader(); + + // Input length. The last incomplete frame is ignored. + const int num_frames = samples_reader->size() / kFrameSize10ms48kHz; + + // Init buffers. + std::vector samples_48k(kFrameSize10ms48kHz); + std::vector samples_24k(kFrameSize10ms24kHz); + std::vector feature_vector(kFeatureVectorSize); + std::vector computed_vad_prob(num_frames); + std::vector expected_vad_prob(num_frames); + + // Read expected output. + ASSERT_TRUE(expected_vad_prob_reader->ReadChunk(expected_vad_prob)); + + // Compute VAD probabilities on the downsampled input. + float cumulative_error = 0.f; + for (int i = 0; i < num_frames; ++i) { + ASSERT_TRUE(samples_reader->ReadChunk(samples_48k)); + decimator.Resample(samples_48k.data(), samples_48k.size(), + samples_24k.data(), samples_24k.size()); + bool is_silence = features_extractor.CheckSilenceComputeFeatures( + {samples_24k.data(), kFrameSize10ms24kHz}, + {feature_vector.data(), kFeatureVectorSize}); + computed_vad_prob[i] = rnn_vad.ComputeVadProbability( + {feature_vector.data(), kFeatureVectorSize}, is_silence); + EXPECT_NEAR(computed_vad_prob[i], expected_vad_prob[i], 1e-3f); + cumulative_error += std::abs(computed_vad_prob[i] - expected_vad_prob[i]); + } + // Check average error. + EXPECT_LT(cumulative_error / num_frames, 1e-4f); + + if (kWriteComputedOutputToFile) { + FileWriter vad_prob_writer("new_vad_prob.dat"); + vad_prob_writer.WriteChunk(computed_vad_prob); + } +} + +// Performance test for the RNN VAD (pre-fetching and downsampling are +// excluded). Keep disabled and only enable locally to measure performance as +// follows: +// - on desktop: run the this unit test adding "--logs"; +// - on android: run the this unit test adding "--logcat-output-file". +TEST_P(RnnVadProbabilityParametrization, DISABLED_RnnVadPerformance) { + // PCM samples reader and buffers. + std::unique_ptr samples_reader = CreatePcmSamplesReader(); + // The last incomplete frame is ignored. + const int num_frames = samples_reader->size() / kFrameSize10ms48kHz; + std::array samples; + // Pre-fetch and decimate samples. + PushSincResampler decimator(kFrameSize10ms48kHz, kFrameSize10ms24kHz); + std::vector prefetched_decimated_samples; + prefetched_decimated_samples.resize(num_frames * kFrameSize10ms24kHz); + for (int i = 0; i < num_frames; ++i) { + ASSERT_TRUE(samples_reader->ReadChunk(samples)); + decimator.Resample(samples.data(), samples.size(), + &prefetched_decimated_samples[i * kFrameSize10ms24kHz], + kFrameSize10ms24kHz); + } + // Initialize. + const AvailableCpuFeatures cpu_features = GetParam(); + FeaturesExtractor features_extractor(cpu_features); + std::array feature_vector; + RnnVad rnn_vad(cpu_features); + constexpr int number_of_tests = 100; + ::webrtc::test::PerformanceTimer perf_timer(number_of_tests); + for (int k = 0; k < number_of_tests; ++k) { + features_extractor.Reset(); + rnn_vad.Reset(); + // Process frames. + perf_timer.StartTimer(); + for (int i = 0; i < num_frames; ++i) { + bool is_silence = features_extractor.CheckSilenceComputeFeatures( + {&prefetched_decimated_samples[i * kFrameSize10ms24kHz], + kFrameSize10ms24kHz}, + feature_vector); + rnn_vad.ComputeVadProbability(feature_vector, is_silence); + } + perf_timer.StopTimer(); + } + DumpPerfStats(num_frames * kFrameSize10ms24kHz, kSampleRate24kHz, + perf_timer.GetDurationAverage(), + perf_timer.GetDurationStandardDeviation()); +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.avx2 && available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/true, /*neon=*/false}); + } + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnVadProbabilityParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h new file mode 100644 index 0000000000..a7402788c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SEQUENCE_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SEQUENCE_BUFFER_H_ + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { + +// Linear buffer implementation to (i) push fixed size chunks of sequential data +// and (ii) view contiguous parts of the buffer. The buffer and the pushed +// chunks have size S and N respectively. For instance, when S = 2N the first +// half of the sequence buffer is replaced with its second half, and the new N +// values are written at the end of the buffer. +// The class also provides a view on the most recent M values, where 0 < M <= S +// and by default M = N. +template +class SequenceBuffer { + static_assert(N <= S, + "The new chunk size cannot be larger than the sequence buffer " + "size."); + static_assert(std::is_arithmetic::value, + "Integral or floating point required."); + + public: + SequenceBuffer() : buffer_(S) { + RTC_DCHECK_EQ(S, buffer_.size()); + Reset(); + } + SequenceBuffer(const SequenceBuffer&) = delete; + SequenceBuffer& operator=(const SequenceBuffer&) = delete; + ~SequenceBuffer() = default; + int size() const { return S; } + int chunks_size() const { return N; } + // Sets the sequence buffer values to zero. + void Reset() { std::fill(buffer_.begin(), buffer_.end(), 0); } + // Returns a view on the whole buffer. + rtc::ArrayView GetBufferView() const { + return {buffer_.data(), S}; + } + // Returns a view on the M most recent values of the buffer. + rtc::ArrayView GetMostRecentValuesView() const { + static_assert(M <= S, + "The number of most recent values cannot be larger than the " + "sequence buffer size."); + return {buffer_.data() + S - M, M}; + } + // Shifts left the buffer by N items and add new N items at the end. + void Push(rtc::ArrayView new_values) { + // Make space for the new values. + if (S > N) + std::memmove(buffer_.data(), buffer_.data() + N, (S - N) * sizeof(T)); + // Copy the new values at the end of the buffer. + std::memcpy(buffer_.data() + S - N, new_values.data(), N * sizeof(T)); + } + + private: + std::vector buffer_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SEQUENCE_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc new file mode 100644 index 0000000000..af005833c1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/sequence_buffer.h" + +#include +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +template +void TestSequenceBufferPushOp() { + SCOPED_TRACE(S); + SCOPED_TRACE(N); + SequenceBuffer seq_buf; + auto seq_buf_view = seq_buf.GetBufferView(); + std::array chunk; + + // Check that a chunk is fully gone after ceil(S / N) push ops. + chunk.fill(1); + seq_buf.Push(chunk); + chunk.fill(0); + constexpr int required_push_ops = (S % N) ? S / N + 1 : S / N; + for (int i = 0; i < required_push_ops - 1; ++i) { + SCOPED_TRACE(i); + seq_buf.Push(chunk); + // Still in the buffer. + const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end()); + EXPECT_EQ(1, *m); + } + // Gone after another push. + seq_buf.Push(chunk); + const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end()); + EXPECT_EQ(0, *m); + + // Check that the last item moves left by N positions after a push op. + if (S > N) { + // Fill in with non-zero values. + for (int i = 0; i < N; ++i) + chunk[i] = static_cast(i + 1); + seq_buf.Push(chunk); + // With the next Push(), `last` will be moved left by N positions. + const T last = chunk[N - 1]; + for (int i = 0; i < N; ++i) + chunk[i] = static_cast(last + i + 1); + seq_buf.Push(chunk); + EXPECT_EQ(last, seq_buf_view[S - N - 1]); + } +} + +TEST(RnnVadTest, SequenceBufferGetters) { + constexpr int buffer_size = 8; + constexpr int chunk_size = 8; + SequenceBuffer seq_buf; + EXPECT_EQ(buffer_size, seq_buf.size()); + EXPECT_EQ(chunk_size, seq_buf.chunks_size()); + // Test view. + auto seq_buf_view = seq_buf.GetBufferView(); + EXPECT_EQ(0, seq_buf_view[0]); + EXPECT_EQ(0, seq_buf_view[seq_buf_view.size() - 1]); + constexpr std::array chunk = {10, 20, 30, 40, + 50, 60, 70, 80}; + seq_buf.Push(chunk); + EXPECT_EQ(10, *seq_buf_view.begin()); + EXPECT_EQ(80, *(seq_buf_view.end() - 1)); +} + +TEST(RnnVadTest, SequenceBufferPushOpsUnsigned) { + TestSequenceBufferPushOp(); // Chunk size: 25%. + TestSequenceBufferPushOp(); // Chunk size: 50%. + TestSequenceBufferPushOp(); // Chunk size: 100%. + TestSequenceBufferPushOp(); // Non-integer ratio. +} + +TEST(RnnVadTest, SequenceBufferPushOpsSigned) { + TestSequenceBufferPushOp(); // Chunk size: 25%. + TestSequenceBufferPushOp(); // Chunk size: 50%. + TestSequenceBufferPushOp(); // Chunk size: 100%. + TestSequenceBufferPushOp(); // Non-integer ratio. +} + +TEST(RnnVadTest, SequenceBufferPushOpsFloating) { + TestSequenceBufferPushOp(); // Chunk size: 25%. + TestSequenceBufferPushOp(); // Chunk size: 50%. + TestSequenceBufferPushOp(); // Chunk size: 100%. + TestSequenceBufferPushOp(); // Non-integer ratio. +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc new file mode 100644 index 0000000000..96086babb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features.h" + +#include +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr float kSilenceThreshold = 0.04f; + +// Computes the new cepstral difference stats and pushes them into the passed +// symmetric matrix buffer. +void UpdateCepstralDifferenceStats( + rtc::ArrayView new_cepstral_coeffs, + const RingBuffer& ring_buf, + SymmetricMatrixBuffer* sym_matrix_buf) { + RTC_DCHECK(sym_matrix_buf); + // Compute the new cepstral distance stats. + std::array distances; + for (int i = 0; i < kCepstralCoeffsHistorySize - 1; ++i) { + const int delay = i + 1; + auto old_cepstral_coeffs = ring_buf.GetArrayView(delay); + distances[i] = 0.f; + for (int k = 0; k < kNumBands; ++k) { + const float c = new_cepstral_coeffs[k] - old_cepstral_coeffs[k]; + distances[i] += c * c; + } + } + // Push the new spectral distance stats into the symmetric matrix buffer. + sym_matrix_buf->Push(distances); +} + +// Computes the first half of the Vorbis window. +std::array ComputeScaledHalfVorbisWindow( + float scaling = 1.f) { + constexpr int kHalfSize = kFrameSize20ms24kHz / 2; + std::array half_window{}; + for (int i = 0; i < kHalfSize; ++i) { + half_window[i] = + scaling * + std::sin(0.5 * kPi * std::sin(0.5 * kPi * (i + 0.5) / kHalfSize) * + std::sin(0.5 * kPi * (i + 0.5) / kHalfSize)); + } + return half_window; +} + +// Computes the forward FFT on a 20 ms frame to which a given window function is +// applied. The Fourier coefficient corresponding to the Nyquist frequency is +// set to zero (it is never used and this allows to simplify the code). +void ComputeWindowedForwardFft( + rtc::ArrayView frame, + const std::array& half_window, + Pffft::FloatBuffer* fft_input_buffer, + Pffft::FloatBuffer* fft_output_buffer, + Pffft* fft) { + RTC_DCHECK_EQ(frame.size(), 2 * half_window.size()); + // Apply windowing. + auto in = fft_input_buffer->GetView(); + for (int i = 0, j = kFrameSize20ms24kHz - 1; + rtc::SafeLt(i, half_window.size()); ++i, --j) { + in[i] = frame[i] * half_window[i]; + in[j] = frame[j] * half_window[i]; + } + fft->ForwardTransform(*fft_input_buffer, fft_output_buffer, /*ordered=*/true); + // Set the Nyquist frequency coefficient to zero. + auto out = fft_output_buffer->GetView(); + out[1] = 0.f; +} + +} // namespace + +SpectralFeaturesExtractor::SpectralFeaturesExtractor() + : half_window_(ComputeScaledHalfVorbisWindow( + 1.f / static_cast(kFrameSize20ms24kHz))), + fft_(kFrameSize20ms24kHz, Pffft::FftType::kReal), + fft_buffer_(fft_.CreateBuffer()), + reference_frame_fft_(fft_.CreateBuffer()), + lagged_frame_fft_(fft_.CreateBuffer()), + dct_table_(ComputeDctTable()) {} + +SpectralFeaturesExtractor::~SpectralFeaturesExtractor() = default; + +void SpectralFeaturesExtractor::Reset() { + cepstral_coeffs_ring_buf_.Reset(); + cepstral_diffs_buf_.Reset(); +} + +bool SpectralFeaturesExtractor::CheckSilenceComputeFeatures( + rtc::ArrayView reference_frame, + rtc::ArrayView lagged_frame, + rtc::ArrayView higher_bands_cepstrum, + rtc::ArrayView average, + rtc::ArrayView first_derivative, + rtc::ArrayView second_derivative, + rtc::ArrayView bands_cross_corr, + float* variability) { + // Compute the Opus band energies for the reference frame. + ComputeWindowedForwardFft(reference_frame, half_window_, fft_buffer_.get(), + reference_frame_fft_.get(), &fft_); + spectral_correlator_.ComputeAutoCorrelation( + reference_frame_fft_->GetConstView(), reference_frame_bands_energy_); + // Check if the reference frame has silence. + const float tot_energy = + std::accumulate(reference_frame_bands_energy_.begin(), + reference_frame_bands_energy_.end(), 0.f); + if (tot_energy < kSilenceThreshold) { + return true; + } + // Compute the Opus band energies for the lagged frame. + ComputeWindowedForwardFft(lagged_frame, half_window_, fft_buffer_.get(), + lagged_frame_fft_.get(), &fft_); + spectral_correlator_.ComputeAutoCorrelation(lagged_frame_fft_->GetConstView(), + lagged_frame_bands_energy_); + // Log of the band energies for the reference frame. + std::array log_bands_energy; + ComputeSmoothedLogMagnitudeSpectrum(reference_frame_bands_energy_, + log_bands_energy); + // Reference frame cepstrum. + std::array cepstrum; + ComputeDct(log_bands_energy, dct_table_, cepstrum); + // Ad-hoc correction terms for the first two cepstral coefficients. + cepstrum[0] -= 12.f; + cepstrum[1] -= 4.f; + // Update the ring buffer and the cepstral difference stats. + cepstral_coeffs_ring_buf_.Push(cepstrum); + UpdateCepstralDifferenceStats(cepstrum, cepstral_coeffs_ring_buf_, + &cepstral_diffs_buf_); + // Write the higher bands cepstral coefficients. + RTC_DCHECK_EQ(cepstrum.size() - kNumLowerBands, higher_bands_cepstrum.size()); + std::copy(cepstrum.begin() + kNumLowerBands, cepstrum.end(), + higher_bands_cepstrum.begin()); + // Compute and write remaining features. + ComputeAvgAndDerivatives(average, first_derivative, second_derivative); + ComputeNormalizedCepstralCorrelation(bands_cross_corr); + RTC_DCHECK(variability); + *variability = ComputeVariability(); + return false; +} + +void SpectralFeaturesExtractor::ComputeAvgAndDerivatives( + rtc::ArrayView average, + rtc::ArrayView first_derivative, + rtc::ArrayView second_derivative) const { + auto curr = cepstral_coeffs_ring_buf_.GetArrayView(0); + auto prev1 = cepstral_coeffs_ring_buf_.GetArrayView(1); + auto prev2 = cepstral_coeffs_ring_buf_.GetArrayView(2); + RTC_DCHECK_EQ(average.size(), first_derivative.size()); + RTC_DCHECK_EQ(first_derivative.size(), second_derivative.size()); + RTC_DCHECK_LE(average.size(), curr.size()); + for (int i = 0; rtc::SafeLt(i, average.size()); ++i) { + // Average, kernel: [1, 1, 1]. + average[i] = curr[i] + prev1[i] + prev2[i]; + // First derivative, kernel: [1, 0, - 1]. + first_derivative[i] = curr[i] - prev2[i]; + // Second derivative, Laplacian kernel: [1, -2, 1]. + second_derivative[i] = curr[i] - 2 * prev1[i] + prev2[i]; + } +} + +void SpectralFeaturesExtractor::ComputeNormalizedCepstralCorrelation( + rtc::ArrayView bands_cross_corr) { + spectral_correlator_.ComputeCrossCorrelation( + reference_frame_fft_->GetConstView(), lagged_frame_fft_->GetConstView(), + bands_cross_corr_); + // Normalize. + for (int i = 0; rtc::SafeLt(i, bands_cross_corr_.size()); ++i) { + bands_cross_corr_[i] = + bands_cross_corr_[i] / + std::sqrt(0.001f + reference_frame_bands_energy_[i] * + lagged_frame_bands_energy_[i]); + } + // Cepstrum. + ComputeDct(bands_cross_corr_, dct_table_, bands_cross_corr); + // Ad-hoc correction terms for the first two cepstral coefficients. + bands_cross_corr[0] -= 1.3f; + bands_cross_corr[1] -= 0.9f; +} + +float SpectralFeaturesExtractor::ComputeVariability() const { + // Compute cepstral variability score. + float variability = 0.f; + for (int delay1 = 0; delay1 < kCepstralCoeffsHistorySize; ++delay1) { + float min_dist = std::numeric_limits::max(); + for (int delay2 = 0; delay2 < kCepstralCoeffsHistorySize; ++delay2) { + if (delay1 == delay2) // The distance would be 0. + continue; + min_dist = + std::min(min_dist, cepstral_diffs_buf_.GetValue(delay1, delay2)); + } + variability += min_dist; + } + // Normalize (based on training set stats). + // TODO(bugs.webrtc.org/10480): Isolate normalization from feature extraction. + return variability / kCepstralCoeffsHistorySize - 2.1f; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h new file mode 100644 index 0000000000..d327ef8e01 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" +#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" +#include "modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h" +#include "modules/audio_processing/utility/pffft_wrapper.h" + +namespace webrtc { +namespace rnn_vad { + +// Class to compute spectral features. +class SpectralFeaturesExtractor { + public: + SpectralFeaturesExtractor(); + SpectralFeaturesExtractor(const SpectralFeaturesExtractor&) = delete; + SpectralFeaturesExtractor& operator=(const SpectralFeaturesExtractor&) = + delete; + ~SpectralFeaturesExtractor(); + // Resets the internal state of the feature extractor. + void Reset(); + // Analyzes a pair of reference and lagged frames from the pitch buffer, + // detects silence and computes features. If silence is detected, the output + // is neither computed nor written. + bool CheckSilenceComputeFeatures( + rtc::ArrayView reference_frame, + rtc::ArrayView lagged_frame, + rtc::ArrayView higher_bands_cepstrum, + rtc::ArrayView average, + rtc::ArrayView first_derivative, + rtc::ArrayView second_derivative, + rtc::ArrayView bands_cross_corr, + float* variability); + + private: + void ComputeAvgAndDerivatives( + rtc::ArrayView average, + rtc::ArrayView first_derivative, + rtc::ArrayView second_derivative) const; + void ComputeNormalizedCepstralCorrelation( + rtc::ArrayView bands_cross_corr); + float ComputeVariability() const; + + const std::array half_window_; + Pffft fft_; + std::unique_ptr fft_buffer_; + std::unique_ptr reference_frame_fft_; + std::unique_ptr lagged_frame_fft_; + SpectralCorrelator spectral_correlator_; + std::array reference_frame_bands_energy_; + std::array lagged_frame_bands_energy_; + std::array bands_cross_corr_; + const std::array dct_table_; + RingBuffer + cepstral_coeffs_ring_buf_; + SymmetricMatrixBuffer cepstral_diffs_buf_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc new file mode 100644 index 0000000000..a10b0f7ec9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Weights for each FFT coefficient for each Opus band (Nyquist frequency +// excluded). The size of each band is specified in +// `kOpusScaleNumBins24kHz20ms`. +constexpr std::array kOpusBandWeights24kHz20ms = + {{ + 0.f, 0.25f, 0.5f, 0.75f, // Band 0 + 0.f, 0.25f, 0.5f, 0.75f, // Band 1 + 0.f, 0.25f, 0.5f, 0.75f, // Band 2 + 0.f, 0.25f, 0.5f, 0.75f, // Band 3 + 0.f, 0.25f, 0.5f, 0.75f, // Band 4 + 0.f, 0.25f, 0.5f, 0.75f, // Band 5 + 0.f, 0.25f, 0.5f, 0.75f, // Band 6 + 0.f, 0.25f, 0.5f, 0.75f, // Band 7 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 8 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 9 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 10 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 11 + 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f, + 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f, + 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f, + 0.9375f, // Band 12 + 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f, + 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f, + 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f, + 0.9375f, // Band 13 + 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f, + 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f, + 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f, + 0.9375f, // Band 14 + 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f, + 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f, + 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f, + 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f, + 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 15 + 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f, + 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f, + 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f, + 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f, + 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 16 + 0.f, 0.03125f, 0.0625f, 0.09375f, 0.125f, + 0.15625f, 0.1875f, 0.21875f, 0.25f, 0.28125f, + 0.3125f, 0.34375f, 0.375f, 0.40625f, 0.4375f, + 0.46875f, 0.5f, 0.53125f, 0.5625f, 0.59375f, + 0.625f, 0.65625f, 0.6875f, 0.71875f, 0.75f, + 0.78125f, 0.8125f, 0.84375f, 0.875f, 0.90625f, + 0.9375f, 0.96875f, // Band 17 + 0.f, 0.0208333f, 0.0416667f, 0.0625f, 0.0833333f, + 0.104167f, 0.125f, 0.145833f, 0.166667f, 0.1875f, + 0.208333f, 0.229167f, 0.25f, 0.270833f, 0.291667f, + 0.3125f, 0.333333f, 0.354167f, 0.375f, 0.395833f, + 0.416667f, 0.4375f, 0.458333f, 0.479167f, 0.5f, + 0.520833f, 0.541667f, 0.5625f, 0.583333f, 0.604167f, + 0.625f, 0.645833f, 0.666667f, 0.6875f, 0.708333f, + 0.729167f, 0.75f, 0.770833f, 0.791667f, 0.8125f, + 0.833333f, 0.854167f, 0.875f, 0.895833f, 0.916667f, + 0.9375f, 0.958333f, 0.979167f // Band 18 + }}; + +} // namespace + +SpectralCorrelator::SpectralCorrelator() + : weights_(kOpusBandWeights24kHz20ms.begin(), + kOpusBandWeights24kHz20ms.end()) {} + +SpectralCorrelator::~SpectralCorrelator() = default; + +void SpectralCorrelator::ComputeAutoCorrelation( + rtc::ArrayView x, + rtc::ArrayView auto_corr) const { + ComputeCrossCorrelation(x, x, auto_corr); +} + +void SpectralCorrelator::ComputeCrossCorrelation( + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView cross_corr) const { + RTC_DCHECK_EQ(x.size(), kFrameSize20ms24kHz); + RTC_DCHECK_EQ(x.size(), y.size()); + RTC_DCHECK_EQ(x[1], 0.f) << "The Nyquist coefficient must be zeroed."; + RTC_DCHECK_EQ(y[1], 0.f) << "The Nyquist coefficient must be zeroed."; + constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); + int k = 0; // Next Fourier coefficient index. + cross_corr[0] = 0.f; + for (int i = 0; i < kOpusBands24kHz - 1; ++i) { + cross_corr[i + 1] = 0.f; + for (int j = 0; j < kOpusScaleNumBins24kHz20ms[i]; ++j) { // Band size. + const float v = x[2 * k] * y[2 * k] + x[2 * k + 1] * y[2 * k + 1]; + const float tmp = weights_[k] * v; + cross_corr[i] += v - tmp; + cross_corr[i + 1] += tmp; + k++; + } + } + cross_corr[0] *= 2.f; // The first band only gets half contribution. + RTC_DCHECK_EQ(k, kFrameSize20ms24kHz / 2); // Nyquist coefficient never used. +} + +void ComputeSmoothedLogMagnitudeSpectrum( + rtc::ArrayView bands_energy, + rtc::ArrayView log_bands_energy) { + RTC_DCHECK_LE(bands_energy.size(), kNumBands); + constexpr float kOneByHundred = 1e-2f; + constexpr float kLogOneByHundred = -2.f; + // Init. + float log_max = kLogOneByHundred; + float follow = kLogOneByHundred; + const auto smooth = [&log_max, &follow](float x) { + x = std::max(log_max - 7.f, std::max(follow - 1.5f, x)); + log_max = std::max(log_max, x); + follow = std::max(follow - 1.5f, x); + return x; + }; + // Smoothing over the bands for which the band energy is defined. + for (int i = 0; rtc::SafeLt(i, bands_energy.size()); ++i) { + log_bands_energy[i] = smooth(std::log10(kOneByHundred + bands_energy[i])); + } + // Smoothing over the remaining bands (zero energy). + for (int i = bands_energy.size(); i < kNumBands; ++i) { + log_bands_energy[i] = smooth(kLogOneByHundred); + } +} + +std::array ComputeDctTable() { + std::array dct_table; + const double k = std::sqrt(0.5); + for (int i = 0; i < kNumBands; ++i) { + for (int j = 0; j < kNumBands; ++j) + dct_table[i * kNumBands + j] = std::cos((i + 0.5) * j * kPi / kNumBands); + dct_table[i * kNumBands] *= k; + } + return dct_table; +} + +void ComputeDct(rtc::ArrayView in, + rtc::ArrayView dct_table, + rtc::ArrayView out) { + // DCT scaling factor - i.e., sqrt(2 / kNumBands). + constexpr float kDctScalingFactor = 0.301511345f; + constexpr float kDctScalingFactorError = + kDctScalingFactor * kDctScalingFactor - + 2.f / static_cast(kNumBands); + static_assert( + (kDctScalingFactorError >= 0.f && kDctScalingFactorError < 1e-1f) || + (kDctScalingFactorError < 0.f && kDctScalingFactorError > -1e-1f), + "kNumBands changed and kDctScalingFactor has not been updated."); + RTC_DCHECK_NE(in.data(), out.data()) << "In-place DCT is not supported."; + RTC_DCHECK_LE(in.size(), kNumBands); + RTC_DCHECK_LE(1, out.size()); + RTC_DCHECK_LE(out.size(), in.size()); + for (int i = 0; rtc::SafeLt(i, out.size()); ++i) { + out[i] = 0.f; + for (int j = 0; rtc::SafeLt(j, in.size()); ++j) { + out[i] += in[j] * dct_table[j * kNumBands + i]; + } + // TODO(bugs.webrtc.org/10480): Scaling factor in the DCT table. + out[i] *= kDctScalingFactor; + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h new file mode 100644 index 0000000000..f4b293a567 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" + +namespace webrtc { +namespace rnn_vad { + +// At a sample rate of 24 kHz, the last 3 Opus bands are beyond the Nyquist +// frequency. However, band #19 gets the contributions from band #18 because +// of the symmetric triangular filter with peak response at 12 kHz. +constexpr int kOpusBands24kHz = 20; +static_assert(kOpusBands24kHz < kNumBands, + "The number of bands at 24 kHz must be less than those defined " + "in the Opus scale at 48 kHz."); + +// Number of FFT frequency bins covered by each band in the Opus scale at a +// sample rate of 24 kHz for 20 ms frames. +// Declared here for unit testing. +constexpr std::array GetOpusScaleNumBins24kHz20ms() { + return {4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 24, 24, 32, 48}; +} + +// TODO(bugs.webrtc.org/10480): Move to a separate file. +// Class to compute band-wise spectral features in the Opus perceptual scale +// for 20 ms frames sampled at 24 kHz. The analysis methods apply triangular +// filters with peak response at the each band boundary. +class SpectralCorrelator { + public: + // Ctor. + SpectralCorrelator(); + SpectralCorrelator(const SpectralCorrelator&) = delete; + SpectralCorrelator& operator=(const SpectralCorrelator&) = delete; + ~SpectralCorrelator(); + + // Computes the band-wise spectral auto-correlations. + // `x` must: + // - have size equal to `kFrameSize20ms24kHz`; + // - be encoded as vectors of interleaved real-complex FFT coefficients + // where x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted). + void ComputeAutoCorrelation( + rtc::ArrayView x, + rtc::ArrayView auto_corr) const; + + // Computes the band-wise spectral cross-correlations. + // `x` and `y` must: + // - have size equal to `kFrameSize20ms24kHz`; + // - be encoded as vectors of interleaved real-complex FFT coefficients where + // x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted). + void ComputeCrossCorrelation( + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView cross_corr) const; + + private: + const std::vector weights_; // Weights for each Fourier coefficient. +}; + +// TODO(bugs.webrtc.org/10480): Move to anonymous namespace in +// spectral_features.cc. Given a vector of Opus-bands energy coefficients, +// computes the log magnitude spectrum applying smoothing both over time and +// over frequency. Declared here for unit testing. +void ComputeSmoothedLogMagnitudeSpectrum( + rtc::ArrayView bands_energy, + rtc::ArrayView log_bands_energy); + +// TODO(bugs.webrtc.org/10480): Move to anonymous namespace in +// spectral_features.cc. Creates a DCT table for arrays having size equal to +// `kNumBands`. Declared here for unit testing. +std::array ComputeDctTable(); + +// TODO(bugs.webrtc.org/10480): Move to anonymous namespace in +// spectral_features.cc. Computes DCT for `in` given a pre-computed DCT table. +// In-place computation is not allowed and `out` can be smaller than `in` in +// order to only compute the first DCT coefficients. Declared here for unit +// testing. +void ComputeDct(rtc::ArrayView in, + rtc::ArrayView dct_table, + rtc::ArrayView out); + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc new file mode 100644 index 0000000000..ece4eb5024 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" + +#include +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/utility/pffft_wrapper.h" +#include "rtc_base/numerics/safe_compare.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Generates the values for the array named `kOpusBandWeights24kHz20ms` in the +// anonymous namespace of the .cc file, which is the array of FFT coefficient +// weights for the Opus scale triangular filters. +std::vector ComputeTriangularFiltersWeights() { + constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); + const auto& v = kOpusScaleNumBins24kHz20ms; // Alias. + const int num_weights = std::accumulate(kOpusScaleNumBins24kHz20ms.begin(), + kOpusScaleNumBins24kHz20ms.end(), 0); + std::vector weights(num_weights); + int next_fft_coeff_index = 0; + for (int band = 0; rtc::SafeLt(band, v.size()); ++band) { + const int band_size = v[band]; + for (int j = 0; rtc::SafeLt(j, band_size); ++j) { + weights[next_fft_coeff_index + j] = static_cast(j) / band_size; + } + next_fft_coeff_index += band_size; + } + return weights; +} + +// Checks that the values returned by GetOpusScaleNumBins24kHz20ms() match the +// Opus scale frequency boundaries. +TEST(RnnVadTest, TestOpusScaleBoundaries) { + constexpr int kBandFrequencyBoundariesHz[kNumBands - 1] = { + 200, 400, 600, 800, 1000, 1200, 1400, 1600, 2000, 2400, 2800, + 3200, 4000, 4800, 5600, 6800, 8000, 9600, 12000, 15600, 20000}; + constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); + int prev = 0; + for (int i = 0; rtc::SafeLt(i, kOpusScaleNumBins24kHz20ms.size()); ++i) { + int boundary = + kBandFrequencyBoundariesHz[i] * kFrameSize20ms24kHz / kSampleRate24kHz; + EXPECT_EQ(kOpusScaleNumBins24kHz20ms[i], boundary - prev); + prev = boundary; + } +} + +// Checks that the computed triangular filters weights for the Opus scale are +// monotonic withing each Opus band. This test should only be enabled when +// ComputeTriangularFiltersWeights() is changed and `kOpusBandWeights24kHz20ms` +// is updated accordingly. +TEST(RnnVadTest, DISABLED_TestOpusScaleWeights) { + auto weights = ComputeTriangularFiltersWeights(); + int i = 0; + for (int band_size : GetOpusScaleNumBins24kHz20ms()) { + SCOPED_TRACE(band_size); + rtc::ArrayView band_weights(weights.data() + i, band_size); + float prev = -1.f; + for (float weight : band_weights) { + EXPECT_LT(prev, weight); + prev = weight; + } + i += band_size; + } +} + +// Checks that the computed band-wise auto-correlation is non-negative for a +// simple input vector of FFT coefficients. +TEST(RnnVadTest, SpectralCorrelatorValidOutput) { + // Input: vector of (1, 1j) values. + Pffft fft(kFrameSize20ms24kHz, Pffft::FftType::kReal); + auto in = fft.CreateBuffer(); + std::array out; + auto in_view = in->GetView(); + std::fill(in_view.begin(), in_view.end(), 1.f); + in_view[1] = 0.f; // Nyquist frequency. + // Compute and check output. + SpectralCorrelator e; + e.ComputeAutoCorrelation(in_view, out); + for (int i = 0; i < kOpusBands24kHz; ++i) { + SCOPED_TRACE(i); + EXPECT_GT(out[i], 0.f); + } +} + +// Checks that the computed smoothed log magnitude spectrum is within tolerance +// given hard-coded test input data. +TEST(RnnVadTest, ComputeSmoothedLogMagnitudeSpectrumWithinTolerance) { + constexpr std::array input = { + {86.060539245605f, 275.668334960938f, 43.406528472900f, 6.541896820068f, + 17.964015960693f, 8.090919494629f, 1.261920094490f, 1.212702631950f, + 1.619154453278f, 0.508935272694f, 0.346316039562f, 0.237035423517f, + 0.172424271703f, 0.271657168865f, 0.126088857651f, 0.139967113733f, + 0.207200810313f, 0.155893072486f, 0.091090843081f, 0.033391401172f, + 0.013879744336f, 0.011973354965f}}; + constexpr std::array expected_output = { + {1.934854507446f, 2.440402746201f, 1.637655138969f, 0.816367030144f, + 1.254645109177f, 0.908534288406f, 0.104459829628f, 0.087320849299f, + 0.211962252855f, -0.284886807203f, -0.448164641857f, -0.607240796089f, + -0.738917350769f, -0.550279200077f, -0.866177439690f, -0.824003994465f, + -0.663138568401f, -0.780171751976f, -0.995288193226f, -1.362596273422f, + -1.621970295906f, -1.658103585243f}}; + std::array computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + ComputeSmoothedLogMagnitudeSpectrum(input, computed_output); + ExpectNearAbsolute(expected_output, computed_output, 1e-5f); + } +} + +// Checks that the computed DCT is within tolerance given hard-coded test input +// data. +TEST(RnnVadTest, ComputeDctWithinTolerance) { + constexpr std::array input = { + {0.232155621052f, 0.678957760334f, 0.220818966627f, -0.077363930643f, + -0.559227049351f, 0.432545185089f, 0.353900641203f, 0.398993015289f, + 0.409774333239f, 0.454977899790f, 0.300520688295f, -0.010286616161f, + 0.272525429726f, 0.098067551851f, 0.083649002016f, 0.046226885170f, + -0.033228103071f, 0.144773483276f, -0.117661058903f, -0.005628800020f, + -0.009547689930f, -0.045382082462f}}; + constexpr std::array expected_output = { + {0.697072803974f, 0.442710995674f, -0.293156713247f, -0.060711503029f, + 0.292050391436f, 0.489301353693f, 0.402255415916f, 0.134404733777f, + -0.086305990815f, -0.199605688453f, -0.234511867166f, -0.413774639368f, + -0.388507157564f, -0.032798115164f, 0.044605545700f, 0.112466648221f, + -0.050096966326f, 0.045971218497f, -0.029815061018f, -0.410366982222f, + -0.209233760834f, -0.128037497401f}}; + auto dct_table = ComputeDctTable(); + std::array computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + ComputeDct(input, dct_table, computed_output); + ExpectNearAbsolute(expected_output, computed_output, 1e-5f); + } +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc new file mode 100644 index 0000000000..324d694957 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features.h" + +#include + +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kTestFeatureVectorSize = kNumBands + 3 * kNumLowerBands + 1; + +// Writes non-zero sample values. +void WriteTestData(rtc::ArrayView samples) { + for (int i = 0; rtc::SafeLt(i, samples.size()); ++i) { + samples[i] = i % 100; + } +} + +rtc::ArrayView GetHigherBandsSpectrum( + std::array* feature_vector) { + return {feature_vector->data() + kNumLowerBands, kNumBands - kNumLowerBands}; +} + +rtc::ArrayView GetAverage( + std::array* feature_vector) { + return {feature_vector->data(), kNumLowerBands}; +} + +rtc::ArrayView GetFirstDerivative( + std::array* feature_vector) { + return {feature_vector->data() + kNumBands, kNumLowerBands}; +} + +rtc::ArrayView GetSecondDerivative( + std::array* feature_vector) { + return {feature_vector->data() + kNumBands + kNumLowerBands, kNumLowerBands}; +} + +rtc::ArrayView GetCepstralCrossCorrelation( + std::array* feature_vector) { + return {feature_vector->data() + kNumBands + 2 * kNumLowerBands, + kNumLowerBands}; +} + +float* GetCepstralVariability( + std::array* feature_vector) { + return feature_vector->data() + kNumBands + 3 * kNumLowerBands; +} + +constexpr float kInitialFeatureVal = -9999.f; + +// Checks that silence is detected when the input signal is 0 and that the +// feature vector is written only if the input signal is not tagged as silence. +TEST(RnnVadTest, SpectralFeaturesWithAndWithoutSilence) { + // Initialize. + SpectralFeaturesExtractor sfe; + std::array samples; + rtc::ArrayView samples_view(samples); + bool is_silence; + std::array feature_vector; + + // Write an initial value in the feature vector to detect changes. + std::fill(feature_vector.begin(), feature_vector.end(), kInitialFeatureVal); + + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + // With silence. + std::fill(samples.begin(), samples.end(), 0.f); + is_silence = sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector), + GetAverage(&feature_vector), GetFirstDerivative(&feature_vector), + GetSecondDerivative(&feature_vector), + GetCepstralCrossCorrelation(&feature_vector), + GetCepstralVariability(&feature_vector)); + // Silence is expected, the output won't be overwritten. + EXPECT_TRUE(is_silence); + EXPECT_TRUE(std::all_of(feature_vector.begin(), feature_vector.end(), + [](float x) { return x == kInitialFeatureVal; })); + + // With no silence. + WriteTestData(samples); + is_silence = sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector), + GetAverage(&feature_vector), GetFirstDerivative(&feature_vector), + GetSecondDerivative(&feature_vector), + GetCepstralCrossCorrelation(&feature_vector), + GetCepstralVariability(&feature_vector)); + // Silence is not expected, the output will be overwritten. + EXPECT_FALSE(is_silence); + EXPECT_FALSE(std::all_of(feature_vector.begin(), feature_vector.end(), + [](float x) { return x == kInitialFeatureVal; })); +} + +// Feeds a constant input signal and checks that: +// - the cepstral coefficients average does not change; +// - the derivatives are zero; +// - the cepstral variability score does not change. +TEST(RnnVadTest, CepstralFeaturesConstantAverageZeroDerivative) { + // Initialize. + SpectralFeaturesExtractor sfe; + std::array samples; + rtc::ArrayView samples_view(samples); + WriteTestData(samples); + + // Fill the spectral features with test data. + std::array feature_vector; + for (int i = 0; i < kCepstralCoeffsHistorySize; ++i) { + sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector), + GetAverage(&feature_vector), GetFirstDerivative(&feature_vector), + GetSecondDerivative(&feature_vector), + GetCepstralCrossCorrelation(&feature_vector), + GetCepstralVariability(&feature_vector)); + } + + // Feed the test data one last time but using a different output vector. + std::array feature_vector_last; + sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector_last), + GetAverage(&feature_vector_last), + GetFirstDerivative(&feature_vector_last), + GetSecondDerivative(&feature_vector_last), + GetCepstralCrossCorrelation(&feature_vector_last), + GetCepstralVariability(&feature_vector_last)); + + // Average is unchanged. + ExpectEqualFloatArray({feature_vector.data(), kNumLowerBands}, + {feature_vector_last.data(), kNumLowerBands}); + // First and second derivatives are zero. + constexpr std::array zeros{}; + ExpectEqualFloatArray( + {feature_vector_last.data() + kNumBands, kNumLowerBands}, zeros); + ExpectEqualFloatArray( + {feature_vector_last.data() + kNumBands + kNumLowerBands, kNumLowerBands}, + zeros); + // Variability is unchanged. + EXPECT_FLOAT_EQ(feature_vector[kNumBands + 3 * kNumLowerBands], + feature_vector_last[kNumBands + 3 * kNumLowerBands]); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h new file mode 100644 index 0000000000..d186479551 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SYMMETRIC_MATRIX_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SYMMETRIC_MATRIX_BUFFER_H_ + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { + +// Data structure to buffer the results of pair-wise comparisons between items +// stored in a ring buffer. Every time that the oldest item is replaced in the +// ring buffer, the new one is compared to the remaining items in the ring +// buffer. The results of such comparisons need to be buffered and automatically +// removed when one of the two corresponding items that have been compared is +// removed from the ring buffer. It is assumed that the comparison is symmetric +// and that comparing an item with itself is not needed. +template +class SymmetricMatrixBuffer { + static_assert(S > 2, ""); + + public: + SymmetricMatrixBuffer() = default; + SymmetricMatrixBuffer(const SymmetricMatrixBuffer&) = delete; + SymmetricMatrixBuffer& operator=(const SymmetricMatrixBuffer&) = delete; + ~SymmetricMatrixBuffer() = default; + // Sets the buffer values to zero. + void Reset() { + static_assert(std::is_arithmetic::value, + "Integral or floating point required."); + buf_.fill(0); + } + // Pushes the results from the comparison between the most recent item and + // those that are still in the ring buffer. The first element in `values` must + // correspond to the comparison between the most recent item and the second + // most recent one in the ring buffer, whereas the last element in `values` + // must correspond to the comparison between the most recent item and the + // oldest one in the ring buffer. + void Push(rtc::ArrayView values) { + // Move the lower-right sub-matrix of size (S-2) x (S-2) one row up and one + // column left. + std::memmove(buf_.data(), buf_.data() + S, (buf_.size() - S) * sizeof(T)); + // Copy new values in the last column in the right order. + for (int i = 0; rtc::SafeLt(i, values.size()); ++i) { + const int index = (S - 1 - i) * (S - 1) - 1; + RTC_DCHECK_GE(index, 0); + RTC_DCHECK_LT(index, buf_.size()); + buf_[index] = values[i]; + } + } + // Reads the value that corresponds to comparison of two items in the ring + // buffer having delay `delay1` and `delay2`. The two arguments must not be + // equal and both must be in {0, ..., S - 1}. + T GetValue(int delay1, int delay2) const { + int row = S - 1 - delay1; + int col = S - 1 - delay2; + RTC_DCHECK_NE(row, col) << "The diagonal cannot be accessed."; + if (row > col) + std::swap(row, col); // Swap to access the upper-right triangular part. + RTC_DCHECK_LE(0, row); + RTC_DCHECK_LT(row, S - 1) << "Not enforcing row < col and row != col."; + RTC_DCHECK_LE(1, col) << "Not enforcing row < col and row != col."; + RTC_DCHECK_LT(col, S); + const int index = row * (S - 1) + (col - 1); + RTC_DCHECK_LE(0, index); + RTC_DCHECK_LT(index, buf_.size()); + return buf_[index]; + } + + private: + // Encode an upper-right triangular matrix (excluding its diagonal) using a + // square matrix. This allows to move the data in Push() with one single + // operation. + std::array buf_{}; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SYMMETRIC_MATRIX_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc new file mode 100644 index 0000000000..1509ca5ac1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h" + +#include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +template +void CheckSymmetry(const SymmetricMatrixBuffer* sym_matrix_buf) { + for (int row = 0; row < S - 1; ++row) + for (int col = row + 1; col < S; ++col) + EXPECT_EQ(sym_matrix_buf->GetValue(row, col), + sym_matrix_buf->GetValue(col, row)); +} + +using PairType = std::pair; + +// Checks that the symmetric matrix buffer contains any pair with a value equal +// to the given one. +template +bool CheckPairsWithValueExist( + const SymmetricMatrixBuffer* sym_matrix_buf, + const int value) { + for (int row = 0; row < S - 1; ++row) { + for (int col = row + 1; col < S; ++col) { + auto p = sym_matrix_buf->GetValue(row, col); + if (p.first == value || p.second == value) + return true; + } + } + return false; +} + +// Test that shows how to combine RingBuffer and SymmetricMatrixBuffer to +// efficiently compute pair-wise scores. This test verifies that the evolution +// of a SymmetricMatrixBuffer instance follows that of RingBuffer. +TEST(RnnVadTest, SymmetricMatrixBufferUseCase) { + // Instance a ring buffer which will be fed with a series of integer values. + constexpr int kRingBufSize = 10; + RingBuffer ring_buf; + // Instance a symmetric matrix buffer for the ring buffer above. It stores + // pairs of integers with which this test can easily check that the evolution + // of RingBuffer and SymmetricMatrixBuffer match. + SymmetricMatrixBuffer sym_matrix_buf; + for (int t = 1; t <= 100; ++t) { // Evolution steps. + SCOPED_TRACE(t); + const int t_removed = ring_buf.GetArrayView(kRingBufSize - 1)[0]; + ring_buf.Push({&t, 1}); + // The head of the ring buffer is `t`. + ASSERT_EQ(t, ring_buf.GetArrayView(0)[0]); + // Create the comparisons between `t` and the older elements in the ring + // buffer. + std::array new_comparions; + for (int i = 0; i < kRingBufSize - 1; ++i) { + // Start comparing `t` to the second newest element in the ring buffer. + const int delay = i + 1; + const auto t_prev = ring_buf.GetArrayView(delay)[0]; + ASSERT_EQ(std::max(0, t - delay), t_prev); + // Compare the last element `t` with `t_prev`. + new_comparions[i].first = t_prev; + new_comparions[i].second = t; + } + // Push the new comparisons in the symmetric matrix buffer. + sym_matrix_buf.Push({new_comparions.data(), new_comparions.size()}); + // Tests. + CheckSymmetry(&sym_matrix_buf); + // Check that the pairs resulting from the content in the ring buffer are + // in the right position. + for (int delay1 = 0; delay1 < kRingBufSize - 1; ++delay1) { + for (int delay2 = delay1 + 1; delay2 < kRingBufSize; ++delay2) { + const auto t1 = ring_buf.GetArrayView(delay1)[0]; + const auto t2 = ring_buf.GetArrayView(delay2)[0]; + ASSERT_LE(t2, t1); + const auto p = sym_matrix_buf.GetValue(delay1, delay2); + EXPECT_EQ(p.first, t2); + EXPECT_EQ(p.second, t1); + } + } + // Check that every older element in the ring buffer still has a + // corresponding pair in the symmetric matrix buffer. + for (int delay = 1; delay < kRingBufSize; ++delay) { + const auto t_prev = ring_buf.GetArrayView(delay)[0]; + EXPECT_TRUE(CheckPairsWithValueExist(&sym_matrix_buf, t_prev)); + } + // Check that the element removed from the ring buffer has no corresponding + // pairs in the symmetric matrix buffer. + if (t > kRingBufSize - 1) { + EXPECT_FALSE(CheckPairsWithValueExist(&sym_matrix_buf, t_removed)); + } + } +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc new file mode 100644 index 0000000000..857a9f2706 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// File reader for binary files that contain a sequence of values with +// arithmetic type `T`. The values of type `T` that are read are cast to float. +template +class FloatFileReader : public FileReader { + public: + static_assert(std::is_arithmetic::value, ""); + explicit FloatFileReader(absl::string_view filename) + : is_(std::string(filename), std::ios::binary | std::ios::ate), + size_(is_.tellg() / sizeof(T)) { + RTC_CHECK(is_); + SeekBeginning(); + } + FloatFileReader(const FloatFileReader&) = delete; + FloatFileReader& operator=(const FloatFileReader&) = delete; + ~FloatFileReader() = default; + + int size() const override { return size_; } + bool ReadChunk(rtc::ArrayView dst) override { + const std::streamsize bytes_to_read = dst.size() * sizeof(T); + if (std::is_same::value) { + is_.read(reinterpret_cast(dst.data()), bytes_to_read); + } else { + buffer_.resize(dst.size()); + is_.read(reinterpret_cast(buffer_.data()), bytes_to_read); + std::transform(buffer_.begin(), buffer_.end(), dst.begin(), + [](const T& v) -> float { return static_cast(v); }); + } + return is_.gcount() == bytes_to_read; + } + bool ReadValue(float& dst) override { return ReadChunk({&dst, 1}); } + void SeekForward(int hop) override { is_.seekg(hop * sizeof(T), is_.cur); } + void SeekBeginning() override { is_.seekg(0, is_.beg); } + + private: + std::ifstream is_; + const int size_; + std::vector buffer_; +}; + +} // namespace + +using webrtc::test::ResourcePath; + +void ExpectEqualFloatArray(rtc::ArrayView expected, + rtc::ArrayView computed) { + ASSERT_EQ(expected.size(), computed.size()); + for (int i = 0; rtc::SafeLt(i, expected.size()); ++i) { + SCOPED_TRACE(i); + EXPECT_FLOAT_EQ(expected[i], computed[i]); + } +} + +void ExpectNearAbsolute(rtc::ArrayView expected, + rtc::ArrayView computed, + float tolerance) { + ASSERT_EQ(expected.size(), computed.size()); + for (int i = 0; rtc::SafeLt(i, expected.size()); ++i) { + SCOPED_TRACE(i); + EXPECT_NEAR(expected[i], computed[i], tolerance); + } +} + +std::unique_ptr CreatePcmSamplesReader() { + return std::make_unique>( + /*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/samples", + "pcm")); +} + +ChunksFileReader CreatePitchBuffer24kHzReader() { + auto reader = std::make_unique>( + /*filename=*/test::ResourcePath( + "audio_processing/agc2/rnn_vad/pitch_buf_24k", "dat")); + const int num_chunks = rtc::CheckedDivExact(reader->size(), kBufSize24kHz); + return {/*chunk_size=*/kBufSize24kHz, num_chunks, std::move(reader)}; +} + +ChunksFileReader CreateLpResidualAndPitchInfoReader() { + constexpr int kPitchInfoSize = 2; // Pitch period and strength. + constexpr int kChunkSize = kBufSize24kHz + kPitchInfoSize; + auto reader = std::make_unique>( + /*filename=*/test::ResourcePath( + "audio_processing/agc2/rnn_vad/pitch_lp_res", "dat")); + const int num_chunks = rtc::CheckedDivExact(reader->size(), kChunkSize); + return {kChunkSize, num_chunks, std::move(reader)}; +} + +std::unique_ptr CreateGruInputReader() { + return std::make_unique>( + /*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/gru_in", + "dat")); +} + +std::unique_ptr CreateVadProbsReader() { + return std::make_unique>( + /*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/vad_prob", + "dat")); +} + +PitchTestData::PitchTestData() { + FloatFileReader reader( + /*filename=*/ResourcePath( + "audio_processing/agc2/rnn_vad/pitch_search_int", "dat")); + reader.ReadChunk(pitch_buffer_24k_); + reader.ReadChunk(square_energies_24k_); + reader.ReadChunk(auto_correlation_12k_); + // Reverse the order of the squared energy values. + // Required after the WebRTC CL 191703 which switched to forward computation. + std::reverse(square_energies_24k_.begin(), square_energies_24k_.end()); +} + +PitchTestData::~PitchTestData() = default; + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h new file mode 100644 index 0000000000..e64b7b7ecd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_TEST_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_TEST_UTILS_H_ + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { + +constexpr float kFloatMin = std::numeric_limits::min(); + +// Fails for every pair from two equally sized rtc::ArrayView views such +// that the values in the pair do not match. +void ExpectEqualFloatArray(rtc::ArrayView expected, + rtc::ArrayView computed); + +// Fails for every pair from two equally sized rtc::ArrayView views such +// that their absolute error is above a given threshold. +void ExpectNearAbsolute(rtc::ArrayView expected, + rtc::ArrayView computed, + float tolerance); + +// File reader interface. +class FileReader { + public: + virtual ~FileReader() = default; + // Number of values in the file. + virtual int size() const = 0; + // Reads `dst.size()` float values into `dst`, advances the internal file + // position according to the number of read bytes and returns true if the + // values are correctly read. If the number of remaining bytes in the file is + // not sufficient to read `dst.size()` float values, `dst` is partially + // modified and false is returned. + virtual bool ReadChunk(rtc::ArrayView dst) = 0; + // Reads a single float value, advances the internal file position according + // to the number of read bytes and returns true if the value is correctly + // read. If the number of remaining bytes in the file is not sufficient to + // read one float, `dst` is not modified and false is returned. + virtual bool ReadValue(float& dst) = 0; + // Advances the internal file position by `hop` float values. + virtual void SeekForward(int hop) = 0; + // Resets the internal file position to BOF. + virtual void SeekBeginning() = 0; +}; + +// File reader for files that contain `num_chunks` chunks with size equal to +// `chunk_size`. +struct ChunksFileReader { + const int chunk_size; + const int num_chunks; + std::unique_ptr reader; +}; + +// Creates a reader for the PCM S16 samples file. +std::unique_ptr CreatePcmSamplesReader(); + +// Creates a reader for the 24 kHz pitch buffer test data. +ChunksFileReader CreatePitchBuffer24kHzReader(); + +// Creates a reader for the LP residual and pitch information test data. +ChunksFileReader CreateLpResidualAndPitchInfoReader(); + +// Creates a reader for the sequence of GRU input vectors. +std::unique_ptr CreateGruInputReader(); + +// Creates a reader for the VAD probabilities test data. +std::unique_ptr CreateVadProbsReader(); + +// Class to retrieve a test pitch buffer content and the expected output for the +// analysis steps. +class PitchTestData { + public: + PitchTestData(); + ~PitchTestData(); + rtc::ArrayView PitchBuffer24kHzView() const { + return pitch_buffer_24k_; + } + rtc::ArrayView SquareEnergies24kHzView() + const { + return square_energies_24k_; + } + rtc::ArrayView AutoCorrelation12kHzView() const { + return auto_correlation_12k_; + } + + private: + std::array pitch_buffer_24k_; + std::array square_energies_24k_; + std::array auto_correlation_12k_; +}; + +// Writer for binary files. +class FileWriter { + public: + explicit FileWriter(absl::string_view file_path) + : os_(std::string(file_path), std::ios::binary) {} + FileWriter(const FileWriter&) = delete; + FileWriter& operator=(const FileWriter&) = delete; + ~FileWriter() = default; + void WriteChunk(rtc::ArrayView value) { + const std::streamsize bytes_to_write = value.size() * sizeof(float); + os_.write(reinterpret_cast(value.data()), bytes_to_write); + } + + private: + std::ofstream os_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_TEST_UTILS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h new file mode 100644 index 0000000000..47f681196a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_VECTOR_MATH_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_VECTOR_MATH_H_ + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace rnn_vad { + +// Provides optimizations for mathematical operations having vectors as +// operand(s). +class VectorMath { + public: + explicit VectorMath(AvailableCpuFeatures cpu_features) + : cpu_features_(cpu_features) {} + + // Computes the dot product between two equally sized vectors. + float DotProduct(rtc::ArrayView x, + rtc::ArrayView y) const { + RTC_DCHECK_EQ(x.size(), y.size()); +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (cpu_features_.avx2) { + return DotProductAvx2(x, y); + } else if (cpu_features_.sse2) { + __m128 accumulator = _mm_setzero_ps(); + constexpr int kBlockSizeLog2 = 2; + constexpr int kBlockSize = 1 << kBlockSizeLog2; + const int incomplete_block_index = (x.size() >> kBlockSizeLog2) + << kBlockSizeLog2; + for (int i = 0; i < incomplete_block_index; i += kBlockSize) { + RTC_DCHECK_LE(i + kBlockSize, x.size()); + const __m128 x_i = _mm_loadu_ps(&x[i]); + const __m128 y_i = _mm_loadu_ps(&y[i]); + // Multiply-add. + const __m128 z_j = _mm_mul_ps(x_i, y_i); + accumulator = _mm_add_ps(accumulator, z_j); + } + // Reduce `accumulator` by addition. + __m128 high = _mm_movehl_ps(accumulator, accumulator); + accumulator = _mm_add_ps(accumulator, high); + high = _mm_shuffle_ps(accumulator, accumulator, 1); + accumulator = _mm_add_ps(accumulator, high); + float dot_product = _mm_cvtss_f32(accumulator); + // Add the result for the last block if incomplete. + for (int i = incomplete_block_index; + i < rtc::dchecked_cast(x.size()); ++i) { + dot_product += x[i] * y[i]; + } + return dot_product; + } +#elif defined(WEBRTC_HAS_NEON) && defined(WEBRTC_ARCH_ARM64) + if (cpu_features_.neon) { + float32x4_t accumulator = vdupq_n_f32(0.f); + constexpr int kBlockSizeLog2 = 2; + constexpr int kBlockSize = 1 << kBlockSizeLog2; + const int incomplete_block_index = (x.size() >> kBlockSizeLog2) + << kBlockSizeLog2; + for (int i = 0; i < incomplete_block_index; i += kBlockSize) { + RTC_DCHECK_LE(i + kBlockSize, x.size()); + const float32x4_t x_i = vld1q_f32(&x[i]); + const float32x4_t y_i = vld1q_f32(&y[i]); + accumulator = vfmaq_f32(accumulator, x_i, y_i); + } + // Reduce `accumulator` by addition. + const float32x2_t tmp = + vpadd_f32(vget_low_f32(accumulator), vget_high_f32(accumulator)); + float dot_product = vget_lane_f32(vpadd_f32(tmp, vrev64_f32(tmp)), 0); + // Add the result for the last block if incomplete. + for (int i = incomplete_block_index; + i < rtc::dchecked_cast(x.size()); ++i) { + dot_product += x[i] * y[i]; + } + return dot_product; + } +#endif + return std::inner_product(x.begin(), x.end(), y.begin(), 0.f); + } + + private: + float DotProductAvx2(rtc::ArrayView x, + rtc::ArrayView y) const; + + const AvailableCpuFeatures cpu_features_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_VECTOR_MATH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc new file mode 100644 index 0000000000..e4d246d9ab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace rnn_vad { + +float VectorMath::DotProductAvx2(rtc::ArrayView x, + rtc::ArrayView y) const { + RTC_DCHECK(cpu_features_.avx2); + RTC_DCHECK_EQ(x.size(), y.size()); + __m256 accumulator = _mm256_setzero_ps(); + constexpr int kBlockSizeLog2 = 3; + constexpr int kBlockSize = 1 << kBlockSizeLog2; + const int incomplete_block_index = (x.size() >> kBlockSizeLog2) + << kBlockSizeLog2; + for (int i = 0; i < incomplete_block_index; i += kBlockSize) { + RTC_DCHECK_LE(i + kBlockSize, x.size()); + const __m256 x_i = _mm256_loadu_ps(&x[i]); + const __m256 y_i = _mm256_loadu_ps(&y[i]); + accumulator = _mm256_fmadd_ps(x_i, y_i, accumulator); + } + // Reduce `accumulator` by addition. + __m128 high = _mm256_extractf128_ps(accumulator, 1); + __m128 low = _mm256_extractf128_ps(accumulator, 0); + low = _mm_add_ps(high, low); + high = _mm_movehl_ps(high, low); + low = _mm_add_ps(high, low); + high = _mm_shuffle_ps(low, low, 1); + low = _mm_add_ss(high, low); + float dot_product = _mm_cvtss_f32(low); + // Add the result for the last block if incomplete. + for (int i = incomplete_block_index; i < rtc::dchecked_cast(x.size()); + ++i) { + dot_product += x[i] * y[i]; + } + return dot_product; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build new file mode 100644 index 0000000000..275c512cf6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build @@ -0,0 +1,185 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +CXXFLAGS += [ + "-mavx2", + "-mfma" +] + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_AVX2"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_GNU_SOURCE"] = True + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + +Library("vector_math_avx2_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build new file mode 100644 index 0000000000..263ec679e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("vector_math_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc new file mode 100644 index 0000000000..45fd65d61e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +#include + +#include "modules/audio_processing/agc2/cpu_features.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kSizeOfX = 19; +constexpr float kX[kSizeOfX] = { + 0.31593041f, 0.9350786f, -0.25252445f, -0.86956251f, -0.9673632f, + 0.54571901f, -0.72504495f, -0.79509912f, -0.25525012f, -0.73340473f, + 0.15747377f, -0.04370565f, 0.76135145f, -0.57239645f, 0.68616848f, + 0.3740298f, 0.34710799f, -0.92207423f, 0.10738454f}; +constexpr int kSizeOfXSubSpan = 16; +static_assert(kSizeOfXSubSpan < kSizeOfX, ""); +constexpr float kEnergyOfX = 7.315563958160327f; +constexpr float kEnergyOfXSubspan = 6.333327669592963f; + +class VectorMathParametrization + : public ::testing::TestWithParam {}; + +TEST_P(VectorMathParametrization, TestDotProduct) { + VectorMath vector_math(/*cpu_features=*/GetParam()); + EXPECT_FLOAT_EQ(vector_math.DotProduct(kX, kX), kEnergyOfX); + EXPECT_FLOAT_EQ( + vector_math.DotProduct({kX, kSizeOfXSubSpan}, {kX, kSizeOfXSubSpan}), + kEnergyOfXSubspan); +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false}); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + VectorMathParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc new file mode 100644 index 0000000000..961baf4cd3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector.h" + +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/saturation_protector_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +constexpr int kPeakEnveloperSuperFrameLengthMs = 400; +constexpr float kMinMarginDb = 12.0f; +constexpr float kMaxMarginDb = 25.0f; +constexpr float kAttack = 0.9988493699365052f; +constexpr float kDecay = 0.9997697679981565f; + +// Saturation protector state. Defined outside of `SaturationProtectorImpl` to +// implement check-point and restore ops. +struct SaturationProtectorState { + bool operator==(const SaturationProtectorState& s) const { + return headroom_db == s.headroom_db && + peak_delay_buffer == s.peak_delay_buffer && + max_peaks_dbfs == s.max_peaks_dbfs && + time_since_push_ms == s.time_since_push_ms; + } + inline bool operator!=(const SaturationProtectorState& s) const { + return !(*this == s); + } + + float headroom_db; + SaturationProtectorBuffer peak_delay_buffer; + float max_peaks_dbfs; + int time_since_push_ms; // Time since the last ring buffer push operation. +}; + +// Resets the saturation protector state. +void ResetSaturationProtectorState(float initial_headroom_db, + SaturationProtectorState& state) { + state.headroom_db = initial_headroom_db; + state.peak_delay_buffer.Reset(); + state.max_peaks_dbfs = kMinLevelDbfs; + state.time_since_push_ms = 0; +} + +// Updates `state` by analyzing the estimated speech level `speech_level_dbfs` +// and the peak level `peak_dbfs` for an observed frame. `state` must not be +// modified without calling this function. +void UpdateSaturationProtectorState(float peak_dbfs, + float speech_level_dbfs, + SaturationProtectorState& state) { + // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms. + state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs); + state.time_since_push_ms += kFrameDurationMs; + if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) { + // Push `max_peaks_dbfs` back into the ring buffer. + state.peak_delay_buffer.PushBack(state.max_peaks_dbfs); + // Reset. + state.max_peaks_dbfs = kMinLevelDbfs; + state.time_since_push_ms = 0; + } + + // Update the headroom by comparing the estimated speech level and the delayed + // max speech peak. + const float delayed_peak_dbfs = + state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs); + const float difference_db = delayed_peak_dbfs - speech_level_dbfs; + if (difference_db > state.headroom_db) { + // Attack. + state.headroom_db = + state.headroom_db * kAttack + difference_db * (1.0f - kAttack); + } else { + // Decay. + state.headroom_db = + state.headroom_db * kDecay + difference_db * (1.0f - kDecay); + } + + state.headroom_db = + rtc::SafeClamp(state.headroom_db, kMinMarginDb, kMaxMarginDb); +} + +// Saturation protector which recommends a headroom based on the recent peaks. +class SaturationProtectorImpl : public SaturationProtector { + public: + explicit SaturationProtectorImpl(float initial_headroom_db, + int adjacent_speech_frames_threshold, + ApmDataDumper* apm_data_dumper) + : apm_data_dumper_(apm_data_dumper), + initial_headroom_db_(initial_headroom_db), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) { + Reset(); + } + SaturationProtectorImpl(const SaturationProtectorImpl&) = delete; + SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete; + ~SaturationProtectorImpl() = default; + + float HeadroomDb() override { return headroom_db_; } + + void Analyze(float speech_probability, + float peak_dbfs, + float speech_level_dbfs) override { + if (speech_probability < kVadConfidenceThreshold) { + // Not a speech frame. + if (adjacent_speech_frames_threshold_ > 1) { + // When two or more adjacent speech frames are required in order to + // update the state, we need to decide whether to discard or confirm the + // updates based on the speech sequence length. + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // First non-speech frame after a long enough sequence of speech + // frames. Update the reliable state. + reliable_state_ = preliminary_state_; + } else if (num_adjacent_speech_frames_ > 0) { + // First non-speech frame after a too short sequence of speech frames. + // Reset to the last reliable state. + preliminary_state_ = reliable_state_; + } + } + num_adjacent_speech_frames_ = 0; + } else { + // Speech frame observed. + num_adjacent_speech_frames_++; + + // Update preliminary level estimate. + UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs, + preliminary_state_); + + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // `preliminary_state_` is now reliable. Update the headroom. + headroom_db_ = preliminary_state_.headroom_db; + } + } + DumpDebugData(); + } + + void Reset() override { + num_adjacent_speech_frames_ = 0; + headroom_db_ = initial_headroom_db_; + ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_); + ResetSaturationProtectorState(initial_headroom_db_, reliable_state_); + } + + private: + void DumpDebugData() { + apm_data_dumper_->DumpRaw( + "agc2_saturation_protector_preliminary_max_peak_dbfs", + preliminary_state_.max_peaks_dbfs); + apm_data_dumper_->DumpRaw( + "agc2_saturation_protector_reliable_max_peak_dbfs", + reliable_state_.max_peaks_dbfs); + } + + ApmDataDumper* const apm_data_dumper_; + const float initial_headroom_db_; + const int adjacent_speech_frames_threshold_; + int num_adjacent_speech_frames_; + float headroom_db_; + SaturationProtectorState preliminary_state_; + SaturationProtectorState reliable_state_; +}; + +} // namespace + +std::unique_ptr CreateSaturationProtector( + float initial_headroom_db, + int adjacent_speech_frames_threshold, + ApmDataDumper* apm_data_dumper) { + return std::make_unique( + initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h new file mode 100644 index 0000000000..ef22145d5f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_ + +#include + +namespace webrtc { +class ApmDataDumper; + +// Saturation protector. Analyzes peak levels and recommends a headroom to +// reduce the chances of clipping. +class SaturationProtector { + public: + virtual ~SaturationProtector() = default; + + // Returns the recommended headroom in dB. + virtual float HeadroomDb() = 0; + + // Analyzes the peak level of a 10 ms frame along with its speech probability + // and the current speech level estimate to update the recommended headroom. + virtual void Analyze(float speech_probability, + float peak_dbfs, + float speech_level_dbfs) = 0; + + // Resets the internal state. + virtual void Reset() = 0; +}; + +// Creates a saturation protector that starts at `initial_headroom_db`. +std::unique_ptr CreateSaturationProtector( + float initial_headroom_db, + int adjacent_speech_frames_threshold, + ApmDataDumper* apm_data_dumper); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc new file mode 100644 index 0000000000..41efdad2c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector_buffer.h" + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { + +SaturationProtectorBuffer::SaturationProtectorBuffer() = default; + +SaturationProtectorBuffer::~SaturationProtectorBuffer() = default; + +bool SaturationProtectorBuffer::operator==( + const SaturationProtectorBuffer& b) const { + RTC_DCHECK_LE(size_, buffer_.size()); + RTC_DCHECK_LE(b.size_, b.buffer_.size()); + if (size_ != b.size_) { + return false; + } + for (int i = 0, i0 = FrontIndex(), i1 = b.FrontIndex(); i < size_; + ++i, ++i0, ++i1) { + if (buffer_[i0 % buffer_.size()] != b.buffer_[i1 % b.buffer_.size()]) { + return false; + } + } + return true; +} + +int SaturationProtectorBuffer::Capacity() const { + return buffer_.size(); +} + +int SaturationProtectorBuffer::Size() const { + return size_; +} + +void SaturationProtectorBuffer::Reset() { + next_ = 0; + size_ = 0; +} + +void SaturationProtectorBuffer::PushBack(float v) { + RTC_DCHECK_GE(next_, 0); + RTC_DCHECK_GE(size_, 0); + RTC_DCHECK_LT(next_, buffer_.size()); + RTC_DCHECK_LE(size_, buffer_.size()); + buffer_[next_++] = v; + if (rtc::SafeEq(next_, buffer_.size())) { + next_ = 0; + } + if (rtc::SafeLt(size_, buffer_.size())) { + size_++; + } +} + +absl::optional SaturationProtectorBuffer::Front() const { + if (size_ == 0) { + return absl::nullopt; + } + RTC_DCHECK_LT(FrontIndex(), buffer_.size()); + return buffer_[FrontIndex()]; +} + +int SaturationProtectorBuffer::FrontIndex() const { + return rtc::SafeEq(size_, buffer_.size()) ? next_ : 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h new file mode 100644 index 0000000000..e17d0998c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_ + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/agc2/agc2_common.h" + +namespace webrtc { + +// Ring buffer for the saturation protector which only supports (i) push back +// and (ii) read oldest item. +class SaturationProtectorBuffer { + public: + SaturationProtectorBuffer(); + ~SaturationProtectorBuffer(); + + bool operator==(const SaturationProtectorBuffer& b) const; + inline bool operator!=(const SaturationProtectorBuffer& b) const { + return !(*this == b); + } + + // Maximum number of values that the buffer can contain. + int Capacity() const; + + // Number of values in the buffer. + int Size() const; + + void Reset(); + + // Pushes back `v`. If the buffer is full, the oldest value is replaced. + void PushBack(float v); + + // Returns the oldest item in the buffer. Returns an empty value if the + // buffer is empty. + absl::optional Front() const; + + private: + int FrontIndex() const; + // `buffer_` has `size_` elements (up to the size of `buffer_`) and `next_` is + // the position where the next new value is written in `buffer_`. + std::array buffer_; + int next_ = 0; + int size_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc new file mode 100644 index 0000000000..22187bf027 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector_buffer.h" + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Eq; +using ::testing::Optional; + +TEST(GainController2SaturationProtectorBuffer, Init) { + SaturationProtectorBuffer b; + EXPECT_EQ(b.Size(), 0); + EXPECT_FALSE(b.Front().has_value()); +} + +TEST(GainController2SaturationProtectorBuffer, PushBack) { + SaturationProtectorBuffer b; + constexpr float kValue = 123.0f; + b.PushBack(kValue); + EXPECT_EQ(b.Size(), 1); + EXPECT_THAT(b.Front(), Optional(Eq(kValue))); +} + +TEST(GainController2SaturationProtectorBuffer, Reset) { + SaturationProtectorBuffer b; + b.PushBack(123.0f); + b.Reset(); + EXPECT_EQ(b.Size(), 0); + EXPECT_FALSE(b.Front().has_value()); +} + +// Checks that the front value does not change until the ring buffer gets full. +TEST(GainController2SaturationProtectorBuffer, FrontUntilBufferIsFull) { + SaturationProtectorBuffer b; + constexpr float kValue = 123.0f; + b.PushBack(kValue); + for (int i = 1; i < b.Capacity(); ++i) { + SCOPED_TRACE(i); + EXPECT_THAT(b.Front(), Optional(Eq(kValue))); + b.PushBack(kValue + i); + } +} + +// Checks that when the buffer is full it behaves as a shift register. +TEST(GainController2SaturationProtectorBuffer, FrontIsDelayed) { + SaturationProtectorBuffer b; + // Fill the buffer. + for (int i = 0; i < b.Capacity(); ++i) { + b.PushBack(i); + } + // The ring buffer should now behave as a shift register with a delay equal to + // its capacity. + for (int i = b.Capacity(); i < 2 * b.Capacity() + 1; ++i) { + SCOPED_TRACE(i); + EXPECT_THAT(b.Front(), Optional(Eq(i - b.Capacity()))); + b.PushBack(i); + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_gn/moz.build new file mode 100644 index 0000000000..a1e6f309bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_gn/moz.build @@ -0,0 +1,234 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("saturation_protector_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc new file mode 100644 index 0000000000..3b104be8cd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector.h" + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr float kInitialHeadroomDb = 20.0f; +constexpr int kNoAdjacentSpeechFramesRequired = 1; +constexpr float kMaxSpeechProbability = 1.0f; + +// Calls `Analyze(speech_probability, peak_dbfs, speech_level_dbfs)` +// `num_iterations` times on `saturation_protector` and return the largest +// headroom difference between two consecutive calls. +float RunOnConstantLevel(int num_iterations, + float speech_probability, + float peak_dbfs, + float speech_level_dbfs, + SaturationProtector& saturation_protector) { + float last_headroom = saturation_protector.HeadroomDb(); + float max_difference = 0.0f; + for (int i = 0; i < num_iterations; ++i) { + saturation_protector.Analyze(speech_probability, peak_dbfs, + speech_level_dbfs); + const float new_headroom = saturation_protector.HeadroomDb(); + max_difference = + std::max(max_difference, std::fabs(new_headroom - last_headroom)); + last_headroom = new_headroom; + } + return max_difference; +} + +// Checks that the returned headroom value is correctly reset. +TEST(GainController2SaturationProtector, Reset) { + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper); + const float initial_headroom_db = saturation_protector->HeadroomDb(); + RunOnConstantLevel(/*num_iterations=*/10, kMaxSpeechProbability, + /*peak_dbfs=*/0.0f, + /*speech_level_dbfs=*/-10.0f, *saturation_protector); + // Make sure that there are side-effects. + ASSERT_NE(initial_headroom_db, saturation_protector->HeadroomDb()); + saturation_protector->Reset(); + EXPECT_EQ(initial_headroom_db, saturation_protector->HeadroomDb()); +} + +// Checks that the estimate converges to the ratio between peaks and level +// estimator values after a while. +TEST(GainController2SaturationProtector, EstimatesCrestRatio) { + constexpr int kNumIterations = 2000; + constexpr float kPeakLevelDbfs = -20.0f; + constexpr float kCrestFactorDb = kInitialHeadroomDb + 1.0f; + constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - kCrestFactorDb; + const float kMaxDifferenceDb = + 0.5f * std::fabs(kInitialHeadroomDb - kCrestFactorDb); + + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper); + RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs, + kSpeechLevelDbfs, *saturation_protector); + EXPECT_NEAR(saturation_protector->HeadroomDb(), kCrestFactorDb, + kMaxDifferenceDb); +} + +// Checks that the headroom does not change too quickly. +TEST(GainController2SaturationProtector, ChangeSlowly) { + constexpr int kNumIterations = 1000; + constexpr float kPeakLevelDbfs = -20.f; + constexpr float kCrestFactorDb = kInitialHeadroomDb - 5.f; + constexpr float kOtherCrestFactorDb = kInitialHeadroomDb; + constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - kCrestFactorDb; + constexpr float kOtherSpeechLevelDbfs = kPeakLevelDbfs - kOtherCrestFactorDb; + + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper); + float max_difference_db = + RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs, + kSpeechLevelDbfs, *saturation_protector); + max_difference_db = std::max( + RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs, + kOtherSpeechLevelDbfs, *saturation_protector), + max_difference_db); + constexpr float kMaxChangeSpeedDbPerSecond = 0.5f; // 1 db / 2 seconds. + EXPECT_LE(max_difference_db, + kMaxChangeSpeedDbPerSecond / 1000 * kFrameDurationMs); +} + +class SaturationProtectorParametrization + : public ::testing::TestWithParam { + protected: + int adjacent_speech_frames_threshold() const { return GetParam(); } +}; + +TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) { + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper); + const float initial_headroom_db = saturation_protector->HeadroomDb(); + RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() - 1, + kMaxSpeechProbability, + /*peak_dbfs=*/0.0f, + /*speech_level_dbfs=*/-10.0f, *saturation_protector); + // No adaptation expected. + EXPECT_EQ(initial_headroom_db, saturation_protector->HeadroomDb()); +} + +TEST_P(SaturationProtectorParametrization, AdaptToEnoughSpeechSegments) { + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper); + const float initial_headroom_db = saturation_protector->HeadroomDb(); + RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() + 1, + kMaxSpeechProbability, + /*peak_dbfs=*/0.0f, + /*speech_level_dbfs=*/-10.0f, *saturation_protector); + // Adaptation expected. + EXPECT_NE(initial_headroom_db, saturation_protector->HeadroomDb()); +} + +INSTANTIATE_TEST_SUITE_P(GainController2, + SaturationProtectorParametrization, + ::testing::Values(2, 9, 17)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc new file mode 100644 index 0000000000..7bf3252116 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/speech_level_estimator.h" + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +float ClampLevelEstimateDbfs(float level_estimate_dbfs) { + return rtc::SafeClamp(level_estimate_dbfs, -90.0f, 30.0f); +} + +// Returns the initial speech level estimate needed to apply the initial gain. +float GetInitialSpeechLevelEstimateDbfs( + const AudioProcessing::Config::GainController2::AdaptiveDigital& config) { + return ClampLevelEstimateDbfs(-kSaturationProtectorInitialHeadroomDb - + config.initial_gain_db - config.headroom_db); +} + +} // namespace + +bool SpeechLevelEstimator::LevelEstimatorState::operator==( + const SpeechLevelEstimator::LevelEstimatorState& b) const { + return time_to_confidence_ms == b.time_to_confidence_ms && + level_dbfs.numerator == b.level_dbfs.numerator && + level_dbfs.denominator == b.level_dbfs.denominator; +} + +float SpeechLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const { + RTC_DCHECK_NE(denominator, 0.f); + return numerator / denominator; +} + +SpeechLevelEstimator::SpeechLevelEstimator( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold) + : apm_data_dumper_(apm_data_dumper), + initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), + level_dbfs_(initial_speech_level_dbfs_), + // TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume + // controller temporal dependency removed. + is_confident_(false) { + RTC_DCHECK(apm_data_dumper_); + RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1); + Reset(); +} + +void SpeechLevelEstimator::Update(float rms_dbfs, + float peak_dbfs, + float speech_probability) { + RTC_DCHECK_GT(rms_dbfs, -150.0f); + RTC_DCHECK_LT(rms_dbfs, 50.0f); + RTC_DCHECK_GT(peak_dbfs, -150.0f); + RTC_DCHECK_LT(peak_dbfs, 50.0f); + RTC_DCHECK_GE(speech_probability, 0.0f); + RTC_DCHECK_LE(speech_probability, 1.0f); + if (speech_probability < kVadConfidenceThreshold) { + // Not a speech frame. + if (adjacent_speech_frames_threshold_ > 1) { + // When two or more adjacent speech frames are required in order to update + // the state, we need to decide whether to discard or confirm the updates + // based on the speech sequence length. + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // First non-speech frame after a long enough sequence of speech frames. + // Update the reliable state. + reliable_state_ = preliminary_state_; + } else if (num_adjacent_speech_frames_ > 0) { + // First non-speech frame after a too short sequence of speech frames. + // Reset to the last reliable state. + preliminary_state_ = reliable_state_; + } + } + num_adjacent_speech_frames_ = 0; + } else { + // Speech frame observed. + num_adjacent_speech_frames_++; + + // Update preliminary level estimate. + RTC_DCHECK_GE(preliminary_state_.time_to_confidence_ms, 0); + const bool buffer_is_full = preliminary_state_.time_to_confidence_ms == 0; + if (!buffer_is_full) { + preliminary_state_.time_to_confidence_ms -= kFrameDurationMs; + } + // Weighted average of levels with speech probability as weight. + RTC_DCHECK_GT(speech_probability, 0.0f); + const float leak_factor = buffer_is_full ? kLevelEstimatorLeakFactor : 1.0f; + preliminary_state_.level_dbfs.numerator = + preliminary_state_.level_dbfs.numerator * leak_factor + + rms_dbfs * speech_probability; + preliminary_state_.level_dbfs.denominator = + preliminary_state_.level_dbfs.denominator * leak_factor + + speech_probability; + + const float level_dbfs = preliminary_state_.level_dbfs.GetRatio(); + + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // `preliminary_state_` is now reliable. Update the last level estimation. + level_dbfs_ = ClampLevelEstimateDbfs(level_dbfs); + } + } + UpdateIsConfident(); + DumpDebugData(); +} + +void SpeechLevelEstimator::UpdateIsConfident() { + if (adjacent_speech_frames_threshold_ == 1) { + // Ignore `reliable_state_` when a single frame is enough to update the + // level estimate (because it is not used). + is_confident_ = preliminary_state_.time_to_confidence_ms == 0; + return; + } + // Once confident, it remains confident. + RTC_DCHECK(reliable_state_.time_to_confidence_ms != 0 || + preliminary_state_.time_to_confidence_ms == 0); + // During the first long enough speech sequence, `reliable_state_` must be + // ignored since `preliminary_state_` is used. + is_confident_ = + reliable_state_.time_to_confidence_ms == 0 || + (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ && + preliminary_state_.time_to_confidence_ms == 0); +} + +void SpeechLevelEstimator::Reset() { + ResetLevelEstimatorState(preliminary_state_); + ResetLevelEstimatorState(reliable_state_); + level_dbfs_ = initial_speech_level_dbfs_; + num_adjacent_speech_frames_ = 0; +} + +void SpeechLevelEstimator::ResetLevelEstimatorState( + LevelEstimatorState& state) const { + state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs; + state.level_dbfs.numerator = initial_speech_level_dbfs_; + state.level_dbfs.denominator = 1.0f; +} + +void SpeechLevelEstimator::DumpDebugData() const { + if (!apm_data_dumper_) + return; + apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", level_dbfs_); + apm_data_dumper_->DumpRaw("agc2_speech_level_is_confident", is_confident_); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_num_adjacent_speech_frames", + num_adjacent_speech_frames_); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_preliminary_level_estimate_num", + preliminary_state_.level_dbfs.numerator); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_preliminary_level_estimate_den", + preliminary_state_.level_dbfs.denominator); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_preliminary_time_to_confidence_ms", + preliminary_state_.time_to_confidence_ms); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_reliable_time_to_confidence_ms", + reliable_state_.time_to_confidence_ms); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.h b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.h new file mode 100644 index 0000000000..4d9f106ba9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_ + +#include + +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { +class ApmDataDumper; + +// Active speech level estimator based on the analysis of the following +// framewise properties: RMS level (dBFS), peak level (dBFS), speech +// probability. +class SpeechLevelEstimator { + public: + SpeechLevelEstimator( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold); + SpeechLevelEstimator(const SpeechLevelEstimator&) = delete; + SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete; + + // Updates the level estimation. + void Update(float rms_dbfs, float peak_dbfs, float speech_probability); + // Returns the estimated speech plus noise level. + float level_dbfs() const { return level_dbfs_; } + // Returns true if the estimator is confident on its current estimate. + bool is_confident() const { return is_confident_; } + + void Reset(); + + private: + // Part of the level estimator state used for check-pointing and restore ops. + struct LevelEstimatorState { + bool operator==(const LevelEstimatorState& s) const; + inline bool operator!=(const LevelEstimatorState& s) const { + return !(*this == s); + } + // TODO(bugs.webrtc.org/7494): Remove `time_to_confidence_ms` if redundant. + int time_to_confidence_ms; + struct Ratio { + float numerator; + float denominator; + float GetRatio() const; + } level_dbfs; + }; + static_assert(std::is_trivially_copyable::value, ""); + + void UpdateIsConfident(); + + void ResetLevelEstimatorState(LevelEstimatorState& state) const; + + void DumpDebugData() const; + + ApmDataDumper* const apm_data_dumper_; + + const float initial_speech_level_dbfs_; + const int adjacent_speech_frames_threshold_; + LevelEstimatorState preliminary_state_; + LevelEstimatorState reliable_state_; + float level_dbfs_; + bool is_confident_; + int num_adjacent_speech_frames_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_gn/moz.build new file mode 100644 index 0000000000..bb1dbc67b8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("speech_level_estimator_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_unittest.cc new file mode 100644 index 0000000000..e1c5f85434 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_unittest.cc @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/speech_level_estimator.h" + +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +// Number of speech frames that the level estimator must observe in order to +// become confident about the estimated level. +constexpr int kNumFramesToConfidence = + kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs; +static_assert(kNumFramesToConfidence > 0, ""); + +constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f; + +// Provides the `vad_level` value `num_iterations` times to `level_estimator`. +void RunOnConstantLevel(int num_iterations, + float rms_dbfs, + float peak_dbfs, + float speech_probability, + SpeechLevelEstimator& level_estimator) { + for (int i = 0; i < num_iterations; ++i) { + level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability); + } +} + +constexpr float kNoSpeechProbability = 0.0f; +constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f; +constexpr float kMaxSpeechProbability = 1.0f; + +// Level estimator with data dumper. +struct TestLevelEstimator { + explicit TestLevelEstimator(int adjacent_speech_frames_threshold) + : data_dumper(0), + estimator(std::make_unique( + &data_dumper, + AdaptiveDigitalConfig{}, + adjacent_speech_frames_threshold)), + initial_speech_level_dbfs(estimator->level_dbfs()), + level_rms_dbfs(initial_speech_level_dbfs / 2.0f), + level_peak_dbfs(initial_speech_level_dbfs / 3.0f) { + RTC_DCHECK_LT(level_rms_dbfs, level_peak_dbfs); + RTC_DCHECK_LT(initial_speech_level_dbfs, level_rms_dbfs); + RTC_DCHECK_GT(level_rms_dbfs - initial_speech_level_dbfs, 5.0f) + << "Adjust `level_rms_dbfs` so that the difference from the initial " + "level is wide enough for the tests"; + } + ApmDataDumper data_dumper; + std::unique_ptr estimator; + const float initial_speech_level_dbfs; + const float level_rms_dbfs; + const float level_peak_dbfs; +}; + +// Checks that the level estimator converges to a constant input speech level. +TEST(GainController2SpeechLevelEstimator, LevelStabilizes) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + const float estimated_level_dbfs = level_estimator.estimator->level_dbfs(); + RunOnConstantLevel(/*num_iterations=*/1, level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs, + 0.1f); +} + +// Checks that the level controller does not become confident when too few +// speech frames are observed. +TEST(GainController2SpeechLevelEstimator, IsNotConfident) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_FALSE(level_estimator.estimator->is_confident()); +} + +// Checks that the level controller becomes confident when enough speech frames +// are observed. +TEST(GainController2SpeechLevelEstimator, IsConfident) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_TRUE(level_estimator.estimator->is_confident()); +} + +// Checks that the estimated level is not affected by the level of non-speech +// frames. +TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + // Simulate speech. + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + const float estimated_level_dbfs = level_estimator.estimator->level_dbfs(); + // Simulate full-scale non-speech. + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + /*rms_dbfs=*/0.0f, /*peak_dbfs=*/0.0f, + kNoSpeechProbability, *level_estimator.estimator); + // No estimated level change is expected. + EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(), + estimated_level_dbfs); +} + +// Checks the convergence speed of the estimator before it becomes confident. +TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_NEAR(level_estimator.estimator->level_dbfs(), + level_estimator.level_rms_dbfs, + kConvergenceSpeedTestsLevelTolerance); +} + +// Checks the convergence speed of the estimator after it becomes confident. +TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + // Reach confidence using the initial level estimate. + RunOnConstantLevel( + /*num_iterations=*/kNumFramesToConfidence, + /*rms_dbfs=*/level_estimator.initial_speech_level_dbfs, + /*peak_dbfs=*/level_estimator.initial_speech_level_dbfs + 6.0f, + kMaxSpeechProbability, *level_estimator.estimator); + // No estimate change should occur, but confidence is achieved. + ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(), + level_estimator.initial_speech_level_dbfs); + ASSERT_TRUE(level_estimator.estimator->is_confident()); + // After confidence. + constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600; // 6 seconds. + static_assert( + kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, ""); + RunOnConstantLevel( + /*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames, + level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, + kMaxSpeechProbability, *level_estimator.estimator); + EXPECT_NEAR(level_estimator.estimator->level_dbfs(), + level_estimator.level_rms_dbfs, + kConvergenceSpeedTestsLevelTolerance); +} + +class SpeechLevelEstimatorParametrization + : public ::testing::TestWithParam { + protected: + int adjacent_speech_frames_threshold() const { return GetParam(); } +}; + +TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) { + TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); + const float initial_level = level_estimator.estimator->level_dbfs(); + ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); + for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) { + SCOPED_TRACE(i); + level_estimator.estimator->Update(level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, + kMaxSpeechProbability); + EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs()); + } + level_estimator.estimator->Update(level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, + kLowSpeechProbability); + EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs()); +} + +TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) { + TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); + const float initial_level = level_estimator.estimator->level_dbfs(); + ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); + for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { + level_estimator.estimator->Update(level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, + kMaxSpeechProbability); + } + EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs()); +} + +INSTANTIATE_TEST_SUITE_P(GainController2, + SpeechLevelEstimatorParametrization, + ::testing::Values(1, 9, 17)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc new file mode 100644 index 0000000000..7746f6c000 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/speech_probability_buffer.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr float kActivityThreshold = 0.9f; +constexpr int kNumAnalysisFrames = 100; +// We use 12 in AGC2 adaptive digital, but with a slightly different logic. +constexpr int kTransientWidthThreshold = 7; + +} // namespace + +SpeechProbabilityBuffer::SpeechProbabilityBuffer( + float low_probability_threshold) + : low_probability_threshold_(low_probability_threshold), + probabilities_(kNumAnalysisFrames) { + RTC_DCHECK_GE(low_probability_threshold, 0.0f); + RTC_DCHECK_LE(low_probability_threshold, 1.0f); + RTC_DCHECK(!probabilities_.empty()); +} + +void SpeechProbabilityBuffer::Update(float probability) { + // Remove the oldest entry if the circular buffer is full. + if (buffer_is_full_) { + const float oldest_probability = probabilities_[buffer_index_]; + sum_probabilities_ -= oldest_probability; + } + + // Check for transients. + if (probability <= low_probability_threshold_) { + // Set a probability lower than the threshold to zero. + probability = 0.0f; + + // Check if this has been a transient. + if (num_high_probability_observations_ <= kTransientWidthThreshold) { + RemoveTransient(); + } + num_high_probability_observations_ = 0; + } else if (num_high_probability_observations_ <= kTransientWidthThreshold) { + ++num_high_probability_observations_; + } + + // Update the circular buffer and the current sum. + probabilities_[buffer_index_] = probability; + sum_probabilities_ += probability; + + // Increment the buffer index and check for wrap-around. + if (++buffer_index_ >= kNumAnalysisFrames) { + buffer_index_ = 0; + buffer_is_full_ = true; + } +} + +void SpeechProbabilityBuffer::RemoveTransient() { + // Don't expect to be here if high-activity region is longer than + // `kTransientWidthThreshold` or there has not been any transient. + RTC_DCHECK_LE(num_high_probability_observations_, kTransientWidthThreshold); + + // Replace previously added probabilities with zero. + int index = + (buffer_index_ > 0) ? (buffer_index_ - 1) : (kNumAnalysisFrames - 1); + + while (num_high_probability_observations_-- > 0) { + sum_probabilities_ -= probabilities_[index]; + probabilities_[index] = 0.0f; + + // Update the circular buffer index. + index = (index > 0) ? (index - 1) : (kNumAnalysisFrames - 1); + } +} + +bool SpeechProbabilityBuffer::IsActiveSegment() const { + if (!buffer_is_full_) { + return false; + } + if (sum_probabilities_ < kActivityThreshold * kNumAnalysisFrames) { + return false; + } + return true; +} + +void SpeechProbabilityBuffer::Reset() { + sum_probabilities_ = 0.0f; + + // Empty the circular buffer. + buffer_index_ = 0; + buffer_is_full_ = false; + num_high_probability_observations_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.h new file mode 100644 index 0000000000..3056a3eeab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_PROBABILITY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_PROBABILITY_BUFFER_H_ + +#include + +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { + +// This class implements a circular buffer that stores speech probabilities +// for a speech segment and estimates speech activity for that segment. +class SpeechProbabilityBuffer { + public: + // Ctor. The value of `low_probability_threshold` is required to be on the + // range [0.0f, 1.0f]. + explicit SpeechProbabilityBuffer(float low_probability_threshold); + ~SpeechProbabilityBuffer() {} + SpeechProbabilityBuffer(const SpeechProbabilityBuffer&) = delete; + SpeechProbabilityBuffer& operator=(const SpeechProbabilityBuffer&) = delete; + + // Adds `probability` in the buffer and computes an updatds sum of the buffer + // probabilities. Value of `probability` is required to be on the range + // [0.0f, 1.0f]. + void Update(float probability); + + // Resets the histogram, forgets the past. + void Reset(); + + // Returns true if the segment is active (a long enough segment with an + // average speech probability above `low_probability_threshold`). + bool IsActiveSegment() const; + + private: + void RemoveTransient(); + + // Use only for testing. + float GetSumProbabilities() const { return sum_probabilities_; } + + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, + CheckSumAfterInitialization); + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, CheckSumAfterUpdate); + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, CheckSumAfterReset); + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, + CheckSumAfterTransientNotRemoved); + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, + CheckSumAfterTransientRemoved); + + const float low_probability_threshold_; + + // Sum of probabilities stored in `probabilities_`. Must be updated if + // `probabilities_` is updated. + float sum_probabilities_ = 0.0f; + + // Circular buffer for probabilities. + std::vector probabilities_; + + // Current index of the circular buffer, where the newest data will be written + // to, therefore, pointing to the oldest data if buffer is full. + int buffer_index_ = 0; + + // Indicates if the buffer is full and adding a new value removes the oldest + // value. + int buffer_is_full_ = false; + + int num_high_probability_observations_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SPEECH_PROBABILITY_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer_unittest.cc new file mode 100644 index 0000000000..89cc209d9d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer_unittest.cc @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/speech_probability_buffer.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr float kAbsError = 0.001f; +constexpr float kActivityThreshold = 0.9f; +constexpr float kLowProbabilityThreshold = 0.2f; +constexpr int kNumAnalysisFrames = 100; + +} // namespace + +TEST(SpeechProbabilityBufferTest, CheckSumAfterInitialization) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + EXPECT_EQ(buffer.GetSumProbabilities(), 0.0f); +} + +TEST(SpeechProbabilityBufferTest, CheckSumAfterUpdate) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(0.7f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 0.7f, kAbsError); + + buffer.Update(0.6f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 1.3f, kAbsError); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(1.0f); + } + + EXPECT_NEAR(buffer.GetSumProbabilities(), 99.6f, kAbsError); +} + +TEST(SpeechProbabilityBufferTest, CheckSumAfterReset) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(0.7f); + buffer.Update(0.6f); + buffer.Update(0.3f); + + EXPECT_GT(buffer.GetSumProbabilities(), 0.0f); + + buffer.Reset(); + + EXPECT_EQ(buffer.GetSumProbabilities(), 0.0f); +} + +TEST(SpeechProbabilityBufferTest, CheckSumAfterTransientNotRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + + buffer.Update(0.0f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 9.0f, kAbsError); + + buffer.Update(0.0f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 9.0f, kAbsError); +} + +TEST(SpeechProbabilityBufferTest, CheckSumAfterTransientRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 3.0f, kAbsError); + + buffer.Update(0.0f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 0.0f, kAbsError); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsNotActiveAfterNoUpdates) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsActiveChangesFromFalseToTrue) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + // Add low probabilities until the buffer is full. That's not enough + // to make `IsActiveSegment()` to return true. + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(0.0f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + // Add high probabilities until `IsActiveSegment()` returns true. + for (int i = 0; i < kActivityThreshold * kNumAnalysisFrames - 1; ++i) { + buffer.Update(1.0f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsActiveChangesFromTrueToFalse) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + // Add high probabilities until the buffer is full. That's enough to + // make `IsActiveSegment()` to return true. + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(1.0f); + } + + EXPECT_TRUE(buffer.IsActiveSegment()); + + // Add low probabilities until `IsActiveSegment()` returns false. + for (int i = 0; i < (1.0f - kActivityThreshold) * kNumAnalysisFrames - 1; + ++i) { + buffer.Update(0.0f); + } + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsActiveAfterUpdatesWithHighProbabilities) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(1.0f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsNotActiveAfterUpdatesWithLowProbabilities) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(0.3f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.3f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsActiveAfterBufferIsFull) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(1.0f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsNotActiveAfterBufferIsFull) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(0.29f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.29f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.29f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsNotActiveAfterReset) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(1.0f); + } + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Reset(); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsNotActiveAfterTransientRemovedAfterFewUpdates) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(0.4f); + buffer.Update(0.4f); + buffer.Update(0.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsActiveAfterTransientNotRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(1.0f); + } + + buffer.Update(0.7f); + buffer.Update(0.8f); + buffer.Update(0.9f); + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(0.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(0.7f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsNotActiveAfterTransientNotRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(0.1f); + } + + buffer.Update(0.7f); + buffer.Update(0.8f); + buffer.Update(0.9f); + buffer.Update(1.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.7f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsNotActiveAfterTransientRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(0.1f); + } + + buffer.Update(0.7f); + buffer.Update(0.8f); + buffer.Update(0.9f); + buffer.Update(1.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.7f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsActiveAfterTransientRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(1.0f); + } + + buffer.Update(0.7f); + buffer.Update(0.8f); + buffer.Update(0.9f); + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(0.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(0.7f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc new file mode 100644 index 0000000000..af6325dea7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vad_wrapper.h" + +#include +#include + +#include "api/array_view.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr int kNumFramesPerSecond = 100; + +class MonoVadImpl : public VoiceActivityDetectorWrapper::MonoVad { + public: + explicit MonoVadImpl(const AvailableCpuFeatures& cpu_features) + : features_extractor_(cpu_features), rnn_vad_(cpu_features) {} + MonoVadImpl(const MonoVadImpl&) = delete; + MonoVadImpl& operator=(const MonoVadImpl&) = delete; + ~MonoVadImpl() = default; + + int SampleRateHz() const override { return rnn_vad::kSampleRate24kHz; } + void Reset() override { rnn_vad_.Reset(); } + float Analyze(rtc::ArrayView frame) override { + RTC_DCHECK_EQ(frame.size(), rnn_vad::kFrameSize10ms24kHz); + std::array feature_vector; + const bool is_silence = features_extractor_.CheckSilenceComputeFeatures( + /*samples=*/{frame.data(), rnn_vad::kFrameSize10ms24kHz}, + feature_vector); + return rnn_vad_.ComputeVadProbability(feature_vector, is_silence); + } + + private: + rnn_vad::FeaturesExtractor features_extractor_; + rnn_vad::RnnVad rnn_vad_; +}; + +} // namespace + +VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( + const AvailableCpuFeatures& cpu_features, + int sample_rate_hz) + : VoiceActivityDetectorWrapper(kVadResetPeriodMs, + cpu_features, + sample_rate_hz) {} + +VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( + int vad_reset_period_ms, + const AvailableCpuFeatures& cpu_features, + int sample_rate_hz) + : VoiceActivityDetectorWrapper(vad_reset_period_ms, + std::make_unique(cpu_features), + sample_rate_hz) {} + +VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( + int vad_reset_period_ms, + std::unique_ptr vad, + int sample_rate_hz) + : vad_reset_period_frames_( + rtc::CheckedDivExact(vad_reset_period_ms, kFrameDurationMs)), + time_to_vad_reset_(vad_reset_period_frames_), + vad_(std::move(vad)) { + RTC_DCHECK(vad_); + RTC_DCHECK_GT(vad_reset_period_frames_, 1); + resampled_buffer_.resize( + rtc::CheckedDivExact(vad_->SampleRateHz(), kNumFramesPerSecond)); + Initialize(sample_rate_hz); +} + +VoiceActivityDetectorWrapper::~VoiceActivityDetectorWrapper() = default; + +void VoiceActivityDetectorWrapper::Initialize(int sample_rate_hz) { + RTC_DCHECK_GT(sample_rate_hz, 0); + frame_size_ = rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond); + int status = + resampler_.InitializeIfNeeded(sample_rate_hz, vad_->SampleRateHz(), + /*num_channels=*/1); + constexpr int kStatusOk = 0; + RTC_DCHECK_EQ(status, kStatusOk); + vad_->Reset(); +} + +float VoiceActivityDetectorWrapper::Analyze(AudioFrameView frame) { + // Periodically reset the VAD. + time_to_vad_reset_--; + if (time_to_vad_reset_ <= 0) { + vad_->Reset(); + time_to_vad_reset_ = vad_reset_period_frames_; + } + // Resample the first channel of `frame`. + RTC_DCHECK_EQ(frame.samples_per_channel(), frame_size_); + resampler_.Resample(frame.channel(0).data(), frame_size_, + resampled_buffer_.data(), resampled_buffer_.size()); + + return vad_->Analyze(resampled_buffer_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h new file mode 100644 index 0000000000..459c471630 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ + +#include +#include + +#include "api/array_view.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +// Wraps a single-channel Voice Activity Detector (VAD) which is used to analyze +// the first channel of the input audio frames. Takes care of resampling the +// input frames to match the sample rate of the wrapped VAD and periodically +// resets the VAD. +class VoiceActivityDetectorWrapper { + public: + // Single channel VAD interface. + class MonoVad { + public: + virtual ~MonoVad() = default; + // Returns the sample rate (Hz) required for the input frames analyzed by + // `ComputeProbability`. + virtual int SampleRateHz() const = 0; + // Resets the internal state. + virtual void Reset() = 0; + // Analyzes an audio frame and returns the speech probability. + virtual float Analyze(rtc::ArrayView frame) = 0; + }; + + // Ctor. Uses `cpu_features` to instantiate the default VAD. + VoiceActivityDetectorWrapper(const AvailableCpuFeatures& cpu_features, + int sample_rate_hz); + + // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call + // `MonoVad::Reset()`; it must be equal to or greater than the duration of two + // frames. Uses `cpu_features` to instantiate the default VAD. + VoiceActivityDetectorWrapper(int vad_reset_period_ms, + const AvailableCpuFeatures& cpu_features, + int sample_rate_hz); + // Ctor. Uses a custom `vad`. + VoiceActivityDetectorWrapper(int vad_reset_period_ms, + std::unique_ptr vad, + int sample_rate_hz); + + VoiceActivityDetectorWrapper(const VoiceActivityDetectorWrapper&) = delete; + VoiceActivityDetectorWrapper& operator=(const VoiceActivityDetectorWrapper&) = + delete; + ~VoiceActivityDetectorWrapper(); + + // Initializes the VAD wrapper. + void Initialize(int sample_rate_hz); + + // Analyzes the first channel of `frame` and returns the speech probability. + // `frame` must be a 10 ms frame with the sample rate specified in the last + // `Initialize()` call. + float Analyze(AudioFrameView frame); + + private: + const int vad_reset_period_frames_; + int frame_size_; + int time_to_vad_reset_; + PushResampler resampler_; + std::unique_ptr vad_; + std::vector resampled_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build new file mode 100644 index 0000000000..dfa2765108 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("vad_wrapper_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc new file mode 100644 index 0000000000..91efdb566e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vad_wrapper.h" + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/gunit.h" +#include "rtc_base/numerics/safe_compare.h" +#include "test/gmock.h" + +namespace webrtc { +namespace { + +using ::testing::AnyNumber; +using ::testing::Return; +using ::testing::ReturnRoundRobin; +using ::testing::Truly; + +constexpr int kNumFramesPerSecond = 100; + +constexpr int kNoVadPeriodicReset = + kFrameDurationMs * (std::numeric_limits::max() / kFrameDurationMs); + +constexpr int kSampleRate8kHz = 8000; + +class MockVad : public VoiceActivityDetectorWrapper::MonoVad { + public: + MOCK_METHOD(int, SampleRateHz, (), (const, override)); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(float, Analyze, (rtc::ArrayView frame), (override)); +}; + +// Checks that the ctor and `Initialize()` read the sample rate of the wrapped +// VAD. +TEST(GainController2VoiceActivityDetectorWrapper, CtorAndInitReadSampleRate) { + auto vad = std::make_unique(); + EXPECT_CALL(*vad, SampleRateHz) + .Times(2) + .WillRepeatedly(Return(kSampleRate8kHz)); + EXPECT_CALL(*vad, Reset).Times(AnyNumber()); + auto vad_wrapper = std::make_unique( + kNoVadPeriodicReset, std::move(vad), kSampleRate8kHz); +} + +// Creates a `VoiceActivityDetectorWrapper` injecting a mock VAD that +// repeatedly returns the next value from `speech_probabilities` and that +// restarts from the beginning when after the last element is returned. +std::unique_ptr CreateMockVadWrapper( + int vad_reset_period_ms, + int sample_rate_hz, + const std::vector& speech_probabilities, + int expected_vad_reset_calls) { + auto vad = std::make_unique(); + EXPECT_CALL(*vad, SampleRateHz) + .Times(AnyNumber()) + .WillRepeatedly(Return(sample_rate_hz)); + if (expected_vad_reset_calls >= 0) { + EXPECT_CALL(*vad, Reset).Times(expected_vad_reset_calls); + } + EXPECT_CALL(*vad, Analyze) + .Times(AnyNumber()) + .WillRepeatedly(ReturnRoundRobin(speech_probabilities)); + return std::make_unique( + vad_reset_period_ms, std::move(vad), kSampleRate8kHz); +} + +// 10 ms mono frame. +struct FrameWithView { + // Ctor. Initializes the frame samples with `value`. + explicit FrameWithView(int sample_rate_hz) + : samples(rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond), + 0.0f), + channel0(samples.data()), + view(&channel0, /*num_channels=*/1, samples.size()) {} + std::vector samples; + const float* const channel0; + const AudioFrameView view; +}; + +// Checks that the expected speech probabilities are returned. +TEST(GainController2VoiceActivityDetectorWrapper, CheckSpeechProbabilities) { + const std::vector speech_probabilities{0.709f, 0.484f, 0.882f, 0.167f, + 0.44f, 0.525f, 0.858f, 0.314f, + 0.653f, 0.965f, 0.413f, 0.0f}; + auto vad_wrapper = CreateMockVadWrapper(kNoVadPeriodicReset, kSampleRate8kHz, + speech_probabilities, + /*expected_vad_reset_calls=*/1); + FrameWithView frame(kSampleRate8kHz); + for (int i = 0; rtc::SafeLt(i, speech_probabilities.size()); ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(speech_probabilities[i], vad_wrapper->Analyze(frame.view)); + } +} + +// Checks that the VAD is not periodically reset. +TEST(GainController2VoiceActivityDetectorWrapper, VadNoPeriodicReset) { + constexpr int kNumFrames = 19; + auto vad_wrapper = CreateMockVadWrapper(kNoVadPeriodicReset, kSampleRate8kHz, + /*speech_probabilities=*/{1.0f}, + /*expected_vad_reset_calls=*/1); + FrameWithView frame(kSampleRate8kHz); + for (int i = 0; i < kNumFrames; ++i) { + vad_wrapper->Analyze(frame.view); + } +} + +class VadPeriodResetParametrization + : public ::testing::TestWithParam> { + protected: + int num_frames() const { return std::get<0>(GetParam()); } + int vad_reset_period_frames() const { return std::get<1>(GetParam()); } +}; + +// Checks that the VAD is periodically reset with the expected period. +TEST_P(VadPeriodResetParametrization, VadPeriodicReset) { + auto vad_wrapper = CreateMockVadWrapper( + /*vad_reset_period_ms=*/vad_reset_period_frames() * kFrameDurationMs, + kSampleRate8kHz, + /*speech_probabilities=*/{1.0f}, + /*expected_vad_reset_calls=*/1 + + num_frames() / vad_reset_period_frames()); + FrameWithView frame(kSampleRate8kHz); + for (int i = 0; i < num_frames(); ++i) { + vad_wrapper->Analyze(frame.view); + } +} + +INSTANTIATE_TEST_SUITE_P(GainController2VoiceActivityDetectorWrapper, + VadPeriodResetParametrization, + ::testing::Combine(::testing::Values(1, 19, 123), + ::testing::Values(2, 5, 20, 53))); + +class VadResamplingParametrization + : public ::testing::TestWithParam> { + protected: + int input_sample_rate_hz() const { return std::get<0>(GetParam()); } + int vad_sample_rate_hz() const { return std::get<1>(GetParam()); } +}; + +// Checks that regardless of the input audio sample rate, the wrapped VAD +// analyzes frames having the expected size, that is according to its internal +// sample rate. +TEST_P(VadResamplingParametrization, CheckResampledFrameSize) { + auto vad = std::make_unique(); + EXPECT_CALL(*vad, SampleRateHz) + .Times(AnyNumber()) + .WillRepeatedly(Return(vad_sample_rate_hz())); + EXPECT_CALL(*vad, Reset).Times(1); + EXPECT_CALL(*vad, Analyze(Truly([this](rtc::ArrayView frame) { + return rtc::SafeEq(frame.size(), rtc::CheckedDivExact(vad_sample_rate_hz(), + kNumFramesPerSecond)); + }))).Times(1); + auto vad_wrapper = std::make_unique( + kNoVadPeriodicReset, std::move(vad), input_sample_rate_hz()); + FrameWithView frame(input_sample_rate_hz()); + vad_wrapper->Analyze(frame.view); +} + +INSTANTIATE_TEST_SUITE_P( + GainController2VoiceActivityDetectorWrapper, + VadResamplingParametrization, + ::testing::Combine(::testing::Values(8000, 16000, 44100, 48000), + ::testing::Values(6000, 8000, 12000, 16000, 24000))); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc new file mode 100644 index 0000000000..a70d815196 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vector_float_frame.h" + +namespace webrtc { + +namespace { + +std::vector ConstructChannelPointers( + std::vector>* x) { + std::vector channel_ptrs; + for (auto& v : *x) { + channel_ptrs.push_back(v.data()); + } + return channel_ptrs; +} +} // namespace + +VectorFloatFrame::VectorFloatFrame(int num_channels, + int samples_per_channel, + float start_value) + : channels_(num_channels, + std::vector(samples_per_channel, start_value)), + channel_ptrs_(ConstructChannelPointers(&channels_)), + float_frame_view_(channel_ptrs_.data(), + channels_.size(), + samples_per_channel) {} + +VectorFloatFrame::~VectorFloatFrame() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h new file mode 100644 index 0000000000..b521f346f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_VECTOR_FLOAT_FRAME_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_VECTOR_FLOAT_FRAME_H_ + +#include + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +// A construct consisting of a multi-channel audio frame, and a FloatFrame view +// of it. +class VectorFloatFrame { + public: + VectorFloatFrame(int num_channels, + int samples_per_channel, + float start_value); + const AudioFrameView& float_frame_view() { return float_frame_view_; } + AudioFrameView float_frame_view() const { + return float_frame_view_; + } + + ~VectorFloatFrame(); + + private: + std::vector> channels_; + std::vector channel_ptrs_; + AudioFrameView float_frame_view_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_VECTOR_FLOAT_FRAME_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/api_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/api_gn/moz.build new file mode 100644 index 0000000000..7f1c48ac39 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/api_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("api_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/apm_logging_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/apm_logging_gn/moz.build new file mode 100644 index 0000000000..b6d96414b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/apm_logging_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("apm_logging_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_buffer.cc b/third_party/libwebrtc/modules/audio_processing/audio_buffer.cc new file mode 100644 index 0000000000..3dbe1fe072 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_buffer.cc @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_buffer.h" + +#include + +#include + +#include "common_audio/channel_buffer.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/resampler/push_sinc_resampler.h" +#include "modules/audio_processing/splitting_filter.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr size_t kSamplesPer32kHzChannel = 320; +constexpr size_t kSamplesPer48kHzChannel = 480; +constexpr size_t kMaxSamplesPerChannel = AudioBuffer::kMaxSampleRate / 100; + +size_t NumBandsFromFramesPerChannel(size_t num_frames) { + if (num_frames == kSamplesPer32kHzChannel) { + return 2; + } + if (num_frames == kSamplesPer48kHzChannel) { + return 3; + } + return 1; +} + +} // namespace + +AudioBuffer::AudioBuffer(size_t input_rate, + size_t input_num_channels, + size_t buffer_rate, + size_t buffer_num_channels, + size_t output_rate, + size_t output_num_channels) + : input_num_frames_(static_cast(input_rate) / 100), + input_num_channels_(input_num_channels), + buffer_num_frames_(static_cast(buffer_rate) / 100), + buffer_num_channels_(buffer_num_channels), + output_num_frames_(static_cast(output_rate) / 100), + output_num_channels_(0), + num_channels_(buffer_num_channels), + num_bands_(NumBandsFromFramesPerChannel(buffer_num_frames_)), + num_split_frames_(rtc::CheckedDivExact(buffer_num_frames_, num_bands_)), + data_( + new ChannelBuffer(buffer_num_frames_, buffer_num_channels_)) { + RTC_DCHECK_GT(input_num_frames_, 0); + RTC_DCHECK_GT(buffer_num_frames_, 0); + RTC_DCHECK_GT(output_num_frames_, 0); + RTC_DCHECK_GT(input_num_channels_, 0); + RTC_DCHECK_GT(buffer_num_channels_, 0); + RTC_DCHECK_LE(buffer_num_channels_, input_num_channels_); + + const bool input_resampling_needed = input_num_frames_ != buffer_num_frames_; + const bool output_resampling_needed = + output_num_frames_ != buffer_num_frames_; + if (input_resampling_needed) { + for (size_t i = 0; i < buffer_num_channels_; ++i) { + input_resamplers_.push_back(std::unique_ptr( + new PushSincResampler(input_num_frames_, buffer_num_frames_))); + } + } + + if (output_resampling_needed) { + for (size_t i = 0; i < buffer_num_channels_; ++i) { + output_resamplers_.push_back(std::unique_ptr( + new PushSincResampler(buffer_num_frames_, output_num_frames_))); + } + } + + if (num_bands_ > 1) { + split_data_.reset(new ChannelBuffer( + buffer_num_frames_, buffer_num_channels_, num_bands_)); + splitting_filter_.reset(new SplittingFilter( + buffer_num_channels_, num_bands_, buffer_num_frames_)); + } +} + +AudioBuffer::~AudioBuffer() {} + +void AudioBuffer::set_downmixing_to_specific_channel(size_t channel) { + downmix_by_averaging_ = false; + RTC_DCHECK_GT(input_num_channels_, channel); + channel_for_downmixing_ = std::min(channel, input_num_channels_ - 1); +} + +void AudioBuffer::set_downmixing_by_averaging() { + downmix_by_averaging_ = true; +} + +void AudioBuffer::CopyFrom(const float* const* stacked_data, + const StreamConfig& stream_config) { + RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_); + RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_); + RestoreNumChannels(); + const bool downmix_needed = input_num_channels_ > 1 && num_channels_ == 1; + + const bool resampling_needed = input_num_frames_ != buffer_num_frames_; + + if (downmix_needed) { + RTC_DCHECK_GE(kMaxSamplesPerChannel, input_num_frames_); + + std::array downmix; + if (downmix_by_averaging_) { + const float kOneByNumChannels = 1.f / input_num_channels_; + for (size_t i = 0; i < input_num_frames_; ++i) { + float value = stacked_data[0][i]; + for (size_t j = 1; j < input_num_channels_; ++j) { + value += stacked_data[j][i]; + } + downmix[i] = value * kOneByNumChannels; + } + } + const float* downmixed_data = downmix_by_averaging_ + ? downmix.data() + : stacked_data[channel_for_downmixing_]; + + if (resampling_needed) { + input_resamplers_[0]->Resample(downmixed_data, input_num_frames_, + data_->channels()[0], buffer_num_frames_); + } + const float* data_to_convert = + resampling_needed ? data_->channels()[0] : downmixed_data; + FloatToFloatS16(data_to_convert, buffer_num_frames_, data_->channels()[0]); + } else { + if (resampling_needed) { + for (size_t i = 0; i < num_channels_; ++i) { + input_resamplers_[i]->Resample(stacked_data[i], input_num_frames_, + data_->channels()[i], + buffer_num_frames_); + FloatToFloatS16(data_->channels()[i], buffer_num_frames_, + data_->channels()[i]); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + FloatToFloatS16(stacked_data[i], buffer_num_frames_, + data_->channels()[i]); + } + } + } +} + +void AudioBuffer::CopyTo(const StreamConfig& stream_config, + float* const* stacked_data) { + RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_); + + const bool resampling_needed = output_num_frames_ != buffer_num_frames_; + if (resampling_needed) { + for (size_t i = 0; i < num_channels_; ++i) { + FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, + data_->channels()[i]); + output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_, + stacked_data[i], output_num_frames_); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, + stacked_data[i]); + } + } + + for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) { + memcpy(stacked_data[i], stacked_data[0], + output_num_frames_ * sizeof(**stacked_data)); + } +} + +void AudioBuffer::CopyTo(AudioBuffer* buffer) const { + RTC_DCHECK_EQ(buffer->num_frames(), output_num_frames_); + + const bool resampling_needed = output_num_frames_ != buffer_num_frames_; + if (resampling_needed) { + for (size_t i = 0; i < num_channels_; ++i) { + output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_, + buffer->channels()[i], + buffer->num_frames()); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + memcpy(buffer->channels()[i], data_->channels()[i], + buffer_num_frames_ * sizeof(**buffer->channels())); + } + } + + for (size_t i = num_channels_; i < buffer->num_channels(); ++i) { + memcpy(buffer->channels()[i], buffer->channels()[0], + output_num_frames_ * sizeof(**buffer->channels())); + } +} + +void AudioBuffer::RestoreNumChannels() { + num_channels_ = buffer_num_channels_; + data_->set_num_channels(buffer_num_channels_); + if (split_data_.get()) { + split_data_->set_num_channels(buffer_num_channels_); + } +} + +void AudioBuffer::set_num_channels(size_t num_channels) { + RTC_DCHECK_GE(buffer_num_channels_, num_channels); + num_channels_ = num_channels; + data_->set_num_channels(num_channels); + if (split_data_.get()) { + split_data_->set_num_channels(num_channels); + } +} + +// The resampler is only for supporting 48kHz to 16kHz in the reverse stream. +void AudioBuffer::CopyFrom(const int16_t* const interleaved_data, + const StreamConfig& stream_config) { + RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_); + RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_); + RestoreNumChannels(); + + const bool resampling_required = input_num_frames_ != buffer_num_frames_; + + const int16_t* interleaved = interleaved_data; + if (num_channels_ == 1) { + if (input_num_channels_ == 1) { + if (resampling_required) { + std::array float_buffer; + S16ToFloatS16(interleaved, input_num_frames_, float_buffer.data()); + input_resamplers_[0]->Resample(float_buffer.data(), input_num_frames_, + data_->channels()[0], + buffer_num_frames_); + } else { + S16ToFloatS16(interleaved, input_num_frames_, data_->channels()[0]); + } + } else { + std::array float_buffer; + float* downmixed_data = + resampling_required ? float_buffer.data() : data_->channels()[0]; + if (downmix_by_averaging_) { + for (size_t j = 0, k = 0; j < input_num_frames_; ++j) { + int32_t sum = 0; + for (size_t i = 0; i < input_num_channels_; ++i, ++k) { + sum += interleaved[k]; + } + downmixed_data[j] = sum / static_cast(input_num_channels_); + } + } else { + for (size_t j = 0, k = channel_for_downmixing_; j < input_num_frames_; + ++j, k += input_num_channels_) { + downmixed_data[j] = interleaved[k]; + } + } + + if (resampling_required) { + input_resamplers_[0]->Resample(downmixed_data, input_num_frames_, + data_->channels()[0], + buffer_num_frames_); + } + } + } else { + auto deinterleave_channel = [](size_t channel, size_t num_channels, + size_t samples_per_channel, const int16_t* x, + float* y) { + for (size_t j = 0, k = channel; j < samples_per_channel; + ++j, k += num_channels) { + y[j] = x[k]; + } + }; + + if (resampling_required) { + std::array float_buffer; + for (size_t i = 0; i < num_channels_; ++i) { + deinterleave_channel(i, num_channels_, input_num_frames_, interleaved, + float_buffer.data()); + input_resamplers_[i]->Resample(float_buffer.data(), input_num_frames_, + data_->channels()[i], + buffer_num_frames_); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + deinterleave_channel(i, num_channels_, input_num_frames_, interleaved, + data_->channels()[i]); + } + } + } +} + +void AudioBuffer::CopyTo(const StreamConfig& stream_config, + int16_t* const interleaved_data) { + const size_t config_num_channels = stream_config.num_channels(); + + RTC_DCHECK(config_num_channels == num_channels_ || num_channels_ == 1); + RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_); + + const bool resampling_required = buffer_num_frames_ != output_num_frames_; + + int16_t* interleaved = interleaved_data; + if (num_channels_ == 1) { + std::array float_buffer; + + if (resampling_required) { + output_resamplers_[0]->Resample(data_->channels()[0], buffer_num_frames_, + float_buffer.data(), output_num_frames_); + } + const float* deinterleaved = + resampling_required ? float_buffer.data() : data_->channels()[0]; + + if (config_num_channels == 1) { + for (size_t j = 0; j < output_num_frames_; ++j) { + interleaved[j] = FloatS16ToS16(deinterleaved[j]); + } + } else { + for (size_t i = 0, k = 0; i < output_num_frames_; ++i) { + float tmp = FloatS16ToS16(deinterleaved[i]); + for (size_t j = 0; j < config_num_channels; ++j, ++k) { + interleaved[k] = tmp; + } + } + } + } else { + auto interleave_channel = [](size_t channel, size_t num_channels, + size_t samples_per_channel, const float* x, + int16_t* y) { + for (size_t k = 0, j = channel; k < samples_per_channel; + ++k, j += num_channels) { + y[j] = FloatS16ToS16(x[k]); + } + }; + + if (resampling_required) { + for (size_t i = 0; i < num_channels_; ++i) { + std::array float_buffer; + output_resamplers_[i]->Resample(data_->channels()[i], + buffer_num_frames_, float_buffer.data(), + output_num_frames_); + interleave_channel(i, config_num_channels, output_num_frames_, + float_buffer.data(), interleaved); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + interleave_channel(i, config_num_channels, output_num_frames_, + data_->channels()[i], interleaved); + } + } + + for (size_t i = num_channels_; i < config_num_channels; ++i) { + for (size_t j = 0, k = i, n = num_channels_; j < output_num_frames_; + ++j, k += config_num_channels, n += config_num_channels) { + interleaved[k] = interleaved[n]; + } + } + } +} + +void AudioBuffer::SplitIntoFrequencyBands() { + splitting_filter_->Analysis(data_.get(), split_data_.get()); +} + +void AudioBuffer::MergeFrequencyBands() { + splitting_filter_->Synthesis(split_data_.get(), data_.get()); +} + +void AudioBuffer::ExportSplitChannelData( + size_t channel, + int16_t* const* split_band_data) const { + for (size_t k = 0; k < num_bands(); ++k) { + const float* band_data = split_bands_const(channel)[k]; + + RTC_DCHECK(split_band_data[k]); + RTC_DCHECK(band_data); + for (size_t i = 0; i < num_frames_per_band(); ++i) { + split_band_data[k][i] = FloatS16ToS16(band_data[i]); + } + } +} + +void AudioBuffer::ImportSplitChannelData( + size_t channel, + const int16_t* const* split_band_data) { + for (size_t k = 0; k < num_bands(); ++k) { + float* band_data = split_bands(channel)[k]; + RTC_DCHECK(split_band_data[k]); + RTC_DCHECK(band_data); + for (size_t i = 0; i < num_frames_per_band(); ++i) { + band_data[i] = split_band_data[k][i]; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_buffer.h b/third_party/libwebrtc/modules/audio_processing/audio_buffer.h new file mode 100644 index 0000000000..b9ea3000a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_buffer.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ + +#include +#include + +#include +#include + +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +class PushSincResampler; +class SplittingFilter; + +enum Band { kBand0To8kHz = 0, kBand8To16kHz = 1, kBand16To24kHz = 2 }; + +// Stores any audio data in a way that allows the audio processing module to +// operate on it in a controlled manner. +class AudioBuffer { + public: + static const int kSplitBandSize = 160; + static const int kMaxSampleRate = 384000; + AudioBuffer(size_t input_rate, + size_t input_num_channels, + size_t buffer_rate, + size_t buffer_num_channels, + size_t output_rate, + size_t output_num_channels); + + virtual ~AudioBuffer(); + + AudioBuffer(const AudioBuffer&) = delete; + AudioBuffer& operator=(const AudioBuffer&) = delete; + + // Specify that downmixing should be done by selecting a single channel. + void set_downmixing_to_specific_channel(size_t channel); + + // Specify that downmixing should be done by averaging all channels,. + void set_downmixing_by_averaging(); + + // Set the number of channels in the buffer. The specified number of channels + // cannot be larger than the specified buffer_num_channels. The number is also + // reset at each call to CopyFrom or InterleaveFrom. + void set_num_channels(size_t num_channels); + + size_t num_channels() const { return num_channels_; } + size_t num_frames() const { return buffer_num_frames_; } + size_t num_frames_per_band() const { return num_split_frames_; } + size_t num_bands() const { return num_bands_; } + + // Returns pointer arrays to the full-band channels. + // Usage: + // channels()[channel][sample]. + // Where: + // 0 <= channel < `buffer_num_channels_` + // 0 <= sample < `buffer_num_frames_` + float* const* channels() { return data_->channels(); } + const float* const* channels_const() const { return data_->channels(); } + + // Returns pointer arrays to the bands for a specific channel. + // Usage: + // split_bands(channel)[band][sample]. + // Where: + // 0 <= channel < `buffer_num_channels_` + // 0 <= band < `num_bands_` + // 0 <= sample < `num_split_frames_` + const float* const* split_bands_const(size_t channel) const { + return split_data_.get() ? split_data_->bands(channel) + : data_->bands(channel); + } + float* const* split_bands(size_t channel) { + return split_data_.get() ? split_data_->bands(channel) + : data_->bands(channel); + } + + // Returns a pointer array to the channels for a specific band. + // Usage: + // split_channels(band)[channel][sample]. + // Where: + // 0 <= band < `num_bands_` + // 0 <= channel < `buffer_num_channels_` + // 0 <= sample < `num_split_frames_` + const float* const* split_channels_const(Band band) const { + if (split_data_.get()) { + return split_data_->channels(band); + } else { + return band == kBand0To8kHz ? data_->channels() : nullptr; + } + } + + // Copies data into the buffer. + void CopyFrom(const int16_t* const interleaved_data, + const StreamConfig& stream_config); + void CopyFrom(const float* const* stacked_data, + const StreamConfig& stream_config); + + // Copies data from the buffer. + void CopyTo(const StreamConfig& stream_config, + int16_t* const interleaved_data); + void CopyTo(const StreamConfig& stream_config, float* const* stacked_data); + void CopyTo(AudioBuffer* buffer) const; + + // Splits the buffer data into frequency bands. + void SplitIntoFrequencyBands(); + + // Recombines the frequency bands into a full-band signal. + void MergeFrequencyBands(); + + // Copies the split bands data into the integer two-dimensional array. + void ExportSplitChannelData(size_t channel, + int16_t* const* split_band_data) const; + + // Copies the data in the integer two-dimensional array into the split_bands + // data. + void ImportSplitChannelData(size_t channel, + const int16_t* const* split_band_data); + + static const size_t kMaxSplitFrameLength = 160; + static const size_t kMaxNumBands = 3; + + // Deprecated methods, will be removed soon. + float* const* channels_f() { return channels(); } + const float* const* channels_const_f() const { return channels_const(); } + const float* const* split_bands_const_f(size_t channel) const { + return split_bands_const(channel); + } + float* const* split_bands_f(size_t channel) { return split_bands(channel); } + const float* const* split_channels_const_f(Band band) const { + return split_channels_const(band); + } + + private: + FRIEND_TEST_ALL_PREFIXES(AudioBufferTest, + SetNumChannelsSetsChannelBuffersNumChannels); + void RestoreNumChannels(); + + const size_t input_num_frames_; + const size_t input_num_channels_; + const size_t buffer_num_frames_; + const size_t buffer_num_channels_; + const size_t output_num_frames_; + const size_t output_num_channels_; + + size_t num_channels_; + size_t num_bands_; + size_t num_split_frames_; + + std::unique_ptr> data_; + std::unique_ptr> split_data_; + std::unique_ptr splitting_filter_; + std::vector> input_resamplers_; + std::vector> output_resamplers_; + bool downmix_by_averaging_ = true; + size_t channel_for_downmixing_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/audio_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_buffer_gn/moz.build new file mode 100644 index 0000000000..2291d2981d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_buffer_gn/moz.build @@ -0,0 +1,235 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/audio_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/splitting_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_buffer_unittest.cc new file mode 100644 index 0000000000..f3b2ddc689 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_buffer_unittest.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_buffer.h" + +#include + +#include "test/gtest.h" +#include "test/testsupport/rtc_expect_death.h" + +namespace webrtc { + +namespace { + +const size_t kSampleRateHz = 48000u; +const size_t kStereo = 2u; +const size_t kMono = 1u; + +void ExpectNumChannels(const AudioBuffer& ab, size_t num_channels) { + EXPECT_EQ(ab.num_channels(), num_channels); +} + +} // namespace + +TEST(AudioBufferTest, SetNumChannelsSetsChannelBuffersNumChannels) { + AudioBuffer ab(kSampleRateHz, kStereo, kSampleRateHz, kStereo, kSampleRateHz, + kStereo); + ExpectNumChannels(ab, kStereo); + ab.set_num_channels(1); + ExpectNumChannels(ab, kMono); + ab.RestoreNumChannels(); + ExpectNumChannels(ab, kStereo); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(AudioBufferDeathTest, SetNumChannelsDeathTest) { + AudioBuffer ab(kSampleRateHz, kMono, kSampleRateHz, kMono, kSampleRateHz, + kMono); + RTC_EXPECT_DEATH(ab.set_num_channels(kStereo), "num_channels"); +} +#endif + +TEST(AudioBufferTest, CopyWithoutResampling) { + AudioBuffer ab1(32000, 2, 32000, 2, 32000, 2); + AudioBuffer ab2(32000, 2, 32000, 2, 32000, 2); + // Fill first buffer. + for (size_t ch = 0; ch < ab1.num_channels(); ++ch) { + for (size_t i = 0; i < ab1.num_frames(); ++i) { + ab1.channels()[ch][i] = i + ch; + } + } + // Copy to second buffer. + ab1.CopyTo(&ab2); + // Verify content of second buffer. + for (size_t ch = 0; ch < ab2.num_channels(); ++ch) { + for (size_t i = 0; i < ab2.num_frames(); ++i) { + EXPECT_EQ(ab2.channels()[ch][i], i + ch); + } + } +} + +TEST(AudioBufferTest, CopyWithResampling) { + AudioBuffer ab1(32000, 2, 32000, 2, 48000, 2); + AudioBuffer ab2(48000, 2, 48000, 2, 48000, 2); + float energy_ab1 = 0.f; + float energy_ab2 = 0.f; + const float pi = std::acos(-1.f); + // Put a sine and compute energy of first buffer. + for (size_t ch = 0; ch < ab1.num_channels(); ++ch) { + for (size_t i = 0; i < ab1.num_frames(); ++i) { + ab1.channels()[ch][i] = std::sin(2 * pi * 100.f / 32000.f * i); + energy_ab1 += ab1.channels()[ch][i] * ab1.channels()[ch][i]; + } + } + // Copy to second buffer. + ab1.CopyTo(&ab2); + // Compute energy of second buffer. + for (size_t ch = 0; ch < ab2.num_channels(); ++ch) { + for (size_t i = 0; i < ab2.num_frames(); ++i) { + energy_ab2 += ab2.channels()[ch][i] * ab2.channels()[ch][i]; + } + } + // Verify that energies match. + EXPECT_NEAR(energy_ab1, energy_ab2 * 32000.f / 48000.f, .01f * energy_ab1); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_frame_proxies_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_frame_proxies_gn/moz.build new file mode 100644 index 0000000000..683d86e743 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_frame_proxies_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_frame_proxies_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_frame_view_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_frame_view_gn/moz.build new file mode 100644 index 0000000000..9b90bca379 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_frame_view_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_frame_view_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_frame_view_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_frame_view_unittest.cc new file mode 100644 index 0000000000..fd25bc3b0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_frame_view_unittest.cc @@ -0,0 +1,51 @@ +/* + * Copyright 2018 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_frame_view.h" + +#include "modules/audio_processing/audio_buffer.h" +#include "test/gtest.h" + +namespace webrtc { +TEST(AudioFrameTest, ConstructFromAudioBuffer) { + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 2; + constexpr float kFloatConstant = 1272.f; + constexpr float kIntConstant = 17252; + const webrtc::StreamConfig stream_config(kSampleRateHz, kNumChannels); + webrtc::AudioBuffer buffer( + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels()); + + AudioFrameView non_const_view(buffer.channels(), buffer.num_channels(), + buffer.num_frames()); + // Modification is allowed. + non_const_view.channel(0)[0] = kFloatConstant; + EXPECT_EQ(buffer.channels()[0][0], kFloatConstant); + + AudioFrameView const_view( + buffer.channels(), buffer.num_channels(), buffer.num_frames()); + // Modification is not allowed. + // const_view.channel(0)[0] = kFloatConstant; + + // Assignment is allowed. + AudioFrameView other_const_view = non_const_view; + static_cast(other_const_view); + + // But not the other way. The following will fail: + // non_const_view = other_const_view; + + AudioFrameView non_const_float_view( + buffer.channels(), buffer.num_channels(), buffer.num_frames()); + non_const_float_view.channel(0)[0] = kIntConstant; + EXPECT_EQ(buffer.channels()[0][0], kIntConstant); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc new file mode 100644 index 0000000000..a246448c26 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "api/make_ref_counted.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +AudioProcessingBuilder::AudioProcessingBuilder() = default; +AudioProcessingBuilder::~AudioProcessingBuilder() = default; + +rtc::scoped_refptr AudioProcessingBuilder::Create() { +#ifdef WEBRTC_EXCLUDE_AUDIO_PROCESSING_MODULE + // Return a null pointer when the APM is excluded from the build. + return nullptr; +#else // WEBRTC_EXCLUDE_AUDIO_PROCESSING_MODULE + return rtc::make_ref_counted( + config_, std::move(capture_post_processing_), + std::move(render_pre_processing_), std::move(echo_control_factory_), + std::move(echo_detector_), std::move(capture_analyzer_)); +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_processing_gn/moz.build new file mode 100644 index 0000000000..ab0ca7113f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_gn/moz.build @@ -0,0 +1,239 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc", + "/third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc", + "/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_processing_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc new file mode 100644 index 0000000000..c304453388 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc @@ -0,0 +1,2649 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_processing_impl.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/audio_frame.h" +#include "common_audio/audio_converter.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/optionally_built_submodule_creators.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" +#include "rtc_base/trace_event.h" +#include "system_wrappers/include/denormal_disabler.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" + +#define RETURN_ON_ERR(expr) \ + do { \ + int err = (expr); \ + if (err != kNoError) { \ + return err; \ + } \ + } while (0) + +namespace webrtc { + +namespace { + +bool SampleRateSupportsMultiBand(int sample_rate_hz) { + return sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz; +} + +// Checks whether the high-pass filter should be done in the full-band. +bool EnforceSplitBandHpf() { + return field_trial::IsEnabled("WebRTC-FullBandHpfKillSwitch"); +} + +// Checks whether AEC3 should be allowed to decide what the default +// configuration should be based on the render and capture channel configuration +// at hand. +bool UseSetupSpecificDefaultAec3Congfig() { + return !field_trial::IsEnabled( + "WebRTC-Aec3SetupSpecificDefaultConfigDefaultsKillSwitch"); +} + +// Identify the native processing rate that best handles a sample rate. +int SuitableProcessRate(int minimum_rate, + int max_splitting_rate, + bool band_splitting_required) { + const int uppermost_native_rate = + band_splitting_required ? max_splitting_rate : 48000; + for (auto rate : {16000, 32000, 48000}) { + if (rate >= uppermost_native_rate) { + return uppermost_native_rate; + } + if (rate >= minimum_rate) { + return rate; + } + } + RTC_DCHECK_NOTREACHED(); + return uppermost_native_rate; +} + +GainControl::Mode Agc1ConfigModeToInterfaceMode( + AudioProcessing::Config::GainController1::Mode mode) { + using Agc1Config = AudioProcessing::Config::GainController1; + switch (mode) { + case Agc1Config::kAdaptiveAnalog: + return GainControl::kAdaptiveAnalog; + case Agc1Config::kAdaptiveDigital: + return GainControl::kAdaptiveDigital; + case Agc1Config::kFixedDigital: + return GainControl::kFixedDigital; + } + RTC_CHECK_NOTREACHED(); +} + +bool MinimizeProcessingForUnusedOutput() { + return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch"); +} + +// Maximum lengths that frame of samples being passed from the render side to +// the capture side can have (does not apply to AEC3). +static const size_t kMaxAllowedValuesOfSamplesPerBand = 160; +static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480; + +// Maximum number of frames to buffer in the render queue. +// TODO(peah): Decrease this once we properly handle hugely unbalanced +// reverse and forward call numbers. +static const size_t kMaxNumFramesToBuffer = 100; + +void PackRenderAudioBufferForEchoDetector(const AudioBuffer& audio, + std::vector& packed_buffer) { + packed_buffer.clear(); + packed_buffer.insert(packed_buffer.end(), audio.channels_const()[0], + audio.channels_const()[0] + audio.num_frames()); +} + +// Options for gracefully handling processing errors. +enum class FormatErrorOutputOption { + kOutputExactCopyOfInput, + kOutputBroadcastCopyOfFirstInputChannel, + kOutputSilence, + kDoNothing +}; + +enum class AudioFormatValidity { + // Format is supported by APM. + kValidAndSupported, + // Format has a reasonable interpretation but is not supported. + kValidButUnsupportedSampleRate, + // The remaining enums values signal that the audio does not have a reasonable + // interpretation and cannot be used. + kInvalidSampleRate, + kInvalidChannelCount +}; + +AudioFormatValidity ValidateAudioFormat(const StreamConfig& config) { + if (config.sample_rate_hz() < 0) + return AudioFormatValidity::kInvalidSampleRate; + if (config.num_channels() == 0) + return AudioFormatValidity::kInvalidChannelCount; + + // Format has a reasonable interpretation, but may still be unsupported. + if (config.sample_rate_hz() < 8000 || + config.sample_rate_hz() > AudioBuffer::kMaxSampleRate) + return AudioFormatValidity::kValidButUnsupportedSampleRate; + + // Format is fully supported. + return AudioFormatValidity::kValidAndSupported; +} + +int AudioFormatValidityToErrorCode(AudioFormatValidity validity) { + switch (validity) { + case AudioFormatValidity::kValidAndSupported: + return AudioProcessing::kNoError; + case AudioFormatValidity::kValidButUnsupportedSampleRate: // fall-through + case AudioFormatValidity::kInvalidSampleRate: + return AudioProcessing::kBadSampleRateError; + case AudioFormatValidity::kInvalidChannelCount: + return AudioProcessing::kBadNumberChannelsError; + } + RTC_DCHECK(false); +} + +// Returns an AudioProcessing::Error together with the best possible option for +// output audio content. +std::pair ChooseErrorOutputOption( + const StreamConfig& input_config, + const StreamConfig& output_config) { + AudioFormatValidity input_validity = ValidateAudioFormat(input_config); + AudioFormatValidity output_validity = ValidateAudioFormat(output_config); + + if (input_validity == AudioFormatValidity::kValidAndSupported && + output_validity == AudioFormatValidity::kValidAndSupported && + (output_config.num_channels() == 1 || + output_config.num_channels() == input_config.num_channels())) { + return {AudioProcessing::kNoError, FormatErrorOutputOption::kDoNothing}; + } + + int error_code = AudioFormatValidityToErrorCode(input_validity); + if (error_code == AudioProcessing::kNoError) { + error_code = AudioFormatValidityToErrorCode(output_validity); + } + if (error_code == AudioProcessing::kNoError) { + // The individual formats are valid but there is some error - must be + // channel mismatch. + error_code = AudioProcessing::kBadNumberChannelsError; + } + + FormatErrorOutputOption output_option; + if (output_validity != AudioFormatValidity::kValidAndSupported && + output_validity != AudioFormatValidity::kValidButUnsupportedSampleRate) { + // The output format is uninterpretable: cannot do anything. + output_option = FormatErrorOutputOption::kDoNothing; + } else if (input_validity != AudioFormatValidity::kValidAndSupported && + input_validity != + AudioFormatValidity::kValidButUnsupportedSampleRate) { + // The input format is uninterpretable: cannot use it, must output silence. + output_option = FormatErrorOutputOption::kOutputSilence; + } else if (input_config.sample_rate_hz() != output_config.sample_rate_hz()) { + // Sample rates do not match: Cannot copy input into output, output silence. + // Note: If the sample rates are in a supported range, we could resample. + // However, that would significantly increase complexity of this error + // handling code. + output_option = FormatErrorOutputOption::kOutputSilence; + } else if (input_config.num_channels() != output_config.num_channels()) { + // Channel counts do not match: We cannot easily map input channels to + // output channels. + output_option = + FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel; + } else { + // The formats match exactly. + RTC_DCHECK(input_config == output_config); + output_option = FormatErrorOutputOption::kOutputExactCopyOfInput; + } + return std::make_pair(error_code, output_option); +} + +// Checks if the audio format is supported. If not, the output is populated in a +// best-effort manner and an APM error code is returned. +int HandleUnsupportedAudioFormats(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) { + RTC_DCHECK(src); + RTC_DCHECK(dest); + + auto [error_code, output_option] = + ChooseErrorOutputOption(input_config, output_config); + if (error_code == AudioProcessing::kNoError) + return AudioProcessing::kNoError; + + const size_t num_output_channels = output_config.num_channels(); + switch (output_option) { + case FormatErrorOutputOption::kOutputSilence: + memset(dest, 0, output_config.num_samples() * sizeof(int16_t)); + break; + case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel: + for (size_t i = 0; i < output_config.num_frames(); ++i) { + int16_t sample = src[input_config.num_channels() * i]; + for (size_t ch = 0; ch < num_output_channels; ++ch) { + dest[ch + num_output_channels * i] = sample; + } + } + break; + case FormatErrorOutputOption::kOutputExactCopyOfInput: + memcpy(dest, src, output_config.num_samples() * sizeof(int16_t)); + break; + case FormatErrorOutputOption::kDoNothing: + break; + } + return error_code; +} + +// Checks if the audio format is supported. If not, the output is populated in a +// best-effort manner and an APM error code is returned. +int HandleUnsupportedAudioFormats(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { + RTC_DCHECK(src); + RTC_DCHECK(dest); + for (size_t i = 0; i < input_config.num_channels(); ++i) { + RTC_DCHECK(src[i]); + } + for (size_t i = 0; i < output_config.num_channels(); ++i) { + RTC_DCHECK(dest[i]); + } + + auto [error_code, output_option] = + ChooseErrorOutputOption(input_config, output_config); + if (error_code == AudioProcessing::kNoError) + return AudioProcessing::kNoError; + + const size_t num_output_channels = output_config.num_channels(); + switch (output_option) { + case FormatErrorOutputOption::kOutputSilence: + for (size_t ch = 0; ch < num_output_channels; ++ch) { + memset(dest[ch], 0, output_config.num_frames() * sizeof(float)); + } + break; + case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel: + for (size_t ch = 0; ch < num_output_channels; ++ch) { + memcpy(dest[ch], src[0], output_config.num_frames() * sizeof(float)); + } + break; + case FormatErrorOutputOption::kOutputExactCopyOfInput: + for (size_t ch = 0; ch < num_output_channels; ++ch) { + memcpy(dest[ch], src[ch], output_config.num_frames() * sizeof(float)); + } + break; + case FormatErrorOutputOption::kDoNothing: + break; + } + return error_code; +} + +using DownmixMethod = AudioProcessing::Config::Pipeline::DownmixMethod; + +void SetDownmixMethod(AudioBuffer& buffer, DownmixMethod method) { + switch (method) { + case DownmixMethod::kAverageChannels: + buffer.set_downmixing_by_averaging(); + break; + case DownmixMethod::kUseFirstChannel: + buffer.set_downmixing_to_specific_channel(/*channel=*/0); + break; + } +} + +constexpr int kUnspecifiedDataDumpInputVolume = -100; + +} // namespace + +// Throughout webrtc, it's assumed that success is represented by zero. +static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero"); + +absl::optional +AudioProcessingImpl::GetGainController2ExperimentParams() { + constexpr char kFieldTrialName[] = "WebRTC-Audio-GainController2"; + + if (!field_trial::IsEnabled(kFieldTrialName)) { + return absl::nullopt; + } + + FieldTrialFlag enabled("Enabled", false); + + // Whether the gain control should switch to AGC2. Enabled by default. + FieldTrialParameter switch_to_agc2("switch_to_agc2", true); + + // AGC2 input volume controller configuration. + constexpr InputVolumeController::Config kDefaultInputVolumeControllerConfig; + FieldTrialConstrained min_input_volume( + "min_input_volume", kDefaultInputVolumeControllerConfig.min_input_volume, + 0, 255); + FieldTrialConstrained clipped_level_min( + "clipped_level_min", + kDefaultInputVolumeControllerConfig.clipped_level_min, 0, 255); + FieldTrialConstrained clipped_level_step( + "clipped_level_step", + kDefaultInputVolumeControllerConfig.clipped_level_step, 0, 255); + FieldTrialConstrained clipped_ratio_threshold( + "clipped_ratio_threshold", + kDefaultInputVolumeControllerConfig.clipped_ratio_threshold, 0, 1); + FieldTrialConstrained clipped_wait_frames( + "clipped_wait_frames", + kDefaultInputVolumeControllerConfig.clipped_wait_frames, 0, + absl::nullopt); + FieldTrialParameter enable_clipping_predictor( + "enable_clipping_predictor", + kDefaultInputVolumeControllerConfig.enable_clipping_predictor); + FieldTrialConstrained target_range_max_dbfs( + "target_range_max_dbfs", + kDefaultInputVolumeControllerConfig.target_range_max_dbfs, -90, 30); + FieldTrialConstrained target_range_min_dbfs( + "target_range_min_dbfs", + kDefaultInputVolumeControllerConfig.target_range_min_dbfs, -90, 30); + FieldTrialConstrained update_input_volume_wait_frames( + "update_input_volume_wait_frames", + kDefaultInputVolumeControllerConfig.update_input_volume_wait_frames, 0, + absl::nullopt); + FieldTrialConstrained speech_probability_threshold( + "speech_probability_threshold", + kDefaultInputVolumeControllerConfig.speech_probability_threshold, 0, 1); + FieldTrialConstrained speech_ratio_threshold( + "speech_ratio_threshold", + kDefaultInputVolumeControllerConfig.speech_ratio_threshold, 0, 1); + + // AGC2 adaptive digital controller configuration. + constexpr AudioProcessing::Config::GainController2::AdaptiveDigital + kDefaultAdaptiveDigitalConfig; + FieldTrialConstrained headroom_db( + "headroom_db", kDefaultAdaptiveDigitalConfig.headroom_db, 0, + absl::nullopt); + FieldTrialConstrained max_gain_db( + "max_gain_db", kDefaultAdaptiveDigitalConfig.max_gain_db, 0, + absl::nullopt); + FieldTrialConstrained initial_gain_db( + "initial_gain_db", kDefaultAdaptiveDigitalConfig.initial_gain_db, 0, + absl::nullopt); + FieldTrialConstrained max_gain_change_db_per_second( + "max_gain_change_db_per_second", + kDefaultAdaptiveDigitalConfig.max_gain_change_db_per_second, 0, + absl::nullopt); + FieldTrialConstrained max_output_noise_level_dbfs( + "max_output_noise_level_dbfs", + kDefaultAdaptiveDigitalConfig.max_output_noise_level_dbfs, absl::nullopt, + 0); + + // Transient suppressor. + FieldTrialParameter disallow_transient_suppressor_usage( + "disallow_transient_suppressor_usage", false); + + // Field-trial based override for the input volume controller and adaptive + // digital configs. + ParseFieldTrial( + {&enabled, &switch_to_agc2, &min_input_volume, &clipped_level_min, + &clipped_level_step, &clipped_ratio_threshold, &clipped_wait_frames, + &enable_clipping_predictor, &target_range_max_dbfs, + &target_range_min_dbfs, &update_input_volume_wait_frames, + &speech_probability_threshold, &speech_ratio_threshold, &headroom_db, + &max_gain_db, &initial_gain_db, &max_gain_change_db_per_second, + &max_output_noise_level_dbfs, &disallow_transient_suppressor_usage}, + field_trial::FindFullName(kFieldTrialName)); + // Checked already by `IsEnabled()` before parsing, therefore always true. + RTC_DCHECK(enabled); + + const bool do_not_change_agc_config = !switch_to_agc2.Get(); + if (do_not_change_agc_config && !disallow_transient_suppressor_usage.Get()) { + // Return an unspecifed value since, in this case, both the AGC2 and TS + // configurations won't be adjusted. + return absl::nullopt; + } + using Params = AudioProcessingImpl::GainController2ExperimentParams; + if (do_not_change_agc_config) { + // Return a value that leaves the AGC2 config unchanged and that always + // disables TS. + return Params{.agc2_config = absl::nullopt, + .disallow_transient_suppressor_usage = true}; + } + // Return a value that switches all the gain control to AGC2. + return Params{ + .agc2_config = + Params::Agc2Config{ + .input_volume_controller = + { + .min_input_volume = min_input_volume.Get(), + .clipped_level_min = clipped_level_min.Get(), + .clipped_level_step = clipped_level_step.Get(), + .clipped_ratio_threshold = + static_cast(clipped_ratio_threshold.Get()), + .clipped_wait_frames = clipped_wait_frames.Get(), + .enable_clipping_predictor = + enable_clipping_predictor.Get(), + .target_range_max_dbfs = target_range_max_dbfs.Get(), + .target_range_min_dbfs = target_range_min_dbfs.Get(), + .update_input_volume_wait_frames = + update_input_volume_wait_frames.Get(), + .speech_probability_threshold = static_cast( + speech_probability_threshold.Get()), + .speech_ratio_threshold = + static_cast(speech_ratio_threshold.Get()), + }, + .adaptive_digital_controller = + { + .enabled = false, + .headroom_db = static_cast(headroom_db.Get()), + .max_gain_db = static_cast(max_gain_db.Get()), + .initial_gain_db = + static_cast(initial_gain_db.Get()), + .max_gain_change_db_per_second = static_cast( + max_gain_change_db_per_second.Get()), + .max_output_noise_level_dbfs = + static_cast(max_output_noise_level_dbfs.Get()), + }}, + .disallow_transient_suppressor_usage = + disallow_transient_suppressor_usage.Get()}; +} + +AudioProcessing::Config AudioProcessingImpl::AdjustConfig( + const AudioProcessing::Config& config, + const absl::optional& + experiment_params) { + if (!experiment_params.has_value() || + (!experiment_params->agc2_config.has_value() && + !experiment_params->disallow_transient_suppressor_usage)) { + // When the experiment parameters are unspecified or when the AGC and TS + // configuration are not overridden, return the unmodified configuration. + return config; + } + + AudioProcessing::Config adjusted_config = config; + + // Override the transient suppressor configuration. + if (experiment_params->disallow_transient_suppressor_usage) { + adjusted_config.transient_suppression.enabled = false; + } + + // Override the auto gain control configuration if the AGC1 analog gain + // controller is active and `experiment_params->agc2_config` is specified. + const bool agc1_analog_enabled = + config.gain_controller1.enabled && + (config.gain_controller1.mode == + AudioProcessing::Config::GainController1::kAdaptiveAnalog || + config.gain_controller1.analog_gain_controller.enabled); + if (agc1_analog_enabled && experiment_params->agc2_config.has_value()) { + // Check that the unadjusted AGC config meets the preconditions. + const bool hybrid_agc_config_detected = + config.gain_controller1.enabled && + config.gain_controller1.analog_gain_controller.enabled && + !config.gain_controller1.analog_gain_controller + .enable_digital_adaptive && + config.gain_controller2.enabled && + config.gain_controller2.adaptive_digital.enabled; + const bool full_agc1_config_detected = + config.gain_controller1.enabled && + config.gain_controller1.analog_gain_controller.enabled && + config.gain_controller1.analog_gain_controller + .enable_digital_adaptive && + !config.gain_controller2.enabled; + const bool one_and_only_one_input_volume_controller = + hybrid_agc_config_detected != full_agc1_config_detected; + const bool agc2_input_volume_controller_enabled = + config.gain_controller2.enabled && + config.gain_controller2.input_volume_controller.enabled; + if (!one_and_only_one_input_volume_controller || + agc2_input_volume_controller_enabled) { + RTC_LOG(LS_ERROR) << "Cannot adjust AGC config (precondition failed)"; + if (!one_and_only_one_input_volume_controller) + RTC_LOG(LS_ERROR) + << "One and only one input volume controller must be enabled."; + if (agc2_input_volume_controller_enabled) + RTC_LOG(LS_ERROR) + << "The AGC2 input volume controller must be disabled."; + } else { + adjusted_config.gain_controller1.enabled = false; + adjusted_config.gain_controller1.analog_gain_controller.enabled = false; + + adjusted_config.gain_controller2.enabled = true; + adjusted_config.gain_controller2.input_volume_controller.enabled = true; + adjusted_config.gain_controller2.adaptive_digital = + experiment_params->agc2_config->adaptive_digital_controller; + adjusted_config.gain_controller2.adaptive_digital.enabled = true; + } + } + + return adjusted_config; +} + +bool AudioProcessingImpl::UseApmVadSubModule( + const AudioProcessing::Config& config, + const absl::optional& experiment_params) { + // The VAD as an APM sub-module is needed only in one case, that is when TS + // and AGC2 are both enabled and when the AGC2 experiment is running and its + // parameters require to fully switch the gain control to AGC2. + return config.transient_suppression.enabled && + config.gain_controller2.enabled && + (config.gain_controller2.input_volume_controller.enabled || + config.gain_controller2.adaptive_digital.enabled) && + experiment_params.has_value() && + experiment_params->agc2_config.has_value(); +} + +AudioProcessingImpl::SubmoduleStates::SubmoduleStates( + bool capture_post_processor_enabled, + bool render_pre_processor_enabled, + bool capture_analyzer_enabled) + : capture_post_processor_enabled_(capture_post_processor_enabled), + render_pre_processor_enabled_(render_pre_processor_enabled), + capture_analyzer_enabled_(capture_analyzer_enabled) {} + +bool AudioProcessingImpl::SubmoduleStates::Update( + bool high_pass_filter_enabled, + bool mobile_echo_controller_enabled, + bool noise_suppressor_enabled, + bool adaptive_gain_controller_enabled, + bool gain_controller2_enabled, + bool voice_activity_detector_enabled, + bool gain_adjustment_enabled, + bool echo_controller_enabled, + bool transient_suppressor_enabled) { + bool changed = false; + changed |= (high_pass_filter_enabled != high_pass_filter_enabled_); + changed |= + (mobile_echo_controller_enabled != mobile_echo_controller_enabled_); + changed |= (noise_suppressor_enabled != noise_suppressor_enabled_); + changed |= + (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_); + changed |= (gain_controller2_enabled != gain_controller2_enabled_); + changed |= + (voice_activity_detector_enabled != voice_activity_detector_enabled_); + changed |= (gain_adjustment_enabled != gain_adjustment_enabled_); + changed |= (echo_controller_enabled != echo_controller_enabled_); + changed |= (transient_suppressor_enabled != transient_suppressor_enabled_); + if (changed) { + high_pass_filter_enabled_ = high_pass_filter_enabled; + mobile_echo_controller_enabled_ = mobile_echo_controller_enabled; + noise_suppressor_enabled_ = noise_suppressor_enabled; + adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled; + gain_controller2_enabled_ = gain_controller2_enabled; + voice_activity_detector_enabled_ = voice_activity_detector_enabled; + gain_adjustment_enabled_ = gain_adjustment_enabled; + echo_controller_enabled_ = echo_controller_enabled; + transient_suppressor_enabled_ = transient_suppressor_enabled; + } + + changed |= first_update_; + first_update_ = false; + return changed; +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandSubModulesActive() + const { + return CaptureMultiBandProcessingPresent(); +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingPresent() + const { + // If echo controller is present, assume it performs active processing. + return CaptureMultiBandProcessingActive(/*ec_processing_active=*/true); +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingActive( + bool ec_processing_active) const { + return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ || + noise_suppressor_enabled_ || adaptive_gain_controller_enabled_ || + (echo_controller_enabled_ && ec_processing_active); +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureFullBandProcessingActive() + const { + return gain_controller2_enabled_ || capture_post_processor_enabled_ || + gain_adjustment_enabled_; +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureAnalyzerActive() const { + return capture_analyzer_enabled_; +} + +bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandSubModulesActive() + const { + return RenderMultiBandProcessingActive() || mobile_echo_controller_enabled_ || + adaptive_gain_controller_enabled_ || echo_controller_enabled_; +} + +bool AudioProcessingImpl::SubmoduleStates::RenderFullBandProcessingActive() + const { + return render_pre_processor_enabled_; +} + +bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandProcessingActive() + const { + return false; +} + +bool AudioProcessingImpl::SubmoduleStates::HighPassFilteringRequired() const { + return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ || + noise_suppressor_enabled_; +} + +AudioProcessingImpl::AudioProcessingImpl() + : AudioProcessingImpl(/*config=*/{}, + /*capture_post_processor=*/nullptr, + /*render_pre_processor=*/nullptr, + /*echo_control_factory=*/nullptr, + /*echo_detector=*/nullptr, + /*capture_analyzer=*/nullptr) {} + +std::atomic AudioProcessingImpl::instance_count_(0); + +AudioProcessingImpl::AudioProcessingImpl( + const AudioProcessing::Config& config, + std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, + std::unique_ptr echo_control_factory, + rtc::scoped_refptr echo_detector, + std::unique_ptr capture_analyzer) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + use_setup_specific_default_aec3_config_( + UseSetupSpecificDefaultAec3Congfig()), + gain_controller2_experiment_params_(GetGainController2ExperimentParams()), + transient_suppressor_vad_mode_(TransientSuppressor::VadMode::kDefault), + capture_runtime_settings_(RuntimeSettingQueueSize()), + render_runtime_settings_(RuntimeSettingQueueSize()), + capture_runtime_settings_enqueuer_(&capture_runtime_settings_), + render_runtime_settings_enqueuer_(&render_runtime_settings_), + echo_control_factory_(std::move(echo_control_factory)), + config_(AdjustConfig(config, gain_controller2_experiment_params_)), + submodule_states_(!!capture_post_processor, + !!render_pre_processor, + !!capture_analyzer), + submodules_(std::move(capture_post_processor), + std::move(render_pre_processor), + std::move(echo_detector), + std::move(capture_analyzer)), + constants_(!field_trial::IsEnabled( + "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"), + !field_trial::IsEnabled( + "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"), + EnforceSplitBandHpf(), + MinimizeProcessingForUnusedOutput(), + field_trial::IsEnabled("WebRTC-TransientSuppressorForcedOff")), + capture_(), + capture_nonlocked_(), + applied_input_volume_stats_reporter_( + InputVolumeStatsReporter::InputVolumeType::kApplied), + recommended_input_volume_stats_reporter_( + InputVolumeStatsReporter::InputVolumeType::kRecommended) { + RTC_LOG(LS_INFO) << "Injected APM submodules:" + "\nEcho control factory: " + << !!echo_control_factory_ + << "\nEcho detector: " << !!submodules_.echo_detector + << "\nCapture analyzer: " << !!submodules_.capture_analyzer + << "\nCapture post processor: " + << !!submodules_.capture_post_processor + << "\nRender pre processor: " + << !!submodules_.render_pre_processor; + if (!DenormalDisabler::IsSupported()) { + RTC_LOG(LS_INFO) << "Denormal disabler unsupported"; + } + + RTC_LOG(LS_INFO) << "AudioProcessing: " << config_.ToString(); + + // Mark Echo Controller enabled if a factory is injected. + capture_nonlocked_.echo_controller_enabled = + static_cast(echo_control_factory_); + + Initialize(); +} + +AudioProcessingImpl::~AudioProcessingImpl() = default; + +int AudioProcessingImpl::Initialize() { + // Run in a single-threaded manner during initialization. + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + InitializeLocked(); + return kNoError; +} + +int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) { + // Run in a single-threaded manner during initialization. + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + InitializeLocked(processing_config); + return kNoError; +} + +void AudioProcessingImpl::MaybeInitializeRender( + const StreamConfig& input_config, + const StreamConfig& output_config) { + ProcessingConfig processing_config = formats_.api_format; + processing_config.reverse_input_stream() = input_config; + processing_config.reverse_output_stream() = output_config; + + if (processing_config == formats_.api_format) { + return; + } + + MutexLock lock_capture(&mutex_capture_); + InitializeLocked(processing_config); +} + +void AudioProcessingImpl::InitializeLocked() { + UpdateActiveSubmoduleStates(); + + const int render_audiobuffer_sample_rate_hz = + formats_.api_format.reverse_output_stream().num_frames() == 0 + ? formats_.render_processing_format.sample_rate_hz() + : formats_.api_format.reverse_output_stream().sample_rate_hz(); + if (formats_.api_format.reverse_input_stream().num_channels() > 0) { + render_.render_audio.reset(new AudioBuffer( + formats_.api_format.reverse_input_stream().sample_rate_hz(), + formats_.api_format.reverse_input_stream().num_channels(), + formats_.render_processing_format.sample_rate_hz(), + formats_.render_processing_format.num_channels(), + render_audiobuffer_sample_rate_hz, + formats_.render_processing_format.num_channels())); + if (formats_.api_format.reverse_input_stream() != + formats_.api_format.reverse_output_stream()) { + render_.render_converter = AudioConverter::Create( + formats_.api_format.reverse_input_stream().num_channels(), + formats_.api_format.reverse_input_stream().num_frames(), + formats_.api_format.reverse_output_stream().num_channels(), + formats_.api_format.reverse_output_stream().num_frames()); + } else { + render_.render_converter.reset(nullptr); + } + } else { + render_.render_audio.reset(nullptr); + render_.render_converter.reset(nullptr); + } + + capture_.capture_audio.reset(new AudioBuffer( + formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.input_stream().num_channels(), + capture_nonlocked_.capture_processing_format.sample_rate_hz(), + formats_.api_format.output_stream().num_channels(), + formats_.api_format.output_stream().sample_rate_hz(), + formats_.api_format.output_stream().num_channels())); + SetDownmixMethod(*capture_.capture_audio, + config_.pipeline.capture_downmix_method); + + if (capture_nonlocked_.capture_processing_format.sample_rate_hz() < + formats_.api_format.output_stream().sample_rate_hz() && + formats_.api_format.output_stream().sample_rate_hz() == 48000) { + capture_.capture_fullband_audio.reset( + new AudioBuffer(formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.input_stream().num_channels(), + formats_.api_format.output_stream().sample_rate_hz(), + formats_.api_format.output_stream().num_channels(), + formats_.api_format.output_stream().sample_rate_hz(), + formats_.api_format.output_stream().num_channels())); + SetDownmixMethod(*capture_.capture_fullband_audio, + config_.pipeline.capture_downmix_method); + } else { + capture_.capture_fullband_audio.reset(); + } + + AllocateRenderQueue(); + + InitializeGainController1(); + InitializeTransientSuppressor(); + InitializeHighPassFilter(true); + InitializeResidualEchoDetector(); + InitializeEchoController(); + InitializeGainController2(); + InitializeVoiceActivityDetector(); + InitializeNoiseSuppressor(); + InitializeAnalyzer(); + InitializePostProcessor(); + InitializePreProcessor(); + InitializeCaptureLevelsAdjuster(); + + if (aec_dump_) { + aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis()); + } +} + +void AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { + UpdateActiveSubmoduleStates(); + + formats_.api_format = config; + + // Choose maximum rate to use for the split filtering. + RTC_DCHECK(config_.pipeline.maximum_internal_processing_rate == 48000 || + config_.pipeline.maximum_internal_processing_rate == 32000); + int max_splitting_rate = 48000; + if (config_.pipeline.maximum_internal_processing_rate == 32000) { + max_splitting_rate = config_.pipeline.maximum_internal_processing_rate; + } + + int capture_processing_rate = SuitableProcessRate( + std::min(formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.output_stream().sample_rate_hz()), + max_splitting_rate, + submodule_states_.CaptureMultiBandSubModulesActive() || + submodule_states_.RenderMultiBandSubModulesActive()); + RTC_DCHECK_NE(8000, capture_processing_rate); + + capture_nonlocked_.capture_processing_format = + StreamConfig(capture_processing_rate); + + int render_processing_rate; + if (!capture_nonlocked_.echo_controller_enabled) { + render_processing_rate = SuitableProcessRate( + std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(), + formats_.api_format.reverse_output_stream().sample_rate_hz()), + max_splitting_rate, + submodule_states_.CaptureMultiBandSubModulesActive() || + submodule_states_.RenderMultiBandSubModulesActive()); + } else { + render_processing_rate = capture_processing_rate; + } + + // If the forward sample rate is 8 kHz, the render stream is also processed + // at this rate. + if (capture_nonlocked_.capture_processing_format.sample_rate_hz() == + kSampleRate8kHz) { + render_processing_rate = kSampleRate8kHz; + } else { + render_processing_rate = + std::max(render_processing_rate, static_cast(kSampleRate16kHz)); + } + + RTC_DCHECK_NE(8000, render_processing_rate); + + if (submodule_states_.RenderMultiBandSubModulesActive()) { + // By default, downmix the render stream to mono for analysis. This has been + // demonstrated to work well for AEC in most practical scenarios. + const bool multi_channel_render = config_.pipeline.multi_channel_render && + constants_.multi_channel_render_support; + int render_processing_num_channels = + multi_channel_render + ? formats_.api_format.reverse_input_stream().num_channels() + : 1; + formats_.render_processing_format = + StreamConfig(render_processing_rate, render_processing_num_channels); + } else { + formats_.render_processing_format = StreamConfig( + formats_.api_format.reverse_input_stream().sample_rate_hz(), + formats_.api_format.reverse_input_stream().num_channels()); + } + + if (capture_nonlocked_.capture_processing_format.sample_rate_hz() == + kSampleRate32kHz || + capture_nonlocked_.capture_processing_format.sample_rate_hz() == + kSampleRate48kHz) { + capture_nonlocked_.split_rate = kSampleRate16kHz; + } else { + capture_nonlocked_.split_rate = + capture_nonlocked_.capture_processing_format.sample_rate_hz(); + } + + InitializeLocked(); +} + +void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) { + // Run in a single-threaded manner when applying the settings. + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + + const auto adjusted_config = + AdjustConfig(config, gain_controller2_experiment_params_); + RTC_LOG(LS_INFO) << "AudioProcessing::ApplyConfig: " + << adjusted_config.ToString(); + + const bool pipeline_config_changed = + config_.pipeline.multi_channel_render != + adjusted_config.pipeline.multi_channel_render || + config_.pipeline.multi_channel_capture != + adjusted_config.pipeline.multi_channel_capture || + config_.pipeline.maximum_internal_processing_rate != + adjusted_config.pipeline.maximum_internal_processing_rate || + config_.pipeline.capture_downmix_method != + adjusted_config.pipeline.capture_downmix_method; + + const bool aec_config_changed = + config_.echo_canceller.enabled != + adjusted_config.echo_canceller.enabled || + config_.echo_canceller.mobile_mode != + adjusted_config.echo_canceller.mobile_mode; + + const bool agc1_config_changed = + config_.gain_controller1 != adjusted_config.gain_controller1; + + const bool agc2_config_changed = + config_.gain_controller2 != adjusted_config.gain_controller2; + + const bool ns_config_changed = + config_.noise_suppression.enabled != + adjusted_config.noise_suppression.enabled || + config_.noise_suppression.level != + adjusted_config.noise_suppression.level; + + const bool ts_config_changed = config_.transient_suppression.enabled != + adjusted_config.transient_suppression.enabled; + + const bool pre_amplifier_config_changed = + config_.pre_amplifier.enabled != adjusted_config.pre_amplifier.enabled || + config_.pre_amplifier.fixed_gain_factor != + adjusted_config.pre_amplifier.fixed_gain_factor; + + const bool gain_adjustment_config_changed = + config_.capture_level_adjustment != + adjusted_config.capture_level_adjustment; + + config_ = adjusted_config; + + if (aec_config_changed) { + InitializeEchoController(); + } + + if (ns_config_changed) { + InitializeNoiseSuppressor(); + } + + if (ts_config_changed) { + InitializeTransientSuppressor(); + } + + InitializeHighPassFilter(false); + + if (agc1_config_changed) { + InitializeGainController1(); + } + + const bool config_ok = GainController2::Validate(config_.gain_controller2); + if (!config_ok) { + RTC_LOG(LS_ERROR) + << "Invalid Gain Controller 2 config; using the default config."; + config_.gain_controller2 = AudioProcessing::Config::GainController2(); + } + + if (agc2_config_changed || ts_config_changed) { + // AGC2 also depends on TS because of the possible dependency on the APM VAD + // sub-module. + InitializeGainController2(); + InitializeVoiceActivityDetector(); + } + + if (pre_amplifier_config_changed || gain_adjustment_config_changed) { + InitializeCaptureLevelsAdjuster(); + } + + // Reinitialization must happen after all submodule configuration to avoid + // additional reinitializations on the next capture / render processing call. + if (pipeline_config_changed) { + InitializeLocked(formats_.api_format); + } +} + +void AudioProcessingImpl::OverrideSubmoduleCreationForTesting( + const ApmSubmoduleCreationOverrides& overrides) { + MutexLock lock(&mutex_capture_); + submodule_creation_overrides_ = overrides; +} + +int AudioProcessingImpl::proc_sample_rate_hz() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.capture_processing_format.sample_rate_hz(); +} + +int AudioProcessingImpl::proc_fullband_sample_rate_hz() const { + return capture_.capture_fullband_audio + ? capture_.capture_fullband_audio->num_frames() * 100 + : capture_nonlocked_.capture_processing_format.sample_rate_hz(); +} + +int AudioProcessingImpl::proc_split_sample_rate_hz() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.split_rate; +} + +size_t AudioProcessingImpl::num_reverse_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.render_processing_format.num_channels(); +} + +size_t AudioProcessingImpl::num_input_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.api_format.input_stream().num_channels(); +} + +size_t AudioProcessingImpl::num_proc_channels() const { + // Used as callback from submodules, hence locking is not allowed. + const bool multi_channel_capture = config_.pipeline.multi_channel_capture && + constants_.multi_channel_capture_support; + if (capture_nonlocked_.echo_controller_enabled && !multi_channel_capture) { + return 1; + } + return num_output_channels(); +} + +size_t AudioProcessingImpl::num_output_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.api_format.output_stream().num_channels(); +} + +void AudioProcessingImpl::set_output_will_be_muted(bool muted) { + MutexLock lock(&mutex_capture_); + HandleCaptureOutputUsedSetting(!muted); +} + +void AudioProcessingImpl::HandleCaptureOutputUsedSetting( + bool capture_output_used) { + capture_.capture_output_used = + capture_output_used || !constants_.minimize_processing_for_unused_output; + + if (submodules_.agc_manager.get()) { + submodules_.agc_manager->HandleCaptureOutputUsedChange( + capture_.capture_output_used); + } + if (submodules_.echo_controller) { + submodules_.echo_controller->SetCaptureOutputUsage( + capture_.capture_output_used); + } + if (submodules_.noise_suppressor) { + submodules_.noise_suppressor->SetCaptureOutputUsage( + capture_.capture_output_used); + } + if (submodules_.gain_controller2) { + submodules_.gain_controller2->SetCaptureOutputUsed( + capture_.capture_output_used); + } +} + +void AudioProcessingImpl::SetRuntimeSetting(RuntimeSetting setting) { + PostRuntimeSetting(setting); +} + +bool AudioProcessingImpl::PostRuntimeSetting(RuntimeSetting setting) { + switch (setting.type()) { + case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting: + case RuntimeSetting::Type::kPlayoutAudioDeviceChange: + return render_runtime_settings_enqueuer_.Enqueue(setting); + case RuntimeSetting::Type::kCapturePreGain: + case RuntimeSetting::Type::kCapturePostGain: + case RuntimeSetting::Type::kCaptureCompressionGain: + case RuntimeSetting::Type::kCaptureFixedPostGain: + case RuntimeSetting::Type::kCaptureOutputUsed: + return capture_runtime_settings_enqueuer_.Enqueue(setting); + case RuntimeSetting::Type::kPlayoutVolumeChange: { + bool enqueueing_successful; + enqueueing_successful = + capture_runtime_settings_enqueuer_.Enqueue(setting); + enqueueing_successful = + render_runtime_settings_enqueuer_.Enqueue(setting) && + enqueueing_successful; + return enqueueing_successful; + } + case RuntimeSetting::Type::kNotSpecified: + RTC_DCHECK_NOTREACHED(); + return true; + } + // The language allows the enum to have a non-enumerator + // value. Check that this doesn't happen. + RTC_DCHECK_NOTREACHED(); + return true; +} + +AudioProcessingImpl::RuntimeSettingEnqueuer::RuntimeSettingEnqueuer( + SwapQueue* runtime_settings) + : runtime_settings_(*runtime_settings) { + RTC_DCHECK(runtime_settings); +} + +AudioProcessingImpl::RuntimeSettingEnqueuer::~RuntimeSettingEnqueuer() = + default; + +bool AudioProcessingImpl::RuntimeSettingEnqueuer::Enqueue( + RuntimeSetting setting) { + const bool successful_insert = runtime_settings_.Insert(&setting); + + if (!successful_insert) { + RTC_LOG(LS_ERROR) << "Cannot enqueue a new runtime setting."; + } + return successful_insert; +} + +void AudioProcessingImpl::MaybeInitializeCapture( + const StreamConfig& input_config, + const StreamConfig& output_config) { + ProcessingConfig processing_config; + bool reinitialization_required = false; + { + // Acquire the capture lock in order to access api_format. The lock is + // released immediately, as we may need to acquire the render lock as part + // of the conditional reinitialization. + MutexLock lock_capture(&mutex_capture_); + processing_config = formats_.api_format; + reinitialization_required = UpdateActiveSubmoduleStates(); + } + + if (processing_config.input_stream() != input_config) { + reinitialization_required = true; + } + + if (processing_config.output_stream() != output_config) { + reinitialization_required = true; + } + + if (reinitialization_required) { + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + // Reread the API format since the render format may have changed. + processing_config = formats_.api_format; + processing_config.input_stream() = input_config; + processing_config.output_stream() = output_config; + InitializeLocked(processing_config); + } +} + +int AudioProcessingImpl::ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig"); + DenormalDisabler denormal_disabler; + RETURN_ON_ERR( + HandleUnsupportedAudioFormats(src, input_config, output_config, dest)); + MaybeInitializeCapture(input_config, output_config); + + MutexLock lock_capture(&mutex_capture_); + + if (aec_dump_) { + RecordUnprocessedCaptureStream(src); + } + + capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream()); + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyFrom( + src, formats_.api_format.input_stream()); + } + RETURN_ON_ERR(ProcessCaptureStreamLocked()); + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyTo(formats_.api_format.output_stream(), + dest); + } else { + capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest); + } + + if (aec_dump_) { + RecordProcessedCaptureStream(dest); + } + return kNoError; +} + +void AudioProcessingImpl::HandleCaptureRuntimeSettings() { + RuntimeSetting setting; + int num_settings_processed = 0; + while (capture_runtime_settings_.Remove(&setting)) { + if (aec_dump_) { + aec_dump_->WriteRuntimeSetting(setting); + } + switch (setting.type()) { + case RuntimeSetting::Type::kCapturePreGain: + if (config_.pre_amplifier.enabled || + config_.capture_level_adjustment.enabled) { + float value; + setting.GetFloat(&value); + // If the pre-amplifier is used, apply the new gain to the + // pre-amplifier regardless if the capture level adjustment is + // activated. This approach allows both functionalities to coexist + // until they have been properly merged. + if (config_.pre_amplifier.enabled) { + config_.pre_amplifier.fixed_gain_factor = value; + } else { + config_.capture_level_adjustment.pre_gain_factor = value; + } + + // Use both the pre-amplifier and the capture level adjustment gains + // as pre-gains. + float gain = 1.f; + if (config_.pre_amplifier.enabled) { + gain *= config_.pre_amplifier.fixed_gain_factor; + } + if (config_.capture_level_adjustment.enabled) { + gain *= config_.capture_level_adjustment.pre_gain_factor; + } + + submodules_.capture_levels_adjuster->SetPreGain(gain); + } + // TODO(bugs.chromium.org/9138): Log setting handling by Aec Dump. + break; + case RuntimeSetting::Type::kCapturePostGain: + if (config_.capture_level_adjustment.enabled) { + float value; + setting.GetFloat(&value); + config_.capture_level_adjustment.post_gain_factor = value; + submodules_.capture_levels_adjuster->SetPostGain( + config_.capture_level_adjustment.post_gain_factor); + } + // TODO(bugs.chromium.org/9138): Log setting handling by Aec Dump. + break; + case RuntimeSetting::Type::kCaptureCompressionGain: { + if (!submodules_.agc_manager && + !(submodules_.gain_controller2 && + config_.gain_controller2.input_volume_controller.enabled)) { + float value; + setting.GetFloat(&value); + int int_value = static_cast(value + .5f); + config_.gain_controller1.compression_gain_db = int_value; + if (submodules_.gain_control) { + int error = + submodules_.gain_control->set_compression_gain_db(int_value); + RTC_DCHECK_EQ(kNoError, error); + } + } + break; + } + case RuntimeSetting::Type::kCaptureFixedPostGain: { + if (submodules_.gain_controller2) { + float value; + setting.GetFloat(&value); + config_.gain_controller2.fixed_digital.gain_db = value; + submodules_.gain_controller2->SetFixedGainDb(value); + } + break; + } + case RuntimeSetting::Type::kPlayoutVolumeChange: { + int value; + setting.GetInt(&value); + capture_.playout_volume = value; + break; + } + case RuntimeSetting::Type::kPlayoutAudioDeviceChange: + RTC_DCHECK_NOTREACHED(); + break; + case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting: + RTC_DCHECK_NOTREACHED(); + break; + case RuntimeSetting::Type::kNotSpecified: + RTC_DCHECK_NOTREACHED(); + break; + case RuntimeSetting::Type::kCaptureOutputUsed: + bool value; + setting.GetBool(&value); + HandleCaptureOutputUsedSetting(value); + break; + } + ++num_settings_processed; + } + + if (num_settings_processed >= RuntimeSettingQueueSize()) { + // Handle overrun of the runtime settings queue, which likely will has + // caused settings to be discarded. + HandleOverrunInCaptureRuntimeSettingsQueue(); + } +} + +void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() { + // Fall back to a safe state for the case when a setting for capture output + // usage setting has been missed. + HandleCaptureOutputUsedSetting(/*capture_output_used=*/true); +} + +void AudioProcessingImpl::HandleRenderRuntimeSettings() { + RuntimeSetting setting; + while (render_runtime_settings_.Remove(&setting)) { + if (aec_dump_) { + aec_dump_->WriteRuntimeSetting(setting); + } + switch (setting.type()) { + case RuntimeSetting::Type::kPlayoutAudioDeviceChange: // fall-through + case RuntimeSetting::Type::kPlayoutVolumeChange: // fall-through + case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting: + if (submodules_.render_pre_processor) { + submodules_.render_pre_processor->SetRuntimeSetting(setting); + } + break; + case RuntimeSetting::Type::kCapturePreGain: // fall-through + case RuntimeSetting::Type::kCapturePostGain: // fall-through + case RuntimeSetting::Type::kCaptureCompressionGain: // fall-through + case RuntimeSetting::Type::kCaptureFixedPostGain: // fall-through + case RuntimeSetting::Type::kCaptureOutputUsed: // fall-through + case RuntimeSetting::Type::kNotSpecified: + RTC_DCHECK_NOTREACHED(); + break; + } + } +} + +void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) { + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + + if (submodules_.echo_control_mobile) { + EchoControlMobileImpl::PackRenderAudioBuffer(audio, num_output_channels(), + num_reverse_channels(), + &aecm_render_queue_buffer_); + RTC_DCHECK(aecm_render_signal_queue_); + // Insert the samples into the queue. + if (!aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = + aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_); + RTC_DCHECK(result); + } + } + + if (!submodules_.agc_manager && submodules_.gain_control) { + GainControlImpl::PackRenderAudioBuffer(*audio, &agc_render_queue_buffer_); + // Insert the samples into the queue. + if (!agc_render_signal_queue_->Insert(&agc_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = agc_render_signal_queue_->Insert(&agc_render_queue_buffer_); + RTC_DCHECK(result); + } + } +} + +void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) { + if (submodules_.echo_detector) { + PackRenderAudioBufferForEchoDetector(*audio, red_render_queue_buffer_); + RTC_DCHECK(red_render_signal_queue_); + // Insert the samples into the queue. + if (!red_render_signal_queue_->Insert(&red_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = red_render_signal_queue_->Insert(&red_render_queue_buffer_); + RTC_DCHECK(result); + } + } +} + +void AudioProcessingImpl::AllocateRenderQueue() { + const size_t new_agc_render_queue_element_max_size = + std::max(static_cast(1), kMaxAllowedValuesOfSamplesPerBand); + + const size_t new_red_render_queue_element_max_size = + std::max(static_cast(1), kMaxAllowedValuesOfSamplesPerFrame); + + // Reallocate the queues if the queue item sizes are too small to fit the + // data to put in the queues. + + if (agc_render_queue_element_max_size_ < + new_agc_render_queue_element_max_size) { + agc_render_queue_element_max_size_ = new_agc_render_queue_element_max_size; + + std::vector template_queue_element( + agc_render_queue_element_max_size_); + + agc_render_signal_queue_.reset( + new SwapQueue, RenderQueueItemVerifier>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier( + agc_render_queue_element_max_size_))); + + agc_render_queue_buffer_.resize(agc_render_queue_element_max_size_); + agc_capture_queue_buffer_.resize(agc_render_queue_element_max_size_); + } else { + agc_render_signal_queue_->Clear(); + } + + if (submodules_.echo_detector) { + if (red_render_queue_element_max_size_ < + new_red_render_queue_element_max_size) { + red_render_queue_element_max_size_ = + new_red_render_queue_element_max_size; + + std::vector template_queue_element( + red_render_queue_element_max_size_); + + red_render_signal_queue_.reset( + new SwapQueue, RenderQueueItemVerifier>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier( + red_render_queue_element_max_size_))); + + red_render_queue_buffer_.resize(red_render_queue_element_max_size_); + red_capture_queue_buffer_.resize(red_render_queue_element_max_size_); + } else { + red_render_signal_queue_->Clear(); + } + } +} + +void AudioProcessingImpl::EmptyQueuedRenderAudio() { + MutexLock lock_capture(&mutex_capture_); + EmptyQueuedRenderAudioLocked(); +} + +void AudioProcessingImpl::EmptyQueuedRenderAudioLocked() { + if (submodules_.echo_control_mobile) { + RTC_DCHECK(aecm_render_signal_queue_); + while (aecm_render_signal_queue_->Remove(&aecm_capture_queue_buffer_)) { + submodules_.echo_control_mobile->ProcessRenderAudio( + aecm_capture_queue_buffer_); + } + } + + if (submodules_.gain_control) { + while (agc_render_signal_queue_->Remove(&agc_capture_queue_buffer_)) { + submodules_.gain_control->ProcessRenderAudio(agc_capture_queue_buffer_); + } + } + + if (submodules_.echo_detector) { + while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) { + submodules_.echo_detector->AnalyzeRenderAudio(red_capture_queue_buffer_); + } + } +} + +int AudioProcessingImpl::ProcessStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame"); + + RETURN_ON_ERR( + HandleUnsupportedAudioFormats(src, input_config, output_config, dest)); + MaybeInitializeCapture(input_config, output_config); + + MutexLock lock_capture(&mutex_capture_); + DenormalDisabler denormal_disabler; + + if (aec_dump_) { + RecordUnprocessedCaptureStream(src, input_config); + } + + capture_.capture_audio->CopyFrom(src, input_config); + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyFrom(src, input_config); + } + RETURN_ON_ERR(ProcessCaptureStreamLocked()); + if (submodule_states_.CaptureMultiBandProcessingPresent() || + submodule_states_.CaptureFullBandProcessingActive()) { + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyTo(output_config, dest); + } else { + capture_.capture_audio->CopyTo(output_config, dest); + } + } + + if (aec_dump_) { + RecordProcessedCaptureStream(dest, output_config); + } + return kNoError; +} + +int AudioProcessingImpl::ProcessCaptureStreamLocked() { + EmptyQueuedRenderAudioLocked(); + HandleCaptureRuntimeSettings(); + DenormalDisabler denormal_disabler; + + // Ensure that not both the AEC and AECM are active at the same time. + // TODO(peah): Simplify once the public API Enable functions for these + // are moved to APM. + RTC_DCHECK_LE( + !!submodules_.echo_controller + !!submodules_.echo_control_mobile, 1); + + data_dumper_->DumpRaw( + "applied_input_volume", + capture_.applied_input_volume.value_or(kUnspecifiedDataDumpInputVolume)); + + AudioBuffer* capture_buffer = capture_.capture_audio.get(); // For brevity. + AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get(); + + if (submodules_.high_pass_filter && + config_.high_pass_filter.apply_in_full_band && + !constants_.enforce_split_band_hpf) { + submodules_.high_pass_filter->Process(capture_buffer, + /*use_split_band_data=*/false); + } + + if (submodules_.capture_levels_adjuster) { + if (config_.capture_level_adjustment.analog_mic_gain_emulation.enabled) { + // When the input volume is emulated, retrieve the volume applied to the + // input audio and notify that to APM so that the volume is passed to the + // active AGC. + set_stream_analog_level_locked( + submodules_.capture_levels_adjuster->GetAnalogMicGainLevel()); + } + submodules_.capture_levels_adjuster->ApplyPreLevelAdjustment( + *capture_buffer); + } + + capture_input_rms_.Analyze(rtc::ArrayView( + capture_buffer->channels_const()[0], + capture_nonlocked_.capture_processing_format.num_frames())); + const bool log_rms = ++capture_rms_interval_counter_ >= 1000; + if (log_rms) { + capture_rms_interval_counter_ = 0; + RmsLevel::Levels levels = capture_input_rms_.AverageAndPeak(); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelAverageRms", + levels.average, 1, RmsLevel::kMinLevelDb, 64); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelPeakRms", + levels.peak, 1, RmsLevel::kMinLevelDb, 64); + } + + if (capture_.applied_input_volume.has_value()) { + applied_input_volume_stats_reporter_.UpdateStatistics( + *capture_.applied_input_volume); + } + + if (submodules_.echo_controller) { + // Determine if the echo path gain has changed by checking all the gains + // applied before AEC. + capture_.echo_path_gain_change = capture_.applied_input_volume_changed; + + // Detect and flag any change in the capture level adjustment pre-gain. + if (submodules_.capture_levels_adjuster) { + float pre_adjustment_gain = + submodules_.capture_levels_adjuster->GetPreAdjustmentGain(); + capture_.echo_path_gain_change = + capture_.echo_path_gain_change || + (capture_.prev_pre_adjustment_gain != pre_adjustment_gain && + capture_.prev_pre_adjustment_gain >= 0.0f); + capture_.prev_pre_adjustment_gain = pre_adjustment_gain; + } + + // Detect volume change. + capture_.echo_path_gain_change = + capture_.echo_path_gain_change || + (capture_.prev_playout_volume != capture_.playout_volume && + capture_.prev_playout_volume >= 0); + capture_.prev_playout_volume = capture_.playout_volume; + + submodules_.echo_controller->AnalyzeCapture(capture_buffer); + } + + if (submodules_.agc_manager) { + submodules_.agc_manager->AnalyzePreProcess(*capture_buffer); + } + + if (submodules_.gain_controller2 && + config_.gain_controller2.input_volume_controller.enabled) { + // Expect the volume to be available if the input controller is enabled. + RTC_DCHECK(capture_.applied_input_volume.has_value()); + if (capture_.applied_input_volume.has_value()) { + submodules_.gain_controller2->Analyze(*capture_.applied_input_volume, + *capture_buffer); + } + } + + if (submodule_states_.CaptureMultiBandSubModulesActive() && + SampleRateSupportsMultiBand( + capture_nonlocked_.capture_processing_format.sample_rate_hz())) { + capture_buffer->SplitIntoFrequencyBands(); + } + + const bool multi_channel_capture = config_.pipeline.multi_channel_capture && + constants_.multi_channel_capture_support; + if (submodules_.echo_controller && !multi_channel_capture) { + // Force down-mixing of the number of channels after the detection of + // capture signal saturation. + // TODO(peah): Look into ensuring that this kind of tampering with the + // AudioBuffer functionality should not be needed. + capture_buffer->set_num_channels(1); + } + + if (submodules_.high_pass_filter && + (!config_.high_pass_filter.apply_in_full_band || + constants_.enforce_split_band_hpf)) { + submodules_.high_pass_filter->Process(capture_buffer, + /*use_split_band_data=*/true); + } + + if (submodules_.gain_control) { + RETURN_ON_ERR( + submodules_.gain_control->AnalyzeCaptureAudio(*capture_buffer)); + } + + if ((!config_.noise_suppression.analyze_linear_aec_output_when_available || + !linear_aec_buffer || submodules_.echo_control_mobile) && + submodules_.noise_suppressor) { + submodules_.noise_suppressor->Analyze(*capture_buffer); + } + + if (submodules_.echo_control_mobile) { + // Ensure that the stream delay was set before the call to the + // AECM ProcessCaptureAudio function. + if (!capture_.was_stream_delay_set) { + return AudioProcessing::kStreamParameterNotSetError; + } + + if (submodules_.noise_suppressor) { + submodules_.noise_suppressor->Process(capture_buffer); + } + + RETURN_ON_ERR(submodules_.echo_control_mobile->ProcessCaptureAudio( + capture_buffer, stream_delay_ms())); + } else { + if (submodules_.echo_controller) { + data_dumper_->DumpRaw("stream_delay", stream_delay_ms()); + + if (capture_.was_stream_delay_set) { + submodules_.echo_controller->SetAudioBufferDelay(stream_delay_ms()); + } + + submodules_.echo_controller->ProcessCapture( + capture_buffer, linear_aec_buffer, capture_.echo_path_gain_change); + } + + if (config_.noise_suppression.analyze_linear_aec_output_when_available && + linear_aec_buffer && submodules_.noise_suppressor) { + submodules_.noise_suppressor->Analyze(*linear_aec_buffer); + } + + if (submodules_.noise_suppressor) { + submodules_.noise_suppressor->Process(capture_buffer); + } + } + + if (submodules_.agc_manager) { + submodules_.agc_manager->Process(*capture_buffer); + + absl::optional new_digital_gain = + submodules_.agc_manager->GetDigitalComressionGain(); + if (new_digital_gain && submodules_.gain_control) { + submodules_.gain_control->set_compression_gain_db(*new_digital_gain); + } + } + + if (submodules_.gain_control) { + // TODO(peah): Add reporting from AEC3 whether there is echo. + RETURN_ON_ERR(submodules_.gain_control->ProcessCaptureAudio( + capture_buffer, /*stream_has_echo*/ false)); + } + + if (submodule_states_.CaptureMultiBandProcessingPresent() && + SampleRateSupportsMultiBand( + capture_nonlocked_.capture_processing_format.sample_rate_hz())) { + capture_buffer->MergeFrequencyBands(); + } + + if (capture_.capture_output_used) { + if (capture_.capture_fullband_audio) { + const auto& ec = submodules_.echo_controller; + bool ec_active = ec ? ec->ActiveProcessing() : false; + // Only update the fullband buffer if the multiband processing has changed + // the signal. Keep the original signal otherwise. + if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) { + capture_buffer->CopyTo(capture_.capture_fullband_audio.get()); + } + capture_buffer = capture_.capture_fullband_audio.get(); + } + + if (submodules_.echo_detector) { + submodules_.echo_detector->AnalyzeCaptureAudio( + rtc::ArrayView(capture_buffer->channels()[0], + capture_buffer->num_frames())); + } + + absl::optional voice_probability; + if (!!submodules_.voice_activity_detector) { + voice_probability = submodules_.voice_activity_detector->Analyze( + AudioFrameView(capture_buffer->channels(), + capture_buffer->num_channels(), + capture_buffer->num_frames())); + } + + if (submodules_.transient_suppressor) { + float transient_suppressor_voice_probability = 1.0f; + switch (transient_suppressor_vad_mode_) { + case TransientSuppressor::VadMode::kDefault: + if (submodules_.agc_manager) { + transient_suppressor_voice_probability = + submodules_.agc_manager->voice_probability(); + } + break; + case TransientSuppressor::VadMode::kRnnVad: + RTC_DCHECK(voice_probability.has_value()); + transient_suppressor_voice_probability = *voice_probability; + break; + case TransientSuppressor::VadMode::kNoVad: + // The transient suppressor will ignore `voice_probability`. + break; + } + float delayed_voice_probability = + submodules_.transient_suppressor->Suppress( + capture_buffer->channels()[0], capture_buffer->num_frames(), + capture_buffer->num_channels(), + capture_buffer->split_bands_const(0)[kBand0To8kHz], + capture_buffer->num_frames_per_band(), + /*reference_data=*/nullptr, /*reference_length=*/0, + transient_suppressor_voice_probability, capture_.key_pressed); + if (voice_probability.has_value()) { + *voice_probability = delayed_voice_probability; + } + } + + // Experimental APM sub-module that analyzes `capture_buffer`. + if (submodules_.capture_analyzer) { + submodules_.capture_analyzer->Analyze(capture_buffer); + } + + if (submodules_.gain_controller2) { + // TODO(bugs.webrtc.org/7494): Let AGC2 detect applied input volume + // changes. + submodules_.gain_controller2->Process( + voice_probability, capture_.applied_input_volume_changed, + capture_buffer); + } + + if (submodules_.capture_post_processor) { + submodules_.capture_post_processor->Process(capture_buffer); + } + + capture_output_rms_.Analyze(rtc::ArrayView( + capture_buffer->channels_const()[0], + capture_nonlocked_.capture_processing_format.num_frames())); + if (log_rms) { + RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak(); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1, + RmsLevel::kMinLevelDb, 64); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms", + levels.peak, 1, RmsLevel::kMinLevelDb, 64); + } + + // Compute echo-detector stats. + if (submodules_.echo_detector) { + auto ed_metrics = submodules_.echo_detector->GetMetrics(); + capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood; + capture_.stats.residual_echo_likelihood_recent_max = + ed_metrics.echo_likelihood_recent_max; + } + } + + // Compute echo-controller stats. + if (submodules_.echo_controller) { + auto ec_metrics = submodules_.echo_controller->GetMetrics(); + capture_.stats.echo_return_loss = ec_metrics.echo_return_loss; + capture_.stats.echo_return_loss_enhancement = + ec_metrics.echo_return_loss_enhancement; + capture_.stats.delay_ms = ec_metrics.delay_ms; + } + + // Pass stats for reporting. + stats_reporter_.UpdateStatistics(capture_.stats); + + UpdateRecommendedInputVolumeLocked(); + if (capture_.recommended_input_volume.has_value()) { + recommended_input_volume_stats_reporter_.UpdateStatistics( + *capture_.recommended_input_volume); + } + + if (submodules_.capture_levels_adjuster) { + submodules_.capture_levels_adjuster->ApplyPostLevelAdjustment( + *capture_buffer); + + if (config_.capture_level_adjustment.analog_mic_gain_emulation.enabled) { + // If the input volume emulation is used, retrieve the recommended input + // volume and set that to emulate the input volume on the next processed + // audio frame. + RTC_DCHECK(capture_.recommended_input_volume.has_value()); + submodules_.capture_levels_adjuster->SetAnalogMicGainLevel( + *capture_.recommended_input_volume); + } + } + + // Temporarily set the output to zero after the stream has been unmuted + // (capture output is again used). The purpose of this is to avoid clicks and + // artefacts in the audio that results when the processing again is + // reactivated after unmuting. + if (!capture_.capture_output_used_last_frame && + capture_.capture_output_used) { + for (size_t ch = 0; ch < capture_buffer->num_channels(); ++ch) { + rtc::ArrayView channel_view(capture_buffer->channels()[ch], + capture_buffer->num_frames()); + std::fill(channel_view.begin(), channel_view.end(), 0.f); + } + } + capture_.capture_output_used_last_frame = capture_.capture_output_used; + + capture_.was_stream_delay_set = false; + + data_dumper_->DumpRaw("recommended_input_volume", + capture_.recommended_input_volume.value_or( + kUnspecifiedDataDumpInputVolume)); + + return kNoError; +} + +int AudioProcessingImpl::AnalyzeReverseStream( + const float* const* data, + const StreamConfig& reverse_config) { + TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_StreamConfig"); + MutexLock lock(&mutex_render_); + DenormalDisabler denormal_disabler; + RTC_DCHECK(data); + for (size_t i = 0; i < reverse_config.num_channels(); ++i) { + RTC_DCHECK(data[i]); + } + RETURN_ON_ERR( + AudioFormatValidityToErrorCode(ValidateAudioFormat(reverse_config))); + + MaybeInitializeRender(reverse_config, reverse_config); + return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config); +} + +int AudioProcessingImpl::ProcessReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig"); + MutexLock lock(&mutex_render_); + DenormalDisabler denormal_disabler; + RETURN_ON_ERR( + HandleUnsupportedAudioFormats(src, input_config, output_config, dest)); + + MaybeInitializeRender(input_config, output_config); + + RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config)); + + if (submodule_states_.RenderMultiBandProcessingActive() || + submodule_states_.RenderFullBandProcessingActive()) { + render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(), + dest); + } else if (formats_.api_format.reverse_input_stream() != + formats_.api_format.reverse_output_stream()) { + render_.render_converter->Convert(src, input_config.num_samples(), dest, + output_config.num_samples()); + } else { + CopyAudioIfNeeded(src, input_config.num_frames(), + input_config.num_channels(), dest); + } + + return kNoError; +} + +int AudioProcessingImpl::AnalyzeReverseStreamLocked( + const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config) { + if (aec_dump_) { + const size_t channel_size = + formats_.api_format.reverse_input_stream().num_frames(); + const size_t num_channels = + formats_.api_format.reverse_input_stream().num_channels(); + aec_dump_->WriteRenderStreamMessage( + AudioFrameView(src, num_channels, channel_size)); + } + render_.render_audio->CopyFrom(src, + formats_.api_format.reverse_input_stream()); + return ProcessRenderStreamLocked(); +} + +int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame"); + + MutexLock lock(&mutex_render_); + DenormalDisabler denormal_disabler; + + RETURN_ON_ERR( + HandleUnsupportedAudioFormats(src, input_config, output_config, dest)); + MaybeInitializeRender(input_config, output_config); + + if (aec_dump_) { + aec_dump_->WriteRenderStreamMessage(src, input_config.num_frames(), + input_config.num_channels()); + } + + render_.render_audio->CopyFrom(src, input_config); + RETURN_ON_ERR(ProcessRenderStreamLocked()); + if (submodule_states_.RenderMultiBandProcessingActive() || + submodule_states_.RenderFullBandProcessingActive()) { + render_.render_audio->CopyTo(output_config, dest); + } + return kNoError; +} + +int AudioProcessingImpl::ProcessRenderStreamLocked() { + AudioBuffer* render_buffer = render_.render_audio.get(); // For brevity. + + HandleRenderRuntimeSettings(); + DenormalDisabler denormal_disabler; + + if (submodules_.render_pre_processor) { + submodules_.render_pre_processor->Process(render_buffer); + } + + QueueNonbandedRenderAudio(render_buffer); + + if (submodule_states_.RenderMultiBandSubModulesActive() && + SampleRateSupportsMultiBand( + formats_.render_processing_format.sample_rate_hz())) { + render_buffer->SplitIntoFrequencyBands(); + } + + if (submodule_states_.RenderMultiBandSubModulesActive()) { + QueueBandedRenderAudio(render_buffer); + } + + // TODO(peah): Perform the queuing inside QueueRenderAudiuo(). + if (submodules_.echo_controller) { + submodules_.echo_controller->AnalyzeRender(render_buffer); + } + + if (submodule_states_.RenderMultiBandProcessingActive() && + SampleRateSupportsMultiBand( + formats_.render_processing_format.sample_rate_hz())) { + render_buffer->MergeFrequencyBands(); + } + + return kNoError; +} + +int AudioProcessingImpl::set_stream_delay_ms(int delay) { + MutexLock lock(&mutex_capture_); + Error retval = kNoError; + capture_.was_stream_delay_set = true; + + if (delay < 0) { + delay = 0; + retval = kBadStreamParameterWarning; + } + + // TODO(ajm): the max is rather arbitrarily chosen; investigate. + if (delay > 500) { + delay = 500; + retval = kBadStreamParameterWarning; + } + + capture_nonlocked_.stream_delay_ms = delay; + return retval; +} + +bool AudioProcessingImpl::GetLinearAecOutput( + rtc::ArrayView> linear_output) const { + MutexLock lock(&mutex_capture_); + AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get(); + + RTC_DCHECK(linear_aec_buffer); + if (linear_aec_buffer) { + RTC_DCHECK_EQ(1, linear_aec_buffer->num_bands()); + RTC_DCHECK_EQ(linear_output.size(), linear_aec_buffer->num_channels()); + + for (size_t ch = 0; ch < linear_aec_buffer->num_channels(); ++ch) { + RTC_DCHECK_EQ(linear_output[ch].size(), linear_aec_buffer->num_frames()); + rtc::ArrayView channel_view = + rtc::ArrayView(linear_aec_buffer->channels_const()[ch], + linear_aec_buffer->num_frames()); + FloatS16ToFloat(channel_view.data(), channel_view.size(), + linear_output[ch].data()); + } + return true; + } + RTC_LOG(LS_ERROR) << "No linear AEC output available"; + RTC_DCHECK_NOTREACHED(); + return false; +} + +int AudioProcessingImpl::stream_delay_ms() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.stream_delay_ms; +} + +void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) { + MutexLock lock(&mutex_capture_); + capture_.key_pressed = key_pressed; +} + +void AudioProcessingImpl::set_stream_analog_level(int level) { + MutexLock lock_capture(&mutex_capture_); + set_stream_analog_level_locked(level); +} + +void AudioProcessingImpl::set_stream_analog_level_locked(int level) { + capture_.applied_input_volume_changed = + capture_.applied_input_volume.has_value() && + *capture_.applied_input_volume != level; + capture_.applied_input_volume = level; + + // Invalidate any previously recommended input volume which will be updated by + // `ProcessStream()`. + capture_.recommended_input_volume = absl::nullopt; + + if (submodules_.agc_manager) { + submodules_.agc_manager->set_stream_analog_level(level); + return; + } + + if (submodules_.gain_control) { + int error = submodules_.gain_control->set_stream_analog_level(level); + RTC_DCHECK_EQ(kNoError, error); + return; + } +} + +int AudioProcessingImpl::recommended_stream_analog_level() const { + MutexLock lock_capture(&mutex_capture_); + if (!capture_.applied_input_volume.has_value()) { + RTC_LOG(LS_ERROR) << "set_stream_analog_level has not been called"; + } + // Input volume to recommend when `set_stream_analog_level()` is not called. + constexpr int kFallBackInputVolume = 255; + // When APM has no input volume to recommend, return the latest applied input + // volume that has been observed in order to possibly produce no input volume + // change. If no applied input volume has been observed, return a fall-back + // value. + return capture_.recommended_input_volume.value_or( + capture_.applied_input_volume.value_or(kFallBackInputVolume)); +} + +void AudioProcessingImpl::UpdateRecommendedInputVolumeLocked() { + if (!capture_.applied_input_volume.has_value()) { + // When `set_stream_analog_level()` is not called, no input level can be + // recommended. + capture_.recommended_input_volume = absl::nullopt; + return; + } + + if (submodules_.agc_manager) { + capture_.recommended_input_volume = + submodules_.agc_manager->recommended_analog_level(); + return; + } + + if (submodules_.gain_control) { + capture_.recommended_input_volume = + submodules_.gain_control->stream_analog_level(); + return; + } + + if (submodules_.gain_controller2 && + config_.gain_controller2.input_volume_controller.enabled) { + capture_.recommended_input_volume = + submodules_.gain_controller2->recommended_input_volume(); + return; + } + + capture_.recommended_input_volume = capture_.applied_input_volume; +} + +bool AudioProcessingImpl::CreateAndAttachAecDump(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + std::unique_ptr aec_dump = + AecDumpFactory::Create(file_name, max_log_size_bytes, worker_queue); + if (!aec_dump) { + return false; + } + + AttachAecDump(std::move(aec_dump)); + return true; +} + +bool AudioProcessingImpl::CreateAndAttachAecDump(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + std::unique_ptr aec_dump = + AecDumpFactory::Create(handle, max_log_size_bytes, worker_queue); + if (!aec_dump) { + return false; + } + + AttachAecDump(std::move(aec_dump)); + return true; +} + +void AudioProcessingImpl::AttachAecDump(std::unique_ptr aec_dump) { + RTC_DCHECK(aec_dump); + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + + // The previously attached AecDump will be destroyed with the + // 'aec_dump' parameter, which is after locks are released. + aec_dump_.swap(aec_dump); + WriteAecDumpConfigMessage(true); + aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis()); +} + +void AudioProcessingImpl::DetachAecDump() { + // The d-tor of a task-queue based AecDump blocks until all pending + // tasks are done. This construction avoids blocking while holding + // the render and capture locks. + std::unique_ptr aec_dump = nullptr; + { + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + aec_dump = std::move(aec_dump_); + } +} + +AudioProcessing::Config AudioProcessingImpl::GetConfig() const { + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + return config_; +} + +bool AudioProcessingImpl::UpdateActiveSubmoduleStates() { + return submodule_states_.Update( + config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile, + !!submodules_.noise_suppressor, !!submodules_.gain_control, + !!submodules_.gain_controller2, !!submodules_.voice_activity_detector, + config_.pre_amplifier.enabled || config_.capture_level_adjustment.enabled, + capture_nonlocked_.echo_controller_enabled, + !!submodules_.transient_suppressor); +} + +void AudioProcessingImpl::InitializeTransientSuppressor() { + // Choose the VAD mode for TS and detect a VAD mode change. + const TransientSuppressor::VadMode previous_vad_mode = + transient_suppressor_vad_mode_; + transient_suppressor_vad_mode_ = TransientSuppressor::VadMode::kDefault; + if (UseApmVadSubModule(config_, gain_controller2_experiment_params_)) { + transient_suppressor_vad_mode_ = TransientSuppressor::VadMode::kRnnVad; + } + const bool vad_mode_changed = + previous_vad_mode != transient_suppressor_vad_mode_; + + if (config_.transient_suppression.enabled && + !constants_.transient_suppressor_forced_off) { + // Attempt to create a transient suppressor, if one is not already created. + if (!submodules_.transient_suppressor || vad_mode_changed) { + submodules_.transient_suppressor = CreateTransientSuppressor( + submodule_creation_overrides_, transient_suppressor_vad_mode_, + proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate, + num_proc_channels()); + if (!submodules_.transient_suppressor) { + RTC_LOG(LS_WARNING) + << "No transient suppressor created (probably disabled)"; + } + } else { + submodules_.transient_suppressor->Initialize( + proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate, + num_proc_channels()); + } + } else { + submodules_.transient_suppressor.reset(); + } +} + +void AudioProcessingImpl::InitializeHighPassFilter(bool forced_reset) { + bool high_pass_filter_needed_by_aec = + config_.echo_canceller.enabled && + config_.echo_canceller.enforce_high_pass_filtering && + !config_.echo_canceller.mobile_mode; + if (submodule_states_.HighPassFilteringRequired() || + high_pass_filter_needed_by_aec) { + bool use_full_band = config_.high_pass_filter.apply_in_full_band && + !constants_.enforce_split_band_hpf; + int rate = use_full_band ? proc_fullband_sample_rate_hz() + : proc_split_sample_rate_hz(); + size_t num_channels = + use_full_band ? num_output_channels() : num_proc_channels(); + + if (!submodules_.high_pass_filter || + rate != submodules_.high_pass_filter->sample_rate_hz() || + forced_reset || + num_channels != submodules_.high_pass_filter->num_channels()) { + submodules_.high_pass_filter.reset( + new HighPassFilter(rate, num_channels)); + } + } else { + submodules_.high_pass_filter.reset(); + } +} + +void AudioProcessingImpl::InitializeEchoController() { + bool use_echo_controller = + echo_control_factory_ || + (config_.echo_canceller.enabled && !config_.echo_canceller.mobile_mode); + + if (use_echo_controller) { + // Create and activate the echo controller. + if (echo_control_factory_) { + submodules_.echo_controller = echo_control_factory_->Create( + proc_sample_rate_hz(), num_reverse_channels(), num_proc_channels()); + RTC_DCHECK(submodules_.echo_controller); + } else { + EchoCanceller3Config config; + absl::optional multichannel_config; + if (use_setup_specific_default_aec3_config_) { + multichannel_config = EchoCanceller3::CreateDefaultMultichannelConfig(); + } + submodules_.echo_controller = std::make_unique( + config, multichannel_config, proc_sample_rate_hz(), + num_reverse_channels(), num_proc_channels()); + } + + // Setup the storage for returning the linear AEC output. + if (config_.echo_canceller.export_linear_aec_output) { + constexpr int kLinearOutputRateHz = 16000; + capture_.linear_aec_output = std::make_unique( + kLinearOutputRateHz, num_proc_channels(), kLinearOutputRateHz, + num_proc_channels(), kLinearOutputRateHz, num_proc_channels()); + } else { + capture_.linear_aec_output.reset(); + } + + capture_nonlocked_.echo_controller_enabled = true; + + submodules_.echo_control_mobile.reset(); + aecm_render_signal_queue_.reset(); + return; + } + + submodules_.echo_controller.reset(); + capture_nonlocked_.echo_controller_enabled = false; + capture_.linear_aec_output.reset(); + + if (!config_.echo_canceller.enabled) { + submodules_.echo_control_mobile.reset(); + aecm_render_signal_queue_.reset(); + return; + } + + if (config_.echo_canceller.mobile_mode) { + // Create and activate AECM. + size_t max_element_size = + std::max(static_cast(1), + kMaxAllowedValuesOfSamplesPerBand * + EchoControlMobileImpl::NumCancellersRequired( + num_output_channels(), num_reverse_channels())); + + std::vector template_queue_element(max_element_size); + + aecm_render_signal_queue_.reset( + new SwapQueue, RenderQueueItemVerifier>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier(max_element_size))); + + aecm_render_queue_buffer_.resize(max_element_size); + aecm_capture_queue_buffer_.resize(max_element_size); + + submodules_.echo_control_mobile.reset(new EchoControlMobileImpl()); + + submodules_.echo_control_mobile->Initialize(proc_split_sample_rate_hz(), + num_reverse_channels(), + num_output_channels()); + return; + } + + submodules_.echo_control_mobile.reset(); + aecm_render_signal_queue_.reset(); +} + +void AudioProcessingImpl::InitializeGainController1() { + if (config_.gain_controller2.enabled && + config_.gain_controller2.input_volume_controller.enabled && + config_.gain_controller1.enabled && + (config_.gain_controller1.mode == + AudioProcessing::Config::GainController1::kAdaptiveAnalog || + config_.gain_controller1.analog_gain_controller.enabled)) { + RTC_LOG(LS_ERROR) << "APM configuration not valid: " + << "Multiple input volume controllers enabled."; + } + + if (!config_.gain_controller1.enabled) { + submodules_.agc_manager.reset(); + submodules_.gain_control.reset(); + return; + } + + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.GainController.Analog.Enabled", + config_.gain_controller1.analog_gain_controller.enabled); + + if (!submodules_.gain_control) { + submodules_.gain_control.reset(new GainControlImpl()); + } + + submodules_.gain_control->Initialize(num_proc_channels(), + proc_sample_rate_hz()); + if (!config_.gain_controller1.analog_gain_controller.enabled) { + int error = submodules_.gain_control->set_mode( + Agc1ConfigModeToInterfaceMode(config_.gain_controller1.mode)); + RTC_DCHECK_EQ(kNoError, error); + error = submodules_.gain_control->set_target_level_dbfs( + config_.gain_controller1.target_level_dbfs); + RTC_DCHECK_EQ(kNoError, error); + error = submodules_.gain_control->set_compression_gain_db( + config_.gain_controller1.compression_gain_db); + RTC_DCHECK_EQ(kNoError, error); + error = submodules_.gain_control->enable_limiter( + config_.gain_controller1.enable_limiter); + RTC_DCHECK_EQ(kNoError, error); + constexpr int kAnalogLevelMinimum = 0; + constexpr int kAnalogLevelMaximum = 255; + error = submodules_.gain_control->set_analog_level_limits( + kAnalogLevelMinimum, kAnalogLevelMaximum); + RTC_DCHECK_EQ(kNoError, error); + + submodules_.agc_manager.reset(); + return; + } + + if (!submodules_.agc_manager.get() || + submodules_.agc_manager->num_channels() != + static_cast(num_proc_channels())) { + int stream_analog_level = -1; + const bool re_creation = !!submodules_.agc_manager; + if (re_creation) { + stream_analog_level = submodules_.agc_manager->recommended_analog_level(); + } + submodules_.agc_manager.reset(new AgcManagerDirect( + num_proc_channels(), config_.gain_controller1.analog_gain_controller)); + if (re_creation) { + submodules_.agc_manager->set_stream_analog_level(stream_analog_level); + } + } + submodules_.agc_manager->Initialize(); + submodules_.agc_manager->SetupDigitalGainControl(*submodules_.gain_control); + submodules_.agc_manager->HandleCaptureOutputUsedChange( + capture_.capture_output_used); +} + +void AudioProcessingImpl::InitializeGainController2() { + if (!config_.gain_controller2.enabled) { + submodules_.gain_controller2.reset(); + return; + } + // Override the input volume controller configuration if the AGC2 experiment + // is running and its parameters require to fully switch the gain control to + // AGC2. + const bool input_volume_controller_config_overridden = + gain_controller2_experiment_params_.has_value() && + gain_controller2_experiment_params_->agc2_config.has_value(); + const InputVolumeController::Config input_volume_controller_config = + input_volume_controller_config_overridden + ? gain_controller2_experiment_params_->agc2_config + ->input_volume_controller + : InputVolumeController::Config{}; + // If the APM VAD sub-module is not used, let AGC2 use its internal VAD. + const bool use_internal_vad = + !UseApmVadSubModule(config_, gain_controller2_experiment_params_); + submodules_.gain_controller2 = std::make_unique( + config_.gain_controller2, input_volume_controller_config, + proc_fullband_sample_rate_hz(), num_proc_channels(), use_internal_vad); + submodules_.gain_controller2->SetCaptureOutputUsed( + capture_.capture_output_used); +} + +void AudioProcessingImpl::InitializeVoiceActivityDetector() { + if (!UseApmVadSubModule(config_, gain_controller2_experiment_params_)) { + submodules_.voice_activity_detector.reset(); + return; + } + + if (!submodules_.voice_activity_detector) { + RTC_DCHECK(!!submodules_.gain_controller2); + // TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here. + submodules_.voice_activity_detector = + std::make_unique( + submodules_.gain_controller2->GetCpuFeatures(), + proc_fullband_sample_rate_hz()); + } else { + submodules_.voice_activity_detector->Initialize( + proc_fullband_sample_rate_hz()); + } +} + +void AudioProcessingImpl::InitializeNoiseSuppressor() { + submodules_.noise_suppressor.reset(); + + if (config_.noise_suppression.enabled) { + auto map_level = + [](AudioProcessing::Config::NoiseSuppression::Level level) { + using NoiseSuppresionConfig = + AudioProcessing::Config::NoiseSuppression; + switch (level) { + case NoiseSuppresionConfig::kLow: + return NsConfig::SuppressionLevel::k6dB; + case NoiseSuppresionConfig::kModerate: + return NsConfig::SuppressionLevel::k12dB; + case NoiseSuppresionConfig::kHigh: + return NsConfig::SuppressionLevel::k18dB; + case NoiseSuppresionConfig::kVeryHigh: + return NsConfig::SuppressionLevel::k21dB; + } + RTC_CHECK_NOTREACHED(); + }; + + NsConfig cfg; + cfg.target_level = map_level(config_.noise_suppression.level); + submodules_.noise_suppressor = std::make_unique( + cfg, proc_sample_rate_hz(), num_proc_channels()); + } +} + +void AudioProcessingImpl::InitializeCaptureLevelsAdjuster() { + if (config_.pre_amplifier.enabled || + config_.capture_level_adjustment.enabled) { + // Use both the pre-amplifier and the capture level adjustment gains as + // pre-gains. + float pre_gain = 1.f; + if (config_.pre_amplifier.enabled) { + pre_gain *= config_.pre_amplifier.fixed_gain_factor; + } + if (config_.capture_level_adjustment.enabled) { + pre_gain *= config_.capture_level_adjustment.pre_gain_factor; + } + + submodules_.capture_levels_adjuster = + std::make_unique( + config_.capture_level_adjustment.analog_mic_gain_emulation.enabled, + config_.capture_level_adjustment.analog_mic_gain_emulation + .initial_level, + pre_gain, config_.capture_level_adjustment.post_gain_factor); + } else { + submodules_.capture_levels_adjuster.reset(); + } +} + +void AudioProcessingImpl::InitializeResidualEchoDetector() { + if (submodules_.echo_detector) { + submodules_.echo_detector->Initialize( + proc_fullband_sample_rate_hz(), 1, + formats_.render_processing_format.sample_rate_hz(), 1); + } +} + +void AudioProcessingImpl::InitializeAnalyzer() { + if (submodules_.capture_analyzer) { + submodules_.capture_analyzer->Initialize(proc_fullband_sample_rate_hz(), + num_proc_channels()); + } +} + +void AudioProcessingImpl::InitializePostProcessor() { + if (submodules_.capture_post_processor) { + submodules_.capture_post_processor->Initialize( + proc_fullband_sample_rate_hz(), num_proc_channels()); + } +} + +void AudioProcessingImpl::InitializePreProcessor() { + if (submodules_.render_pre_processor) { + submodules_.render_pre_processor->Initialize( + formats_.render_processing_format.sample_rate_hz(), + formats_.render_processing_format.num_channels()); + } +} + +void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) { + if (!aec_dump_) { + return; + } + + std::string experiments_description = ""; + // TODO(peah): Add semicolon-separated concatenations of experiment + // descriptions for other submodules. + if (!!submodules_.capture_post_processor) { + experiments_description += "CapturePostProcessor;"; + } + if (!!submodules_.render_pre_processor) { + experiments_description += "RenderPreProcessor;"; + } + if (capture_nonlocked_.echo_controller_enabled) { + experiments_description += "EchoController;"; + } + if (config_.gain_controller2.enabled) { + experiments_description += "GainController2;"; + } + + InternalAPMConfig apm_config; + + apm_config.aec_enabled = config_.echo_canceller.enabled; + apm_config.aec_delay_agnostic_enabled = false; + apm_config.aec_extended_filter_enabled = false; + apm_config.aec_suppression_level = 0; + + apm_config.aecm_enabled = !!submodules_.echo_control_mobile; + apm_config.aecm_comfort_noise_enabled = + submodules_.echo_control_mobile && + submodules_.echo_control_mobile->is_comfort_noise_enabled(); + apm_config.aecm_routing_mode = + submodules_.echo_control_mobile + ? static_cast(submodules_.echo_control_mobile->routing_mode()) + : 0; + + apm_config.agc_enabled = !!submodules_.gain_control; + + apm_config.agc_mode = submodules_.gain_control + ? static_cast(submodules_.gain_control->mode()) + : GainControl::kAdaptiveAnalog; + apm_config.agc_limiter_enabled = + submodules_.gain_control ? submodules_.gain_control->is_limiter_enabled() + : false; + apm_config.noise_robust_agc_enabled = !!submodules_.agc_manager; + + apm_config.hpf_enabled = config_.high_pass_filter.enabled; + + apm_config.ns_enabled = config_.noise_suppression.enabled; + apm_config.ns_level = static_cast(config_.noise_suppression.level); + + apm_config.transient_suppression_enabled = + config_.transient_suppression.enabled; + apm_config.experiments_description = experiments_description; + apm_config.pre_amplifier_enabled = config_.pre_amplifier.enabled; + apm_config.pre_amplifier_fixed_gain_factor = + config_.pre_amplifier.fixed_gain_factor; + + if (!forced && apm_config == apm_config_for_aec_dump_) { + return; + } + aec_dump_->WriteConfig(apm_config); + apm_config_for_aec_dump_ = apm_config; +} + +void AudioProcessingImpl::RecordUnprocessedCaptureStream( + const float* const* src) { + RTC_DCHECK(aec_dump_); + WriteAecDumpConfigMessage(false); + + const size_t channel_size = formats_.api_format.input_stream().num_frames(); + const size_t num_channels = formats_.api_format.input_stream().num_channels(); + aec_dump_->AddCaptureStreamInput( + AudioFrameView(src, num_channels, channel_size)); + RecordAudioProcessingState(); +} + +void AudioProcessingImpl::RecordUnprocessedCaptureStream( + const int16_t* const data, + const StreamConfig& config) { + RTC_DCHECK(aec_dump_); + WriteAecDumpConfigMessage(false); + + aec_dump_->AddCaptureStreamInput(data, config.num_channels(), + config.num_frames()); + RecordAudioProcessingState(); +} + +void AudioProcessingImpl::RecordProcessedCaptureStream( + const float* const* processed_capture_stream) { + RTC_DCHECK(aec_dump_); + + const size_t channel_size = formats_.api_format.output_stream().num_frames(); + const size_t num_channels = + formats_.api_format.output_stream().num_channels(); + aec_dump_->AddCaptureStreamOutput(AudioFrameView( + processed_capture_stream, num_channels, channel_size)); + aec_dump_->WriteCaptureStreamMessage(); +} + +void AudioProcessingImpl::RecordProcessedCaptureStream( + const int16_t* const data, + const StreamConfig& config) { + RTC_DCHECK(aec_dump_); + + aec_dump_->AddCaptureStreamOutput(data, config.num_channels(), + config.num_frames()); + aec_dump_->WriteCaptureStreamMessage(); +} + +void AudioProcessingImpl::RecordAudioProcessingState() { + RTC_DCHECK(aec_dump_); + AecDump::AudioProcessingState audio_proc_state; + audio_proc_state.delay = capture_nonlocked_.stream_delay_ms; + audio_proc_state.drift = 0; + audio_proc_state.applied_input_volume = capture_.applied_input_volume; + audio_proc_state.keypress = capture_.key_pressed; + aec_dump_->AddAudioProcessingState(audio_proc_state); +} + +AudioProcessingImpl::ApmCaptureState::ApmCaptureState() + : was_stream_delay_set(false), + capture_output_used(true), + capture_output_used_last_frame(true), + key_pressed(false), + capture_processing_format(kSampleRate16kHz), + split_rate(kSampleRate16kHz), + echo_path_gain_change(false), + prev_pre_adjustment_gain(-1.0f), + playout_volume(-1), + prev_playout_volume(-1), + applied_input_volume_changed(false) {} + +AudioProcessingImpl::ApmCaptureState::~ApmCaptureState() = default; + +AudioProcessingImpl::ApmRenderState::ApmRenderState() = default; + +AudioProcessingImpl::ApmRenderState::~ApmRenderState() = default; + +AudioProcessingImpl::ApmStatsReporter::ApmStatsReporter() + : stats_message_queue_(1) {} + +AudioProcessingImpl::ApmStatsReporter::~ApmStatsReporter() = default; + +AudioProcessingStats AudioProcessingImpl::ApmStatsReporter::GetStatistics() { + MutexLock lock_stats(&mutex_stats_); + bool new_stats_available = stats_message_queue_.Remove(&cached_stats_); + // If the message queue is full, return the cached stats. + static_cast(new_stats_available); + + return cached_stats_; +} + +void AudioProcessingImpl::ApmStatsReporter::UpdateStatistics( + const AudioProcessingStats& new_stats) { + AudioProcessingStats stats_to_queue = new_stats; + bool stats_message_passed = stats_message_queue_.Insert(&stats_to_queue); + // If the message queue is full, discard the new stats. + static_cast(stats_message_passed); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.h b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.h new file mode 100644 index 0000000000..fe80e0d912 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.h @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ + +#include + +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/function_view.h" +#include "modules/audio_processing/aec3/echo_canceller3.h" +#include "modules/audio_processing/agc/agc_manager_direct.h" +#include "modules/audio_processing/agc/gain_control.h" +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h" +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/gain_control_impl.h" +#include "modules/audio_processing/gain_controller2.h" +#include "modules/audio_processing/high_pass_filter.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "modules/audio_processing/include/audio_frame_proxies.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/include/audio_processing_statistics.h" +#include "modules/audio_processing/ns/noise_suppressor.h" +#include "modules/audio_processing/optionally_built_submodule_creators.h" +#include "modules/audio_processing/render_queue_item_verifier.h" +#include "modules/audio_processing/rms_level.h" +#include "modules/audio_processing/transient/transient_suppressor.h" +#include "rtc_base/gtest_prod_util.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/swap_queue.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioConverter; + +constexpr int RuntimeSettingQueueSize() { + return 100; +} + +class AudioProcessingImpl : public AudioProcessing { + public: + // Methods forcing APM to run in a single-threaded manner. + // Acquires both the render and capture locks. + AudioProcessingImpl(); + AudioProcessingImpl(const AudioProcessing::Config& config, + std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, + std::unique_ptr echo_control_factory, + rtc::scoped_refptr echo_detector, + std::unique_ptr capture_analyzer); + ~AudioProcessingImpl() override; + int Initialize() override; + int Initialize(const ProcessingConfig& processing_config) override; + void ApplyConfig(const AudioProcessing::Config& config) override; + bool CreateAndAttachAecDump(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) override; + bool CreateAndAttachAecDump(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) override; + // TODO(webrtc:5298) Deprecated variant. + void AttachAecDump(std::unique_ptr aec_dump) override; + void DetachAecDump() override; + void SetRuntimeSetting(RuntimeSetting setting) override; + bool PostRuntimeSetting(RuntimeSetting setting) override; + + // Capture-side exclusive methods possibly running APM in a + // multi-threaded manner. Acquire the capture lock. + int ProcessStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) override; + int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) override; + bool GetLinearAecOutput( + rtc::ArrayView> linear_output) const override; + void set_output_will_be_muted(bool muted) override; + void HandleCaptureOutputUsedSetting(bool capture_output_used) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + int set_stream_delay_ms(int delay) override; + void set_stream_key_pressed(bool key_pressed) override; + void set_stream_analog_level(int level) override; + int recommended_stream_analog_level() const + RTC_LOCKS_EXCLUDED(mutex_capture_) override; + + // Render-side exclusive methods possibly running APM in a + // multi-threaded manner. Acquire the render lock. + int ProcessReverseStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) override; + int AnalyzeReverseStream(const float* const* data, + const StreamConfig& reverse_config) override; + int ProcessReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) override; + + // Methods only accessed from APM submodules or + // from AudioProcessing tests in a single-threaded manner. + // Hence there is no need for locks in these. + int proc_sample_rate_hz() const override; + int proc_split_sample_rate_hz() const override; + size_t num_input_channels() const override; + size_t num_proc_channels() const override; + size_t num_output_channels() const override; + size_t num_reverse_channels() const override; + int stream_delay_ms() const override; + + AudioProcessingStats GetStatistics(bool has_remote_tracks) override { + return GetStatistics(); + } + AudioProcessingStats GetStatistics() override { + return stats_reporter_.GetStatistics(); + } + + AudioProcessing::Config GetConfig() const override; + + protected: + // Overridden in a mock. + virtual void InitializeLocked() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + void AssertLockedForTest() + RTC_ASSERT_EXCLUSIVE_LOCK(mutex_render_, mutex_capture_) { + mutex_render_.AssertHeld(); + mutex_capture_.AssertHeld(); + } + + private: + // TODO(peah): These friend classes should be removed as soon as the new + // parameter setting scheme allows. + FRIEND_TEST_ALL_PREFIXES(ApmConfiguration, DefaultBehavior); + FRIEND_TEST_ALL_PREFIXES(ApmConfiguration, ValidConfigBehavior); + FRIEND_TEST_ALL_PREFIXES(ApmConfiguration, InValidConfigBehavior); + FRIEND_TEST_ALL_PREFIXES(ApmWithSubmodulesExcludedTest, + ToggleTransientSuppressor); + FRIEND_TEST_ALL_PREFIXES(ApmWithSubmodulesExcludedTest, + ReinitializeTransientSuppressor); + FRIEND_TEST_ALL_PREFIXES(ApmWithSubmodulesExcludedTest, + BitexactWithDisabledModules); + FRIEND_TEST_ALL_PREFIXES( + AudioProcessingImplGainController2FieldTrialParametrizedTest, + ConfigAdjustedWhenExperimentEnabled); + + void set_stream_analog_level_locked(int level) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void UpdateRecommendedInputVolumeLocked() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + void OverrideSubmoduleCreationForTesting( + const ApmSubmoduleCreationOverrides& overrides); + + // Class providing thread-safe message pipe functionality for + // `runtime_settings_`. + class RuntimeSettingEnqueuer { + public: + explicit RuntimeSettingEnqueuer( + SwapQueue* runtime_settings); + ~RuntimeSettingEnqueuer(); + + // Enqueue setting and return whether the setting was successfully enqueued. + bool Enqueue(RuntimeSetting setting); + + private: + SwapQueue& runtime_settings_; + }; + + const std::unique_ptr data_dumper_; + static std::atomic instance_count_; + const bool use_setup_specific_default_aec3_config_; + + // Parameters for the "GainController2" experiment which determines whether + // the following APM sub-modules are created and, if so, their configurations: + // AGC2 (`gain_controller2`), AGC1 (`gain_control`, `agc_manager`) and TS + // (`transient_suppressor`). + // TODO(bugs.webrtc.org/7494): Remove when the "WebRTC-Audio-GainController2" + // field trial is removed. + struct GainController2ExperimentParams { + struct Agc2Config { + InputVolumeController::Config input_volume_controller; + AudioProcessing::Config::GainController2::AdaptiveDigital + adaptive_digital_controller; + }; + // When `agc2_config` is specified, all gain control switches to AGC2 and + // the configuration is overridden. + absl::optional agc2_config; + // When true, the transient suppressor submodule is never created regardless + // of the APM configuration. + bool disallow_transient_suppressor_usage; + }; + // Specified when the "WebRTC-Audio-GainController2" field trial is specified. + // TODO(bugs.webrtc.org/7494): Remove when the "WebRTC-Audio-GainController2" + // field trial is removed. + const absl::optional + gain_controller2_experiment_params_; + + // Parses the "WebRTC-Audio-GainController2" field trial. If disabled, returns + // an unspecified value. + static absl::optional + GetGainController2ExperimentParams(); + + // When `experiment_params` is specified, returns an APM configuration + // modified according to the experiment parameters. Otherwise returns + // `config`. + static AudioProcessing::Config AdjustConfig( + const AudioProcessing::Config& config, + const absl::optional& experiment_params); + // Returns true if the APM VAD sub-module should be used. + static bool UseApmVadSubModule( + const AudioProcessing::Config& config, + const absl::optional& experiment_params); + + TransientSuppressor::VadMode transient_suppressor_vad_mode_; + + SwapQueue capture_runtime_settings_; + SwapQueue render_runtime_settings_; + + RuntimeSettingEnqueuer capture_runtime_settings_enqueuer_; + RuntimeSettingEnqueuer render_runtime_settings_enqueuer_; + + // EchoControl factory. + const std::unique_ptr echo_control_factory_; + + class SubmoduleStates { + public: + SubmoduleStates(bool capture_post_processor_enabled, + bool render_pre_processor_enabled, + bool capture_analyzer_enabled); + // Updates the submodule state and returns true if it has changed. + bool Update(bool high_pass_filter_enabled, + bool mobile_echo_controller_enabled, + bool noise_suppressor_enabled, + bool adaptive_gain_controller_enabled, + bool gain_controller2_enabled, + bool voice_activity_detector_enabled, + bool gain_adjustment_enabled, + bool echo_controller_enabled, + bool transient_suppressor_enabled); + bool CaptureMultiBandSubModulesActive() const; + bool CaptureMultiBandProcessingPresent() const; + bool CaptureMultiBandProcessingActive(bool ec_processing_active) const; + bool CaptureFullBandProcessingActive() const; + bool CaptureAnalyzerActive() const; + bool RenderMultiBandSubModulesActive() const; + bool RenderFullBandProcessingActive() const; + bool RenderMultiBandProcessingActive() const; + bool HighPassFilteringRequired() const; + + private: + const bool capture_post_processor_enabled_ = false; + const bool render_pre_processor_enabled_ = false; + const bool capture_analyzer_enabled_ = false; + bool high_pass_filter_enabled_ = false; + bool mobile_echo_controller_enabled_ = false; + bool noise_suppressor_enabled_ = false; + bool adaptive_gain_controller_enabled_ = false; + bool voice_activity_detector_enabled_ = false; + bool gain_controller2_enabled_ = false; + bool gain_adjustment_enabled_ = false; + bool echo_controller_enabled_ = false; + bool transient_suppressor_enabled_ = false; + bool first_update_ = true; + }; + + // Methods for modifying the formats struct that is used by both + // the render and capture threads. The check for whether modifications are + // needed is done while holding a single lock only, thereby avoiding that the + // capture thread blocks the render thread. + // Called by render: Holds the render lock when reading the format struct and + // acquires both locks if reinitialization is required. + void MaybeInitializeRender(const StreamConfig& input_config, + const StreamConfig& output_config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + // Called by capture: Acquires and releases the capture lock to read the + // format struct and acquires both locks if reinitialization is needed. + void MaybeInitializeCapture(const StreamConfig& input_config, + const StreamConfig& output_config); + + // Method for updating the state keeping track of the active submodules. + // Returns a bool indicating whether the state has changed. + bool UpdateActiveSubmoduleStates() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Methods requiring APM running in a single-threaded manner, requiring both + // the render and capture lock to be acquired. + void InitializeLocked(const ProcessingConfig& config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + void InitializeResidualEchoDetector() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + void InitializeEchoController() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + + // Initializations of capture-only sub-modules, requiring the capture lock + // already acquired. + void InitializeHighPassFilter(bool forced_reset) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeGainController1() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeTransientSuppressor() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + // Initializes the `GainController2` sub-module. If the sub-module is enabled, + // recreates it. + void InitializeGainController2() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + // Initializes the `VoiceActivityDetectorWrapper` sub-module. If the + // sub-module is enabled, recreates it. Call `InitializeGainController2()` + // first. + // TODO(bugs.webrtc.org/13663): Remove if TS is removed otherwise remove call + // order requirement - i.e., decouple from `InitializeGainController2()`. + void InitializeVoiceActivityDetector() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeNoiseSuppressor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeCaptureLevelsAdjuster() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializePostProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeAnalyzer() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Initializations of render-only submodules, requiring the render lock + // already acquired. + void InitializePreProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + + // Sample rate used for the fullband processing. + int proc_fullband_sample_rate_hz() const + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Empties and handles the respective RuntimeSetting queues. + void HandleCaptureRuntimeSettings() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void HandleRenderRuntimeSettings() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + + void EmptyQueuedRenderAudio() RTC_LOCKS_EXCLUDED(mutex_capture_); + void EmptyQueuedRenderAudioLocked() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void AllocateRenderQueue() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + void QueueBandedRenderAudio(AudioBuffer* audio) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + void QueueNonbandedRenderAudio(AudioBuffer* audio) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + + // Capture-side exclusive methods possibly running APM in a multi-threaded + // manner that are called with the render lock already acquired. + int ProcessCaptureStreamLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Render-side exclusive methods possibly running APM in a multi-threaded + // manner that are called with the render lock already acquired. + int AnalyzeReverseStreamLocked(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + int ProcessRenderStreamLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + + // Collects configuration settings from public and private + // submodules to be saved as an audioproc::Config message on the + // AecDump if it is attached. If not `forced`, only writes the current + // config if it is different from the last saved one; if `forced`, + // writes the config regardless of the last saved. + void WriteAecDumpConfigMessage(bool forced) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Notifies attached AecDump of current configuration and capture data. + void RecordUnprocessedCaptureStream(const float* const* capture_stream) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + void RecordUnprocessedCaptureStream(const int16_t* const data, + const StreamConfig& config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Notifies attached AecDump of current configuration and + // processed capture data and issues a capture stream recording + // request. + void RecordProcessedCaptureStream( + const float* const* processed_capture_stream) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + void RecordProcessedCaptureStream(const int16_t* const data, + const StreamConfig& config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Notifies attached AecDump about current state (delay, drift, etc). + void RecordAudioProcessingState() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Ensures that overruns in the capture runtime settings queue is properly + // handled by the code, providing safe-fallbacks to mitigate the implications + // of any settings being missed. + void HandleOverrunInCaptureRuntimeSettingsQueue() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // AecDump instance used for optionally logging APM config, input + // and output to file in the AEC-dump format defined in debug.proto. + std::unique_ptr aec_dump_; + + // Hold the last config written with AecDump for avoiding writing + // the same config twice. + InternalAPMConfig apm_config_for_aec_dump_ RTC_GUARDED_BY(mutex_capture_); + + // Critical sections. + mutable Mutex mutex_render_ RTC_ACQUIRED_BEFORE(mutex_capture_); + mutable Mutex mutex_capture_; + + // Struct containing the Config specifying the behavior of APM. + AudioProcessing::Config config_; + + // Overrides for testing the exclusion of some submodules from the build. + ApmSubmoduleCreationOverrides submodule_creation_overrides_ + RTC_GUARDED_BY(mutex_capture_); + + // Class containing information about what submodules are active. + SubmoduleStates submodule_states_; + + // Struct containing the pointers to the submodules. + struct Submodules { + Submodules(std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, + rtc::scoped_refptr echo_detector, + std::unique_ptr capture_analyzer) + : echo_detector(std::move(echo_detector)), + capture_post_processor(std::move(capture_post_processor)), + render_pre_processor(std::move(render_pre_processor)), + capture_analyzer(std::move(capture_analyzer)) {} + // Accessed internally from capture or during initialization. + const rtc::scoped_refptr echo_detector; + const std::unique_ptr capture_post_processor; + const std::unique_ptr render_pre_processor; + const std::unique_ptr capture_analyzer; + std::unique_ptr agc_manager; + std::unique_ptr gain_control; + std::unique_ptr gain_controller2; + std::unique_ptr voice_activity_detector; + std::unique_ptr high_pass_filter; + std::unique_ptr echo_controller; + std::unique_ptr echo_control_mobile; + std::unique_ptr noise_suppressor; + std::unique_ptr transient_suppressor; + std::unique_ptr capture_levels_adjuster; + } submodules_; + + // State that is written to while holding both the render and capture locks + // but can be read without any lock being held. + // As this is only accessed internally of APM, and all internal methods in APM + // either are holding the render or capture locks, this construct is safe as + // it is not possible to read the variables while writing them. + struct ApmFormatState { + ApmFormatState() + : // Format of processing streams at input/output call sites. + api_format({{{kSampleRate16kHz, 1}, + {kSampleRate16kHz, 1}, + {kSampleRate16kHz, 1}, + {kSampleRate16kHz, 1}}}), + render_processing_format(kSampleRate16kHz, 1) {} + ProcessingConfig api_format; + StreamConfig render_processing_format; + } formats_; + + // APM constants. + const struct ApmConstants { + ApmConstants(bool multi_channel_render_support, + bool multi_channel_capture_support, + bool enforce_split_band_hpf, + bool minimize_processing_for_unused_output, + bool transient_suppressor_forced_off) + : multi_channel_render_support(multi_channel_render_support), + multi_channel_capture_support(multi_channel_capture_support), + enforce_split_band_hpf(enforce_split_band_hpf), + minimize_processing_for_unused_output( + minimize_processing_for_unused_output), + transient_suppressor_forced_off(transient_suppressor_forced_off) {} + bool multi_channel_render_support; + bool multi_channel_capture_support; + bool enforce_split_band_hpf; + bool minimize_processing_for_unused_output; + bool transient_suppressor_forced_off; + } constants_; + + struct ApmCaptureState { + ApmCaptureState(); + ~ApmCaptureState(); + bool was_stream_delay_set; + bool capture_output_used; + bool capture_output_used_last_frame; + bool key_pressed; + std::unique_ptr capture_audio; + std::unique_ptr capture_fullband_audio; + std::unique_ptr linear_aec_output; + // Only the rate and samples fields of capture_processing_format_ are used + // because the capture processing number of channels is mutable and is + // tracked by the capture_audio_. + StreamConfig capture_processing_format; + int split_rate; + bool echo_path_gain_change; + float prev_pre_adjustment_gain; + int playout_volume; + int prev_playout_volume; + AudioProcessingStats stats; + // Input volume applied on the audio input device when the audio is + // acquired. Unspecified when unknown. + absl::optional applied_input_volume; + bool applied_input_volume_changed; + // Recommended input volume to apply on the audio input device the next time + // that audio is acquired. Unspecified when no input volume can be + // recommended. + absl::optional recommended_input_volume; + } capture_ RTC_GUARDED_BY(mutex_capture_); + + struct ApmCaptureNonLockedState { + ApmCaptureNonLockedState() + : capture_processing_format(kSampleRate16kHz), + split_rate(kSampleRate16kHz), + stream_delay_ms(0) {} + // Only the rate and samples fields of capture_processing_format_ are used + // because the forward processing number of channels is mutable and is + // tracked by the capture_audio_. + StreamConfig capture_processing_format; + int split_rate; + int stream_delay_ms; + bool echo_controller_enabled = false; + } capture_nonlocked_; + + struct ApmRenderState { + ApmRenderState(); + ~ApmRenderState(); + std::unique_ptr render_converter; + std::unique_ptr render_audio; + } render_ RTC_GUARDED_BY(mutex_render_); + + // Class for statistics reporting. The class is thread-safe and no lock is + // needed when accessing it. + class ApmStatsReporter { + public: + ApmStatsReporter(); + ~ApmStatsReporter(); + + // Returns the most recently reported statistics. + AudioProcessingStats GetStatistics(); + + // Update the cached statistics. + void UpdateStatistics(const AudioProcessingStats& new_stats); + + private: + Mutex mutex_stats_; + AudioProcessingStats cached_stats_ RTC_GUARDED_BY(mutex_stats_); + SwapQueue stats_message_queue_; + } stats_reporter_; + + std::vector aecm_render_queue_buffer_ RTC_GUARDED_BY(mutex_render_); + std::vector aecm_capture_queue_buffer_ + RTC_GUARDED_BY(mutex_capture_); + + size_t agc_render_queue_element_max_size_ RTC_GUARDED_BY(mutex_render_) + RTC_GUARDED_BY(mutex_capture_) = 0; + std::vector agc_render_queue_buffer_ RTC_GUARDED_BY(mutex_render_); + std::vector agc_capture_queue_buffer_ RTC_GUARDED_BY(mutex_capture_); + + size_t red_render_queue_element_max_size_ RTC_GUARDED_BY(mutex_render_) + RTC_GUARDED_BY(mutex_capture_) = 0; + std::vector red_render_queue_buffer_ RTC_GUARDED_BY(mutex_render_); + std::vector red_capture_queue_buffer_ RTC_GUARDED_BY(mutex_capture_); + + RmsLevel capture_input_rms_ RTC_GUARDED_BY(mutex_capture_); + RmsLevel capture_output_rms_ RTC_GUARDED_BY(mutex_capture_); + int capture_rms_interval_counter_ RTC_GUARDED_BY(mutex_capture_) = 0; + + InputVolumeStatsReporter applied_input_volume_stats_reporter_ + RTC_GUARDED_BY(mutex_capture_); + InputVolumeStatsReporter recommended_input_volume_stats_reporter_ + RTC_GUARDED_BY(mutex_capture_); + + // Lock protection not needed. + std::unique_ptr< + SwapQueue, RenderQueueItemVerifier>> + aecm_render_signal_queue_; + std::unique_ptr< + SwapQueue, RenderQueueItemVerifier>> + agc_render_signal_queue_; + std::unique_ptr, RenderQueueItemVerifier>> + red_render_signal_queue_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc new file mode 100644 index 0000000000..3614b574df --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc @@ -0,0 +1,1012 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/event.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/random.h" +#include "rtc_base/synchronization/mutex.h" +#include "system_wrappers/include/sleep.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr int kMaxFrameSize = 480; +constexpr TimeDelta kTestTimeOutLimit = TimeDelta::Minutes(10); + +class AudioProcessingImplLockTest; + +// Type of the render thread APM API call to use in the test. +enum class RenderApiImpl { + ProcessReverseStreamImplInteger, + ProcessReverseStreamImplFloat, + AnalyzeReverseStreamImplFloat, +}; + +// Type of the capture thread APM API call to use in the test. +enum class CaptureApiImpl { ProcessStreamImplInteger, ProcessStreamImplFloat }; + +// The runtime parameter setting scheme to use in the test. +enum class RuntimeParameterSettingScheme { + SparseStreamMetadataChangeScheme, + ExtremeStreamMetadataChangeScheme, + FixedMonoStreamMetadataScheme, + FixedStereoStreamMetadataScheme +}; + +// Variant of echo canceller settings to use in the test. +enum class AecType { + BasicWebRtcAecSettings, + AecTurnedOff, + BasicWebRtcAecSettingsWithExtentedFilter, + BasicWebRtcAecSettingsWithDelayAgnosticAec, + BasicWebRtcAecSettingsWithAecMobile +}; + +// Thread-safe random number generator wrapper. +class RandomGenerator { + public: + RandomGenerator() : rand_gen_(42U) {} + + int RandInt(int min, int max) { + MutexLock lock(&mutex_); + return rand_gen_.Rand(min, max); + } + + int RandInt(int max) { + MutexLock lock(&mutex_); + return rand_gen_.Rand(max); + } + + float RandFloat() { + MutexLock lock(&mutex_); + return rand_gen_.Rand(); + } + + private: + Mutex mutex_; + Random rand_gen_ RTC_GUARDED_BY(mutex_); +}; + +// Variables related to the audio data and formats. +struct AudioFrameData { + explicit AudioFrameData(int max_frame_size) { + // Set up the two-dimensional arrays needed for the APM API calls. + input_framechannels.resize(2 * max_frame_size); + input_frame.resize(2); + input_frame[0] = &input_framechannels[0]; + input_frame[1] = &input_framechannels[max_frame_size]; + + output_frame_channels.resize(2 * max_frame_size); + output_frame.resize(2); + output_frame[0] = &output_frame_channels[0]; + output_frame[1] = &output_frame_channels[max_frame_size]; + + frame.resize(2 * max_frame_size); + } + + std::vector frame; + + std::vector output_frame; + std::vector output_frame_channels; + std::vector input_frame; + std::vector input_framechannels; + + int input_sample_rate_hz = 16000; + int input_number_of_channels = 1; + int output_sample_rate_hz = 16000; + int output_number_of_channels = 1; +}; + +// The configuration for the test. +struct TestConfig { + // Test case generator for the test configurations to use in the brief tests. + static std::vector GenerateBriefTestConfigs() { + std::vector test_configs; + AecType aec_types[] = {AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec, + AecType::BasicWebRtcAecSettingsWithAecMobile}; + for (auto aec_type : aec_types) { + TestConfig test_config; + test_config.aec_type = aec_type; + + test_config.min_number_of_calls = 300; + + // Perform tests only with the extreme runtime parameter setting scheme. + test_config.runtime_parameter_setting_scheme = + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme; + + // Only test 16 kHz for this test suite. + test_config.initial_sample_rate_hz = 16000; + + // Create test config for the Int16 processing API function set. + test_config.render_api_function = + RenderApiImpl::ProcessReverseStreamImplInteger; + test_config.capture_api_function = + CaptureApiImpl::ProcessStreamImplInteger; + test_configs.push_back(test_config); + + // Create test config for the StreamConfig processing API function set. + test_config.render_api_function = + RenderApiImpl::ProcessReverseStreamImplFloat; + test_config.capture_api_function = CaptureApiImpl::ProcessStreamImplFloat; + test_configs.push_back(test_config); + } + + // Return the created test configurations. + return test_configs; + } + + // Test case generator for the test configurations to use in the extensive + // tests. + static std::vector GenerateExtensiveTestConfigs() { + // Lambda functions for the test config generation. + auto add_processing_apis = [](TestConfig test_config) { + struct AllowedApiCallCombinations { + RenderApiImpl render_api; + CaptureApiImpl capture_api; + }; + + const AllowedApiCallCombinations api_calls[] = { + {RenderApiImpl::ProcessReverseStreamImplInteger, + CaptureApiImpl::ProcessStreamImplInteger}, + {RenderApiImpl::ProcessReverseStreamImplFloat, + CaptureApiImpl::ProcessStreamImplFloat}, + {RenderApiImpl::AnalyzeReverseStreamImplFloat, + CaptureApiImpl::ProcessStreamImplFloat}, + {RenderApiImpl::ProcessReverseStreamImplInteger, + CaptureApiImpl::ProcessStreamImplFloat}, + {RenderApiImpl::ProcessReverseStreamImplFloat, + CaptureApiImpl::ProcessStreamImplInteger}}; + std::vector out; + for (auto api_call : api_calls) { + test_config.render_api_function = api_call.render_api; + test_config.capture_api_function = api_call.capture_api; + out.push_back(test_config); + } + return out; + }; + + auto add_aec_settings = [](const std::vector& in) { + std::vector out; + AecType aec_types[] = { + AecType::BasicWebRtcAecSettings, AecType::AecTurnedOff, + AecType::BasicWebRtcAecSettingsWithExtentedFilter, + AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec, + AecType::BasicWebRtcAecSettingsWithAecMobile}; + for (auto test_config : in) { + // Due to a VisualStudio 2015 compiler issue, the internal loop + // variable here cannot override a previously defined name. + // In other words "type" cannot be named "aec_type" here. + // https://connect.microsoft.com/VisualStudio/feedback/details/2291755 + for (auto type : aec_types) { + test_config.aec_type = type; + out.push_back(test_config); + } + } + return out; + }; + + auto add_settings_scheme = [](const std::vector& in) { + std::vector out; + RuntimeParameterSettingScheme schemes[] = { + RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme, + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme, + RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme, + RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme}; + + for (auto test_config : in) { + for (auto scheme : schemes) { + test_config.runtime_parameter_setting_scheme = scheme; + out.push_back(test_config); + } + } + return out; + }; + + auto add_sample_rates = [](const std::vector& in) { + const int sample_rates[] = {8000, 16000, 32000, 48000}; + + std::vector out; + for (auto test_config : in) { + auto available_rates = + (test_config.aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile + ? rtc::ArrayView(sample_rates, 2) + : rtc::ArrayView(sample_rates)); + + for (auto rate : available_rates) { + test_config.initial_sample_rate_hz = rate; + out.push_back(test_config); + } + } + return out; + }; + + // Generate test configurations of the relevant combinations of the + // parameters to + // test. + TestConfig test_config; + test_config.min_number_of_calls = 10000; + return add_sample_rates(add_settings_scheme( + add_aec_settings(add_processing_apis(test_config)))); + } + + RenderApiImpl render_api_function = + RenderApiImpl::ProcessReverseStreamImplFloat; + CaptureApiImpl capture_api_function = CaptureApiImpl::ProcessStreamImplFloat; + RuntimeParameterSettingScheme runtime_parameter_setting_scheme = + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme; + int initial_sample_rate_hz = 16000; + AecType aec_type = AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec; + int min_number_of_calls = 300; +}; + +// Handler for the frame counters. +class FrameCounters { + public: + void IncreaseRenderCounter() { + MutexLock lock(&mutex_); + render_count++; + } + + void IncreaseCaptureCounter() { + MutexLock lock(&mutex_); + capture_count++; + } + + int GetCaptureCounter() const { + MutexLock lock(&mutex_); + return capture_count; + } + + int GetRenderCounter() const { + MutexLock lock(&mutex_); + return render_count; + } + + int CaptureMinusRenderCounters() const { + MutexLock lock(&mutex_); + return capture_count - render_count; + } + + int RenderMinusCaptureCounters() const { + return -CaptureMinusRenderCounters(); + } + + bool BothCountersExceedeThreshold(int threshold) { + MutexLock lock(&mutex_); + return (render_count > threshold && capture_count > threshold); + } + + private: + mutable Mutex mutex_; + int render_count RTC_GUARDED_BY(mutex_) = 0; + int capture_count RTC_GUARDED_BY(mutex_) = 0; +}; + +// Class for handling the capture side processing. +class CaptureProcessor { + public: + CaptureProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + const TestConfig* test_config, + AudioProcessing* apm); + void Process(); + + private: + static constexpr int kMaxCallDifference = 10; + static constexpr float kCaptureInputFloatLevel = 0.03125f; + static constexpr int kCaptureInputFixLevel = 1024; + + void PrepareFrame(); + void CallApmCaptureSide(); + void ApplyRuntimeSettingScheme(); + + RandomGenerator* const rand_gen_ = nullptr; + rtc::Event* const render_call_event_ = nullptr; + rtc::Event* const capture_call_event_ = nullptr; + FrameCounters* const frame_counters_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* const apm_ = nullptr; + AudioFrameData frame_data_; +}; + +// Class for handling the stats processing. +class StatsProcessor { + public: + StatsProcessor(RandomGenerator* rand_gen, + const TestConfig* test_config, + AudioProcessing* apm); + void Process(); + + private: + RandomGenerator* rand_gen_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* apm_ = nullptr; +}; + +// Class for handling the render side processing. +class RenderProcessor { + public: + RenderProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + const TestConfig* test_config, + AudioProcessing* apm); + void Process(); + + private: + static constexpr int kMaxCallDifference = 10; + static constexpr int kRenderInputFixLevel = 16384; + static constexpr float kRenderInputFloatLevel = 0.5f; + + void PrepareFrame(); + void CallApmRenderSide(); + void ApplyRuntimeSettingScheme(); + + RandomGenerator* const rand_gen_ = nullptr; + rtc::Event* const render_call_event_ = nullptr; + rtc::Event* const capture_call_event_ = nullptr; + FrameCounters* const frame_counters_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* const apm_ = nullptr; + AudioFrameData frame_data_; + bool first_render_call_ = true; +}; + +class AudioProcessingImplLockTest + : public ::testing::TestWithParam { + public: + AudioProcessingImplLockTest(); + bool RunTest(); + bool MaybeEndTest(); + + private: + void SetUp() override; + void TearDown() override; + + // Tests whether all the required render and capture side calls have been + // done. + bool TestDone() { + return frame_counters_.BothCountersExceedeThreshold( + test_config_.min_number_of_calls); + } + + // Start the threads used in the test. + void StartThreads() { + const auto attributes = + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime); + render_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!MaybeEndTest()) + render_thread_state_.Process(); + }, + "render", attributes); + capture_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!MaybeEndTest()) { + capture_thread_state_.Process(); + } + }, + "capture", attributes); + + stats_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!MaybeEndTest()) + stats_thread_state_.Process(); + }, + "stats", attributes); + } + + // Event handlers for the test. + rtc::Event test_complete_; + rtc::Event render_call_event_; + rtc::Event capture_call_event_; + + // Thread related variables. + mutable RandomGenerator rand_gen_; + + const TestConfig test_config_; + rtc::scoped_refptr apm_; + FrameCounters frame_counters_; + RenderProcessor render_thread_state_; + CaptureProcessor capture_thread_state_; + StatsProcessor stats_thread_state_; + rtc::PlatformThread render_thread_; + rtc::PlatformThread capture_thread_; + rtc::PlatformThread stats_thread_; +}; + +// Sleeps a random time between 0 and max_sleep milliseconds. +void SleepRandomMs(int max_sleep, RandomGenerator* rand_gen) { + int sleeptime = rand_gen->RandInt(0, max_sleep); + SleepMs(sleeptime); +} + +// Populates a float audio frame with random data. +void PopulateAudioFrame(float** frame, + float amplitude, + size_t num_channels, + size_t samples_per_channel, + RandomGenerator* rand_gen) { + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random 16 bit quantized float number between +-amplitude. + frame[ch][k] = amplitude * (2 * rand_gen->RandFloat() - 1); + } + } +} + +// Populates an integer audio frame with random data. +void PopulateAudioFrame(float amplitude, + size_t num_channels, + size_t samples_per_channel, + rtc::ArrayView frame, + RandomGenerator* rand_gen) { + ASSERT_GT(amplitude, 0); + ASSERT_LE(amplitude, 32767); + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random 16 bit number between -(amplitude+1) and + // amplitude. + frame[k * ch] = rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1; + } + } +} + +AudioProcessing::Config GetApmTestConfig(AecType aec_type) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = aec_type != AecType::AecTurnedOff; + apm_config.echo_canceller.mobile_mode = + aec_type == AecType::BasicWebRtcAecSettingsWithAecMobile; + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveDigital; + apm_config.noise_suppression.enabled = true; + return apm_config; +} + +AudioProcessingImplLockTest::AudioProcessingImplLockTest() + : test_config_(GetParam()), + apm_(AudioProcessingBuilderForTesting() + .SetConfig(GetApmTestConfig(test_config_.aec_type)) + .Create()), + render_thread_state_(kMaxFrameSize, + &rand_gen_, + &render_call_event_, + &capture_call_event_, + &frame_counters_, + &test_config_, + apm_.get()), + capture_thread_state_(kMaxFrameSize, + &rand_gen_, + &render_call_event_, + &capture_call_event_, + &frame_counters_, + &test_config_, + apm_.get()), + stats_thread_state_(&rand_gen_, &test_config_, apm_.get()) {} + +// Run the test with a timeout. +bool AudioProcessingImplLockTest::RunTest() { + StartThreads(); + return test_complete_.Wait(kTestTimeOutLimit); +} + +bool AudioProcessingImplLockTest::MaybeEndTest() { + if (HasFatalFailure() || TestDone()) { + test_complete_.Set(); + return true; + } + return false; +} + +void AudioProcessingImplLockTest::SetUp() {} + +void AudioProcessingImplLockTest::TearDown() { + render_call_event_.Set(); + capture_call_event_.Set(); +} + +StatsProcessor::StatsProcessor(RandomGenerator* rand_gen, + const TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), test_config_(test_config), apm_(apm) {} + +// Implements the callback functionality for the statistics +// collection thread. +void StatsProcessor::Process() { + SleepRandomMs(100, rand_gen_); + + AudioProcessing::Config apm_config = apm_->GetConfig(); + if (test_config_->aec_type != AecType::AecTurnedOff) { + EXPECT_TRUE(apm_config.echo_canceller.enabled); + EXPECT_EQ(apm_config.echo_canceller.mobile_mode, + (test_config_->aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile)); + } else { + EXPECT_FALSE(apm_config.echo_canceller.enabled); + } + EXPECT_TRUE(apm_config.gain_controller1.enabled); + EXPECT_TRUE(apm_config.noise_suppression.enabled); + + // The below return value is not testable. + apm_->GetStatistics(); +} + +CaptureProcessor::CaptureProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + const TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), + render_call_event_(render_call_event), + capture_call_event_(capture_call_event), + frame_counters_(shared_counters_state), + test_config_(test_config), + apm_(apm), + frame_data_(max_frame_size) {} + +// Implements the callback functionality for the capture thread. +void CaptureProcessor::Process() { + // Sleep a random time to simulate thread jitter. + SleepRandomMs(3, rand_gen_); + + // Ensure that the number of render and capture calls do not + // differ too much. + if (frame_counters_->CaptureMinusRenderCounters() > kMaxCallDifference) { + render_call_event_->Wait(rtc::Event::kForever); + } + + // Apply any specified capture side APM non-processing runtime calls. + ApplyRuntimeSettingScheme(); + + // Apply the capture side processing call. + CallApmCaptureSide(); + + // Increase the number of capture-side calls. + frame_counters_->IncreaseCaptureCounter(); + + // Flag to the render thread that another capture API call has occurred + // by triggering this threads call event. + capture_call_event_->Set(); +} + +// Prepares a frame with relevant audio data and metadata. +void CaptureProcessor::PrepareFrame() { + // Restrict to a common fixed sample rate if the integer + // interface is used. + if (test_config_->capture_api_function == + CaptureApiImpl::ProcessStreamImplInteger) { + frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz; + frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz; + } + + // Prepare the audio data. + StreamConfig input_stream_config(frame_data_.input_sample_rate_hz, + frame_data_.input_number_of_channels); + + PopulateAudioFrame(kCaptureInputFixLevel, input_stream_config.num_channels(), + input_stream_config.num_frames(), frame_data_.frame, + rand_gen_); + + PopulateAudioFrame(&frame_data_.input_frame[0], kCaptureInputFloatLevel, + input_stream_config.num_channels(), + input_stream_config.num_frames(), rand_gen_); +} + +// Applies the capture side processing API call. +void CaptureProcessor::CallApmCaptureSide() { + // Prepare a proper capture side processing API call input. + PrepareFrame(); + + // Set the stream delay. + apm_->set_stream_delay_ms(30); + + // Set the analog level. + apm_->set_stream_analog_level(80); + + // Call the specified capture side API processing method. + StreamConfig input_stream_config(frame_data_.input_sample_rate_hz, + frame_data_.input_number_of_channels); + StreamConfig output_stream_config(frame_data_.output_sample_rate_hz, + frame_data_.output_number_of_channels); + int result = AudioProcessing::kNoError; + switch (test_config_->capture_api_function) { + case CaptureApiImpl::ProcessStreamImplInteger: + result = + apm_->ProcessStream(frame_data_.frame.data(), input_stream_config, + output_stream_config, frame_data_.frame.data()); + break; + case CaptureApiImpl::ProcessStreamImplFloat: + result = apm_->ProcessStream(&frame_data_.input_frame[0], + input_stream_config, output_stream_config, + &frame_data_.output_frame[0]); + break; + default: + FAIL(); + } + + // Retrieve the new analog level. + apm_->recommended_stream_analog_level(); + + // Check the return code for error. + ASSERT_EQ(AudioProcessing::kNoError, result); +} + +// Applies any runtime capture APM API calls and audio stream characteristics +// specified by the scheme for the test. +void CaptureProcessor::ApplyRuntimeSettingScheme() { + const int capture_count_local = frame_counters_->GetCaptureCounter(); + + // Update the number of channels and sample rates for the input and output. + // Note that the counts frequencies for when to set parameters + // are set using prime numbers in order to ensure that the + // permutation scheme in the parameter setting changes. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + if (capture_count_local == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (capture_count_local % 11 == 0) + frame_data_.input_sample_rate_hz = 32000; + else if (capture_count_local % 73 == 0) + frame_data_.input_sample_rate_hz = 48000; + else if (capture_count_local % 89 == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (capture_count_local % 97 == 0) + frame_data_.input_sample_rate_hz = 8000; + + if (capture_count_local == 0) + frame_data_.input_number_of_channels = 1; + else if (capture_count_local % 4 == 0) + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + + if (capture_count_local == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (capture_count_local % 5 == 0) + frame_data_.output_sample_rate_hz = 32000; + else if (capture_count_local % 47 == 0) + frame_data_.output_sample_rate_hz = 48000; + else if (capture_count_local % 53 == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (capture_count_local % 71 == 0) + frame_data_.output_sample_rate_hz = 8000; + + if (capture_count_local == 0) + frame_data_.output_number_of_channels = 1; + else if (capture_count_local % 8 == 0) + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + if (capture_count_local % 2 == 0) { + frame_data_.input_number_of_channels = 1; + frame_data_.input_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + } else { + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + if (frame_data_.input_sample_rate_hz == 8000) + frame_data_.input_sample_rate_hz = 16000; + else if (frame_data_.input_sample_rate_hz == 16000) + frame_data_.input_sample_rate_hz = 32000; + else if (frame_data_.input_sample_rate_hz == 32000) + frame_data_.input_sample_rate_hz = 48000; + else if (frame_data_.input_sample_rate_hz == 48000) + frame_data_.input_sample_rate_hz = 8000; + + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + if (frame_data_.output_sample_rate_hz == 8000) + frame_data_.output_sample_rate_hz = 16000; + else if (frame_data_.output_sample_rate_hz == 16000) + frame_data_.output_sample_rate_hz = 32000; + else if (frame_data_.output_sample_rate_hz == 32000) + frame_data_.output_sample_rate_hz = 48000; + else if (frame_data_.output_sample_rate_hz == 48000) + frame_data_.output_sample_rate_hz = 8000; + } + break; + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + if (capture_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + } + break; + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (capture_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 2; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 2; + } + break; + default: + FAIL(); + } + + // Call any specified runtime APM setter and + // getter calls. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (capture_count_local % 2 == 0) { + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm_->set_stream_delay_ms(30)); + apm_->set_stream_key_pressed(true); + } else { + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm_->set_stream_delay_ms(50)); + apm_->set_stream_key_pressed(false); + } + break; + default: + FAIL(); + } + + // Restric the number of output channels not to exceed + // the number of input channels. + frame_data_.output_number_of_channels = + std::min(frame_data_.output_number_of_channels, + frame_data_.input_number_of_channels); +} + +RenderProcessor::RenderProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + const TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), + render_call_event_(render_call_event), + capture_call_event_(capture_call_event), + frame_counters_(shared_counters_state), + test_config_(test_config), + apm_(apm), + frame_data_(max_frame_size) {} + +// Implements the callback functionality for the render thread. +void RenderProcessor::Process() { + // Conditional wait to ensure that a capture call has been done + // before the first render call is performed (implicitly + // required by the APM API). + if (first_render_call_) { + capture_call_event_->Wait(rtc::Event::kForever); + first_render_call_ = false; + } + + // Sleep a random time to simulate thread jitter. + SleepRandomMs(3, rand_gen_); + + // Ensure that the number of render and capture calls do not + // differ too much. + if (frame_counters_->RenderMinusCaptureCounters() > kMaxCallDifference) { + capture_call_event_->Wait(rtc::Event::kForever); + } + + // Apply any specified render side APM non-processing runtime calls. + ApplyRuntimeSettingScheme(); + + // Apply the render side processing call. + CallApmRenderSide(); + + // Increase the number of render-side calls. + frame_counters_->IncreaseRenderCounter(); + + // Flag to the capture thread that another render API call has occurred + // by triggering this threads call event. + render_call_event_->Set(); +} + +// Prepares the render side frame and the accompanying metadata +// with the appropriate information. +void RenderProcessor::PrepareFrame() { + // Restrict to a common fixed sample rate if the integer interface is + // used. + if ((test_config_->render_api_function == + RenderApiImpl::ProcessReverseStreamImplInteger) || + (test_config_->aec_type != + AecType::BasicWebRtcAecSettingsWithAecMobile)) { + frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz; + frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz; + } + + // Prepare the audio data. + StreamConfig input_stream_config(frame_data_.input_sample_rate_hz, + frame_data_.input_number_of_channels); + + PopulateAudioFrame(kRenderInputFixLevel, input_stream_config.num_channels(), + input_stream_config.num_frames(), frame_data_.frame, + rand_gen_); + + PopulateAudioFrame(&frame_data_.input_frame[0], kRenderInputFloatLevel, + input_stream_config.num_channels(), + input_stream_config.num_frames(), rand_gen_); +} + +// Makes the render side processing API call. +void RenderProcessor::CallApmRenderSide() { + // Prepare a proper render side processing API call input. + PrepareFrame(); + + // Call the specified render side API processing method. + StreamConfig input_stream_config(frame_data_.input_sample_rate_hz, + frame_data_.input_number_of_channels); + StreamConfig output_stream_config(frame_data_.output_sample_rate_hz, + frame_data_.output_number_of_channels); + int result = AudioProcessing::kNoError; + switch (test_config_->render_api_function) { + case RenderApiImpl::ProcessReverseStreamImplInteger: + result = apm_->ProcessReverseStream( + frame_data_.frame.data(), input_stream_config, output_stream_config, + frame_data_.frame.data()); + break; + case RenderApiImpl::ProcessReverseStreamImplFloat: + result = apm_->ProcessReverseStream( + &frame_data_.input_frame[0], input_stream_config, + output_stream_config, &frame_data_.output_frame[0]); + break; + case RenderApiImpl::AnalyzeReverseStreamImplFloat: + result = apm_->AnalyzeReverseStream(&frame_data_.input_frame[0], + input_stream_config); + break; + default: + FAIL(); + } + + // Check the return code for error. + ASSERT_EQ(AudioProcessing::kNoError, result); +} + +// Applies any render capture side APM API calls and audio stream +// characteristics +// specified by the scheme for the test. +void RenderProcessor::ApplyRuntimeSettingScheme() { + const int render_count_local = frame_counters_->GetRenderCounter(); + + // Update the number of channels and sample rates for the input and output. + // Note that the counts frequencies for when to set parameters + // are set using prime numbers in order to ensure that the + // permutation scheme in the parameter setting changes. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + if (render_count_local == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (render_count_local % 47 == 0) + frame_data_.input_sample_rate_hz = 32000; + else if (render_count_local % 71 == 0) + frame_data_.input_sample_rate_hz = 48000; + else if (render_count_local % 79 == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (render_count_local % 83 == 0) + frame_data_.input_sample_rate_hz = 8000; + + if (render_count_local == 0) + frame_data_.input_number_of_channels = 1; + else if (render_count_local % 4 == 0) + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + + if (render_count_local == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (render_count_local % 17 == 0) + frame_data_.output_sample_rate_hz = 32000; + else if (render_count_local % 19 == 0) + frame_data_.output_sample_rate_hz = 48000; + else if (render_count_local % 29 == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (render_count_local % 61 == 0) + frame_data_.output_sample_rate_hz = 8000; + + if (render_count_local == 0) + frame_data_.output_number_of_channels = 1; + else if (render_count_local % 8 == 0) + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + if (render_count_local == 0) { + frame_data_.input_number_of_channels = 1; + frame_data_.input_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + } else { + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + if (frame_data_.input_sample_rate_hz == 8000) + frame_data_.input_sample_rate_hz = 16000; + else if (frame_data_.input_sample_rate_hz == 16000) + frame_data_.input_sample_rate_hz = 32000; + else if (frame_data_.input_sample_rate_hz == 32000) + frame_data_.input_sample_rate_hz = 48000; + else if (frame_data_.input_sample_rate_hz == 48000) + frame_data_.input_sample_rate_hz = 8000; + + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + if (frame_data_.output_sample_rate_hz == 8000) + frame_data_.output_sample_rate_hz = 16000; + else if (frame_data_.output_sample_rate_hz == 16000) + frame_data_.output_sample_rate_hz = 32000; + else if (frame_data_.output_sample_rate_hz == 32000) + frame_data_.output_sample_rate_hz = 48000; + else if (frame_data_.output_sample_rate_hz == 48000) + frame_data_.output_sample_rate_hz = 8000; + } + break; + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + if (render_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + } + break; + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (render_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 2; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 2; + } + break; + default: + FAIL(); + } + + // Restric the number of output channels not to exceed + // the number of input channels. + frame_data_.output_number_of_channels = + std::min(frame_data_.output_number_of_channels, + frame_data_.input_number_of_channels); +} + +} // namespace + +TEST_P(AudioProcessingImplLockTest, LockTest) { + // Run test and verify that it did not time out. + ASSERT_TRUE(RunTest()); +} + +// Instantiate tests from the extreme test configuration set. +INSTANTIATE_TEST_SUITE_P( + DISABLED_AudioProcessingImplLockExtensive, + AudioProcessingImplLockTest, + ::testing::ValuesIn(TestConfig::GenerateExtensiveTestConfigs())); + +INSTANTIATE_TEST_SUITE_P( + AudioProcessingImplLockBrief, + AudioProcessingImplLockTest, + ::testing::ValuesIn(TestConfig::GenerateBriefTestConfigs())); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_unittest.cc new file mode 100644 index 0000000000..7c12a07ed9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_unittest.cc @@ -0,0 +1,1569 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_processing_impl.h" + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/make_ref_counted.h" +#include "api/scoped_refptr.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/optionally_built_submodule_creators.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "modules/audio_processing/test/echo_control_mock.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Invoke; +using ::testing::NotNull; + +class MockInitialize : public AudioProcessingImpl { + public: + MockInitialize() : AudioProcessingImpl() {} + + MOCK_METHOD(void, InitializeLocked, (), (override)); + void RealInitializeLocked() { + AssertLockedForTest(); + AudioProcessingImpl::InitializeLocked(); + } + + MOCK_METHOD(void, AddRef, (), (const, override)); + MOCK_METHOD(rtc::RefCountReleaseStatus, Release, (), (const, override)); +}; + +// Creates MockEchoControl instances and provides a raw pointer access to +// the next created one. The raw pointer is meant to be used with gmock. +// Returning a pointer of the next created MockEchoControl instance is necessary +// for the following reasons: (i) gmock expectations must be set before any call +// occurs, (ii) APM is initialized the first time that +// AudioProcessingImpl::ProcessStream() is called and the initialization leads +// to the creation of a new EchoControl object. +class MockEchoControlFactory : public EchoControlFactory { + public: + MockEchoControlFactory() : next_mock_(std::make_unique()) {} + // Returns a pointer to the next MockEchoControl that this factory creates. + MockEchoControl* GetNext() const { return next_mock_.get(); } + std::unique_ptr Create(int sample_rate_hz, + int num_render_channels, + int num_capture_channels) override { + std::unique_ptr mock = std::move(next_mock_); + next_mock_ = std::make_unique(); + return mock; + } + + private: + std::unique_ptr next_mock_; +}; + +// Mocks EchoDetector and records the first samples of the last analyzed render +// stream frame. Used to check what data is read by an EchoDetector +// implementation injected into an APM. +class TestEchoDetector : public EchoDetector { + public: + TestEchoDetector() + : analyze_render_audio_called_(false), + last_render_audio_first_sample_(0.f) {} + ~TestEchoDetector() override = default; + void AnalyzeRenderAudio(rtc::ArrayView render_audio) override { + last_render_audio_first_sample_ = render_audio[0]; + analyze_render_audio_called_ = true; + } + void AnalyzeCaptureAudio(rtc::ArrayView capture_audio) override { + } + void Initialize(int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels) override {} + EchoDetector::Metrics GetMetrics() const override { return {}; } + // Returns true if AnalyzeRenderAudio() has been called at least once. + bool analyze_render_audio_called() const { + return analyze_render_audio_called_; + } + // Returns the first sample of the last analyzed render frame. + float last_render_audio_first_sample() const { + return last_render_audio_first_sample_; + } + + private: + bool analyze_render_audio_called_; + float last_render_audio_first_sample_; +}; + +// Mocks CustomProcessing and applies ProcessSample() to all the samples. +// Meant to be injected into an APM to modify samples in a known and detectable +// way. +class TestRenderPreProcessor : public CustomProcessing { + public: + TestRenderPreProcessor() = default; + ~TestRenderPreProcessor() = default; + void Initialize(int sample_rate_hz, int num_channels) override {} + void Process(AudioBuffer* audio) override { + for (size_t k = 0; k < audio->num_channels(); ++k) { + rtc::ArrayView channel_view(audio->channels()[k], + audio->num_frames()); + std::transform(channel_view.begin(), channel_view.end(), + channel_view.begin(), ProcessSample); + } + } + std::string ToString() const override { return "TestRenderPreProcessor"; } + void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting) override {} + // Modifies a sample. This member is used in Process() to modify a frame and + // it is publicly visible to enable tests. + static constexpr float ProcessSample(float x) { return 2.f * x; } +}; + +// Runs `apm` input processing for volume adjustments for `num_frames` random +// frames starting from the volume `initial_volume`. This includes three steps: +// 1) Set the input volume 2) Process the stream 3) Set the new recommended +// input volume. Returns the new recommended input volume. +int ProcessInputVolume(AudioProcessing& apm, + int num_frames, + int initial_volume) { + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(/*sample_rate_hz=*/kSampleRateHz, + /*num_channels=*/kNumChannels); + int recommended_input_volume = initial_volume; + for (int i = 0; i < num_frames; ++i) { + Random random_generator(2341U); + RandomizeSampleVector(&random_generator, buffer); + + apm.set_stream_analog_level(recommended_input_volume); + apm.ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers); + recommended_input_volume = apm.recommended_stream_analog_level(); + } + return recommended_input_volume; +} + +} // namespace + +TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) { + MockInitialize mock; + ON_CALL(mock, InitializeLocked) + .WillByDefault(Invoke(&mock, &MockInitialize::RealInitializeLocked)); + + EXPECT_CALL(mock, InitializeLocked).Times(1); + mock.Initialize(); + + constexpr size_t kMaxSampleRateHz = 32000; + constexpr size_t kMaxNumChannels = 2; + std::array frame; + frame.fill(0); + StreamConfig config(16000, 1); + // Call with the default parameters; there should be an init. + EXPECT_CALL(mock, InitializeLocked).Times(0); + EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); + EXPECT_NOERR( + mock.ProcessReverseStream(frame.data(), config, config, frame.data())); + + // New sample rate. (Only impacts ProcessStream). + config = StreamConfig(32000, 1); + EXPECT_CALL(mock, InitializeLocked).Times(1); + EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); + + // New number of channels. + config = StreamConfig(32000, 2); + EXPECT_CALL(mock, InitializeLocked).Times(2); + EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); + EXPECT_NOERR( + mock.ProcessReverseStream(frame.data(), config, config, frame.data())); + + // A new sample rate passed to ProcessReverseStream should cause an init. + config = StreamConfig(16000, 2); + EXPECT_CALL(mock, InitializeLocked).Times(1); + EXPECT_NOERR( + mock.ProcessReverseStream(frame.data(), config, config, frame.data())); +} + +TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) { + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + webrtc::AudioProcessing::Config apm_config; + apm_config.pre_amplifier.enabled = true; + apm_config.pre_amplifier.fixed_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int kSampleRateHz = 48000; + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kNumChannels = 2; + + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + EXPECT_EQ(frame[100], kAudioLevel) + << "With factor 1, frame shouldn't be modified."; + + constexpr float kGainFactor = 2.f; + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(kGainFactor)); + + // Process for two frames to have time to ramp up gain. + for (int i = 0; i < 2; ++i) { + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + } + EXPECT_EQ(frame[100], kGainFactor * kAudioLevel) + << "Frame should be amplified."; +} + +TEST(AudioProcessingImplTest, + LevelAdjustmentUpdateCapturePreGainRuntimeSetting) { + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + webrtc::AudioProcessing::Config apm_config; + apm_config.capture_level_adjustment.enabled = true; + apm_config.capture_level_adjustment.pre_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int kSampleRateHz = 48000; + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kNumChannels = 2; + + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + EXPECT_EQ(frame[100], kAudioLevel) + << "With factor 1, frame shouldn't be modified."; + + constexpr float kGainFactor = 2.f; + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(kGainFactor)); + + // Process for two frames to have time to ramp up gain. + for (int i = 0; i < 2; ++i) { + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + } + EXPECT_EQ(frame[100], kGainFactor * kAudioLevel) + << "Frame should be amplified."; +} + +TEST(AudioProcessingImplTest, + LevelAdjustmentUpdateCapturePostGainRuntimeSetting) { + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + webrtc::AudioProcessing::Config apm_config; + apm_config.capture_level_adjustment.enabled = true; + apm_config.capture_level_adjustment.post_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int kSampleRateHz = 48000; + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kNumChannels = 2; + + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + EXPECT_EQ(frame[100], kAudioLevel) + << "With factor 1, frame shouldn't be modified."; + + constexpr float kGainFactor = 2.f; + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePostGain(kGainFactor)); + + // Process for two frames to have time to ramp up gain. + for (int i = 0; i < 2; ++i) { + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + } + EXPECT_EQ(frame[100], kGainFactor * kAudioLevel) + << "Frame should be amplified."; +} + +TEST(AudioProcessingImplTest, EchoControllerObservesSetCaptureUsageChange) { + // Tests that the echo controller observes that the capture usage has been + // updated. + auto echo_control_factory = std::make_unique(); + const MockEchoControlFactory* echo_control_factory_ptr = + echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + + constexpr int16_t kAudioLevel = 10000; + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 2; + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + // Ensure that SetCaptureOutputUsage is not called when no runtime settings + // are passed. + EXPECT_CALL(*echo_control_mock, SetCaptureOutputUsage(testing::_)).Times(0); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + // Ensure that SetCaptureOutputUsage is called with the right information when + // a runtime setting is passed. + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/false)) + .Times(1); + EXPECT_TRUE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/true)) + .Times(1); + EXPECT_TRUE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/true))); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + // The number of positions to place items in the queue is equal to the queue + // size minus 1. + constexpr int kNumSlotsInQueue = RuntimeSettingQueueSize(); + + // Ensure that SetCaptureOutputUsage is called with the right information when + // many runtime settings are passed. + for (int k = 0; k < kNumSlotsInQueue - 1; ++k) { + EXPECT_TRUE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + } + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/false)) + .Times(kNumSlotsInQueue - 1); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + // Ensure that SetCaptureOutputUsage is properly called with the fallback + // value when the runtime settings queue becomes full. + for (int k = 0; k < kNumSlotsInQueue; ++k) { + EXPECT_TRUE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + } + EXPECT_FALSE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + EXPECT_FALSE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/false)) + .Times(kNumSlotsInQueue); + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/true)) + .Times(1); + apm->ProcessStream(frame.data(), config, config, frame.data()); +} + +TEST(AudioProcessingImplTest, + EchoControllerObservesPreAmplifierEchoPathGainChange) { + // Tests that the echo controller observes an echo path gain change when the + // pre-amplifier submodule changes the gain. + auto echo_control_factory = std::make_unique(); + const auto* echo_control_factory_ptr = echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + // Disable AGC. + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = false; + apm_config.pre_amplifier.enabled = true; + apm_config.pre_amplifier.fixed_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kSampleRateHz = 48000; + constexpr size_t kNumChannels = 2; + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/true)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(2.f)); + apm->ProcessStream(frame.data(), config, config, frame.data()); +} + +TEST(AudioProcessingImplTest, + EchoControllerObservesLevelAdjustmentPreGainEchoPathGainChange) { + // Tests that the echo controller observes an echo path gain change when the + // pre-amplifier submodule changes the gain. + auto echo_control_factory = std::make_unique(); + const auto* echo_control_factory_ptr = echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + // Disable AGC. + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = false; + apm_config.capture_level_adjustment.enabled = true; + apm_config.capture_level_adjustment.pre_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kSampleRateHz = 48000; + constexpr size_t kNumChannels = 2; + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/true)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(2.f)); + apm->ProcessStream(frame.data(), config, config, frame.data()); +} + +TEST(AudioProcessingImplTest, + EchoControllerObservesAnalogAgc1EchoPathGainChange) { + // Tests that the echo controller observes an echo path gain change when the + // AGC1 analog adaptive submodule changes the analog gain. + auto echo_control_factory = std::make_unique(); + const auto* echo_control_factory_ptr = echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + webrtc::AudioProcessing::Config apm_config; + // Enable AGC1. + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.analog_gain_controller.enabled = true; + apm_config.gain_controller2.enabled = false; + apm_config.pre_amplifier.enabled = false; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 1000; + constexpr size_t kSampleRateHz = 48000; + constexpr size_t kNumChannels = 2; + std::array frame; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + constexpr int kInitialStreamAnalogLevel = 123; + apm->set_stream_analog_level(kInitialStreamAnalogLevel); + + // When the first fame is processed, no echo path gain change must be + // detected. + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); + + // Simulate the application of the recommended analog level. + int recommended_analog_level = apm->recommended_stream_analog_level(); + if (recommended_analog_level == kInitialStreamAnalogLevel) { + // Force an analog gain change if it did not happen. + recommended_analog_level++; + } + apm->set_stream_analog_level(recommended_analog_level); + + // After the first fame and with a stream analog level change, the echo path + // gain change must be detected. + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/true)) + .Times(1); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); +} + +// Tests that a stream is successfully processed when AGC2 adaptive digital is +// used and when the field trial +// `WebRTC-Audio-TransientSuppressorVadMode/Enabled-Default/` is set. +TEST(AudioProcessingImplTest, + ProcessWithAgc2AndTransientSuppressorVadModeDefault) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + auto apm = AudioProcessingBuilder() + .SetConfig({.gain_controller1{.enabled = false}}) + .Create(); + ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = true; + apm_config.gain_controller2.adaptive_digital.enabled = true; + apm_config.transient_suppression.enabled = true; + apm->ApplyConfig(apm_config); + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(/*sample_rate_hz=*/kSampleRateHz, + /*num_channels=*/kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + for (int i = 0; i < kFramesToProcess; ++i) { + RandomizeSampleVector(&random_generator, buffer); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } +} + +// Tests that a stream is successfully processed when AGC2 adaptive digital is +// used and when the field trial +// `WebRTC-Audio-TransientSuppressorVadMode/Enabled-RnnVad/` is set. +TEST(AudioProcessingImplTest, + ProcessWithAgc2AndTransientSuppressorVadModeRnnVad) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true/"); + rtc::scoped_refptr apm = AudioProcessingBuilder().Create(); + ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = true; + apm_config.gain_controller2.adaptive_digital.enabled = true; + apm_config.transient_suppression.enabled = true; + apm->ApplyConfig(apm_config); + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(/*sample_rate_hz=*/kSampleRateHz, + /*num_channels=*/kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + for (int i = 0; i < kFramesToProcess; ++i) { + RandomizeSampleVector(&random_generator, buffer); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } +} + +TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) { + // Tests that the echo controller observes an echo path gain change when a + // playout volume change is reported. + auto echo_control_factory = std::make_unique(); + const auto* echo_control_factory_ptr = echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + // Disable AGC. + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = false; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kSampleRateHz = 48000; + constexpr size_t kNumChannels = 2; + std::array frame; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50)); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50)); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/true)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(100)); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); +} + +TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) { + // Make sure that signal changes caused by a render pre-processing sub-module + // take place before any echo detector analysis. + auto test_echo_detector = rtc::make_ref_counted(); + std::unique_ptr test_render_pre_processor( + new TestRenderPreProcessor()); + // Create APM injecting the test echo detector and render pre-processor. + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(test_echo_detector) + .SetRenderPreProcessing(std::move(test_render_pre_processor)) + .Create(); + webrtc::AudioProcessing::Config apm_config; + apm_config.pre_amplifier.enabled = true; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 1000; + constexpr int kSampleRateHz = 16000; + constexpr size_t kNumChannels = 1; + // Explicitly initialize APM to ensure no render frames are discarded. + const ProcessingConfig processing_config = {{ + {kSampleRateHz, kNumChannels}, + {kSampleRateHz, kNumChannels}, + {kSampleRateHz, kNumChannels}, + {kSampleRateHz, kNumChannels}, + }}; + apm->Initialize(processing_config); + + std::array frame; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + + constexpr float kAudioLevelFloat = static_cast(kAudioLevel); + constexpr float kExpectedPreprocessedAudioLevel = + TestRenderPreProcessor::ProcessSample(kAudioLevelFloat); + ASSERT_NE(kAudioLevelFloat, kExpectedPreprocessedAudioLevel); + + // Analyze a render stream frame. + frame.fill(kAudioLevel); + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm->ProcessReverseStream(frame.data(), stream_config, + stream_config, frame.data())); + // Trigger a call to in EchoDetector::AnalyzeRenderAudio() via + // ProcessStream(). + frame.fill(kAudioLevel); + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm->ProcessStream(frame.data(), stream_config, stream_config, + frame.data())); + // Regardless of how the call to in EchoDetector::AnalyzeRenderAudio() is + // triggered, the line below checks that the call has occurred. If not, the + // APM implementation may have changed and this test might need to be adapted. + ASSERT_TRUE(test_echo_detector->analyze_render_audio_called()); + // Check that the data read in EchoDetector::AnalyzeRenderAudio() is that + // produced by the render pre-processor. + EXPECT_EQ(kExpectedPreprocessedAudioLevel, + test_echo_detector->last_render_audio_first_sample()); +} + +// Disabling build-optional submodules and trying to enable them via the APM +// config should be bit-exact with running APM with said submodules disabled. +// This mainly tests that SetCreateOptionalSubmodulesForTesting has an effect. +TEST(ApmWithSubmodulesExcludedTest, BitexactWithDisabledModules) { + auto apm = rtc::make_ref_counted(); + ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); + + ApmSubmoduleCreationOverrides overrides; + overrides.transient_suppression = true; + apm->OverrideSubmoduleCreationForTesting(overrides); + + AudioProcessing::Config apm_config = apm->GetConfig(); + apm_config.transient_suppression.enabled = true; + apm->ApplyConfig(apm_config); + + rtc::scoped_refptr apm_reference = + AudioProcessingBuilder().Create(); + apm_config = apm_reference->GetConfig(); + apm_config.transient_suppression.enabled = false; + apm_reference->ApplyConfig(apm_config); + + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 1; + std::array buffer; + std::array buffer_reference; + float* channel_pointers[] = {buffer.data()}; + float* channel_pointers_reference[] = {buffer_reference.data()}; + StreamConfig stream_config(/*sample_rate_hz=*/kSampleRateHz, + /*num_channels=*/kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcessPerConfiguration = 10; + + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + std::copy(buffer.begin(), buffer.end(), buffer_reference.begin()); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + ASSERT_EQ( + apm_reference->ProcessStream(channel_pointers_reference, stream_config, + stream_config, channel_pointers_reference), + kNoErr); + for (int j = 0; j < kSampleRateHz / 100; ++j) { + EXPECT_EQ(buffer[j], buffer_reference[j]); + } + } +} + +// Disable transient suppressor creation and run APM in ways that should trigger +// calls to the transient suppressor API. +TEST(ApmWithSubmodulesExcludedTest, ReinitializeTransientSuppressor) { + auto apm = rtc::make_ref_counted(); + ASSERT_EQ(apm->Initialize(), kNoErr); + + ApmSubmoduleCreationOverrides overrides; + overrides.transient_suppression = true; + apm->OverrideSubmoduleCreationForTesting(overrides); + + AudioProcessing::Config config = apm->GetConfig(); + config.transient_suppression.enabled = true; + apm->ApplyConfig(config); + // 960 samples per frame: 10 ms of <= 48 kHz audio with <= 2 channels. + float buffer[960]; + float* channel_pointers[] = {&buffer[0], &buffer[480]}; + Random random_generator(2341U); + constexpr int kFramesToProcessPerConfiguration = 3; + + StreamConfig initial_stream_config(/*sample_rate_hz=*/16000, + /*num_channels=*/1); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, initial_stream_config, + initial_stream_config, channel_pointers), + kNoErr); + } + + StreamConfig stereo_stream_config(/*sample_rate_hz=*/16000, + /*num_channels=*/2); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, stereo_stream_config, + stereo_stream_config, channel_pointers), + kNoErr); + } + + StreamConfig high_sample_rate_stream_config(/*sample_rate_hz=*/48000, + /*num_channels=*/2); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ( + apm->ProcessStream(channel_pointers, high_sample_rate_stream_config, + high_sample_rate_stream_config, channel_pointers), + kNoErr); + } +} + +// Disable transient suppressor creation and run APM in ways that should trigger +// calls to the transient suppressor API. +TEST(ApmWithSubmodulesExcludedTest, ToggleTransientSuppressor) { + auto apm = rtc::make_ref_counted(); + ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); + + ApmSubmoduleCreationOverrides overrides; + overrides.transient_suppression = true; + apm->OverrideSubmoduleCreationForTesting(overrides); + + // 960 samples per frame: 10 ms of <= 48 kHz audio with <= 2 channels. + float buffer[960]; + float* channel_pointers[] = {&buffer[0], &buffer[480]}; + Random random_generator(2341U); + constexpr int kFramesToProcessPerConfiguration = 3; + StreamConfig stream_config(/*sample_rate_hz=*/16000, + /*num_channels=*/1); + + AudioProcessing::Config config = apm->GetConfig(); + config.transient_suppression.enabled = true; + apm->ApplyConfig(config); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } + + config = apm->GetConfig(); + config.transient_suppression.enabled = false; + apm->ApplyConfig(config); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } + + config = apm->GetConfig(); + config.transient_suppression.enabled = true; + apm->ApplyConfig(config); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } +} + +class StartupInputVolumeParameterizedTest + : public ::testing::TestWithParam {}; + +// Tests that, when no input volume controller is used, the startup input volume +// is never modified. +TEST_P(StartupInputVolumeParameterizedTest, + WithNoInputVolumeControllerStartupVolumeNotModified) { + webrtc::AudioProcessing::Config config; + config.gain_controller1.enabled = false; + config.gain_controller2.enabled = false; + auto apm = AudioProcessingBuilder().SetConfig(config).Create(); + + int startup_volume = GetParam(); + int recommended_volume = ProcessInputVolume( + *apm, /*num_frames=*/1, /*initial_volume=*/startup_volume); + EXPECT_EQ(recommended_volume, startup_volume); +} + +INSTANTIATE_TEST_SUITE_P(AudioProcessingImplTest, + StartupInputVolumeParameterizedTest, + ::testing::Values(0, 5, 15, 50, 100)); + +// Tests that, when no input volume controller is used, the recommended input +// volume always matches the applied one. +TEST(AudioProcessingImplTest, + WithNoInputVolumeControllerAppliedAndRecommendedVolumesMatch) { + webrtc::AudioProcessing::Config config; + config.gain_controller1.enabled = false; + config.gain_controller2.enabled = false; + auto apm = AudioProcessingBuilder().SetConfig(config).Create(); + + Random rand_gen(42); + for (int i = 0; i < 32; ++i) { + SCOPED_TRACE(i); + int32_t applied_volume = rand_gen.Rand(/*low=*/0, /*high=*/255); + int recommended_volume = + ProcessInputVolume(*apm, /*num_frames=*/1, applied_volume); + EXPECT_EQ(recommended_volume, applied_volume); + } +} + +class ApmInputVolumeControllerParametrizedTest + : public ::testing::TestWithParam< + std::tuple> { + protected: + ApmInputVolumeControllerParametrizedTest() + : sample_rate_hz_(std::get<0>(GetParam())), + num_channels_(std::get<1>(GetParam())), + channels_(num_channels_), + channel_pointers_(num_channels_) { + const int frame_size = sample_rate_hz_ / 100; + for (int c = 0; c < num_channels_; ++c) { + channels_[c].resize(frame_size); + channel_pointers_[c] = channels_[c].data(); + std::fill(channels_[c].begin(), channels_[c].end(), 0.0f); + } + } + + int sample_rate_hz() const { return sample_rate_hz_; } + int num_channels() const { return num_channels_; } + AudioProcessing::Config GetConfig() const { return std::get<2>(GetParam()); } + + float* const* channel_pointers() { return channel_pointers_.data(); } + + private: + const int sample_rate_hz_; + const int num_channels_; + std::vector> channels_; + std::vector channel_pointers_; +}; + +TEST_P(ApmInputVolumeControllerParametrizedTest, + EnforceMinInputVolumeAtStartupWithZeroVolume) { + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + apm->set_stream_analog_level(0); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_GT(apm->recommended_stream_analog_level(), 0); +} + +TEST_P(ApmInputVolumeControllerParametrizedTest, + EnforceMinInputVolumeAtStartupWithNonZeroVolume) { + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + constexpr int kStartupVolume = 3; + apm->set_stream_analog_level(kStartupVolume); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_GT(apm->recommended_stream_analog_level(), kStartupVolume); +} + +TEST_P(ApmInputVolumeControllerParametrizedTest, + EnforceMinInputVolumeAfterManualVolumeAdjustment) { + const auto config = GetConfig(); + if (config.gain_controller1.enabled) { + // After a downward manual adjustment, AGC1 slowly converges to the minimum + // input volume. + GTEST_SKIP() << "Does not apply to AGC1"; + } + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + apm->set_stream_analog_level(20); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + constexpr int kManuallyAdjustedVolume = 3; + apm->set_stream_analog_level(kManuallyAdjustedVolume); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_GT(apm->recommended_stream_analog_level(), kManuallyAdjustedVolume); +} + +TEST_P(ApmInputVolumeControllerParametrizedTest, + DoNotEnforceMinInputVolumeAtStartupWithHighVolume) { + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + constexpr int kStartupVolume = 200; + apm->set_stream_analog_level(kStartupVolume); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_EQ(apm->recommended_stream_analog_level(), kStartupVolume); +} + +TEST_P(ApmInputVolumeControllerParametrizedTest, + DoNotEnforceMinInputVolumeAfterManualVolumeAdjustmentToZero) { + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + apm->set_stream_analog_level(100); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + apm->set_stream_analog_level(0); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_EQ(apm->recommended_stream_analog_level(), 0); +} + +INSTANTIATE_TEST_SUITE_P( + AudioProcessingImplTest, + ApmInputVolumeControllerParametrizedTest, + ::testing::Combine( + ::testing::Values(8000, 16000, 32000, 48000), // Sample rates. + ::testing::Values(1, 2), // Number of channels. + ::testing::Values( + // Full AGC1. + AudioProcessing::Config{ + .gain_controller1 = {.enabled = true, + .analog_gain_controller = + {.enabled = true, + .enable_digital_adaptive = true}}, + .gain_controller2 = {.enabled = false}}, + // Hybrid AGC. + AudioProcessing::Config{ + .gain_controller1 = {.enabled = true, + .analog_gain_controller = + {.enabled = true, + .enable_digital_adaptive = false}}, + .gain_controller2 = {.enabled = true, + .adaptive_digital = {.enabled = true}}}))); + +// When the input volume is not emulated and no input volume controller is +// active, the recommended volume must always be the applied volume. +TEST(AudioProcessingImplTest, + RecommendAppliedInputVolumeWithNoAgcWithNoEmulation) { + auto apm = AudioProcessingBuilder() + .SetConfig({.capture_level_adjustment = {.enabled = false}, + .gain_controller1 = {.enabled = false}}) + .Create(); + + constexpr int kOneFrame = 1; + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/123), 123); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/59), 59); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135); +} + +// When the input volume is emulated, the recommended volume must always be the +// applied volume and at any time it must not be that set in the input volume +// emulator. +// TODO(bugs.webrtc.org/14581): Enable when APM fixed to let this test pass. +TEST(AudioProcessingImplTest, + DISABLED_RecommendAppliedInputVolumeWithNoAgcWithEmulation) { + auto apm = + AudioProcessingBuilder() + .SetConfig({.capture_level_adjustment = {.enabled = true, + .analog_mic_gain_emulation{ + .enabled = true, + .initial_level = 255}}, + .gain_controller1 = {.enabled = false}}) + .Create(); + + constexpr int kOneFrame = 1; + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/123), 123); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/59), 59); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135); +} + +// Even if there is an enabled input volume controller, when the input volume is +// emulated, the recommended volume is always the applied volume because the +// active controller must only adjust the internally emulated volume and leave +// the externally applied volume unchanged. +// TODO(bugs.webrtc.org/14581): Enable when APM fixed to let this test pass. +TEST(AudioProcessingImplTest, + DISABLED_RecommendAppliedInputVolumeWithAgcWithEmulation) { + auto apm = + AudioProcessingBuilder() + .SetConfig({.capture_level_adjustment = {.enabled = true, + .analog_mic_gain_emulation{ + .enabled = true}}, + .gain_controller1 = {.enabled = true, + .analog_gain_controller{ + .enabled = true, + }}}) + .Create(); + + constexpr int kOneFrame = 1; + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/123), 123); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/59), 59); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135); +} + +TEST(AudioProcessingImplTest, + Agc2FieldTrialDoNotSwitchToFullAgc2WhenNoAgcIsActive) { + constexpr AudioProcessing::Config kOriginal{ + .gain_controller1{.enabled = false}, + .gain_controller2{.enabled = false}, + }; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, kOriginal.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, kOriginal.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, kOriginal.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, kOriginal.gain_controller2); +} + +TEST(AudioProcessingImplTest, + Agc2FieldTrialDoNotSwitchToFullAgc2WithAgc1Agc2InputVolumeControllers) { + constexpr AudioProcessing::Config kOriginal{ + .gain_controller1{.enabled = true, + .analog_gain_controller{.enabled = true}}, + .gain_controller2{.enabled = true, + .input_volume_controller{.enabled = true}}, + }; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, kOriginal.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, kOriginal.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, kOriginal.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, kOriginal.gain_controller2); +} + +class Agc2FieldTrialParametrizedTest + : public ::testing::TestWithParam {}; + +TEST_P(Agc2FieldTrialParametrizedTest, DoNotChangeConfigIfDisabled) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); +} + +TEST_P(Agc2FieldTrialParametrizedTest, DoNotChangeConfigIfNoOverride) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "switch_to_agc2:false," + "disallow_transient_suppressor_usage:false/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); +} + +TEST_P(Agc2FieldTrialParametrizedTest, DoNotSwitchToFullAgc2) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:false/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); +} + +TEST_P(Agc2FieldTrialParametrizedTest, SwitchToFullAgc2) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); +} + +TEST_P(Agc2FieldTrialParametrizedTest, + SwitchToFullAgc2AndOverrideInputVolumeControllerParameters) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true," + "min_input_volume:123," + "clipped_level_min:20," + "clipped_level_step:30," + "clipped_ratio_threshold:0.4," + "clipped_wait_frames:50," + "enable_clipping_predictor:true," + "target_range_max_dbfs:-6," + "target_range_min_dbfs:-70," + "update_input_volume_wait_frames:80," + "speech_probability_threshold:0.9," + "speech_ratio_threshold:1.0/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); +} + +TEST_P(Agc2FieldTrialParametrizedTest, + SwitchToFullAgc2AndOverrideAdaptiveDigitalControllerParameters) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true," + "headroom_db:10," + "max_gain_db:20," + "initial_gain_db:7," + "max_gain_change_db_per_second:5," + "max_output_noise_level_dbfs:-40/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); + ASSERT_NE(adjusted.gain_controller2.adaptive_digital, + original.gain_controller2.adaptive_digital); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.headroom_db, 10); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.max_gain_db, 20); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.initial_gain_db, 7); + EXPECT_EQ( + adjusted.gain_controller2.adaptive_digital.max_gain_change_db_per_second, + 5); + EXPECT_EQ( + adjusted.gain_controller2.adaptive_digital.max_output_noise_level_dbfs, + -40); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); + ASSERT_NE(adjusted.gain_controller2.adaptive_digital, + original.gain_controller2.adaptive_digital); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.headroom_db, 10); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.max_gain_db, 20); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.initial_gain_db, 7); + EXPECT_EQ( + adjusted.gain_controller2.adaptive_digital.max_gain_change_db_per_second, + 5); + EXPECT_EQ( + adjusted.gain_controller2.adaptive_digital.max_output_noise_level_dbfs, + -40); +} + +TEST_P(Agc2FieldTrialParametrizedTest, ProcessSucceedsWithTs) { + AudioProcessing::Config config = GetParam(); + if (!config.transient_suppression.enabled) { + GTEST_SKIP() << "TS is disabled, skip."; + } + + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + auto apm = AudioProcessingBuilder().SetConfig(config).Create(); + + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + int volume = 100; + for (int i = 0; i < kFramesToProcess; ++i) { + SCOPED_TRACE(i); + RandomizeSampleVector(&random_generator, buffer); + apm->set_stream_analog_level(volume); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + volume = apm->recommended_stream_analog_level(); + } +} + +TEST_P(Agc2FieldTrialParametrizedTest, ProcessSucceedsWithoutTs) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "switch_to_agc2:false," + "disallow_transient_suppressor_usage:true/"); + auto apm = AudioProcessingBuilder().SetConfig(GetParam()).Create(); + + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + int volume = 100; + for (int i = 0; i < kFramesToProcess; ++i) { + SCOPED_TRACE(i); + RandomizeSampleVector(&random_generator, buffer); + apm->set_stream_analog_level(volume); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + volume = apm->recommended_stream_analog_level(); + } +} + +TEST_P(Agc2FieldTrialParametrizedTest, + ProcessSucceedsWhenSwitchToFullAgc2WithTs) { + AudioProcessing::Config config = GetParam(); + if (!config.transient_suppression.enabled) { + GTEST_SKIP() << "TS is disabled, skip."; + } + + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "switch_to_agc2:true," + "disallow_transient_suppressor_usage:false/"); + auto apm = AudioProcessingBuilder().SetConfig(config).Create(); + + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + int volume = 100; + for (int i = 0; i < kFramesToProcess; ++i) { + SCOPED_TRACE(i); + RandomizeSampleVector(&random_generator, buffer); + apm->set_stream_analog_level(volume); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + volume = apm->recommended_stream_analog_level(); + } +} + +TEST_P(Agc2FieldTrialParametrizedTest, + ProcessSucceedsWhenSwitchToFullAgc2WithoutTs) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "switch_to_agc2:true," + "disallow_transient_suppressor_usage:true/"); + auto apm = AudioProcessingBuilder().SetConfig(GetParam()).Create(); + + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + int volume = 100; + for (int i = 0; i < kFramesToProcess; ++i) { + SCOPED_TRACE(i); + RandomizeSampleVector(&random_generator, buffer); + apm->set_stream_analog_level(volume); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + volume = apm->recommended_stream_analog_level(); + } +} + +INSTANTIATE_TEST_SUITE_P( + AudioProcessingImplTest, + Agc2FieldTrialParametrizedTest, + ::testing::Values( + // Full AGC1, TS disabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = false}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = true}}, + .gain_controller2 = {.enabled = false}}, + // Full AGC1, TS enabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = true}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = true}}, + .gain_controller2 = {.enabled = false}}, + // Hybrid AGC, TS disabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = false}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = false}}, + .gain_controller2 = {.enabled = true, + .adaptive_digital = {.enabled = true}}}, + // Hybrid AGC, TS enabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = true}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = false}}, + .gain_controller2 = {.enabled = true, + .adaptive_digital = {.enabled = true}}})); + +TEST(AudioProcessingImplTest, CanDisableTransientSuppressor) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = false}}; + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_FALSE(apm->GetConfig().transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, CanEnableTs) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = true}}; + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, CanDisableTsWithAgc2FieldTrialDisabled) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = false}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_FALSE(apm->GetConfig().transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, CanEnableTsWithAgc2FieldTrialDisabled) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = true}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, + CanDisableTsWithAgc2FieldTrialEnabledAndUsageAllowed) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = false}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "disallow_transient_suppressor_usage:false/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, + CanEnableTsWithAgc2FieldTrialEnabledAndUsageAllowed) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = true}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "disallow_transient_suppressor_usage:false/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, + CannotEnableTsWithAgc2FieldTrialEnabledAndUsageDisallowed) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = true}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "disallow_transient_suppressor_usage:true/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_FALSE(apm->GetConfig().transient_suppression.enabled); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_performance_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_performance_unittest.cc new file mode 100644 index 0000000000..10d3d84951 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_performance_unittest.cc @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "api/numerics/samples_stats_counter.h" +#include "api/test/metrics/global_metrics_logger_and_exporter.h" +#include "api/test/metrics/metric.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/event.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/random.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::webrtc::test::GetGlobalMetricsLogger; +using ::webrtc::test::ImprovementDirection; +using ::webrtc::test::Metric; +using ::webrtc::test::Unit; + +class CallSimulator; + +// Type of the render thread APM API call to use in the test. +enum class ProcessorType { kRender, kCapture }; + +// Variant of APM processing settings to use in the test. +enum class SettingsType { + kDefaultApmDesktop, + kDefaultApmMobile, + kAllSubmodulesTurnedOff, + kDefaultApmDesktopWithoutDelayAgnostic, + kDefaultApmDesktopWithoutExtendedFilter +}; + +// Variables related to the audio data and formats. +struct AudioFrameData { + explicit AudioFrameData(size_t max_frame_size) { + // Set up the two-dimensional arrays needed for the APM API calls. + input_framechannels.resize(2 * max_frame_size); + input_frame.resize(2); + input_frame[0] = &input_framechannels[0]; + input_frame[1] = &input_framechannels[max_frame_size]; + + output_frame_channels.resize(2 * max_frame_size); + output_frame.resize(2); + output_frame[0] = &output_frame_channels[0]; + output_frame[1] = &output_frame_channels[max_frame_size]; + } + + std::vector output_frame_channels; + std::vector output_frame; + std::vector input_framechannels; + std::vector input_frame; + StreamConfig input_stream_config; + StreamConfig output_stream_config; +}; + +// The configuration for the test. +struct SimulationConfig { + SimulationConfig(int sample_rate_hz, SettingsType simulation_settings) + : sample_rate_hz(sample_rate_hz), + simulation_settings(simulation_settings) {} + + static std::vector GenerateSimulationConfigs() { + std::vector simulation_configs; +#ifndef WEBRTC_ANDROID + const SettingsType desktop_settings[] = { + SettingsType::kDefaultApmDesktop, SettingsType::kAllSubmodulesTurnedOff, + SettingsType::kDefaultApmDesktopWithoutDelayAgnostic, + SettingsType::kDefaultApmDesktopWithoutExtendedFilter}; + + const int desktop_sample_rates[] = {8000, 16000, 32000, 48000}; + + for (auto sample_rate : desktop_sample_rates) { + for (auto settings : desktop_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } +#endif + + const SettingsType mobile_settings[] = {SettingsType::kDefaultApmMobile}; + + const int mobile_sample_rates[] = {8000, 16000}; + + for (auto sample_rate : mobile_sample_rates) { + for (auto settings : mobile_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } + + return simulation_configs; + } + + std::string SettingsDescription() const { + std::string description; + switch (simulation_settings) { + case SettingsType::kDefaultApmMobile: + description = "DefaultApmMobile"; + break; + case SettingsType::kDefaultApmDesktop: + description = "DefaultApmDesktop"; + break; + case SettingsType::kAllSubmodulesTurnedOff: + description = "AllSubmodulesOff"; + break; + case SettingsType::kDefaultApmDesktopWithoutDelayAgnostic: + description = "DefaultApmDesktopWithoutDelayAgnostic"; + break; + case SettingsType::kDefaultApmDesktopWithoutExtendedFilter: + description = "DefaultApmDesktopWithoutExtendedFilter"; + break; + } + return description; + } + + int sample_rate_hz = 16000; + SettingsType simulation_settings = SettingsType::kDefaultApmDesktop; +}; + +// Handler for the frame counters. +class FrameCounters { + public: + void IncreaseRenderCounter() { render_count_.fetch_add(1); } + + void IncreaseCaptureCounter() { capture_count_.fetch_add(1); } + + int CaptureMinusRenderCounters() const { + // The return value will be approximate, but that's good enough since + // by the time we return the value, it's not guaranteed to be correct + // anyway. + return capture_count_.load(std::memory_order_acquire) - + render_count_.load(std::memory_order_acquire); + } + + int RenderMinusCaptureCounters() const { + return -CaptureMinusRenderCounters(); + } + + bool BothCountersExceedeThreshold(int threshold) const { + // TODO(tommi): We could use an event to signal this so that we don't need + // to be polling from the main thread and possibly steal cycles. + const int capture_count = capture_count_.load(std::memory_order_acquire); + const int render_count = render_count_.load(std::memory_order_acquire); + return (render_count > threshold && capture_count > threshold); + } + + private: + std::atomic render_count_{0}; + std::atomic capture_count_{0}; +}; + +// Class that represents a flag that can only be raised. +class LockedFlag { + public: + bool get_flag() const { return flag_.load(std::memory_order_acquire); } + + void set_flag() { + if (!get_flag()) { + // read-only operation to avoid affecting the cache-line. + int zero = 0; + flag_.compare_exchange_strong(zero, 1); + } + } + + private: + std::atomic flag_{0}; +}; + +// Parent class for the thread processors. +class TimedThreadApiProcessor { + public: + TimedThreadApiProcessor(ProcessorType processor_type, + Random* rand_gen, + FrameCounters* shared_counters_state, + LockedFlag* capture_call_checker, + CallSimulator* test_framework, + const SimulationConfig* simulation_config, + AudioProcessing* apm, + int num_durations_to_store, + float input_level, + int num_channels) + : rand_gen_(rand_gen), + frame_counters_(shared_counters_state), + capture_call_checker_(capture_call_checker), + test_(test_framework), + simulation_config_(simulation_config), + apm_(apm), + frame_data_(kMaxFrameSize), + clock_(webrtc::Clock::GetRealTimeClock()), + num_durations_to_store_(num_durations_to_store), + api_call_durations_(num_durations_to_store_ - kNumInitializationFrames), + samples_count_(0), + input_level_(input_level), + processor_type_(processor_type), + num_channels_(num_channels) {} + + // Implements the callback functionality for the threads. + bool Process(); + + // Method for printing out the simulation statistics. + void print_processor_statistics(absl::string_view processor_name) const { + const std::string modifier = "_api_call_duration"; + + const std::string sample_rate_name = + "_" + std::to_string(simulation_config_->sample_rate_hz) + "Hz"; + + GetGlobalMetricsLogger()->LogMetric( + "apm_timing" + sample_rate_name, processor_name, api_call_durations_, + Unit::kMilliseconds, ImprovementDirection::kNeitherIsBetter); + } + + void AddDuration(int64_t duration) { + if (samples_count_ >= kNumInitializationFrames && + samples_count_ < num_durations_to_store_) { + api_call_durations_.AddSample(duration); + } + samples_count_++; + } + + private: + static const int kMaxCallDifference = 10; + static const int kMaxFrameSize = 480; + static const int kNumInitializationFrames = 5; + + int ProcessCapture() { + // Set the stream delay. + apm_->set_stream_delay_ms(30); + + // Call and time the specified capture side API processing method. + const int64_t start_time = clock_->TimeInMicroseconds(); + const int result = apm_->ProcessStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + const int64_t end_time = clock_->TimeInMicroseconds(); + + frame_counters_->IncreaseCaptureCounter(); + + AddDuration(end_time - start_time); + + if (first_process_call_) { + // Flag that the capture side has been called at least once + // (needed to ensure that a capture call has been done + // before the first render call is performed (implicitly + // required by the APM API). + capture_call_checker_->set_flag(); + first_process_call_ = false; + } + return result; + } + + bool ReadyToProcessCapture() { + return (frame_counters_->CaptureMinusRenderCounters() <= + kMaxCallDifference); + } + + int ProcessRender() { + // Call and time the specified render side API processing method. + const int64_t start_time = clock_->TimeInMicroseconds(); + const int result = apm_->ProcessReverseStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + const int64_t end_time = clock_->TimeInMicroseconds(); + frame_counters_->IncreaseRenderCounter(); + + AddDuration(end_time - start_time); + + return result; + } + + bool ReadyToProcessRender() { + // Do not process until at least one capture call has been done. + // (implicitly required by the APM API). + if (first_process_call_ && !capture_call_checker_->get_flag()) { + return false; + } + + // Ensure that the number of render and capture calls do not differ too + // much. + if (frame_counters_->RenderMinusCaptureCounters() > kMaxCallDifference) { + return false; + } + + first_process_call_ = false; + return true; + } + + void PrepareFrame() { + // Lambda function for populating a float multichannel audio frame + // with random data. + auto populate_audio_frame = [](float amplitude, size_t num_channels, + size_t samples_per_channel, Random* rand_gen, + float** frame) { + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random float number with a value between +-amplitude. + frame[ch][k] = amplitude * (2 * rand_gen->Rand() - 1); + } + } + }; + + // Prepare the audio input data and metadata. + frame_data_.input_stream_config.set_sample_rate_hz( + simulation_config_->sample_rate_hz); + frame_data_.input_stream_config.set_num_channels(num_channels_); + populate_audio_frame(input_level_, num_channels_, + (simulation_config_->sample_rate_hz * + AudioProcessing::kChunkSizeMs / 1000), + rand_gen_, &frame_data_.input_frame[0]); + + // Prepare the float audio output data and metadata. + frame_data_.output_stream_config.set_sample_rate_hz( + simulation_config_->sample_rate_hz); + frame_data_.output_stream_config.set_num_channels(1); + } + + bool ReadyToProcess() { + switch (processor_type_) { + case ProcessorType::kRender: + return ReadyToProcessRender(); + + case ProcessorType::kCapture: + return ReadyToProcessCapture(); + } + + // Should not be reached, but the return statement is needed for the code to + // build successfully on Android. + RTC_DCHECK_NOTREACHED(); + return false; + } + + Random* rand_gen_ = nullptr; + FrameCounters* frame_counters_ = nullptr; + LockedFlag* capture_call_checker_ = nullptr; + CallSimulator* test_ = nullptr; + const SimulationConfig* const simulation_config_ = nullptr; + AudioProcessing* apm_ = nullptr; + AudioFrameData frame_data_; + webrtc::Clock* clock_; + const size_t num_durations_to_store_; + SamplesStatsCounter api_call_durations_; + size_t samples_count_ = 0; + const float input_level_; + bool first_process_call_ = true; + const ProcessorType processor_type_; + const int num_channels_ = 1; +}; + +// Class for managing the test simulation. +class CallSimulator : public ::testing::TestWithParam { + public: + CallSimulator() + : rand_gen_(42U), + simulation_config_(static_cast(GetParam())) {} + + // Run the call simulation with a timeout. + bool Run() { + StartThreads(); + + bool result = test_complete_.Wait(kTestTimeout); + + StopThreads(); + + render_thread_state_->print_processor_statistics( + simulation_config_.SettingsDescription() + "_render"); + capture_thread_state_->print_processor_statistics( + simulation_config_.SettingsDescription() + "_capture"); + + return result; + } + + // Tests whether all the required render and capture side calls have been + // done. + bool MaybeEndTest() { + if (frame_counters_.BothCountersExceedeThreshold(kMinNumFramesToProcess)) { + test_complete_.Set(); + return true; + } + return false; + } + + private: + static const float kCaptureInputFloatLevel; + static const float kRenderInputFloatLevel; + static const int kMinNumFramesToProcess = 150; + static constexpr TimeDelta kTestTimeout = + TimeDelta::Millis(3 * 10 * kMinNumFramesToProcess); + + // Stop all running threads. + void StopThreads() { + render_thread_.Finalize(); + capture_thread_.Finalize(); + } + + // Simulator and APM setup. + void SetUp() override { + // Lambda function for setting the default APM runtime settings for desktop. + auto set_default_desktop_apm_runtime_settings = [](AudioProcessing* apm) { + AudioProcessing::Config apm_config = apm->GetConfig(); + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = false; + apm_config.noise_suppression.enabled = true; + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveDigital; + apm->ApplyConfig(apm_config); + }; + + // Lambda function for setting the default APM runtime settings for mobile. + auto set_default_mobile_apm_runtime_settings = [](AudioProcessing* apm) { + AudioProcessing::Config apm_config = apm->GetConfig(); + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = true; + apm_config.noise_suppression.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveDigital; + apm->ApplyConfig(apm_config); + }; + + // Lambda function for turning off all of the APM runtime settings + // submodules. + auto turn_off_default_apm_runtime_settings = [](AudioProcessing* apm) { + AudioProcessing::Config apm_config = apm->GetConfig(); + apm_config.echo_canceller.enabled = false; + apm_config.gain_controller1.enabled = false; + apm_config.noise_suppression.enabled = false; + apm->ApplyConfig(apm_config); + }; + + int num_capture_channels = 1; + switch (simulation_config_.simulation_settings) { + case SettingsType::kDefaultApmMobile: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + set_default_mobile_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultApmDesktop: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kAllSubmodulesTurnedOff: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + turn_off_default_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultApmDesktopWithoutDelayAgnostic: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultApmDesktopWithoutExtendedFilter: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + break; + } + } + + render_thread_state_.reset(new TimedThreadApiProcessor( + ProcessorType::kRender, &rand_gen_, &frame_counters_, + &capture_call_checker_, this, &simulation_config_, apm_.get(), + kMinNumFramesToProcess, kRenderInputFloatLevel, 1)); + capture_thread_state_.reset(new TimedThreadApiProcessor( + ProcessorType::kCapture, &rand_gen_, &frame_counters_, + &capture_call_checker_, this, &simulation_config_, apm_.get(), + kMinNumFramesToProcess, kCaptureInputFloatLevel, num_capture_channels)); + } + + // Start the threads used in the test. + void StartThreads() { + const auto attributes = + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime); + render_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (render_thread_state_->Process()) { + } + }, + "render", attributes); + capture_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (capture_thread_state_->Process()) { + } + }, + "capture", attributes); + } + + // Event handler for the test. + rtc::Event test_complete_; + + // Thread related variables. + Random rand_gen_; + + rtc::scoped_refptr apm_; + const SimulationConfig simulation_config_; + FrameCounters frame_counters_; + LockedFlag capture_call_checker_; + std::unique_ptr render_thread_state_; + std::unique_ptr capture_thread_state_; + rtc::PlatformThread render_thread_; + rtc::PlatformThread capture_thread_; +}; + +// Implements the callback functionality for the threads. +bool TimedThreadApiProcessor::Process() { + PrepareFrame(); + + // Wait in a spinlock manner until it is ok to start processing. + // Note that SleepMs is not applicable since it only allows sleeping + // on a millisecond basis which is too long. + // TODO(tommi): This loop may affect the performance of the test that it's + // meant to measure. See if we could use events instead to signal readiness. + while (!ReadyToProcess()) { + } + + int result = AudioProcessing::kNoError; + switch (processor_type_) { + case ProcessorType::kRender: + result = ProcessRender(); + break; + case ProcessorType::kCapture: + result = ProcessCapture(); + break; + } + + EXPECT_EQ(result, AudioProcessing::kNoError); + + return !test_->MaybeEndTest(); +} + +const float CallSimulator::kRenderInputFloatLevel = 0.5f; +const float CallSimulator::kCaptureInputFloatLevel = 0.03125f; +} // anonymous namespace + +// TODO(peah): Reactivate once issue 7712 has been resolved. +TEST_P(CallSimulator, DISABLED_ApiCallDurationTest) { + // Run test and verify that it did not time out. + EXPECT_TRUE(Run()); +} + +INSTANTIATE_TEST_SUITE_P( + AudioProcessingPerformanceTest, + CallSimulator, + ::testing::ValuesIn(SimulationConfig::GenerateSimulationConfigs())); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build new file mode 100644 index 0000000000..f0af20c335 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_processing_statistics_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_unittest.cc new file mode 100644 index 0000000000..e320e71405 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_unittest.cc @@ -0,0 +1,3441 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/include/audio_processing.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "api/audio/echo_detector_creator.h" +#include "api/make_ref_counted.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "common_audio/resampler/push_sinc_resampler.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/protobuf_utils.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/fake_clock.h" +#include "rtc_base/gtest_prod_util.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/protobuf_utils.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/swap_queue.h" +#include "rtc_base/system/arch.h" +#include "rtc_base/task_queue_for_test.h" +#include "rtc_base/thread.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "modules/audio_processing/debug.pb.h" +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/test/unittest.pb.h" +#else +#include "modules/audio_processing/test/unittest.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +ABSL_FLAG(bool, + write_apm_ref_data, + false, + "Write ApmTest.Process results to file, instead of comparing results " + "to the existing reference data file."); + +namespace webrtc { +namespace { + +// All sample rates used by APM internally during processing. Other input / +// output rates are resampled to / from one of these. +const int kProcessSampleRates[] = {16000, 32000, 48000}; + +enum StreamDirection { kForward = 0, kReverse }; + +void ConvertToFloat(const int16_t* int_data, ChannelBuffer* cb) { + ChannelBuffer cb_int(cb->num_frames(), cb->num_channels()); + Deinterleave(int_data, cb->num_frames(), cb->num_channels(), + cb_int.channels()); + for (size_t i = 0; i < cb->num_channels(); ++i) { + S16ToFloat(cb_int.channels()[i], cb->num_frames(), cb->channels()[i]); + } +} + +void ConvertToFloat(const Int16FrameData& frame, ChannelBuffer* cb) { + ConvertToFloat(frame.data.data(), cb); +} + +void MixStereoToMono(const float* stereo, + float* mono, + size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; ++i) + mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2; +} + +void MixStereoToMono(const int16_t* stereo, + int16_t* mono, + size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; ++i) + mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1; +} + +void CopyLeftToRightChannel(int16_t* stereo, size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; i++) { + stereo[i * 2 + 1] = stereo[i * 2]; + } +} + +void VerifyChannelsAreEqual(const int16_t* stereo, size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; i++) { + EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]); + } +} + +void SetFrameTo(Int16FrameData* frame, int16_t value) { + for (size_t i = 0; i < frame->samples_per_channel * frame->num_channels; + ++i) { + frame->data[i] = value; + } +} + +void SetFrameTo(Int16FrameData* frame, int16_t left, int16_t right) { + ASSERT_EQ(2u, frame->num_channels); + for (size_t i = 0; i < frame->samples_per_channel * 2; i += 2) { + frame->data[i] = left; + frame->data[i + 1] = right; + } +} + +void ScaleFrame(Int16FrameData* frame, float scale) { + for (size_t i = 0; i < frame->samples_per_channel * frame->num_channels; + ++i) { + frame->data[i] = FloatS16ToS16(frame->data[i] * scale); + } +} + +bool FrameDataAreEqual(const Int16FrameData& frame1, + const Int16FrameData& frame2) { + if (frame1.samples_per_channel != frame2.samples_per_channel) { + return false; + } + if (frame1.num_channels != frame2.num_channels) { + return false; + } + if (memcmp( + frame1.data.data(), frame2.data.data(), + frame1.samples_per_channel * frame1.num_channels * sizeof(int16_t))) { + return false; + } + return true; +} + +rtc::ArrayView GetMutableFrameData(Int16FrameData* frame) { + int16_t* ptr = frame->data.data(); + const size_t len = frame->samples_per_channel * frame->num_channels; + return rtc::ArrayView(ptr, len); +} + +rtc::ArrayView GetFrameData(const Int16FrameData& frame) { + const int16_t* ptr = frame.data.data(); + const size_t len = frame.samples_per_channel * frame.num_channels; + return rtc::ArrayView(ptr, len); +} + +void EnableAllAPComponents(AudioProcessing* ap) { + AudioProcessing::Config apm_config = ap->GetConfig(); + apm_config.echo_canceller.enabled = true; +#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + apm_config.echo_canceller.mobile_mode = true; + + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveDigital; +#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + apm_config.echo_canceller.mobile_mode = false; + + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveAnalog; +#endif + + apm_config.noise_suppression.enabled = true; + + apm_config.high_pass_filter.enabled = true; + apm_config.pipeline.maximum_internal_processing_rate = 48000; + ap->ApplyConfig(apm_config); +} + +// These functions are only used by ApmTest.Process. +template +T AbsValue(T a) { + return a > 0 ? a : -a; +} + +int16_t MaxAudioFrame(const Int16FrameData& frame) { + const size_t length = frame.samples_per_channel * frame.num_channels; + int16_t max_data = AbsValue(frame.data[0]); + for (size_t i = 1; i < length; i++) { + max_data = std::max(max_data, AbsValue(frame.data[i])); + } + + return max_data; +} + +void OpenFileAndWriteMessage(absl::string_view filename, + const MessageLite& msg) { + FILE* file = fopen(std::string(filename).c_str(), "wb"); + ASSERT_TRUE(file != NULL); + + int32_t size = rtc::checked_cast(msg.ByteSizeLong()); + ASSERT_GT(size, 0); + std::unique_ptr array(new uint8_t[size]); + ASSERT_TRUE(msg.SerializeToArray(array.get(), size)); + + ASSERT_EQ(1u, fwrite(&size, sizeof(size), 1, file)); + ASSERT_EQ(static_cast(size), + fwrite(array.get(), sizeof(array[0]), size, file)); + fclose(file); +} + +std::string ResourceFilePath(absl::string_view name, int sample_rate_hz) { + rtc::StringBuilder ss; + // Resource files are all stereo. + ss << name << sample_rate_hz / 1000 << "_stereo"; + return test::ResourcePath(ss.str(), "pcm"); +} + +// Temporary filenames unique to this process. Used to be able to run these +// tests in parallel as each process needs to be running in isolation they can't +// have competing filenames. +std::map temp_filenames; + +std::string OutputFilePath(absl::string_view name, + int input_rate, + int output_rate, + int reverse_input_rate, + int reverse_output_rate, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_input_channels, + size_t num_reverse_output_channels, + StreamDirection file_direction) { + rtc::StringBuilder ss; + ss << name << "_i" << num_input_channels << "_" << input_rate / 1000 << "_ir" + << num_reverse_input_channels << "_" << reverse_input_rate / 1000 << "_"; + if (num_output_channels == 1) { + ss << "mono"; + } else if (num_output_channels == 2) { + ss << "stereo"; + } else { + RTC_DCHECK_NOTREACHED(); + } + ss << output_rate / 1000; + if (num_reverse_output_channels == 1) { + ss << "_rmono"; + } else if (num_reverse_output_channels == 2) { + ss << "_rstereo"; + } else { + RTC_DCHECK_NOTREACHED(); + } + ss << reverse_output_rate / 1000; + ss << "_d" << file_direction << "_pcm"; + + std::string filename = ss.str(); + if (temp_filenames[filename].empty()) + temp_filenames[filename] = test::TempFilename(test::OutputPath(), filename); + return temp_filenames[filename]; +} + +void ClearTempFiles() { + for (auto& kv : temp_filenames) + remove(kv.second.c_str()); +} + +// Only remove "out" files. Keep "ref" files. +void ClearTempOutFiles() { + for (auto it = temp_filenames.begin(); it != temp_filenames.end();) { + const std::string& filename = it->first; + if (filename.substr(0, 3).compare("out") == 0) { + remove(it->second.c_str()); + temp_filenames.erase(it++); + } else { + it++; + } + } +} + +void OpenFileAndReadMessage(absl::string_view filename, MessageLite* msg) { + FILE* file = fopen(std::string(filename).c_str(), "rb"); + ASSERT_TRUE(file != NULL); + ReadMessageFromFile(file, msg); + fclose(file); +} + +// Reads a 10 ms chunk (actually AudioProcessing::GetFrameSize() samples per +// channel) of int16 interleaved audio from the given (assumed stereo) file, +// converts to deinterleaved float (optionally downmixing) and returns the +// result in `cb`. Returns false if the file ended (or on error) and true +// otherwise. +// +// `int_data` and `float_data` are just temporary space that must be +// sufficiently large to hold the 10 ms chunk. +bool ReadChunk(FILE* file, + int16_t* int_data, + float* float_data, + ChannelBuffer* cb) { + // The files always contain stereo audio. + size_t frame_size = cb->num_frames() * 2; + size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file); + if (read_count != frame_size) { + // Check that the file really ended. + RTC_DCHECK(feof(file)); + return false; // This is expected. + } + + S16ToFloat(int_data, frame_size, float_data); + if (cb->num_channels() == 1) { + MixStereoToMono(float_data, cb->channels()[0], cb->num_frames()); + } else { + Deinterleave(float_data, cb->num_frames(), 2, cb->channels()); + } + + return true; +} + +// Returns the reference file name that matches the current CPU +// architecture/optimizations. +std::string GetReferenceFilename() { +#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + return test::ResourcePath("audio_processing/output_data_fixed", "pb"); +#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + if (GetCPUInfo(kAVX2) != 0) { + return test::ResourcePath("audio_processing/output_data_float_avx2", "pb"); + } + return test::ResourcePath("audio_processing/output_data_float", "pb"); +#endif +} + +// Flag that can temporarily be enabled for local debugging to inspect +// `ApmTest.VerifyDebugDump(Int|Float)` failures. Do not upload code changes +// with this flag set to true. +constexpr bool kDumpWhenExpectMessageEqFails = false; + +// Checks the debug constants values used in this file so that no code change is +// submitted with values temporarily used for local debugging. +TEST(ApmUnitTests, CheckDebugConstants) { + ASSERT_FALSE(kDumpWhenExpectMessageEqFails); +} + +// Expects the equality of `actual` and `expected` by inspecting a hard-coded +// subset of `audioproc::Stream` fields. +void ExpectStreamFieldsEq(const audioproc::Stream& actual, + const audioproc::Stream& expected) { + EXPECT_EQ(actual.input_data(), expected.input_data()); + EXPECT_EQ(actual.output_data(), expected.output_data()); + EXPECT_EQ(actual.delay(), expected.delay()); + EXPECT_EQ(actual.drift(), expected.drift()); + EXPECT_EQ(actual.applied_input_volume(), expected.applied_input_volume()); + EXPECT_EQ(actual.keypress(), expected.keypress()); +} + +// Expects the equality of `actual` and `expected` by inspecting a hard-coded +// subset of `audioproc::Event` fields. +void ExpectEventFieldsEq(const audioproc::Event& actual, + const audioproc::Event& expected) { + EXPECT_EQ(actual.type(), expected.type()); + if (actual.type() != expected.type()) { + return; + } + switch (actual.type()) { + case audioproc::Event::STREAM: + ExpectStreamFieldsEq(actual.stream(), expected.stream()); + break; + default: + // Not implemented. + break; + } +} + +// Returns true if the `actual` and `expected` byte streams share the same size +// and contain the same data. If they differ and `kDumpWhenExpectMessageEqFails` +// is true, checks the equality of a subset of `audioproc::Event` (nested) +// fields. +bool ExpectMessageEq(rtc::ArrayView actual, + rtc::ArrayView expected) { + EXPECT_EQ(actual.size(), expected.size()); + if (actual.size() != expected.size()) { + return false; + } + if (memcmp(actual.data(), expected.data(), actual.size()) == 0) { + // Same message. No need to parse. + return true; + } + if (kDumpWhenExpectMessageEqFails) { + // Parse differing messages and expect equality to produce detailed error + // messages. + audioproc::Event event_actual, event_expected; + RTC_DCHECK(event_actual.ParseFromArray(actual.data(), actual.size())); + RTC_DCHECK(event_expected.ParseFromArray(expected.data(), expected.size())); + ExpectEventFieldsEq(event_actual, event_expected); + } + return false; +} + +class ApmTest : public ::testing::Test { + protected: + ApmTest(); + virtual void SetUp(); + virtual void TearDown(); + + static void SetUpTestSuite() {} + + static void TearDownTestSuite() { ClearTempFiles(); } + + // Used to select between int and float interface tests. + enum Format { kIntFormat, kFloatFormat }; + + void Init(int sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_channels, + bool open_output_file); + void Init(AudioProcessing* ap); + void EnableAllComponents(); + bool ReadFrame(FILE* file, Int16FrameData* frame); + bool ReadFrame(FILE* file, Int16FrameData* frame, ChannelBuffer* cb); + void ReadFrameWithRewind(FILE* file, Int16FrameData* frame); + void ReadFrameWithRewind(FILE* file, + Int16FrameData* frame, + ChannelBuffer* cb); + void ProcessDelayVerificationTest(int delay_ms, + int system_delay_ms, + int delay_min, + int delay_max); + void TestChangingChannelsInt16Interface( + size_t num_channels, + AudioProcessing::Error expected_return); + void TestChangingForwardChannels(size_t num_in_channels, + size_t num_out_channels, + AudioProcessing::Error expected_return); + void TestChangingReverseChannels(size_t num_rev_channels, + AudioProcessing::Error expected_return); + void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate); + void RunManualVolumeChangeIsPossibleTest(int sample_rate); + void StreamParametersTest(Format format); + int ProcessStreamChooser(Format format); + int AnalyzeReverseStreamChooser(Format format); + void ProcessDebugDump(absl::string_view in_filename, + absl::string_view out_filename, + Format format, + int max_size_bytes); + void VerifyDebugDumpTest(Format format); + + const std::string output_path_; + const std::string ref_filename_; + rtc::scoped_refptr apm_; + Int16FrameData frame_; + Int16FrameData revframe_; + std::unique_ptr> float_cb_; + std::unique_ptr> revfloat_cb_; + int output_sample_rate_hz_; + size_t num_output_channels_; + FILE* far_file_; + FILE* near_file_; + FILE* out_file_; +}; + +ApmTest::ApmTest() + : output_path_(test::OutputPath()), + ref_filename_(GetReferenceFilename()), + output_sample_rate_hz_(0), + num_output_channels_(0), + far_file_(NULL), + near_file_(NULL), + out_file_(NULL) { + apm_ = AudioProcessingBuilderForTesting().Create(); + AudioProcessing::Config apm_config = apm_->GetConfig(); + apm_config.gain_controller1.analog_gain_controller.enabled = false; + apm_config.pipeline.maximum_internal_processing_rate = 48000; + apm_->ApplyConfig(apm_config); +} + +void ApmTest::SetUp() { + ASSERT_TRUE(apm_.get() != NULL); + + Init(32000, 32000, 32000, 2, 2, 2, false); +} + +void ApmTest::TearDown() { + if (far_file_) { + ASSERT_EQ(0, fclose(far_file_)); + } + far_file_ = NULL; + + if (near_file_) { + ASSERT_EQ(0, fclose(near_file_)); + } + near_file_ = NULL; + + if (out_file_) { + ASSERT_EQ(0, fclose(out_file_)); + } + out_file_ = NULL; +} + +void ApmTest::Init(AudioProcessing* ap) { + ASSERT_EQ( + kNoErr, + ap->Initialize({{{frame_.sample_rate_hz, frame_.num_channels}, + {output_sample_rate_hz_, num_output_channels_}, + {revframe_.sample_rate_hz, revframe_.num_channels}, + {revframe_.sample_rate_hz, revframe_.num_channels}}})); +} + +void ApmTest::Init(int sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_channels, + bool open_output_file) { + SetContainerFormat(sample_rate_hz, num_input_channels, &frame_, &float_cb_); + output_sample_rate_hz_ = output_sample_rate_hz; + num_output_channels_ = num_output_channels; + + SetContainerFormat(reverse_sample_rate_hz, num_reverse_channels, &revframe_, + &revfloat_cb_); + Init(apm_.get()); + + if (far_file_) { + ASSERT_EQ(0, fclose(far_file_)); + } + std::string filename = ResourceFilePath("far", sample_rate_hz); + far_file_ = fopen(filename.c_str(), "rb"); + ASSERT_TRUE(far_file_ != NULL) << "Could not open file " << filename << "\n"; + + if (near_file_) { + ASSERT_EQ(0, fclose(near_file_)); + } + filename = ResourceFilePath("near", sample_rate_hz); + near_file_ = fopen(filename.c_str(), "rb"); + ASSERT_TRUE(near_file_ != NULL) << "Could not open file " << filename << "\n"; + + if (open_output_file) { + if (out_file_) { + ASSERT_EQ(0, fclose(out_file_)); + } + filename = OutputFilePath( + "out", sample_rate_hz, output_sample_rate_hz, reverse_sample_rate_hz, + reverse_sample_rate_hz, num_input_channels, num_output_channels, + num_reverse_channels, num_reverse_channels, kForward); + out_file_ = fopen(filename.c_str(), "wb"); + ASSERT_TRUE(out_file_ != NULL) + << "Could not open file " << filename << "\n"; + } +} + +void ApmTest::EnableAllComponents() { + EnableAllAPComponents(apm_.get()); +} + +bool ApmTest::ReadFrame(FILE* file, + Int16FrameData* frame, + ChannelBuffer* cb) { + // The files always contain stereo audio. + size_t frame_size = frame->samples_per_channel * 2; + size_t read_count = + fread(frame->data.data(), sizeof(int16_t), frame_size, file); + if (read_count != frame_size) { + // Check that the file really ended. + EXPECT_NE(0, feof(file)); + return false; // This is expected. + } + + if (frame->num_channels == 1) { + MixStereoToMono(frame->data.data(), frame->data.data(), + frame->samples_per_channel); + } + + if (cb) { + ConvertToFloat(*frame, cb); + } + return true; +} + +bool ApmTest::ReadFrame(FILE* file, Int16FrameData* frame) { + return ReadFrame(file, frame, NULL); +} + +// If the end of the file has been reached, rewind it and attempt to read the +// frame again. +void ApmTest::ReadFrameWithRewind(FILE* file, + Int16FrameData* frame, + ChannelBuffer* cb) { + if (!ReadFrame(near_file_, &frame_, cb)) { + rewind(near_file_); + ASSERT_TRUE(ReadFrame(near_file_, &frame_, cb)); + } +} + +void ApmTest::ReadFrameWithRewind(FILE* file, Int16FrameData* frame) { + ReadFrameWithRewind(file, frame, NULL); +} + +int ApmTest::ProcessStreamChooser(Format format) { + if (format == kIntFormat) { + return apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data()); + } + return apm_->ProcessStream( + float_cb_->channels(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(output_sample_rate_hz_, num_output_channels_), + float_cb_->channels()); +} + +int ApmTest::AnalyzeReverseStreamChooser(Format format) { + if (format == kIntFormat) { + return apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data()); + } + return apm_->AnalyzeReverseStream( + revfloat_cb_->channels(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels)); +} + +void ApmTest::ProcessDelayVerificationTest(int delay_ms, + int system_delay_ms, + int delay_min, + int delay_max) { + // The `revframe_` and `frame_` should include the proper frame information, + // hence can be used for extracting information. + Int16FrameData tmp_frame; + std::queue frame_queue; + bool causal = true; + + tmp_frame.CopyFrom(revframe_); + SetFrameTo(&tmp_frame, 0); + + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + // Initialize the `frame_queue` with empty frames. + int frame_delay = delay_ms / 10; + while (frame_delay < 0) { + Int16FrameData* frame = new Int16FrameData(); + frame->CopyFrom(tmp_frame); + frame_queue.push(frame); + frame_delay++; + causal = false; + } + while (frame_delay > 0) { + Int16FrameData* frame = new Int16FrameData(); + frame->CopyFrom(tmp_frame); + frame_queue.push(frame); + frame_delay--; + } + // Run for 4.5 seconds, skipping statistics from the first 2.5 seconds. We + // need enough frames with audio to have reliable estimates, but as few as + // possible to keep processing time down. 4.5 seconds seemed to be a good + // compromise for this recording. + for (int frame_count = 0; frame_count < 450; ++frame_count) { + Int16FrameData* frame = new Int16FrameData(); + frame->CopyFrom(tmp_frame); + // Use the near end recording, since that has more speech in it. + ASSERT_TRUE(ReadFrame(near_file_, frame)); + frame_queue.push(frame); + Int16FrameData* reverse_frame = frame; + Int16FrameData* process_frame = frame_queue.front(); + if (!causal) { + reverse_frame = frame_queue.front(); + // When we call ProcessStream() the frame is modified, so we can't use the + // pointer directly when things are non-causal. Use an intermediate frame + // and copy the data. + process_frame = &tmp_frame; + process_frame->CopyFrom(*frame); + } + EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream( + reverse_frame->data.data(), + StreamConfig(reverse_frame->sample_rate_hz, + reverse_frame->num_channels), + StreamConfig(reverse_frame->sample_rate_hz, + reverse_frame->num_channels), + reverse_frame->data.data())); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(system_delay_ms)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream(process_frame->data.data(), + StreamConfig(process_frame->sample_rate_hz, + process_frame->num_channels), + StreamConfig(process_frame->sample_rate_hz, + process_frame->num_channels), + process_frame->data.data())); + frame = frame_queue.front(); + frame_queue.pop(); + delete frame; + + if (frame_count == 250) { + // Discard the first delay metrics to avoid convergence effects. + static_cast(apm_->GetStatistics()); + } + } + + rewind(near_file_); + while (!frame_queue.empty()) { + Int16FrameData* frame = frame_queue.front(); + frame_queue.pop(); + delete frame; + } + // Calculate expected delay estimate and acceptable regions. Further, + // limit them w.r.t. AEC delay estimation support. + const size_t samples_per_ms = + rtc::SafeMin(16u, frame_.samples_per_channel / 10); + const int expected_median = + rtc::SafeClamp(delay_ms - system_delay_ms, delay_min, delay_max); + const int expected_median_high = rtc::SafeClamp( + expected_median + rtc::dchecked_cast(96 / samples_per_ms), delay_min, + delay_max); + const int expected_median_low = rtc::SafeClamp( + expected_median - rtc::dchecked_cast(96 / samples_per_ms), delay_min, + delay_max); + // Verify delay metrics. + AudioProcessingStats stats = apm_->GetStatistics(); + ASSERT_TRUE(stats.delay_median_ms.has_value()); + int32_t median = *stats.delay_median_ms; + EXPECT_GE(expected_median_high, median); + EXPECT_LE(expected_median_low, median); +} + +void ApmTest::StreamParametersTest(Format format) { + // No errors when the components are disabled. + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + + // -- Missing AGC level -- + AudioProcessing::Config apm_config = apm_->GetConfig(); + apm_config.gain_controller1.enabled = true; + apm_->ApplyConfig(apm_config); + EXPECT_EQ(apm_->kStreamParameterNotSetError, ProcessStreamChooser(format)); + + // Resets after successful ProcessStream(). + apm_->set_stream_analog_level(127); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, ProcessStreamChooser(format)); + + // Other stream parameters set correctly. + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = false; + apm_->ApplyConfig(apm_config); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, ProcessStreamChooser(format)); + apm_config.gain_controller1.enabled = false; + apm_->ApplyConfig(apm_config); + + // -- Missing delay -- + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + + // Resets after successful ProcessStream(). + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + + // Other stream parameters set correctly. + apm_config.gain_controller1.enabled = true; + apm_->ApplyConfig(apm_config); + apm_->set_stream_analog_level(127); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + apm_config.gain_controller1.enabled = false; + apm_->ApplyConfig(apm_config); + + // -- No stream parameters -- + EXPECT_EQ(apm_->kNoError, AnalyzeReverseStreamChooser(format)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + + // -- All there -- + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + apm_->set_stream_analog_level(127); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); +} + +TEST_F(ApmTest, StreamParametersInt) { + StreamParametersTest(kIntFormat); +} + +TEST_F(ApmTest, StreamParametersFloat) { + StreamParametersTest(kFloatFormat); +} + +void ApmTest::TestChangingChannelsInt16Interface( + size_t num_channels, + AudioProcessing::Error expected_return) { + frame_.num_channels = num_channels; + + EXPECT_EQ(expected_return, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_EQ(expected_return, + apm_->ProcessReverseStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); +} + +void ApmTest::TestChangingForwardChannels( + size_t num_in_channels, + size_t num_out_channels, + AudioProcessing::Error expected_return) { + const StreamConfig input_stream = {frame_.sample_rate_hz, num_in_channels}; + const StreamConfig output_stream = {output_sample_rate_hz_, num_out_channels}; + + EXPECT_EQ(expected_return, + apm_->ProcessStream(float_cb_->channels(), input_stream, + output_stream, float_cb_->channels())); +} + +void ApmTest::TestChangingReverseChannels( + size_t num_rev_channels, + AudioProcessing::Error expected_return) { + const ProcessingConfig processing_config = { + {{frame_.sample_rate_hz, apm_->num_input_channels()}, + {output_sample_rate_hz_, apm_->num_output_channels()}, + {frame_.sample_rate_hz, num_rev_channels}, + {frame_.sample_rate_hz, num_rev_channels}}}; + + EXPECT_EQ( + expected_return, + apm_->ProcessReverseStream( + float_cb_->channels(), processing_config.reverse_input_stream(), + processing_config.reverse_output_stream(), float_cb_->channels())); +} + +TEST_F(ApmTest, ChannelsInt16Interface) { + // Testing number of invalid and valid channels. + Init(16000, 16000, 16000, 4, 4, 4, false); + + TestChangingChannelsInt16Interface(0, apm_->kBadNumberChannelsError); + + for (size_t i = 1; i < 4; i++) { + TestChangingChannelsInt16Interface(i, kNoErr); + EXPECT_EQ(i, apm_->num_input_channels()); + } +} + +TEST_F(ApmTest, Channels) { + // Testing number of invalid and valid channels. + Init(16000, 16000, 16000, 4, 4, 4, false); + + TestChangingForwardChannels(0, 1, apm_->kBadNumberChannelsError); + TestChangingReverseChannels(0, apm_->kBadNumberChannelsError); + + for (size_t i = 1; i < 4; ++i) { + for (size_t j = 0; j < 1; ++j) { + // Output channels much be one or match input channels. + if (j == 1 || i == j) { + TestChangingForwardChannels(i, j, kNoErr); + TestChangingReverseChannels(i, kNoErr); + + EXPECT_EQ(i, apm_->num_input_channels()); + EXPECT_EQ(j, apm_->num_output_channels()); + // The number of reverse channels used for processing to is always 1. + EXPECT_EQ(1u, apm_->num_reverse_channels()); + } else { + TestChangingForwardChannels(i, j, + AudioProcessing::kBadNumberChannelsError); + } + } + } +} + +TEST_F(ApmTest, SampleRatesInt) { + // Testing some valid sample rates. + for (int sample_rate : {8000, 12000, 16000, 32000, 44100, 48000, 96000}) { + SetContainerFormat(sample_rate, 2, &frame_, &float_cb_); + EXPECT_NOERR(ProcessStreamChooser(kIntFormat)); + } +} + +// This test repeatedly reconfigures the pre-amplifier in APM, processes a +// number of frames, and checks that output signal has the right level. +TEST_F(ApmTest, PreAmplifier) { + // Fill the audio frame with a sawtooth pattern. + rtc::ArrayView frame_data = GetMutableFrameData(&frame_); + const size_t samples_per_channel = frame_.samples_per_channel; + for (size_t i = 0; i < samples_per_channel; i++) { + for (size_t ch = 0; ch < frame_.num_channels; ++ch) { + frame_data[i + ch * samples_per_channel] = 10000 * ((i % 3) - 1); + } + } + // Cache the frame in tmp_frame. + Int16FrameData tmp_frame; + tmp_frame.CopyFrom(frame_); + + auto compute_power = [](const Int16FrameData& frame) { + rtc::ArrayView data = GetFrameData(frame); + return std::accumulate(data.begin(), data.end(), 0.0f, + [](float a, float b) { return a + b * b; }) / + data.size() / 32768 / 32768; + }; + + const float input_power = compute_power(tmp_frame); + // Double-check that the input data is large compared to the error kEpsilon. + constexpr float kEpsilon = 1e-4f; + RTC_DCHECK_GE(input_power, 10 * kEpsilon); + + // 1. Enable pre-amp with 0 dB gain. + AudioProcessing::Config config = apm_->GetConfig(); + config.pre_amplifier.enabled = true; + config.pre_amplifier.fixed_gain_factor = 1.0f; + apm_->ApplyConfig(config); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + float output_power = compute_power(frame_); + EXPECT_NEAR(output_power, input_power, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.pre_amplifier.fixed_gain_factor, 1.0f); + + // 2. Change pre-amp gain via ApplyConfig. + config.pre_amplifier.fixed_gain_factor = 2.0f; + apm_->ApplyConfig(config); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + output_power = compute_power(frame_); + EXPECT_NEAR(output_power, 4 * input_power, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.pre_amplifier.fixed_gain_factor, 2.0f); + + // 3. Change pre-amp gain via a RuntimeSetting. + apm_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(1.5f)); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + output_power = compute_power(frame_); + EXPECT_NEAR(output_power, 2.25 * input_power, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.pre_amplifier.fixed_gain_factor, 1.5f); +} + +// Ensures that the emulated analog mic gain functionality runs without +// crashing. +TEST_F(ApmTest, AnalogMicGainEmulation) { + // Fill the audio frame with a sawtooth pattern. + rtc::ArrayView frame_data = GetMutableFrameData(&frame_); + const size_t samples_per_channel = frame_.samples_per_channel; + for (size_t i = 0; i < samples_per_channel; i++) { + for (size_t ch = 0; ch < frame_.num_channels; ++ch) { + frame_data[i + ch * samples_per_channel] = 100 * ((i % 3) - 1); + } + } + // Cache the frame in tmp_frame. + Int16FrameData tmp_frame; + tmp_frame.CopyFrom(frame_); + + // Enable the analog gain emulation. + AudioProcessing::Config config = apm_->GetConfig(); + config.capture_level_adjustment.enabled = true; + config.capture_level_adjustment.analog_mic_gain_emulation.enabled = true; + config.capture_level_adjustment.analog_mic_gain_emulation.initial_level = 21; + config.gain_controller1.enabled = true; + config.gain_controller1.mode = + AudioProcessing::Config::GainController1::Mode::kAdaptiveAnalog; + config.gain_controller1.analog_gain_controller.enabled = true; + apm_->ApplyConfig(config); + + // Process a number of frames to ensure that the code runs without crashes. + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } +} + +// This test repeatedly reconfigures the capture level adjustment functionality +// in APM, processes a number of frames, and checks that output signal has the +// right level. +TEST_F(ApmTest, CaptureLevelAdjustment) { + // Fill the audio frame with a sawtooth pattern. + rtc::ArrayView frame_data = GetMutableFrameData(&frame_); + const size_t samples_per_channel = frame_.samples_per_channel; + for (size_t i = 0; i < samples_per_channel; i++) { + for (size_t ch = 0; ch < frame_.num_channels; ++ch) { + frame_data[i + ch * samples_per_channel] = 100 * ((i % 3) - 1); + } + } + // Cache the frame in tmp_frame. + Int16FrameData tmp_frame; + tmp_frame.CopyFrom(frame_); + + auto compute_power = [](const Int16FrameData& frame) { + rtc::ArrayView data = GetFrameData(frame); + return std::accumulate(data.begin(), data.end(), 0.0f, + [](float a, float b) { return a + b * b; }) / + data.size() / 32768 / 32768; + }; + + const float input_power = compute_power(tmp_frame); + // Double-check that the input data is large compared to the error kEpsilon. + constexpr float kEpsilon = 1e-20f; + RTC_DCHECK_GE(input_power, 10 * kEpsilon); + + // 1. Enable pre-amp with 0 dB gain. + AudioProcessing::Config config = apm_->GetConfig(); + config.capture_level_adjustment.enabled = true; + config.capture_level_adjustment.pre_gain_factor = 0.5f; + config.capture_level_adjustment.post_gain_factor = 4.f; + const float expected_output_power1 = + config.capture_level_adjustment.pre_gain_factor * + config.capture_level_adjustment.pre_gain_factor * + config.capture_level_adjustment.post_gain_factor * + config.capture_level_adjustment.post_gain_factor * input_power; + apm_->ApplyConfig(config); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + float output_power = compute_power(frame_); + EXPECT_NEAR(output_power, expected_output_power1, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.capture_level_adjustment.pre_gain_factor, 0.5f); + EXPECT_EQ(config.capture_level_adjustment.post_gain_factor, 4.f); + + // 2. Change pre-amp gain via ApplyConfig. + config.capture_level_adjustment.pre_gain_factor = 1.0f; + config.capture_level_adjustment.post_gain_factor = 2.f; + const float expected_output_power2 = + config.capture_level_adjustment.pre_gain_factor * + config.capture_level_adjustment.pre_gain_factor * + config.capture_level_adjustment.post_gain_factor * + config.capture_level_adjustment.post_gain_factor * input_power; + apm_->ApplyConfig(config); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + output_power = compute_power(frame_); + EXPECT_NEAR(output_power, expected_output_power2, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.capture_level_adjustment.pre_gain_factor, 1.0f); + EXPECT_EQ(config.capture_level_adjustment.post_gain_factor, 2.f); + + // 3. Change pre-amp gain via a RuntimeSetting. + constexpr float kPreGain3 = 0.5f; + constexpr float kPostGain3 = 3.f; + const float expected_output_power3 = + kPreGain3 * kPreGain3 * kPostGain3 * kPostGain3 * input_power; + + apm_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(kPreGain3)); + apm_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePostGain(kPostGain3)); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + output_power = compute_power(frame_); + EXPECT_NEAR(output_power, expected_output_power3, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.capture_level_adjustment.pre_gain_factor, 0.5f); + EXPECT_EQ(config.capture_level_adjustment.post_gain_factor, 3.f); +} + +TEST_F(ApmTest, GainControl) { + AudioProcessing::Config config = apm_->GetConfig(); + config.gain_controller1.enabled = false; + apm_->ApplyConfig(config); + config.gain_controller1.enabled = true; + apm_->ApplyConfig(config); + + // Testing gain modes + for (auto mode : + {AudioProcessing::Config::GainController1::kAdaptiveDigital, + AudioProcessing::Config::GainController1::kFixedDigital, + AudioProcessing::Config::GainController1::kAdaptiveAnalog}) { + config.gain_controller1.mode = mode; + apm_->ApplyConfig(config); + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + } + + // Testing target levels + for (int target_level_dbfs : {0, 15, 31}) { + config.gain_controller1.target_level_dbfs = target_level_dbfs; + apm_->ApplyConfig(config); + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + } + + // Testing compression gains + for (int compression_gain_db : {0, 10, 90}) { + config.gain_controller1.compression_gain_db = compression_gain_db; + apm_->ApplyConfig(config); + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + } + + // Testing limiter off/on + for (bool enable : {false, true}) { + config.gain_controller1.enable_limiter = enable; + apm_->ApplyConfig(config); + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + } + + // Testing level limits. + constexpr int kMinLevel = 0; + constexpr int kMaxLevel = 255; + apm_->set_stream_analog_level(kMinLevel); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + apm_->set_stream_analog_level((kMinLevel + kMaxLevel) / 2); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + apm_->set_stream_analog_level(kMaxLevel); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +using ApmDeathTest = ApmTest; + +TEST_F(ApmDeathTest, GainControlDiesOnTooLowTargetLevelDbfs) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.target_level_dbfs = -1; + EXPECT_DEATH(apm_->ApplyConfig(config), ""); +} + +TEST_F(ApmDeathTest, GainControlDiesOnTooHighTargetLevelDbfs) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.target_level_dbfs = 32; + EXPECT_DEATH(apm_->ApplyConfig(config), ""); +} + +TEST_F(ApmDeathTest, GainControlDiesOnTooLowCompressionGainDb) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.compression_gain_db = -1; + EXPECT_DEATH(apm_->ApplyConfig(config), ""); +} + +TEST_F(ApmDeathTest, GainControlDiesOnTooHighCompressionGainDb) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.compression_gain_db = 91; + EXPECT_DEATH(apm_->ApplyConfig(config), ""); +} + +TEST_F(ApmDeathTest, ApmDiesOnTooLowAnalogLevel) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + apm_->ApplyConfig(config); + EXPECT_DEATH(apm_->set_stream_analog_level(-1), ""); +} + +TEST_F(ApmDeathTest, ApmDiesOnTooHighAnalogLevel) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + apm_->ApplyConfig(config); + EXPECT_DEATH(apm_->set_stream_analog_level(256), ""); +} +#endif + +void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) { + Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveAnalog; + apm_->ApplyConfig(config); + + int out_analog_level = 0; + for (int i = 0; i < 2000; ++i) { + ReadFrameWithRewind(near_file_, &frame_); + // Ensure the audio is at a low level, so the AGC will try to increase it. + ScaleFrame(&frame_, 0.25); + + // Always pass in the same volume. + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + out_analog_level = apm_->recommended_stream_analog_level(); + } + + // Ensure the AGC is still able to reach the maximum. + EXPECT_EQ(255, out_analog_level); +} + +// Verifies that despite volume slider quantization, the AGC can continue to +// increase its volume. +TEST_F(ApmTest, QuantizedVolumeDoesNotGetStuck) { + for (size_t sample_rate_hz : kProcessSampleRates) { + SCOPED_TRACE(::testing::Message() << "sample_rate_hz=" << sample_rate_hz); + RunQuantizedVolumeDoesNotGetStuckTest(sample_rate_hz); + } +} + +void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) { + Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveAnalog; + apm_->ApplyConfig(config); + + int out_analog_level = 100; + for (int i = 0; i < 1000; ++i) { + ReadFrameWithRewind(near_file_, &frame_); + // Ensure the audio is at a low level, so the AGC will try to increase it. + ScaleFrame(&frame_, 0.25); + + apm_->set_stream_analog_level(out_analog_level); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + out_analog_level = apm_->recommended_stream_analog_level(); + } + + // Ensure the volume was raised. + EXPECT_GT(out_analog_level, 100); + int highest_level_reached = out_analog_level; + // Simulate a user manual volume change. + out_analog_level = 100; + + for (int i = 0; i < 300; ++i) { + ReadFrameWithRewind(near_file_, &frame_); + ScaleFrame(&frame_, 0.25); + + apm_->set_stream_analog_level(out_analog_level); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + out_analog_level = apm_->recommended_stream_analog_level(); + // Check that AGC respected the manually adjusted volume. + EXPECT_LT(out_analog_level, highest_level_reached); + } + // Check that the volume was still raised. + EXPECT_GT(out_analog_level, 100); +} + +TEST_F(ApmTest, ManualVolumeChangeIsPossible) { + for (size_t sample_rate_hz : kProcessSampleRates) { + SCOPED_TRACE(::testing::Message() << "sample_rate_hz=" << sample_rate_hz); + RunManualVolumeChangeIsPossibleTest(sample_rate_hz); + } +} + +TEST_F(ApmTest, HighPassFilter) { + // Turn HP filter on/off + AudioProcessing::Config apm_config; + apm_config.high_pass_filter.enabled = true; + apm_->ApplyConfig(apm_config); + apm_config.high_pass_filter.enabled = false; + apm_->ApplyConfig(apm_config); +} + +TEST_F(ApmTest, AllProcessingDisabledByDefault) { + AudioProcessing::Config config = apm_->GetConfig(); + EXPECT_FALSE(config.echo_canceller.enabled); + EXPECT_FALSE(config.high_pass_filter.enabled); + EXPECT_FALSE(config.gain_controller1.enabled); + EXPECT_FALSE(config.noise_suppression.enabled); +} + +TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledInt) { + // Test that ProcessStream simply copies input to output when all components + // are disabled. + // Runs over all processing rates, and some particularly common or special + // rates. + // - 8000 Hz: lowest sample rate seen in Chrome metrics, + // - 22050 Hz: APM input/output frames are not exactly 10 ms, + // - 44100 Hz: very common desktop sample rate. + constexpr int kSampleRatesHz[] = {8000, 16000, 22050, 32000, 44100, 48000}; + for (size_t sample_rate_hz : kSampleRatesHz) { + SCOPED_TRACE(::testing::Message() << "sample_rate_hz=" << sample_rate_hz); + Init(sample_rate_hz, sample_rate_hz, sample_rate_hz, 2, 2, 2, false); + SetFrameTo(&frame_, 1000, 2000); + Int16FrameData frame_copy; + frame_copy.CopyFrom(frame_); + for (int j = 0; j < 1000; j++) { + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessReverseStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); + } + } +} + +TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledFloat) { + // Test that ProcessStream simply copies input to output when all components + // are disabled. + const size_t kSamples = 160; + const int sample_rate = 16000; + const float src[kSamples] = {-1.0f, 0.0f, 1.0f}; + float dest[kSamples] = {}; + + auto src_channels = &src[0]; + auto dest_channels = &dest[0]; + + apm_ = AudioProcessingBuilderForTesting().Create(); + EXPECT_NOERR(apm_->ProcessStream(&src_channels, StreamConfig(sample_rate, 1), + StreamConfig(sample_rate, 1), + &dest_channels)); + + for (size_t i = 0; i < kSamples; ++i) { + EXPECT_EQ(src[i], dest[i]); + } + + // Same for ProcessReverseStream. + float rev_dest[kSamples] = {}; + auto rev_dest_channels = &rev_dest[0]; + + StreamConfig input_stream = {sample_rate, 1}; + StreamConfig output_stream = {sample_rate, 1}; + EXPECT_NOERR(apm_->ProcessReverseStream(&src_channels, input_stream, + output_stream, &rev_dest_channels)); + + for (size_t i = 0; i < kSamples; ++i) { + EXPECT_EQ(src[i], rev_dest[i]); + } +} + +TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) { + EnableAllComponents(); + + for (size_t i = 0; i < arraysize(kProcessSampleRates); i++) { + Init(kProcessSampleRates[i], kProcessSampleRates[i], kProcessSampleRates[i], + 2, 2, 2, false); + int analog_level = 127; + ASSERT_EQ(0, feof(far_file_)); + ASSERT_EQ(0, feof(near_file_)); + while (ReadFrame(far_file_, &revframe_) && ReadFrame(near_file_, &frame_)) { + CopyLeftToRightChannel(revframe_.data.data(), + revframe_.samples_per_channel); + + ASSERT_EQ( + kNoErr, + apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data())); + + CopyLeftToRightChannel(frame_.data.data(), frame_.samples_per_channel); + + ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0)); + apm_->set_stream_analog_level(analog_level); + ASSERT_EQ(kNoErr, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + analog_level = apm_->recommended_stream_analog_level(); + + VerifyChannelsAreEqual(frame_.data.data(), frame_.samples_per_channel); + } + rewind(far_file_); + rewind(near_file_); + } +} + +TEST_F(ApmTest, SplittingFilter) { + // Verify the filter is not active through undistorted audio when: + // 1. No components are enabled... + SetFrameTo(&frame_, 1000); + Int16FrameData frame_copy; + frame_copy.CopyFrom(frame_); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); + + // 2. Only the level estimator is enabled... + auto apm_config = apm_->GetConfig(); + SetFrameTo(&frame_, 1000); + frame_copy.CopyFrom(frame_); + apm_->ApplyConfig(apm_config); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); + apm_->ApplyConfig(apm_config); + + // Check the test is valid. We should have distortion from the filter + // when AEC is enabled (which won't affect the audio). + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = false; + apm_->ApplyConfig(apm_config); + frame_.samples_per_channel = 320; + frame_.num_channels = 2; + frame_.sample_rate_hz = 32000; + SetFrameTo(&frame_, 1000); + frame_copy.CopyFrom(frame_); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_FALSE(FrameDataAreEqual(frame_, frame_copy)); +} + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP +void ApmTest::ProcessDebugDump(absl::string_view in_filename, + absl::string_view out_filename, + Format format, + int max_size_bytes) { + TaskQueueForTest worker_queue("ApmTest_worker_queue"); + FILE* in_file = fopen(std::string(in_filename).c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + audioproc::Event event_msg; + bool first_init = true; + + while (ReadMessageFromFile(in_file, &event_msg)) { + if (event_msg.type() == audioproc::Event::INIT) { + const audioproc::Init msg = event_msg.init(); + int reverse_sample_rate = msg.sample_rate(); + if (msg.has_reverse_sample_rate()) { + reverse_sample_rate = msg.reverse_sample_rate(); + } + int output_sample_rate = msg.sample_rate(); + if (msg.has_output_sample_rate()) { + output_sample_rate = msg.output_sample_rate(); + } + + Init(msg.sample_rate(), output_sample_rate, reverse_sample_rate, + msg.num_input_channels(), msg.num_output_channels(), + msg.num_reverse_channels(), false); + if (first_init) { + // AttachAecDump() writes an additional init message. Don't start + // recording until after the first init to avoid the extra message. + auto aec_dump = + AecDumpFactory::Create(out_filename, max_size_bytes, &worker_queue); + EXPECT_TRUE(aec_dump); + apm_->AttachAecDump(std::move(aec_dump)); + first_init = false; + } + + } else if (event_msg.type() == audioproc::Event::REVERSE_STREAM) { + const audioproc::ReverseStream msg = event_msg.reverse_stream(); + + if (msg.channel_size() > 0) { + ASSERT_EQ(revframe_.num_channels, + static_cast(msg.channel_size())); + for (int i = 0; i < msg.channel_size(); ++i) { + memcpy(revfloat_cb_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + } else { + memcpy(revframe_.data.data(), msg.data().data(), msg.data().size()); + if (format == kFloatFormat) { + // We're using an int16 input file; convert to float. + ConvertToFloat(revframe_, revfloat_cb_.get()); + } + } + AnalyzeReverseStreamChooser(format); + + } else if (event_msg.type() == audioproc::Event::STREAM) { + const audioproc::Stream msg = event_msg.stream(); + // ProcessStream could have changed this for the output frame. + frame_.num_channels = apm_->num_input_channels(); + + apm_->set_stream_analog_level(msg.applied_input_volume()); + EXPECT_NOERR(apm_->set_stream_delay_ms(msg.delay())); + if (msg.has_keypress()) { + apm_->set_stream_key_pressed(msg.keypress()); + } else { + apm_->set_stream_key_pressed(true); + } + + if (msg.input_channel_size() > 0) { + ASSERT_EQ(frame_.num_channels, + static_cast(msg.input_channel_size())); + for (int i = 0; i < msg.input_channel_size(); ++i) { + memcpy(float_cb_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + } else { + memcpy(frame_.data.data(), msg.input_data().data(), + msg.input_data().size()); + if (format == kFloatFormat) { + // We're using an int16 input file; convert to float. + ConvertToFloat(frame_, float_cb_.get()); + } + } + ProcessStreamChooser(format); + } + } + apm_->DetachAecDump(); + fclose(in_file); +} + +void ApmTest::VerifyDebugDumpTest(Format format) { + rtc::ScopedFakeClock fake_clock; + const std::string in_filename = test::ResourcePath("ref03", "aecdump"); + std::string format_string; + switch (format) { + case kIntFormat: + format_string = "_int"; + break; + case kFloatFormat: + format_string = "_float"; + break; + } + const std::string ref_filename = test::TempFilename( + test::OutputPath(), std::string("ref") + format_string + "_aecdump"); + const std::string out_filename = test::TempFilename( + test::OutputPath(), std::string("out") + format_string + "_aecdump"); + const std::string limited_filename = test::TempFilename( + test::OutputPath(), std::string("limited") + format_string + "_aecdump"); + const size_t logging_limit_bytes = 100000; + // We expect at least this many bytes in the created logfile. + const size_t logging_expected_bytes = 95000; + EnableAllComponents(); + ProcessDebugDump(in_filename, ref_filename, format, -1); + ProcessDebugDump(ref_filename, out_filename, format, -1); + ProcessDebugDump(ref_filename, limited_filename, format, logging_limit_bytes); + + FILE* ref_file = fopen(ref_filename.c_str(), "rb"); + FILE* out_file = fopen(out_filename.c_str(), "rb"); + FILE* limited_file = fopen(limited_filename.c_str(), "rb"); + ASSERT_TRUE(ref_file != NULL); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(limited_file != NULL); + std::unique_ptr ref_bytes; + std::unique_ptr out_bytes; + std::unique_ptr limited_bytes; + + size_t ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes); + size_t out_size = ReadMessageBytesFromFile(out_file, &out_bytes); + size_t limited_size = ReadMessageBytesFromFile(limited_file, &limited_bytes); + size_t bytes_read = 0; + size_t bytes_read_limited = 0; + while (ref_size > 0 && out_size > 0) { + bytes_read += ref_size; + bytes_read_limited += limited_size; + EXPECT_EQ(ref_size, out_size); + EXPECT_GE(ref_size, limited_size); + EXPECT_TRUE(ExpectMessageEq(/*actual=*/{out_bytes.get(), out_size}, + /*expected=*/{ref_bytes.get(), ref_size})); + if (limited_size > 0) { + EXPECT_TRUE( + ExpectMessageEq(/*actual=*/{limited_bytes.get(), limited_size}, + /*expected=*/{ref_bytes.get(), ref_size})); + } + ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes); + out_size = ReadMessageBytesFromFile(out_file, &out_bytes); + limited_size = ReadMessageBytesFromFile(limited_file, &limited_bytes); + } + EXPECT_GT(bytes_read, 0u); + EXPECT_GT(bytes_read_limited, logging_expected_bytes); + EXPECT_LE(bytes_read_limited, logging_limit_bytes); + EXPECT_NE(0, feof(ref_file)); + EXPECT_NE(0, feof(out_file)); + EXPECT_NE(0, feof(limited_file)); + ASSERT_EQ(0, fclose(ref_file)); + ASSERT_EQ(0, fclose(out_file)); + ASSERT_EQ(0, fclose(limited_file)); + remove(ref_filename.c_str()); + remove(out_filename.c_str()); + remove(limited_filename.c_str()); +} + +TEST_F(ApmTest, VerifyDebugDumpInt) { + VerifyDebugDumpTest(kIntFormat); +} + +TEST_F(ApmTest, VerifyDebugDumpFloat) { + VerifyDebugDumpTest(kFloatFormat); +} +#endif + +// TODO(andrew): expand test to verify output. +TEST_F(ApmTest, DebugDump) { + TaskQueueForTest worker_queue("ApmTest_worker_queue"); + const std::string filename = + test::TempFilename(test::OutputPath(), "debug_aec"); + { + auto aec_dump = AecDumpFactory::Create("", -1, &worker_queue); + EXPECT_FALSE(aec_dump); + } + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + // Stopping without having started should be OK. + apm_->DetachAecDump(); + + auto aec_dump = AecDumpFactory::Create(filename, -1, &worker_queue); + EXPECT_TRUE(aec_dump); + apm_->AttachAecDump(std::move(aec_dump)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data())); + apm_->DetachAecDump(); + + // Verify the file has been written. + FILE* fid = fopen(filename.c_str(), "r"); + ASSERT_TRUE(fid != NULL); + + // Clean it up. + ASSERT_EQ(0, fclose(fid)); + ASSERT_EQ(0, remove(filename.c_str())); +#else + // Verify the file has NOT been written. + ASSERT_TRUE(fopen(filename.c_str(), "r") == NULL); +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP +} + +// TODO(andrew): expand test to verify output. +TEST_F(ApmTest, DebugDumpFromFileHandle) { + TaskQueueForTest worker_queue("ApmTest_worker_queue"); + + const std::string filename = + test::TempFilename(test::OutputPath(), "debug_aec"); + FileWrapper f = FileWrapper::OpenWriteOnly(filename); + ASSERT_TRUE(f.is_open()); + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + // Stopping without having started should be OK. + apm_->DetachAecDump(); + + auto aec_dump = AecDumpFactory::Create(std::move(f), -1, &worker_queue); + EXPECT_TRUE(aec_dump); + apm_->AttachAecDump(std::move(aec_dump)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data())); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + apm_->DetachAecDump(); + + // Verify the file has been written. + FILE* fid = fopen(filename.c_str(), "r"); + ASSERT_TRUE(fid != NULL); + + // Clean it up. + ASSERT_EQ(0, fclose(fid)); + ASSERT_EQ(0, remove(filename.c_str())); +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP +} + +// TODO(andrew): Add a test to process a few frames with different combinations +// of enabled components. + +TEST_F(ApmTest, Process) { + GOOGLE_PROTOBUF_VERIFY_VERSION; + audioproc::OutputData ref_data; + + if (!absl::GetFlag(FLAGS_write_apm_ref_data)) { + OpenFileAndReadMessage(ref_filename_, &ref_data); + } else { + const int kChannels[] = {1, 2}; + // Write the desired tests to the protobuf reference file. + for (size_t i = 0; i < arraysize(kChannels); i++) { + for (size_t j = 0; j < arraysize(kChannels); j++) { + for (int sample_rate_hz : AudioProcessing::kNativeSampleRatesHz) { + audioproc::Test* test = ref_data.add_test(); + test->set_num_reverse_channels(kChannels[i]); + test->set_num_input_channels(kChannels[j]); + test->set_num_output_channels(kChannels[j]); + test->set_sample_rate(sample_rate_hz); + test->set_use_aec_extended_filter(false); + } + } + } +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + // To test the extended filter mode. + audioproc::Test* test = ref_data.add_test(); + test->set_num_reverse_channels(2); + test->set_num_input_channels(2); + test->set_num_output_channels(2); + test->set_sample_rate(AudioProcessing::kSampleRate32kHz); + test->set_use_aec_extended_filter(true); +#endif + } + + for (int i = 0; i < ref_data.test_size(); i++) { + printf("Running test %d of %d...\n", i + 1, ref_data.test_size()); + + audioproc::Test* test = ref_data.mutable_test(i); + // TODO(ajm): We no longer allow different input and output channels. Skip + // these tests for now, but they should be removed from the set. + if (test->num_input_channels() != test->num_output_channels()) + continue; + + apm_ = AudioProcessingBuilderForTesting() + .SetEchoDetector(CreateEchoDetector()) + .Create(); + AudioProcessing::Config apm_config = apm_->GetConfig(); + apm_config.gain_controller1.analog_gain_controller.enabled = false; + apm_->ApplyConfig(apm_config); + + EnableAllComponents(); + + Init(test->sample_rate(), test->sample_rate(), test->sample_rate(), + static_cast(test->num_input_channels()), + static_cast(test->num_output_channels()), + static_cast(test->num_reverse_channels()), true); + + int frame_count = 0; + int analog_level = 127; + int analog_level_average = 0; + int max_output_average = 0; +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + int stats_index = 0; +#endif + + while (ReadFrame(far_file_, &revframe_) && ReadFrame(near_file_, &frame_)) { + EXPECT_EQ( + apm_->kNoError, + apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data())); + + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + apm_->set_stream_analog_level(analog_level); + + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + + // Ensure the frame was downmixed properly. + EXPECT_EQ(static_cast(test->num_output_channels()), + frame_.num_channels); + + max_output_average += MaxAudioFrame(frame_); + + analog_level = apm_->recommended_stream_analog_level(); + analog_level_average += analog_level; + AudioProcessingStats stats = apm_->GetStatistics(); + + size_t frame_size = frame_.samples_per_channel * frame_.num_channels; + size_t write_count = + fwrite(frame_.data.data(), sizeof(int16_t), frame_size, out_file_); + ASSERT_EQ(frame_size, write_count); + + // Reset in case of downmixing. + frame_.num_channels = static_cast(test->num_input_channels()); + frame_count++; + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + const int kStatsAggregationFrameNum = 100; // 1 second. + if (frame_count % kStatsAggregationFrameNum == 0) { + // Get echo and delay metrics. + AudioProcessingStats stats2 = apm_->GetStatistics(); + + // Echo metrics. + const float echo_return_loss = stats2.echo_return_loss.value_or(-1.0f); + const float echo_return_loss_enhancement = + stats2.echo_return_loss_enhancement.value_or(-1.0f); + const float residual_echo_likelihood = + stats2.residual_echo_likelihood.value_or(-1.0f); + const float residual_echo_likelihood_recent_max = + stats2.residual_echo_likelihood_recent_max.value_or(-1.0f); + + if (!absl::GetFlag(FLAGS_write_apm_ref_data)) { + const audioproc::Test::EchoMetrics& reference = + test->echo_metrics(stats_index); + constexpr float kEpsilon = 0.01; + EXPECT_NEAR(echo_return_loss, reference.echo_return_loss(), kEpsilon); + EXPECT_NEAR(echo_return_loss_enhancement, + reference.echo_return_loss_enhancement(), kEpsilon); + EXPECT_NEAR(residual_echo_likelihood, + reference.residual_echo_likelihood(), kEpsilon); + EXPECT_NEAR(residual_echo_likelihood_recent_max, + reference.residual_echo_likelihood_recent_max(), + kEpsilon); + ++stats_index; + } else { + audioproc::Test::EchoMetrics* message_echo = test->add_echo_metrics(); + message_echo->set_echo_return_loss(echo_return_loss); + message_echo->set_echo_return_loss_enhancement( + echo_return_loss_enhancement); + message_echo->set_residual_echo_likelihood(residual_echo_likelihood); + message_echo->set_residual_echo_likelihood_recent_max( + residual_echo_likelihood_recent_max); + } + } +#endif // defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE). + } + max_output_average /= frame_count; + analog_level_average /= frame_count; + + if (!absl::GetFlag(FLAGS_write_apm_ref_data)) { + const int kIntNear = 1; + // All numbers being consistently higher on N7 compare to the reference + // data. + // TODO(bjornv): If we start getting more of these offsets on Android we + // should consider a different approach. Either using one slack for all, + // or generate a separate android reference. +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) + const int kMaxOutputAverageOffset = 9; + const int kMaxOutputAverageNear = 26; +#else + const int kMaxOutputAverageOffset = 0; + const int kMaxOutputAverageNear = kIntNear; +#endif + EXPECT_NEAR(test->analog_level_average(), analog_level_average, kIntNear); + EXPECT_NEAR(test->max_output_average(), + max_output_average - kMaxOutputAverageOffset, + kMaxOutputAverageNear); + } else { + test->set_analog_level_average(analog_level_average); + test->set_max_output_average(max_output_average); + } + + rewind(far_file_); + rewind(near_file_); + } + + if (absl::GetFlag(FLAGS_write_apm_ref_data)) { + OpenFileAndWriteMessage(ref_filename_, ref_data); + } +} + +// Compares the reference and test arrays over a region around the expected +// delay. Finds the highest SNR in that region and adds the variance and squared +// error results to the supplied accumulators. +void UpdateBestSNR(const float* ref, + const float* test, + size_t length, + int expected_delay, + double* variance_acc, + double* sq_error_acc) { + RTC_CHECK_LT(expected_delay, length) + << "delay greater than signal length, cannot compute SNR"; + double best_snr = std::numeric_limits::min(); + double best_variance = 0; + double best_sq_error = 0; + // Search over a region of nine samples around the expected delay. + for (int delay = std::max(expected_delay - 4, 0); delay <= expected_delay + 4; + ++delay) { + double sq_error = 0; + double variance = 0; + for (size_t i = 0; i < length - delay; ++i) { + double error = test[i + delay] - ref[i]; + sq_error += error * error; + variance += ref[i] * ref[i]; + } + + if (sq_error == 0) { + *variance_acc += variance; + return; + } + double snr = variance / sq_error; + if (snr > best_snr) { + best_snr = snr; + best_variance = variance; + best_sq_error = sq_error; + } + } + + *variance_acc += best_variance; + *sq_error_acc += best_sq_error; +} + +// Used to test a multitude of sample rate and channel combinations. It works +// by first producing a set of reference files (in SetUpTestCase) that are +// assumed to be correct, as the used parameters are verified by other tests +// in this collection. Primarily the reference files are all produced at +// "native" rates which do not involve any resampling. + +// Each test pass produces an output file with a particular format. The output +// is matched against the reference file closest to its internal processing +// format. If necessary the output is resampled back to its process format. +// Due to the resampling distortion, we don't expect identical results, but +// enforce SNR thresholds which vary depending on the format. 0 is a special +// case SNR which corresponds to inf, or zero error. +typedef std::tuple AudioProcessingTestData; +class AudioProcessingTest + : public ::testing::TestWithParam { + public: + AudioProcessingTest() + : input_rate_(std::get<0>(GetParam())), + output_rate_(std::get<1>(GetParam())), + reverse_input_rate_(std::get<2>(GetParam())), + reverse_output_rate_(std::get<3>(GetParam())), + expected_snr_(std::get<4>(GetParam())), + expected_reverse_snr_(std::get<5>(GetParam())) {} + + virtual ~AudioProcessingTest() {} + + static void SetUpTestSuite() { + // Create all needed output reference files. + const size_t kNumChannels[] = {1, 2}; + for (size_t i = 0; i < arraysize(kProcessSampleRates); ++i) { + for (size_t j = 0; j < arraysize(kNumChannels); ++j) { + for (size_t k = 0; k < arraysize(kNumChannels); ++k) { + // The reference files always have matching input and output channels. + ProcessFormat(kProcessSampleRates[i], kProcessSampleRates[i], + kProcessSampleRates[i], kProcessSampleRates[i], + kNumChannels[j], kNumChannels[j], kNumChannels[k], + kNumChannels[k], "ref"); + } + } + } + } + + void TearDown() { + // Remove "out" files after each test. + ClearTempOutFiles(); + } + + static void TearDownTestSuite() { ClearTempFiles(); } + + // Runs a process pass on files with the given parameters and dumps the output + // to a file specified with `output_file_prefix`. Both forward and reverse + // output streams are dumped. + static void ProcessFormat(int input_rate, + int output_rate, + int reverse_input_rate, + int reverse_output_rate, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_input_channels, + size_t num_reverse_output_channels, + absl::string_view output_file_prefix) { + AudioProcessing::Config apm_config; + apm_config.gain_controller1.analog_gain_controller.enabled = false; + rtc::scoped_refptr ap = + AudioProcessingBuilderForTesting().SetConfig(apm_config).Create(); + + EnableAllAPComponents(ap.get()); + + ProcessingConfig processing_config = { + {{input_rate, num_input_channels}, + {output_rate, num_output_channels}, + {reverse_input_rate, num_reverse_input_channels}, + {reverse_output_rate, num_reverse_output_channels}}}; + ap->Initialize(processing_config); + + FILE* far_file = + fopen(ResourceFilePath("far", reverse_input_rate).c_str(), "rb"); + FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb"); + FILE* out_file = fopen( + OutputFilePath( + output_file_prefix, input_rate, output_rate, reverse_input_rate, + reverse_output_rate, num_input_channels, num_output_channels, + num_reverse_input_channels, num_reverse_output_channels, kForward) + .c_str(), + "wb"); + FILE* rev_out_file = fopen( + OutputFilePath( + output_file_prefix, input_rate, output_rate, reverse_input_rate, + reverse_output_rate, num_input_channels, num_output_channels, + num_reverse_input_channels, num_reverse_output_channels, kReverse) + .c_str(), + "wb"); + ASSERT_TRUE(far_file != NULL); + ASSERT_TRUE(near_file != NULL); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(rev_out_file != NULL); + + ChannelBuffer fwd_cb(AudioProcessing::GetFrameSize(input_rate), + num_input_channels); + ChannelBuffer rev_cb( + AudioProcessing::GetFrameSize(reverse_input_rate), + num_reverse_input_channels); + ChannelBuffer out_cb(AudioProcessing::GetFrameSize(output_rate), + num_output_channels); + ChannelBuffer rev_out_cb( + AudioProcessing::GetFrameSize(reverse_output_rate), + num_reverse_output_channels); + + // Temporary buffers. + const int max_length = + 2 * std::max(std::max(out_cb.num_frames(), rev_out_cb.num_frames()), + std::max(fwd_cb.num_frames(), rev_cb.num_frames())); + std::unique_ptr float_data(new float[max_length]); + std::unique_ptr int_data(new int16_t[max_length]); + + int analog_level = 127; + while (ReadChunk(far_file, int_data.get(), float_data.get(), &rev_cb) && + ReadChunk(near_file, int_data.get(), float_data.get(), &fwd_cb)) { + EXPECT_NOERR(ap->ProcessReverseStream( + rev_cb.channels(), processing_config.reverse_input_stream(), + processing_config.reverse_output_stream(), rev_out_cb.channels())); + + EXPECT_NOERR(ap->set_stream_delay_ms(0)); + ap->set_stream_analog_level(analog_level); + + EXPECT_NOERR(ap->ProcessStream( + fwd_cb.channels(), StreamConfig(input_rate, num_input_channels), + StreamConfig(output_rate, num_output_channels), out_cb.channels())); + + // Dump forward output to file. + Interleave(out_cb.channels(), out_cb.num_frames(), out_cb.num_channels(), + float_data.get()); + size_t out_length = out_cb.num_channels() * out_cb.num_frames(); + + ASSERT_EQ(out_length, fwrite(float_data.get(), sizeof(float_data[0]), + out_length, out_file)); + + // Dump reverse output to file. + Interleave(rev_out_cb.channels(), rev_out_cb.num_frames(), + rev_out_cb.num_channels(), float_data.get()); + size_t rev_out_length = + rev_out_cb.num_channels() * rev_out_cb.num_frames(); + + ASSERT_EQ(rev_out_length, fwrite(float_data.get(), sizeof(float_data[0]), + rev_out_length, rev_out_file)); + + analog_level = ap->recommended_stream_analog_level(); + } + fclose(far_file); + fclose(near_file); + fclose(out_file); + fclose(rev_out_file); + } + + protected: + int input_rate_; + int output_rate_; + int reverse_input_rate_; + int reverse_output_rate_; + double expected_snr_; + double expected_reverse_snr_; +}; + +TEST_P(AudioProcessingTest, Formats) { + struct ChannelFormat { + int num_input; + int num_output; + int num_reverse_input; + int num_reverse_output; + }; + ChannelFormat cf[] = { + {1, 1, 1, 1}, {1, 1, 2, 1}, {2, 1, 1, 1}, + {2, 1, 2, 1}, {2, 2, 1, 1}, {2, 2, 2, 2}, + }; + + for (size_t i = 0; i < arraysize(cf); ++i) { + ProcessFormat(input_rate_, output_rate_, reverse_input_rate_, + reverse_output_rate_, cf[i].num_input, cf[i].num_output, + cf[i].num_reverse_input, cf[i].num_reverse_output, "out"); + + // Verify output for both directions. + std::vector stream_directions; + stream_directions.push_back(kForward); + stream_directions.push_back(kReverse); + for (StreamDirection file_direction : stream_directions) { + const int in_rate = file_direction ? reverse_input_rate_ : input_rate_; + const int out_rate = file_direction ? reverse_output_rate_ : output_rate_; + const int out_num = + file_direction ? cf[i].num_reverse_output : cf[i].num_output; + const double expected_snr = + file_direction ? expected_reverse_snr_ : expected_snr_; + + const int min_ref_rate = std::min(in_rate, out_rate); + int ref_rate; + if (min_ref_rate > 32000) { + ref_rate = 48000; + } else if (min_ref_rate > 16000) { + ref_rate = 32000; + } else { + ref_rate = 16000; + } + + FILE* out_file = fopen( + OutputFilePath("out", input_rate_, output_rate_, reverse_input_rate_, + reverse_output_rate_, cf[i].num_input, + cf[i].num_output, cf[i].num_reverse_input, + cf[i].num_reverse_output, file_direction) + .c_str(), + "rb"); + // The reference files always have matching input and output channels. + FILE* ref_file = + fopen(OutputFilePath("ref", ref_rate, ref_rate, ref_rate, ref_rate, + cf[i].num_output, cf[i].num_output, + cf[i].num_reverse_output, + cf[i].num_reverse_output, file_direction) + .c_str(), + "rb"); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(ref_file != NULL); + + const size_t ref_length = + AudioProcessing::GetFrameSize(ref_rate) * out_num; + const size_t out_length = + AudioProcessing::GetFrameSize(out_rate) * out_num; + // Data from the reference file. + std::unique_ptr ref_data(new float[ref_length]); + // Data from the output file. + std::unique_ptr out_data(new float[out_length]); + // Data from the resampled output, in case the reference and output rates + // don't match. + std::unique_ptr cmp_data(new float[ref_length]); + + PushResampler resampler; + resampler.InitializeIfNeeded(out_rate, ref_rate, out_num); + + // Compute the resampling delay of the output relative to the reference, + // to find the region over which we should search for the best SNR. + float expected_delay_sec = 0; + if (in_rate != ref_rate) { + // Input resampling delay. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(in_rate); + } + if (out_rate != ref_rate) { + // Output resampling delay. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(ref_rate); + // Delay of converting the output back to its processing rate for + // testing. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(out_rate); + } + // The delay is multiplied by the number of channels because + // UpdateBestSNR() computes the SNR over interleaved data without taking + // channels into account. + int expected_delay = + std::floor(expected_delay_sec * ref_rate + 0.5f) * out_num; + + double variance = 0; + double sq_error = 0; + while (fread(out_data.get(), sizeof(out_data[0]), out_length, out_file) && + fread(ref_data.get(), sizeof(ref_data[0]), ref_length, ref_file)) { + float* out_ptr = out_data.get(); + if (out_rate != ref_rate) { + // Resample the output back to its internal processing rate if + // necessary. + ASSERT_EQ(ref_length, + static_cast(resampler.Resample( + out_ptr, out_length, cmp_data.get(), ref_length))); + out_ptr = cmp_data.get(); + } + + // Update the `sq_error` and `variance` accumulators with the highest + // SNR of reference vs output. + UpdateBestSNR(ref_data.get(), out_ptr, ref_length, expected_delay, + &variance, &sq_error); + } + + std::cout << "(" << input_rate_ << ", " << output_rate_ << ", " + << reverse_input_rate_ << ", " << reverse_output_rate_ << ", " + << cf[i].num_input << ", " << cf[i].num_output << ", " + << cf[i].num_reverse_input << ", " << cf[i].num_reverse_output + << ", " << file_direction << "): "; + if (sq_error > 0) { + double snr = 10 * log10(variance / sq_error); + EXPECT_GE(snr, expected_snr); + EXPECT_NE(0, expected_snr); + std::cout << "SNR=" << snr << " dB" << std::endl; + } else { + std::cout << "SNR=inf dB" << std::endl; + } + + fclose(out_file); + fclose(ref_file); + } + } +} + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) +INSTANTIATE_TEST_SUITE_P( + CommonFormats, + AudioProcessingTest, + // Internal processing rates and the particularly common sample rate 44100 + // Hz are tested in a grid of combinations (capture in, render in, out). + ::testing::Values(std::make_tuple(48000, 48000, 48000, 48000, 0, 0), + std::make_tuple(48000, 48000, 32000, 48000, 40, 30), + std::make_tuple(48000, 48000, 16000, 48000, 40, 20), + std::make_tuple(48000, 44100, 48000, 44100, 20, 20), + std::make_tuple(48000, 44100, 32000, 44100, 20, 15), + std::make_tuple(48000, 44100, 16000, 44100, 20, 15), + std::make_tuple(48000, 32000, 48000, 32000, 30, 35), + std::make_tuple(48000, 32000, 32000, 32000, 30, 0), + std::make_tuple(48000, 32000, 16000, 32000, 30, 20), + std::make_tuple(48000, 16000, 48000, 16000, 25, 20), + std::make_tuple(48000, 16000, 32000, 16000, 25, 20), + std::make_tuple(48000, 16000, 16000, 16000, 25, 0), + + std::make_tuple(44100, 48000, 48000, 48000, 30, 0), + std::make_tuple(44100, 48000, 32000, 48000, 30, 30), + std::make_tuple(44100, 48000, 16000, 48000, 30, 20), + std::make_tuple(44100, 44100, 48000, 44100, 20, 20), + std::make_tuple(44100, 44100, 32000, 44100, 20, 15), + std::make_tuple(44100, 44100, 16000, 44100, 20, 15), + std::make_tuple(44100, 32000, 48000, 32000, 30, 35), + std::make_tuple(44100, 32000, 32000, 32000, 30, 0), + std::make_tuple(44100, 32000, 16000, 32000, 30, 20), + std::make_tuple(44100, 16000, 48000, 16000, 25, 20), + std::make_tuple(44100, 16000, 32000, 16000, 25, 20), + std::make_tuple(44100, 16000, 16000, 16000, 25, 0), + + std::make_tuple(32000, 48000, 48000, 48000, 15, 0), + std::make_tuple(32000, 48000, 32000, 48000, 15, 30), + std::make_tuple(32000, 48000, 16000, 48000, 15, 20), + std::make_tuple(32000, 44100, 48000, 44100, 19, 20), + std::make_tuple(32000, 44100, 32000, 44100, 19, 15), + std::make_tuple(32000, 44100, 16000, 44100, 19, 15), + std::make_tuple(32000, 32000, 48000, 32000, 40, 35), + std::make_tuple(32000, 32000, 32000, 32000, 0, 0), + std::make_tuple(32000, 32000, 16000, 32000, 39, 20), + std::make_tuple(32000, 16000, 48000, 16000, 25, 20), + std::make_tuple(32000, 16000, 32000, 16000, 25, 20), + std::make_tuple(32000, 16000, 16000, 16000, 25, 0), + + std::make_tuple(16000, 48000, 48000, 48000, 9, 0), + std::make_tuple(16000, 48000, 32000, 48000, 9, 30), + std::make_tuple(16000, 48000, 16000, 48000, 9, 20), + std::make_tuple(16000, 44100, 48000, 44100, 15, 20), + std::make_tuple(16000, 44100, 32000, 44100, 15, 15), + std::make_tuple(16000, 44100, 16000, 44100, 15, 15), + std::make_tuple(16000, 32000, 48000, 32000, 25, 35), + std::make_tuple(16000, 32000, 32000, 32000, 25, 0), + std::make_tuple(16000, 32000, 16000, 32000, 25, 20), + std::make_tuple(16000, 16000, 48000, 16000, 39, 20), + std::make_tuple(16000, 16000, 32000, 16000, 39, 20), + std::make_tuple(16000, 16000, 16000, 16000, 0, 0), + + // Other sample rates are not tested exhaustively, to keep + // the test runtime manageable. + // + // Testing most other sample rates logged by Chrome UMA: + // - WebRTC.AudioInputSampleRate + // - WebRTC.AudioOutputSampleRate + // ApmConfiguration.HandlingOfRateCombinations covers + // remaining sample rates. + std::make_tuple(192000, 192000, 48000, 192000, 20, 40), + std::make_tuple(176400, 176400, 48000, 176400, 20, 35), + std::make_tuple(96000, 96000, 48000, 96000, 20, 40), + std::make_tuple(88200, 88200, 48000, 88200, 20, 20), + std::make_tuple(44100, 44100, 48000, 44100, 20, 20))); + +#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) +INSTANTIATE_TEST_SUITE_P( + CommonFormats, + AudioProcessingTest, + ::testing::Values(std::make_tuple(48000, 48000, 48000, 48000, 19, 0), + std::make_tuple(48000, 48000, 32000, 48000, 19, 30), + std::make_tuple(48000, 48000, 16000, 48000, 19, 20), + std::make_tuple(48000, 44100, 48000, 44100, 15, 20), + std::make_tuple(48000, 44100, 32000, 44100, 15, 15), + std::make_tuple(48000, 44100, 16000, 44100, 15, 15), + std::make_tuple(48000, 32000, 48000, 32000, 19, 35), + std::make_tuple(48000, 32000, 32000, 32000, 19, 0), + std::make_tuple(48000, 32000, 16000, 32000, 19, 20), + std::make_tuple(48000, 16000, 48000, 16000, 20, 20), + std::make_tuple(48000, 16000, 32000, 16000, 20, 20), + std::make_tuple(48000, 16000, 16000, 16000, 20, 0), + + std::make_tuple(44100, 48000, 48000, 48000, 15, 0), + std::make_tuple(44100, 48000, 32000, 48000, 15, 30), + std::make_tuple(44100, 48000, 16000, 48000, 15, 20), + std::make_tuple(44100, 44100, 48000, 44100, 15, 20), + std::make_tuple(44100, 44100, 32000, 44100, 15, 15), + std::make_tuple(44100, 44100, 16000, 44100, 15, 15), + std::make_tuple(44100, 32000, 48000, 32000, 18, 35), + std::make_tuple(44100, 32000, 32000, 32000, 18, 0), + std::make_tuple(44100, 32000, 16000, 32000, 18, 20), + std::make_tuple(44100, 16000, 48000, 16000, 19, 20), + std::make_tuple(44100, 16000, 32000, 16000, 19, 20), + std::make_tuple(44100, 16000, 16000, 16000, 19, 0), + + std::make_tuple(32000, 48000, 48000, 48000, 17, 0), + std::make_tuple(32000, 48000, 32000, 48000, 17, 30), + std::make_tuple(32000, 48000, 16000, 48000, 17, 20), + std::make_tuple(32000, 44100, 48000, 44100, 20, 20), + std::make_tuple(32000, 44100, 32000, 44100, 20, 15), + std::make_tuple(32000, 44100, 16000, 44100, 20, 15), + std::make_tuple(32000, 32000, 48000, 32000, 27, 35), + std::make_tuple(32000, 32000, 32000, 32000, 0, 0), + std::make_tuple(32000, 32000, 16000, 32000, 30, 20), + std::make_tuple(32000, 16000, 48000, 16000, 20, 20), + std::make_tuple(32000, 16000, 32000, 16000, 20, 20), + std::make_tuple(32000, 16000, 16000, 16000, 20, 0), + + std::make_tuple(16000, 48000, 48000, 48000, 11, 0), + std::make_tuple(16000, 48000, 32000, 48000, 11, 30), + std::make_tuple(16000, 48000, 16000, 48000, 11, 20), + std::make_tuple(16000, 44100, 48000, 44100, 15, 20), + std::make_tuple(16000, 44100, 32000, 44100, 15, 15), + std::make_tuple(16000, 44100, 16000, 44100, 15, 15), + std::make_tuple(16000, 32000, 48000, 32000, 24, 35), + std::make_tuple(16000, 32000, 32000, 32000, 24, 0), + std::make_tuple(16000, 32000, 16000, 32000, 25, 20), + std::make_tuple(16000, 16000, 48000, 16000, 28, 20), + std::make_tuple(16000, 16000, 32000, 16000, 28, 20), + std::make_tuple(16000, 16000, 16000, 16000, 0, 0), + + std::make_tuple(192000, 192000, 48000, 192000, 20, 40), + std::make_tuple(176400, 176400, 48000, 176400, 20, 35), + std::make_tuple(96000, 96000, 48000, 96000, 20, 40), + std::make_tuple(88200, 88200, 48000, 88200, 20, 20), + std::make_tuple(44100, 44100, 48000, 44100, 20, 20))); +#endif + +// Produces a scoped trace debug output. +std::string ProduceDebugText(int render_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_input_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t render_input_num_channels, + size_t render_output_num_channels, + size_t capture_input_num_channels, + size_t capture_output_num_channels) { + rtc::StringBuilder ss; + ss << "Sample rates:" + "\n Render input: " + << render_input_sample_rate_hz + << " Hz" + "\n Render output: " + << render_output_sample_rate_hz + << " Hz" + "\n Capture input: " + << capture_input_sample_rate_hz + << " Hz" + "\n Capture output: " + << capture_output_sample_rate_hz + << " Hz" + "\nNumber of channels:" + "\n Render input: " + << render_input_num_channels + << "\n Render output: " << render_output_num_channels + << "\n Capture input: " << capture_input_num_channels + << "\n Capture output: " << capture_output_num_channels; + return ss.Release(); +} + +// Validates that running the audio processing module using various combinations +// of sample rates and number of channels works as intended. +void RunApmRateAndChannelTest( + rtc::ArrayView sample_rates_hz, + rtc::ArrayView render_channel_counts, + rtc::ArrayView capture_channel_counts) { + webrtc::AudioProcessing::Config apm_config; + apm_config.pipeline.multi_channel_render = true; + apm_config.pipeline.multi_channel_capture = true; + apm_config.echo_canceller.enabled = true; + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().SetConfig(apm_config).Create(); + + StreamConfig render_input_stream_config; + StreamConfig render_output_stream_config; + StreamConfig capture_input_stream_config; + StreamConfig capture_output_stream_config; + + std::vector render_input_frame_channels; + std::vector render_input_frame; + std::vector render_output_frame_channels; + std::vector render_output_frame; + std::vector capture_input_frame_channels; + std::vector capture_input_frame; + std::vector capture_output_frame_channels; + std::vector capture_output_frame; + + for (auto render_input_sample_rate_hz : sample_rates_hz) { + for (auto render_output_sample_rate_hz : sample_rates_hz) { + for (auto capture_input_sample_rate_hz : sample_rates_hz) { + for (auto capture_output_sample_rate_hz : sample_rates_hz) { + for (size_t render_input_num_channels : render_channel_counts) { + for (size_t capture_input_num_channels : capture_channel_counts) { + size_t render_output_num_channels = render_input_num_channels; + size_t capture_output_num_channels = capture_input_num_channels; + auto populate_audio_frame = [](int sample_rate_hz, + size_t num_channels, + StreamConfig* cfg, + std::vector* channels_data, + std::vector* frame_data) { + cfg->set_sample_rate_hz(sample_rate_hz); + cfg->set_num_channels(num_channels); + + size_t max_frame_size = + AudioProcessing::GetFrameSize(sample_rate_hz); + channels_data->resize(num_channels * max_frame_size); + std::fill(channels_data->begin(), channels_data->end(), 0.5f); + frame_data->resize(num_channels); + for (size_t channel = 0; channel < num_channels; ++channel) { + (*frame_data)[channel] = + &(*channels_data)[channel * max_frame_size]; + } + }; + + populate_audio_frame( + render_input_sample_rate_hz, render_input_num_channels, + &render_input_stream_config, &render_input_frame_channels, + &render_input_frame); + populate_audio_frame( + render_output_sample_rate_hz, render_output_num_channels, + &render_output_stream_config, &render_output_frame_channels, + &render_output_frame); + populate_audio_frame( + capture_input_sample_rate_hz, capture_input_num_channels, + &capture_input_stream_config, &capture_input_frame_channels, + &capture_input_frame); + populate_audio_frame( + capture_output_sample_rate_hz, capture_output_num_channels, + &capture_output_stream_config, &capture_output_frame_channels, + &capture_output_frame); + + for (size_t frame = 0; frame < 2; ++frame) { + SCOPED_TRACE(ProduceDebugText( + render_input_sample_rate_hz, render_output_sample_rate_hz, + capture_input_sample_rate_hz, capture_output_sample_rate_hz, + render_input_num_channels, render_output_num_channels, + render_input_num_channels, capture_output_num_channels)); + + int result = apm->ProcessReverseStream( + &render_input_frame[0], render_input_stream_config, + render_output_stream_config, &render_output_frame[0]); + EXPECT_EQ(result, AudioProcessing::kNoError); + result = apm->ProcessStream( + &capture_input_frame[0], capture_input_stream_config, + capture_output_stream_config, &capture_output_frame[0]); + EXPECT_EQ(result, AudioProcessing::kNoError); + } + } + } + } + } + } + } +} + +constexpr void Toggle(bool& b) { + b ^= true; +} + +} // namespace + +TEST(RuntimeSettingTest, TestDefaultCtor) { + auto s = AudioProcessing::RuntimeSetting(); + EXPECT_EQ(AudioProcessing::RuntimeSetting::Type::kNotSpecified, s.type()); +} + +TEST(RuntimeSettingTest, TestUsageWithSwapQueue) { + SwapQueue q(1); + auto s = AudioProcessing::RuntimeSetting(); + ASSERT_TRUE(q.Insert(&s)); + ASSERT_TRUE(q.Remove(&s)); + EXPECT_EQ(AudioProcessing::RuntimeSetting::Type::kNotSpecified, s.type()); +} + +TEST(ApmConfiguration, EnablePostProcessing) { + // Verify that apm uses a capture post processing module if one is provided. + auto mock_post_processor_ptr = + new ::testing::NiceMock(); + auto mock_post_processor = + std::unique_ptr(mock_post_processor_ptr); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetCapturePostProcessing(std::move(mock_post_processor)) + .Create(); + + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_post_processor_ptr, Process(::testing::_)).Times(1); + apm->ProcessStream(audio.data.data(), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +TEST(ApmConfiguration, EnablePreProcessing) { + // Verify that apm uses a capture post processing module if one is provided. + auto mock_pre_processor_ptr = + new ::testing::NiceMock(); + auto mock_pre_processor = + std::unique_ptr(mock_pre_processor_ptr); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetRenderPreProcessing(std::move(mock_pre_processor)) + .Create(); + + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_pre_processor_ptr, Process(::testing::_)).Times(1); + apm->ProcessReverseStream( + audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +TEST(ApmConfiguration, EnableCaptureAnalyzer) { + // Verify that apm uses a capture analyzer if one is provided. + auto mock_capture_analyzer_ptr = + new ::testing::NiceMock(); + auto mock_capture_analyzer = + std::unique_ptr(mock_capture_analyzer_ptr); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetCaptureAnalyzer(std::move(mock_capture_analyzer)) + .Create(); + + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_capture_analyzer_ptr, Analyze(::testing::_)).Times(1); + apm->ProcessStream(audio.data.data(), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +TEST(ApmConfiguration, PreProcessingReceivesRuntimeSettings) { + auto mock_pre_processor_ptr = + new ::testing::NiceMock(); + auto mock_pre_processor = + std::unique_ptr(mock_pre_processor_ptr); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetRenderPreProcessing(std::move(mock_pre_processor)) + .Create(); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCustomRenderSetting(0)); + + // RuntimeSettings forwarded during 'Process*Stream' calls. + // Therefore we have to make one such call. + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_pre_processor_ptr, SetRuntimeSetting(::testing::_)) + .Times(1); + apm->ProcessReverseStream( + audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +class MyEchoControlFactory : public EchoControlFactory { + public: + std::unique_ptr Create(int sample_rate_hz) { + auto ec = new test::MockEchoControl(); + EXPECT_CALL(*ec, AnalyzeRender(::testing::_)).Times(1); + EXPECT_CALL(*ec, AnalyzeCapture(::testing::_)).Times(2); + EXPECT_CALL(*ec, ProcessCapture(::testing::_, ::testing::_, ::testing::_)) + .Times(2); + return std::unique_ptr(ec); + } + + std::unique_ptr Create(int sample_rate_hz, + int num_render_channels, + int num_capture_channels) { + return Create(sample_rate_hz); + } +}; + +TEST(ApmConfiguration, EchoControlInjection) { + // Verify that apm uses an injected echo controller if one is provided. + std::unique_ptr echo_control_factory( + new MyEchoControlFactory()); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + apm->ProcessStream(audio.data.data(), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); + apm->ProcessReverseStream( + audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); + apm->ProcessStream(audio.data.data(), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +TEST(ApmConfiguration, EchoDetectorInjection) { + using ::testing::_; + rtc::scoped_refptr mock_echo_detector = + rtc::make_ref_counted<::testing::StrictMock>(); + EXPECT_CALL(*mock_echo_detector, + Initialize(/*capture_sample_rate_hz=*/16000, _, + /*render_sample_rate_hz=*/16000, _)) + .Times(1); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(mock_echo_detector) + .Create(); + + // The echo detector is included in processing when enabled. + EXPECT_CALL(*mock_echo_detector, AnalyzeRenderAudio(_)) + .WillOnce([](rtc::ArrayView render_audio) { + EXPECT_EQ(render_audio.size(), 160u); + }); + EXPECT_CALL(*mock_echo_detector, AnalyzeCaptureAudio(_)) + .WillOnce([](rtc::ArrayView capture_audio) { + EXPECT_EQ(capture_audio.size(), 160u); + }); + EXPECT_CALL(*mock_echo_detector, GetMetrics()).Times(1); + + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, 16000); + + apm->ProcessReverseStream(frame.data.data(), StreamConfig(16000, 1), + StreamConfig(16000, 1), frame.data.data()); + apm->ProcessStream(frame.data.data(), StreamConfig(16000, 1), + StreamConfig(16000, 1), frame.data.data()); + + // When processing rates change, the echo detector is also reinitialized to + // match those. + EXPECT_CALL(*mock_echo_detector, + Initialize(/*capture_sample_rate_hz=*/48000, _, + /*render_sample_rate_hz=*/16000, _)) + .Times(1); + EXPECT_CALL(*mock_echo_detector, + Initialize(/*capture_sample_rate_hz=*/48000, _, + /*render_sample_rate_hz=*/48000, _)) + .Times(1); + EXPECT_CALL(*mock_echo_detector, AnalyzeRenderAudio(_)) + .WillOnce([](rtc::ArrayView render_audio) { + EXPECT_EQ(render_audio.size(), 480u); + }); + EXPECT_CALL(*mock_echo_detector, AnalyzeCaptureAudio(_)) + .Times(2) + .WillRepeatedly([](rtc::ArrayView capture_audio) { + EXPECT_EQ(capture_audio.size(), 480u); + }); + EXPECT_CALL(*mock_echo_detector, GetMetrics()).Times(2); + + SetFrameSampleRate(&frame, 48000); + apm->ProcessStream(frame.data.data(), StreamConfig(48000, 1), + StreamConfig(48000, 1), frame.data.data()); + apm->ProcessReverseStream(frame.data.data(), StreamConfig(48000, 1), + StreamConfig(48000, 1), frame.data.data()); + apm->ProcessStream(frame.data.data(), StreamConfig(48000, 1), + StreamConfig(48000, 1), frame.data.data()); +} + +rtc::scoped_refptr CreateApm(bool mobile_aec) { + // Enable residual echo detection, for stats. + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(CreateEchoDetector()) + .Create(); + if (!apm) { + return apm; + } + + ProcessingConfig processing_config = { + {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}}; + + if (apm->Initialize(processing_config) != 0) { + return nullptr; + } + + // Disable all components except for an AEC. + AudioProcessing::Config apm_config; + apm_config.high_pass_filter.enabled = false; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = false; + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = mobile_aec; + apm_config.noise_suppression.enabled = false; + apm->ApplyConfig(apm_config); + return apm; +} + +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) || defined(WEBRTC_MAC) +#define MAYBE_ApmStatistics DISABLED_ApmStatistics +#else +#define MAYBE_ApmStatistics ApmStatistics +#endif + +TEST(MAYBE_ApmStatistics, AECEnabledTest) { + // Set up APM with AEC3 and process some audio. + rtc::scoped_refptr apm = CreateApm(false); + ASSERT_TRUE(apm); + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + apm->ApplyConfig(apm_config); + + // Set up an audioframe. + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + + // Fill the audio frame with a sawtooth pattern. + int16_t* ptr = frame.data.data(); + for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { + ptr[i] = 10000 * ((i % 3) - 1); + } + + // Do some processing. + for (int i = 0; i < 200; i++) { + EXPECT_EQ(apm->ProcessReverseStream( + frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + EXPECT_EQ(apm->set_stream_delay_ms(0), 0); + EXPECT_EQ(apm->ProcessStream( + frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + } + + // Test statistics interface. + AudioProcessingStats stats = apm->GetStatistics(); + // We expect all statistics to be set and have a sensible value. + ASSERT_TRUE(stats.residual_echo_likelihood.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood, 1.0); + ASSERT_TRUE(stats.residual_echo_likelihood_recent_max.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); + ASSERT_TRUE(stats.echo_return_loss.has_value()); + EXPECT_NE(*stats.echo_return_loss, -100.0); + ASSERT_TRUE(stats.echo_return_loss_enhancement.has_value()); + EXPECT_NE(*stats.echo_return_loss_enhancement, -100.0); +} + +TEST(MAYBE_ApmStatistics, AECMEnabledTest) { + // Set up APM with AECM and process some audio. + rtc::scoped_refptr apm = CreateApm(true); + ASSERT_TRUE(apm); + + // Set up an audioframe. + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + + // Fill the audio frame with a sawtooth pattern. + int16_t* ptr = frame.data.data(); + for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { + ptr[i] = 10000 * ((i % 3) - 1); + } + + // Do some processing. + for (int i = 0; i < 200; i++) { + EXPECT_EQ(apm->ProcessReverseStream( + frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + EXPECT_EQ(apm->set_stream_delay_ms(0), 0); + EXPECT_EQ(apm->ProcessStream( + frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + } + + // Test statistics interface. + AudioProcessingStats stats = apm->GetStatistics(); + // We expect only the residual echo detector statistics to be set and have a + // sensible value. + ASSERT_TRUE(stats.residual_echo_likelihood.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood, 1.0); + ASSERT_TRUE(stats.residual_echo_likelihood_recent_max.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); + EXPECT_FALSE(stats.echo_return_loss.has_value()); + EXPECT_FALSE(stats.echo_return_loss_enhancement.has_value()); +} + +TEST(ApmStatistics, DoNotReportVoiceDetectedStat) { + ProcessingConfig processing_config = { + {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}}; + + // Set up an audioframe. + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + + // Fill the audio frame with a sawtooth pattern. + int16_t* ptr = frame.data.data(); + for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { + ptr[i] = 10000 * ((i % 3) - 1); + } + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + apm->Initialize(processing_config); + + // No metric should be reported. + EXPECT_EQ( + apm->ProcessStream(frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + EXPECT_FALSE(apm->GetStatistics().voice_detected.has_value()); +} + +TEST(ApmStatistics, GetStatisticsReportsNoEchoDetectorStatsWhenDisabled) { + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + ASSERT_EQ( + apm->ProcessStream(frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + // Echo detector is disabled by default, no stats reported. + AudioProcessingStats stats = apm->GetStatistics(); + EXPECT_FALSE(stats.residual_echo_likelihood.has_value()); + EXPECT_FALSE(stats.residual_echo_likelihood_recent_max.has_value()); +} + +TEST(ApmStatistics, GetStatisticsReportsEchoDetectorStatsWhenEnabled) { + // Create APM with an echo detector injected. + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(CreateEchoDetector()) + .Create(); + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + // Echo detector enabled: Report stats. + ASSERT_EQ( + apm->ProcessStream(frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + AudioProcessingStats stats = apm->GetStatistics(); + EXPECT_TRUE(stats.residual_echo_likelihood.has_value()); + EXPECT_TRUE(stats.residual_echo_likelihood_recent_max.has_value()); +} + +TEST(ApmConfiguration, HandlingOfRateAndChannelCombinations) { + std::array sample_rates_hz = {16000, 32000, 48000}; + std::array render_channel_counts = {1, 7}; + std::array capture_channel_counts = {1, 7}; + RunApmRateAndChannelTest(sample_rates_hz, render_channel_counts, + capture_channel_counts); +} + +TEST(ApmConfiguration, HandlingOfChannelCombinations) { + std::array sample_rates_hz = {48000}; + std::array render_channel_counts = {1, 2, 3, 4, 5, 6, 7, 8}; + std::array capture_channel_counts = {1, 2, 3, 4, 5, 6, 7, 8}; + RunApmRateAndChannelTest(sample_rates_hz, render_channel_counts, + capture_channel_counts); +} + +TEST(ApmConfiguration, HandlingOfRateCombinations) { + // Test rates <= 96000 logged by Chrome UMA: + // - WebRTC.AudioInputSampleRate + // - WebRTC.AudioOutputSampleRate + // Higher rates are tested in AudioProcessingTest.Format, to keep the number + // of combinations in this test manageable. + std::array sample_rates_hz = {8000, 11025, 16000, 22050, 32000, + 44100, 48000, 88200, 96000}; + std::array render_channel_counts = {2}; + std::array capture_channel_counts = {2}; + RunApmRateAndChannelTest(sample_rates_hz, render_channel_counts, + capture_channel_counts); +} + +TEST(ApmConfiguration, SelfAssignment) { + // At some point memory sanitizer was complaining about self-assigment. + // Make sure we don't regress. + AudioProcessing::Config config; + AudioProcessing::Config* config2 = &config; + *config2 = *config2; // Workaround -Wself-assign-overloaded + SUCCEED(); // Real success is absence of defects from asan/msan/ubsan. +} + +TEST(AudioProcessing, GainController1ConfigEqual) { + AudioProcessing::Config::GainController1 a; + AudioProcessing::Config::GainController1 b; + EXPECT_EQ(a, b); + + Toggle(a.enabled); + b.enabled = a.enabled; + EXPECT_EQ(a, b); + + a.mode = AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital; + b.mode = a.mode; + EXPECT_EQ(a, b); + + a.target_level_dbfs++; + b.target_level_dbfs = a.target_level_dbfs; + EXPECT_EQ(a, b); + + a.compression_gain_db++; + b.compression_gain_db = a.compression_gain_db; + EXPECT_EQ(a, b); + + Toggle(a.enable_limiter); + b.enable_limiter = a.enable_limiter; + EXPECT_EQ(a, b); + + auto& a_analog = a.analog_gain_controller; + auto& b_analog = b.analog_gain_controller; + + Toggle(a_analog.enabled); + b_analog.enabled = a_analog.enabled; + EXPECT_EQ(a, b); + + a_analog.startup_min_volume++; + b_analog.startup_min_volume = a_analog.startup_min_volume; + EXPECT_EQ(a, b); + + a_analog.clipped_level_min++; + b_analog.clipped_level_min = a_analog.clipped_level_min; + EXPECT_EQ(a, b); + + Toggle(a_analog.enable_digital_adaptive); + b_analog.enable_digital_adaptive = a_analog.enable_digital_adaptive; + EXPECT_EQ(a, b); +} + +// Checks that one differing parameter is sufficient to make two configs +// different. +TEST(AudioProcessing, GainController1ConfigNotEqual) { + AudioProcessing::Config::GainController1 a; + const AudioProcessing::Config::GainController1 b; + + Toggle(a.enabled); + EXPECT_NE(a, b); + a = b; + + a.mode = AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital; + EXPECT_NE(a, b); + a = b; + + a.target_level_dbfs++; + EXPECT_NE(a, b); + a = b; + + a.compression_gain_db++; + EXPECT_NE(a, b); + a = b; + + Toggle(a.enable_limiter); + EXPECT_NE(a, b); + a = b; + + auto& a_analog = a.analog_gain_controller; + const auto& b_analog = b.analog_gain_controller; + + Toggle(a_analog.enabled); + EXPECT_NE(a, b); + a_analog = b_analog; + + a_analog.startup_min_volume++; + EXPECT_NE(a, b); + a_analog = b_analog; + + a_analog.clipped_level_min++; + EXPECT_NE(a, b); + a_analog = b_analog; + + Toggle(a_analog.enable_digital_adaptive); + EXPECT_NE(a, b); + a_analog = b_analog; +} + +TEST(AudioProcessing, GainController2ConfigEqual) { + AudioProcessing::Config::GainController2 a; + AudioProcessing::Config::GainController2 b; + EXPECT_EQ(a, b); + + Toggle(a.enabled); + b.enabled = a.enabled; + EXPECT_EQ(a, b); + + a.fixed_digital.gain_db += 1.0f; + b.fixed_digital.gain_db = a.fixed_digital.gain_db; + EXPECT_EQ(a, b); + + auto& a_adaptive = a.adaptive_digital; + auto& b_adaptive = b.adaptive_digital; + + Toggle(a_adaptive.enabled); + b_adaptive.enabled = a_adaptive.enabled; + EXPECT_EQ(a, b); + + a_adaptive.headroom_db += 1.0f; + b_adaptive.headroom_db = a_adaptive.headroom_db; + EXPECT_EQ(a, b); + + a_adaptive.max_gain_db += 1.0f; + b_adaptive.max_gain_db = a_adaptive.max_gain_db; + EXPECT_EQ(a, b); + + a_adaptive.initial_gain_db += 1.0f; + b_adaptive.initial_gain_db = a_adaptive.initial_gain_db; + EXPECT_EQ(a, b); + + a_adaptive.max_gain_change_db_per_second += 1.0f; + b_adaptive.max_gain_change_db_per_second = + a_adaptive.max_gain_change_db_per_second; + EXPECT_EQ(a, b); + + a_adaptive.max_output_noise_level_dbfs += 1.0f; + b_adaptive.max_output_noise_level_dbfs = + a_adaptive.max_output_noise_level_dbfs; + EXPECT_EQ(a, b); +} + +// Checks that one differing parameter is sufficient to make two configs +// different. +TEST(AudioProcessing, GainController2ConfigNotEqual) { + AudioProcessing::Config::GainController2 a; + const AudioProcessing::Config::GainController2 b; + + Toggle(a.enabled); + EXPECT_NE(a, b); + a = b; + + a.fixed_digital.gain_db += 1.0f; + EXPECT_NE(a, b); + a.fixed_digital = b.fixed_digital; + + auto& a_adaptive = a.adaptive_digital; + const auto& b_adaptive = b.adaptive_digital; + + Toggle(a_adaptive.enabled); + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.headroom_db += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.max_gain_db += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.initial_gain_db += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.max_gain_change_db_per_second += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.max_output_noise_level_dbfs += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; +} + +struct ApmFormatHandlingTestParams { + enum class ExpectedOutput { + kErrorAndUnmodified, + kErrorAndSilence, + kErrorAndCopyOfFirstChannel, + kErrorAndExactCopy, + kNoError + }; + + StreamConfig input_config; + StreamConfig output_config; + ExpectedOutput expected_output; +}; + +class ApmFormatHandlingTest + : public ::testing::TestWithParam< + std::tuple> { + public: + ApmFormatHandlingTest() + : stream_direction_(std::get<0>(GetParam())), + test_params_(std::get<1>(GetParam())) {} + + protected: + ::testing::Message ProduceDebugMessage() { + return ::testing::Message() + << "input sample_rate_hz=" + << test_params_.input_config.sample_rate_hz() + << " num_channels=" << test_params_.input_config.num_channels() + << ", output sample_rate_hz=" + << test_params_.output_config.sample_rate_hz() + << " num_channels=" << test_params_.output_config.num_channels() + << ", stream_direction=" << stream_direction_ << ", expected_output=" + << static_cast(test_params_.expected_output); + } + + StreamDirection stream_direction_; + ApmFormatHandlingTestParams test_params_; +}; + +INSTANTIATE_TEST_SUITE_P( + FormatValidation, + ApmFormatHandlingTest, + testing::Combine( + ::testing::Values(kForward, kReverse), + ::testing::Values( + // Test cases with values on the boundary of legal ranges. + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(8000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(8000, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(384000, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(384000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 2), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 3), StreamConfig(16000, 3), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + + // Supported but incompatible formats. + ApmFormatHandlingTestParams{ + StreamConfig(16000, 3), StreamConfig(16000, 2), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 3), StreamConfig(16000, 4), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel}, + + // Unsupported format and input / output mismatch. + ApmFormatHandlingTestParams{ + StreamConfig(7900, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(7900, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + ApmFormatHandlingTestParams{ + StreamConfig(390000, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(390000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + ApmFormatHandlingTestParams{ + StreamConfig(-16000, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + + // Unsupported format but input / output formats match. + ApmFormatHandlingTestParams{StreamConfig(7900, 1), + StreamConfig(7900, 1), + ApmFormatHandlingTestParams:: + ExpectedOutput::kErrorAndExactCopy}, + ApmFormatHandlingTestParams{StreamConfig(390000, 1), + StreamConfig(390000, 1), + ApmFormatHandlingTestParams:: + ExpectedOutput::kErrorAndExactCopy}, + + // Unsupported but identical sample rate, channel mismatch. + ApmFormatHandlingTestParams{ + StreamConfig(7900, 1), StreamConfig(7900, 2), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel}, + ApmFormatHandlingTestParams{ + StreamConfig(7900, 2), StreamConfig(7900, 1), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel}, + + // Test cases with meaningless output format. + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(-16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndUnmodified}, + ApmFormatHandlingTestParams{ + StreamConfig(-16000, 1), StreamConfig(-16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndUnmodified}))); + +TEST_P(ApmFormatHandlingTest, IntApi) { + SCOPED_TRACE(ProduceDebugMessage()); + + // Set up input and output data. + const size_t num_input_samples = + test_params_.input_config.num_channels() * + std::abs(test_params_.input_config.sample_rate_hz() / 100); + const size_t num_output_samples = + test_params_.output_config.num_channels() * + std::abs(test_params_.output_config.sample_rate_hz() / 100); + std::vector input_block(num_input_samples); + for (int i = 0; i < static_cast(input_block.size()); ++i) { + input_block[i] = i; + } + std::vector output_block(num_output_samples); + constexpr int kUnlikelyOffset = 37; + for (int i = 0; i < static_cast(output_block.size()); ++i) { + output_block[i] = i - kUnlikelyOffset; + } + + // Call APM. + rtc::scoped_refptr ap = + AudioProcessingBuilderForTesting().Create(); + int error; + if (stream_direction_ == kForward) { + error = ap->ProcessStream(input_block.data(), test_params_.input_config, + test_params_.output_config, output_block.data()); + } else { + error = ap->ProcessReverseStream( + input_block.data(), test_params_.input_config, + test_params_.output_config, output_block.data()); + } + + // Check output. + switch (test_params_.expected_output) { + case ApmFormatHandlingTestParams::ExpectedOutput::kNoError: + EXPECT_EQ(error, AudioProcessing::kNoError); + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndUnmodified: + EXPECT_NE(error, AudioProcessing::kNoError); + for (int i = 0; i < static_cast(output_block.size()); ++i) { + EXPECT_EQ(output_block[i], i - kUnlikelyOffset); + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence: + EXPECT_NE(error, AudioProcessing::kNoError); + for (int i = 0; i < static_cast(output_block.size()); ++i) { + EXPECT_EQ(output_block[i], 0); + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < test_params_.output_config.num_channels(); + ++ch) { + for (size_t i = 0; i < test_params_.output_config.num_frames(); ++i) { + EXPECT_EQ( + output_block[ch + i * test_params_.output_config.num_channels()], + static_cast(i * + test_params_.input_config.num_channels())); + } + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndExactCopy: + EXPECT_NE(error, AudioProcessing::kNoError); + for (int i = 0; i < static_cast(output_block.size()); ++i) { + EXPECT_EQ(output_block[i], i); + } + break; + } +} + +TEST_P(ApmFormatHandlingTest, FloatApi) { + SCOPED_TRACE(ProduceDebugMessage()); + + // Set up input and output data. + const size_t input_samples_per_channel = + std::abs(test_params_.input_config.sample_rate_hz()) / 100; + const size_t output_samples_per_channel = + std::abs(test_params_.output_config.sample_rate_hz()) / 100; + const size_t input_num_channels = test_params_.input_config.num_channels(); + const size_t output_num_channels = test_params_.output_config.num_channels(); + ChannelBuffer input_block(input_samples_per_channel, + input_num_channels); + ChannelBuffer output_block(output_samples_per_channel, + output_num_channels); + for (size_t ch = 0; ch < input_num_channels; ++ch) { + for (size_t i = 0; i < input_samples_per_channel; ++i) { + input_block.channels()[ch][i] = ch + i * input_num_channels; + } + } + constexpr int kUnlikelyOffset = 37; + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + output_block.channels()[ch][i] = + ch + i * output_num_channels - kUnlikelyOffset; + } + } + + // Call APM. + rtc::scoped_refptr ap = + AudioProcessingBuilderForTesting().Create(); + int error; + if (stream_direction_ == kForward) { + error = + ap->ProcessStream(input_block.channels(), test_params_.input_config, + test_params_.output_config, output_block.channels()); + } else { + error = ap->ProcessReverseStream( + input_block.channels(), test_params_.input_config, + test_params_.output_config, output_block.channels()); + } + + // Check output. + switch (test_params_.expected_output) { + case ApmFormatHandlingTestParams::ExpectedOutput::kNoError: + EXPECT_EQ(error, AudioProcessing::kNoError); + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndUnmodified: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + EXPECT_EQ(output_block.channels()[ch][i], + ch + i * output_num_channels - kUnlikelyOffset); + } + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + EXPECT_EQ(output_block.channels()[ch][i], 0); + } + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + EXPECT_EQ(output_block.channels()[ch][i], + input_block.channels()[0][i]); + } + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndExactCopy: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + EXPECT_EQ(output_block.channels()[ch][i], + input_block.channels()[ch][i]); + } + } + break; + } +} + +TEST(ApmAnalyzeReverseStreamFormatTest, AnalyzeReverseStream) { + for (auto&& [input_config, expect_error] : + {std::tuple(StreamConfig(16000, 2), /*expect_error=*/false), + std::tuple(StreamConfig(8000, 1), /*expect_error=*/false), + std::tuple(StreamConfig(384000, 1), /*expect_error=*/false), + std::tuple(StreamConfig(7900, 1), /*expect_error=*/true), + std::tuple(StreamConfig(390000, 1), /*expect_error=*/true), + std::tuple(StreamConfig(16000, 0), /*expect_error=*/true), + std::tuple(StreamConfig(-16000, 0), /*expect_error=*/true)}) { + SCOPED_TRACE(::testing::Message() + << "sample_rate_hz=" << input_config.sample_rate_hz() + << " num_channels=" << input_config.num_channels()); + + // Set up input data. + ChannelBuffer input_block( + std::abs(input_config.sample_rate_hz()) / 100, + input_config.num_channels()); + + // Call APM. + rtc::scoped_refptr ap = + AudioProcessingBuilderForTesting().Create(); + int error = ap->AnalyzeReverseStream(input_block.channels(), input_config); + + // Check output. + if (expect_error) { + EXPECT_NE(error, AudioProcessing::kNoError); + } else { + EXPECT_EQ(error, AudioProcessing::kNoError); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/BUILD.gn new file mode 100644 index 0000000000..e7ff8482f6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/BUILD.gn @@ -0,0 +1,45 @@ +# Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("capture_levels_adjuster") { + visibility = [ "*" ] + + sources = [ + "audio_samples_scaler.cc", + "audio_samples_scaler.h", + "capture_levels_adjuster.cc", + "capture_levels_adjuster.h", + ] + + defines = [] + + deps = [ + "..:audio_buffer", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:safe_minmax", + ] +} + +rtc_library("capture_levels_adjuster_unittests") { + testonly = true + + sources = [ + "audio_samples_scaler_unittest.cc", + "capture_levels_adjuster_unittest.cc", + ] + deps = [ + ":capture_levels_adjuster", + "..:audioproc_test_utils", + "../../../rtc_base:gunit_helpers", + "../../../rtc_base:stringutils", + "../../../test:test_support", + ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc new file mode 100644 index 0000000000..cb2336b87d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h" + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +AudioSamplesScaler::AudioSamplesScaler(float initial_gain) + : previous_gain_(initial_gain), target_gain_(initial_gain) {} + +void AudioSamplesScaler::Process(AudioBuffer& audio_buffer) { + if (static_cast(audio_buffer.num_frames()) != samples_per_channel_) { + // Update the members depending on audio-buffer length if needed. + RTC_DCHECK_GT(audio_buffer.num_frames(), 0); + samples_per_channel_ = static_cast(audio_buffer.num_frames()); + one_by_samples_per_channel_ = 1.f / samples_per_channel_; + } + + if (target_gain_ == 1.f && previous_gain_ == target_gain_) { + // If only a gain of 1 is to be applied, do an early return without applying + // any gain. + return; + } + + float gain = previous_gain_; + if (previous_gain_ == target_gain_) { + // Apply a non-changing gain. + for (size_t channel = 0; channel < audio_buffer.num_channels(); ++channel) { + rtc::ArrayView channel_view(audio_buffer.channels()[channel], + samples_per_channel_); + for (float& sample : channel_view) { + sample *= gain; + } + } + } else { + const float increment = + (target_gain_ - previous_gain_) * one_by_samples_per_channel_; + + if (increment > 0.f) { + // Apply an increasing gain. + for (size_t channel = 0; channel < audio_buffer.num_channels(); + ++channel) { + gain = previous_gain_; + rtc::ArrayView channel_view(audio_buffer.channels()[channel], + samples_per_channel_); + for (float& sample : channel_view) { + gain = std::min(gain + increment, target_gain_); + sample *= gain; + } + } + } else { + // Apply a decreasing gain. + for (size_t channel = 0; channel < audio_buffer.num_channels(); + ++channel) { + gain = previous_gain_; + rtc::ArrayView channel_view(audio_buffer.channels()[channel], + samples_per_channel_); + for (float& sample : channel_view) { + gain = std::max(gain + increment, target_gain_); + sample *= gain; + } + } + } + } + previous_gain_ = target_gain_; + + // Saturate the samples to be in the S16 range. + for (size_t channel = 0; channel < audio_buffer.num_channels(); ++channel) { + rtc::ArrayView channel_view(audio_buffer.channels()[channel], + samples_per_channel_); + for (float& sample : channel_view) { + constexpr float kMinFloatS16Value = -32768.f; + constexpr float kMaxFloatS16Value = 32767.f; + sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h new file mode 100644 index 0000000000..2ae8533940 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_ +#define MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_ + +#include + +#include "modules/audio_processing/audio_buffer.h" + +namespace webrtc { + +// Handles and applies a gain to the samples in an audio buffer. +// The gain is applied for each sample and any changes in the gain take effect +// gradually (in a linear manner) over one frame. +class AudioSamplesScaler { + public: + // C-tor. The supplied `initial_gain` is used immediately at the first call to + // Process(), i.e., in contrast to the gain supplied by SetGain(...) there is + // no gradual change to the `initial_gain`. + explicit AudioSamplesScaler(float initial_gain); + AudioSamplesScaler(const AudioSamplesScaler&) = delete; + AudioSamplesScaler& operator=(const AudioSamplesScaler&) = delete; + + // Applies the specified gain to the audio in `audio_buffer`. + void Process(AudioBuffer& audio_buffer); + + // Sets the gain to apply to each sample. + void SetGain(float gain) { target_gain_ = gain; } + + private: + float previous_gain_ = 1.f; + float target_gain_ = 1.f; + int samples_per_channel_ = -1; + float one_by_samples_per_channel_ = -1.f; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler_unittest.cc b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler_unittest.cc new file mode 100644 index 0000000000..6e5fc2cbe3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler_unittest.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h" + +#include + +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +float SampleValueForChannel(int channel) { + constexpr float kSampleBaseValue = 100.f; + constexpr float kSampleChannelOffset = 1.f; + return kSampleBaseValue + channel * kSampleChannelOffset; +} + +void PopulateBuffer(AudioBuffer& audio_buffer) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + test::FillBufferChannel(SampleValueForChannel(ch), ch, audio_buffer); + } +} + +constexpr int kNumFramesToProcess = 10; + +class AudioSamplesScalerTest + : public ::testing::Test, + public ::testing::WithParamInterface> { + protected: + int sample_rate_hz() const { return std::get<0>(GetParam()); } + int num_channels() const { return std::get<1>(GetParam()); } + float initial_gain() const { return std::get<2>(GetParam()); } +}; + +INSTANTIATE_TEST_SUITE_P( + AudioSamplesScalerTestSuite, + AudioSamplesScalerTest, + ::testing::Combine(::testing::Values(16000, 32000, 48000), + ::testing::Values(1, 2, 4), + ::testing::Values(0.1f, 1.f, 2.f, 4.f))); + +TEST_P(AudioSamplesScalerTest, InitialGainIsRespected) { + AudioSamplesScaler scaler(initial_gain()); + + AudioBuffer audio_buffer(sample_rate_hz(), num_channels(), sample_rate_hz(), + num_channels(), sample_rate_hz(), num_channels()); + + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + scaler.Process(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + initial_gain() * SampleValueForChannel(ch)); + } + } + } +} + +TEST_P(AudioSamplesScalerTest, VerifyGainAdjustment) { + const float higher_gain = initial_gain(); + const float lower_gain = higher_gain / 2.f; + + AudioSamplesScaler scaler(lower_gain); + + AudioBuffer audio_buffer(sample_rate_hz(), num_channels(), sample_rate_hz(), + num_channels(), sample_rate_hz(), num_channels()); + + // Allow the intial, lower, gain to take effect. + PopulateBuffer(audio_buffer); + + scaler.Process(audio_buffer); + + // Set the new, higher, gain. + scaler.SetGain(higher_gain); + + // Ensure that the new, higher, gain is achieved gradually over one frame. + PopulateBuffer(audio_buffer); + + scaler.Process(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames() - 1; ++i) { + EXPECT_LT(audio_buffer.channels_const()[ch][i], + higher_gain * SampleValueForChannel(ch)); + EXPECT_LE(audio_buffer.channels_const()[ch][i], + audio_buffer.channels_const()[ch][i + 1]); + } + EXPECT_LE(audio_buffer.channels_const()[ch][audio_buffer.num_frames() - 1], + higher_gain * SampleValueForChannel(ch)); + } + + // Ensure that the new, higher, gain is achieved and stay unchanged. + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + scaler.Process(audio_buffer); + + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + higher_gain * SampleValueForChannel(ch)); + } + } + } + + // Set the new, lower, gain. + scaler.SetGain(lower_gain); + + // Ensure that the new, lower, gain is achieved gradually over one frame. + PopulateBuffer(audio_buffer); + scaler.Process(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames() - 1; ++i) { + EXPECT_GT(audio_buffer.channels_const()[ch][i], + lower_gain * SampleValueForChannel(ch)); + EXPECT_GE(audio_buffer.channels_const()[ch][i], + audio_buffer.channels_const()[ch][i + 1]); + } + EXPECT_GE(audio_buffer.channels_const()[ch][audio_buffer.num_frames() - 1], + lower_gain * SampleValueForChannel(ch)); + } + + // Ensure that the new, lower, gain is achieved and stay unchanged. + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + scaler.Process(audio_buffer); + + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + lower_gain * SampleValueForChannel(ch)); + } + } + } +} + +TEST(AudioSamplesScaler, UpwardsClamping) { + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + constexpr float kGain = 10.f; + constexpr float kMaxClampedSampleValue = 32767.f; + static_assert(kGain > 1.f, ""); + + AudioSamplesScaler scaler(kGain); + + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + test::FillBufferChannel( + kMaxClampedSampleValue - audio_buffer.num_channels() + 1.f + ch, ch, + audio_buffer); + } + + scaler.Process(audio_buffer); + for (int ch = 0; ch < kNumChannels; ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + kMaxClampedSampleValue); + } + } + } +} + +TEST(AudioSamplesScaler, DownwardsClamping) { + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + constexpr float kGain = 10.f; + constexpr float kMinClampedSampleValue = -32768.f; + static_assert(kGain > 1.f, ""); + + AudioSamplesScaler scaler(kGain); + + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + test::FillBufferChannel( + kMinClampedSampleValue + audio_buffer.num_channels() - 1.f + ch, ch, + audio_buffer); + } + + scaler.Process(audio_buffer); + for (int ch = 0; ch < kNumChannels; ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + kMinClampedSampleValue); + } + } + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc new file mode 100644 index 0000000000..dfda582915 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h" + +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { + +constexpr int kMinAnalogMicGainLevel = 0; +constexpr int kMaxAnalogMicGainLevel = 255; + +float ComputeLevelBasedGain(int emulated_analog_mic_gain_level) { + static_assert( + kMinAnalogMicGainLevel == 0, + "The minimum gain level must be 0 for the maths below to work."); + static_assert(kMaxAnalogMicGainLevel > 0, + "The minimum gain level must be larger than 0 for the maths " + "below to work."); + constexpr float kGainToLevelMultiplier = 1.f / kMaxAnalogMicGainLevel; + + RTC_DCHECK_GE(emulated_analog_mic_gain_level, kMinAnalogMicGainLevel); + RTC_DCHECK_LE(emulated_analog_mic_gain_level, kMaxAnalogMicGainLevel); + return kGainToLevelMultiplier * emulated_analog_mic_gain_level; +} + +float ComputePreGain(float pre_gain, + int emulated_analog_mic_gain_level, + bool emulated_analog_mic_gain_enabled) { + return emulated_analog_mic_gain_enabled + ? pre_gain * ComputeLevelBasedGain(emulated_analog_mic_gain_level) + : pre_gain; +} + +} // namespace + +CaptureLevelsAdjuster::CaptureLevelsAdjuster( + bool emulated_analog_mic_gain_enabled, + int emulated_analog_mic_gain_level, + float pre_gain, + float post_gain) + : emulated_analog_mic_gain_enabled_(emulated_analog_mic_gain_enabled), + emulated_analog_mic_gain_level_(emulated_analog_mic_gain_level), + pre_gain_(pre_gain), + pre_adjustment_gain_(ComputePreGain(pre_gain_, + emulated_analog_mic_gain_level_, + emulated_analog_mic_gain_enabled_)), + pre_scaler_(pre_adjustment_gain_), + post_scaler_(post_gain) {} + +void CaptureLevelsAdjuster::ApplyPreLevelAdjustment(AudioBuffer& audio_buffer) { + pre_scaler_.Process(audio_buffer); +} + +void CaptureLevelsAdjuster::ApplyPostLevelAdjustment( + AudioBuffer& audio_buffer) { + post_scaler_.Process(audio_buffer); +} + +void CaptureLevelsAdjuster::SetPreGain(float pre_gain) { + pre_gain_ = pre_gain; + UpdatePreAdjustmentGain(); +} + +void CaptureLevelsAdjuster::SetPostGain(float post_gain) { + post_scaler_.SetGain(post_gain); +} + +void CaptureLevelsAdjuster::SetAnalogMicGainLevel(int level) { + RTC_DCHECK_GE(level, kMinAnalogMicGainLevel); + RTC_DCHECK_LE(level, kMaxAnalogMicGainLevel); + int clamped_level = + rtc::SafeClamp(level, kMinAnalogMicGainLevel, kMaxAnalogMicGainLevel); + + emulated_analog_mic_gain_level_ = clamped_level; + UpdatePreAdjustmentGain(); +} + +void CaptureLevelsAdjuster::UpdatePreAdjustmentGain() { + pre_adjustment_gain_ = + ComputePreGain(pre_gain_, emulated_analog_mic_gain_level_, + emulated_analog_mic_gain_enabled_); + pre_scaler_.SetGain(pre_adjustment_gain_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h new file mode 100644 index 0000000000..38b68ad06c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_ +#define MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_ + +#include + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h" + +namespace webrtc { + +// Adjusts the level of the capture signal before and after all capture-side +// processing is done using a combination of explicitly specified gains +// and an emulated analog gain functionality where a specified analog level +// results in an additional gain. The pre-adjustment is achieved by combining +// the gain value `pre_gain` and the level `emulated_analog_mic_gain_level` to +// form a combined gain of `pre_gain`*`emulated_analog_mic_gain_level`/255 which +// is multiplied to each sample. The intention of the +// `emulated_analog_mic_gain_level` is to be controlled by the analog AGC +// functionality and to produce an emulated analog mic gain equal to +// `emulated_analog_mic_gain_level`/255. The post level adjustment is achieved +// by multiplying each sample with the value of `post_gain`. Any changes in the +// gains take are done smoothly over one frame and the scaled samples are +// clamped to fit into the allowed S16 sample range. +class CaptureLevelsAdjuster { + public: + // C-tor. The values for the level and the gains must fulfill + // 0 <= emulated_analog_mic_gain_level <= 255. + // 0.f <= pre_gain. + // 0.f <= post_gain. + CaptureLevelsAdjuster(bool emulated_analog_mic_gain_enabled, + int emulated_analog_mic_gain_level, + float pre_gain, + float post_gain); + CaptureLevelsAdjuster(const CaptureLevelsAdjuster&) = delete; + CaptureLevelsAdjuster& operator=(const CaptureLevelsAdjuster&) = delete; + + // Adjusts the level of the signal. This should be called before any of the + // other processing is performed. + void ApplyPreLevelAdjustment(AudioBuffer& audio_buffer); + + // Adjusts the level of the signal. This should be called after all of the + // other processing have been performed. + void ApplyPostLevelAdjustment(AudioBuffer& audio_buffer); + + // Sets the gain to apply to each sample before any of the other processing is + // performed. + void SetPreGain(float pre_gain); + + // Returns the total pre-adjustment gain applied, comprising both the pre_gain + // as well as the gain from the emulated analog mic, to each sample before any + // of the other processing is performed. + float GetPreAdjustmentGain() const { return pre_adjustment_gain_; } + + // Sets the gain to apply to each sample after all of the other processing + // have been performed. + void SetPostGain(float post_gain); + + // Sets the analog gain level to use for the emulated analog gain. + // `level` must be in the range [0...255]. + void SetAnalogMicGainLevel(int level); + + // Returns the current analog gain level used for the emulated analog gain. + int GetAnalogMicGainLevel() const { return emulated_analog_mic_gain_level_; } + + private: + // Updates the value of `pre_adjustment_gain_` based on the supplied values + // for `pre_gain` and `emulated_analog_mic_gain_level_`. + void UpdatePreAdjustmentGain(); + + const bool emulated_analog_mic_gain_enabled_; + int emulated_analog_mic_gain_level_; + float pre_gain_; + float pre_adjustment_gain_; + AudioSamplesScaler pre_scaler_; + AudioSamplesScaler post_scaler_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_gn/moz.build new file mode 100644 index 0000000000..3e57c48b6f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc", + "/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("capture_levels_adjuster_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_unittest.cc b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_unittest.cc new file mode 100644 index 0000000000..1183441a14 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_unittest.cc @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h" + +#include +#include + +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +float SampleValueForChannel(int channel) { + constexpr float kSampleBaseValue = 100.f; + constexpr float kSampleChannelOffset = 1.f; + return kSampleBaseValue + channel * kSampleChannelOffset; +} + +void PopulateBuffer(AudioBuffer& audio_buffer) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + test::FillBufferChannel(SampleValueForChannel(ch), ch, audio_buffer); + } +} + +float ComputeExpectedSignalGainAfterApplyPreLevelAdjustment( + bool emulated_analog_mic_gain_enabled, + int emulated_analog_mic_gain_level, + float pre_gain) { + if (!emulated_analog_mic_gain_enabled) { + return pre_gain; + } + return pre_gain * std::min(emulated_analog_mic_gain_level, 255) / 255.f; +} + +float ComputeExpectedSignalGainAfterApplyPostLevelAdjustment( + bool emulated_analog_mic_gain_enabled, + int emulated_analog_mic_gain_level, + float pre_gain, + float post_gain) { + return post_gain * ComputeExpectedSignalGainAfterApplyPreLevelAdjustment( + emulated_analog_mic_gain_enabled, + emulated_analog_mic_gain_level, pre_gain); +} + +constexpr int kNumFramesToProcess = 10; + +class CaptureLevelsAdjusterTest + : public ::testing::Test, + public ::testing::WithParamInterface< + std::tuple> { + protected: + int sample_rate_hz() const { return std::get<0>(GetParam()); } + int num_channels() const { return std::get<1>(GetParam()); } + bool emulated_analog_mic_gain_enabled() const { + return std::get<2>(GetParam()); + } + int emulated_analog_mic_gain_level() const { return std::get<3>(GetParam()); } + float pre_gain() const { return std::get<4>(GetParam()); } + float post_gain() const { return std::get<5>(GetParam()); } +}; + +INSTANTIATE_TEST_SUITE_P( + CaptureLevelsAdjusterTestSuite, + CaptureLevelsAdjusterTest, + ::testing::Combine(::testing::Values(16000, 32000, 48000), + ::testing::Values(1, 2, 4), + ::testing::Values(false, true), + ::testing::Values(21, 255), + ::testing::Values(0.1f, 1.f, 4.f), + ::testing::Values(0.1f, 1.f, 4.f))); + +TEST_P(CaptureLevelsAdjusterTest, InitialGainIsInstantlyAchieved) { + CaptureLevelsAdjuster adjuster(emulated_analog_mic_gain_enabled(), + emulated_analog_mic_gain_level(), pre_gain(), + post_gain()); + + AudioBuffer audio_buffer(sample_rate_hz(), num_channels(), sample_rate_hz(), + num_channels(), sample_rate_hz(), num_channels()); + + const float expected_signal_gain_after_pre_gain = + ComputeExpectedSignalGainAfterApplyPreLevelAdjustment( + emulated_analog_mic_gain_enabled(), emulated_analog_mic_gain_level(), + pre_gain()); + const float expected_signal_gain_after_post_level_adjustment = + ComputeExpectedSignalGainAfterApplyPostLevelAdjustment( + emulated_analog_mic_gain_enabled(), emulated_analog_mic_gain_level(), + pre_gain(), post_gain()); + + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + adjuster.ApplyPreLevelAdjustment(audio_buffer); + EXPECT_FLOAT_EQ(adjuster.GetPreAdjustmentGain(), + expected_signal_gain_after_pre_gain); + + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ( + audio_buffer.channels_const()[ch][i], + expected_signal_gain_after_pre_gain * SampleValueForChannel(ch)); + } + } + adjuster.ApplyPostLevelAdjustment(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + expected_signal_gain_after_post_level_adjustment * + SampleValueForChannel(ch)); + } + } + } +} + +TEST_P(CaptureLevelsAdjusterTest, NewGainsAreAchieved) { + const int lower_emulated_analog_mic_gain_level = + emulated_analog_mic_gain_level(); + const float lower_pre_gain = pre_gain(); + const float lower_post_gain = post_gain(); + const int higher_emulated_analog_mic_gain_level = + std::min(lower_emulated_analog_mic_gain_level * 2, 255); + const float higher_pre_gain = lower_pre_gain * 2.f; + const float higher_post_gain = lower_post_gain * 2.f; + + CaptureLevelsAdjuster adjuster(emulated_analog_mic_gain_enabled(), + lower_emulated_analog_mic_gain_level, + lower_pre_gain, lower_post_gain); + + AudioBuffer audio_buffer(sample_rate_hz(), num_channels(), sample_rate_hz(), + num_channels(), sample_rate_hz(), num_channels()); + + const float expected_signal_gain_after_pre_gain = + ComputeExpectedSignalGainAfterApplyPreLevelAdjustment( + emulated_analog_mic_gain_enabled(), + higher_emulated_analog_mic_gain_level, higher_pre_gain); + const float expected_signal_gain_after_post_level_adjustment = + ComputeExpectedSignalGainAfterApplyPostLevelAdjustment( + emulated_analog_mic_gain_enabled(), + higher_emulated_analog_mic_gain_level, higher_pre_gain, + higher_post_gain); + + adjuster.SetPreGain(higher_pre_gain); + adjuster.SetPostGain(higher_post_gain); + adjuster.SetAnalogMicGainLevel(higher_emulated_analog_mic_gain_level); + + PopulateBuffer(audio_buffer); + adjuster.ApplyPreLevelAdjustment(audio_buffer); + adjuster.ApplyPostLevelAdjustment(audio_buffer); + EXPECT_EQ(adjuster.GetAnalogMicGainLevel(), + higher_emulated_analog_mic_gain_level); + + for (int frame = 1; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + adjuster.ApplyPreLevelAdjustment(audio_buffer); + EXPECT_FLOAT_EQ(adjuster.GetPreAdjustmentGain(), + expected_signal_gain_after_pre_gain); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ( + audio_buffer.channels_const()[ch][i], + expected_signal_gain_after_pre_gain * SampleValueForChannel(ch)); + } + } + + adjuster.ApplyPostLevelAdjustment(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + expected_signal_gain_after_post_level_adjustment * + SampleValueForChannel(ch)); + } + } + + EXPECT_EQ(adjuster.GetAnalogMicGainLevel(), + higher_emulated_analog_mic_gain_level); + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/debug.proto b/third_party/libwebrtc/modules/audio_processing/debug.proto new file mode 100644 index 0000000000..cc5efbc73c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/debug.proto @@ -0,0 +1,115 @@ +syntax = "proto2"; +option optimize_for = LITE_RUNTIME; +package webrtc.audioproc; + +// Contains the format of input/output/reverse audio. An Init message is added +// when any of the fields are changed. +message Init { + optional int32 sample_rate = 1; + optional int32 device_sample_rate = 2 [deprecated=true]; + optional int32 num_input_channels = 3; + optional int32 num_output_channels = 4; + optional int32 num_reverse_channels = 5; + optional int32 reverse_sample_rate = 6; + optional int32 output_sample_rate = 7; + optional int32 reverse_output_sample_rate = 8; + optional int32 num_reverse_output_channels = 9; + optional int64 timestamp_ms = 10; +} + +// May contain interleaved or deinterleaved data, but don't store both formats. +message ReverseStream { + // int16 interleaved data. + optional bytes data = 1; + + // float deinterleaved data, where each repeated element points to a single + // channel buffer of data. + repeated bytes channel = 2; +} + +// May contain interleaved or deinterleaved data, but don't store both formats. +message Stream { + // int16 interleaved data. + optional bytes input_data = 1; + optional bytes output_data = 2; + + optional int32 delay = 3; + optional sint32 drift = 4; + optional int32 applied_input_volume = 5; + optional bool keypress = 6; + + // float deinterleaved data, where each repeated element points to a single + // channel buffer of data. + repeated bytes input_channel = 7; + repeated bytes output_channel = 8; +} + +// Contains the configurations of various APM component. A Config message is +// added when any of the fields are changed. +message Config { + // Acoustic echo canceler. + optional bool aec_enabled = 1; + optional bool aec_delay_agnostic_enabled = 2; + optional bool aec_drift_compensation_enabled = 3; + optional bool aec_extended_filter_enabled = 4; + optional int32 aec_suppression_level = 5; + // Mobile AEC. + optional bool aecm_enabled = 6; + optional bool aecm_comfort_noise_enabled = 7 [deprecated = true]; + optional int32 aecm_routing_mode = 8 [deprecated = true]; + // Automatic gain controller. + optional bool agc_enabled = 9; + optional int32 agc_mode = 10; + optional bool agc_limiter_enabled = 11; + optional bool noise_robust_agc_enabled = 12; + // High pass filter. + optional bool hpf_enabled = 13; + // Noise suppression. + optional bool ns_enabled = 14; + optional int32 ns_level = 15; + // Transient suppression. + optional bool transient_suppression_enabled = 16; + // Semicolon-separated string containing experimental feature + // descriptions. + optional string experiments_description = 17; + reserved 18; // Intelligibility enhancer enabled (deprecated). + // Pre amplifier. + optional bool pre_amplifier_enabled = 19; + optional float pre_amplifier_fixed_gain_factor = 20; + + // Next field number 21. +} + +message PlayoutAudioDeviceInfo { + optional int32 id = 1; + optional int32 max_volume = 2; +} + +message RuntimeSetting { + optional float capture_pre_gain = 1; + optional float custom_render_processing_setting = 2; + optional float capture_fixed_post_gain = 3; + optional int32 playout_volume_change = 4; + optional PlayoutAudioDeviceInfo playout_audio_device_change = 5; + optional bool capture_output_used = 6; + optional float capture_post_gain = 7; +} + +message Event { + enum Type { + INIT = 0; + REVERSE_STREAM = 1; + STREAM = 2; + CONFIG = 3; + UNKNOWN_EVENT = 4; + RUNTIME_SETTING = 5; + } + + required Type type = 1; + + optional Init init = 2; + optional ReverseStream reverse_stream = 3; + optional Stream stream = 4; + optional Config config = 5; + optional RuntimeSetting runtime_setting = 6; +} diff --git a/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc new file mode 100644 index 0000000000..f351811e08 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// TODO(peah): Increase the number of frames to proces when the issue of +// non repeatable test results have been found. +const int kNumFramesToProcess = 200; + +void SetupComponent(int sample_rate_hz, + EchoControlMobileImpl::RoutingMode routing_mode, + bool comfort_noise_enabled, + EchoControlMobileImpl* echo_control_mobile) { + echo_control_mobile->Initialize( + sample_rate_hz > 16000 ? 16000 : sample_rate_hz, 1, 1); + echo_control_mobile->set_routing_mode(routing_mode); + echo_control_mobile->enable_comfort_noise(comfort_noise_enabled); +} + +void ProcessOneFrame(int sample_rate_hz, + int stream_delay_ms, + AudioBuffer* render_audio_buffer, + AudioBuffer* capture_audio_buffer, + EchoControlMobileImpl* echo_control_mobile) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + render_audio_buffer->SplitIntoFrequencyBands(); + capture_audio_buffer->SplitIntoFrequencyBands(); + } + + std::vector render_audio; + EchoControlMobileImpl::PackRenderAudioBuffer( + render_audio_buffer, 1, render_audio_buffer->num_channels(), + &render_audio); + echo_control_mobile->ProcessRenderAudio(render_audio); + + echo_control_mobile->ProcessCaptureAudio(capture_audio_buffer, + stream_delay_ms); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_audio_buffer->MergeFrequencyBands(); + } +} + +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + int stream_delay_ms, + EchoControlMobileImpl::RoutingMode routing_mode, + bool comfort_noise_enabled, + const rtc::ArrayView& output_reference) { + EchoControlMobileImpl echo_control_mobile; + SetupComponent(sample_rate_hz, routing_mode, comfort_noise_enabled, + &echo_control_mobile); + + const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig render_config(sample_rate_hz, num_channels); + AudioBuffer render_buffer( + render_config.sample_rate_hz(), render_config.num_channels(), + render_config.sample_rate_hz(), 1, render_config.sample_rate_hz(), 1); + test::InputAudioFile render_file( + test::GetApmRenderTestVectorFileName(sample_rate_hz)); + std::vector render_input(samples_per_channel * num_channels); + + const StreamConfig capture_config(sample_rate_hz, num_channels); + AudioBuffer capture_buffer( + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), 1, capture_config.sample_rate_hz(), 1); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector capture_input(samples_per_channel * num_channels); + + for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &render_file, render_input); + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, stream_delay_ms, &render_buffer, + &capture_buffer, &echo_control_mobile); + } + + // Extract and verify the test results. + std::vector capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kElementErrorBound)); +} + +} // namespace + +// TODO(peah): Renable once the integer overflow issue in aecm_core.c:932:69 +// has been solved. +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono8kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.005280f, 0.002380f, -0.000427f}; + + RunBitexactnessTest(8000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.003601f, 0.002991f, 0.001923f}; + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono32kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.002258f, 0.002899f, 0.003906f}; + + RunBitexactnessTest(32000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono48kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {-0.000046f, 0.000041f, 0.000249f}; + + RunBitexactnessTest(48000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudSpeakerPhone_CngOff_StreamDelay0) { + const float kOutputReference[] = {0.000000f, 0.000000f, 0.000000f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + false, kOutputReference); +} + +// TODO(peah): Renable once the integer overflow issue in aecm_core.c:932:69 +// has been solved. +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudSpeakerPhone_CngOn_StreamDelay5) { + const float kOutputReference[] = {0.003693f, 0.002930f, 0.001801f}; + + RunBitexactnessTest(16000, 1, 5, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + Mono16kHz_LoudSpeakerPhone_CngOn_StreamDelay10) { + const float kOutputReference[] = {-0.002380f, -0.002533f, -0.002563f}; + + RunBitexactnessTest(16000, 1, 10, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_QuietEarpieceOrHeadset_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.000397f, 0.000000f, -0.000305f}; + + RunBitexactnessTest( + 16000, 1, 0, EchoControlMobileImpl::RoutingMode::kQuietEarpieceOrHeadset, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_Earpiece_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.002167f, 0.001617f, 0.001038f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kEarpiece, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudEarpiece_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.003540f, 0.002899f, 0.001862f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudEarpiece, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_SpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.003632f, 0.003052f, 0.001984f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kSpeakerphone, true, + kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc new file mode 100644 index 0000000000..fa5cb8ffec --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_control_mobile_impl.h" + +#include + +#include + +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +int16_t MapSetting(EchoControlMobileImpl::RoutingMode mode) { + switch (mode) { + case EchoControlMobileImpl::kQuietEarpieceOrHeadset: + return 0; + case EchoControlMobileImpl::kEarpiece: + return 1; + case EchoControlMobileImpl::kLoudEarpiece: + return 2; + case EchoControlMobileImpl::kSpeakerphone: + return 3; + case EchoControlMobileImpl::kLoudSpeakerphone: + return 4; + } + RTC_DCHECK_NOTREACHED(); + return -1; +} + +AudioProcessing::Error MapError(int err) { + switch (err) { + case AECM_UNSUPPORTED_FUNCTION_ERROR: + return AudioProcessing::kUnsupportedFunctionError; + case AECM_NULL_POINTER_ERROR: + return AudioProcessing::kNullPointerError; + case AECM_BAD_PARAMETER_ERROR: + return AudioProcessing::kBadParameterError; + case AECM_BAD_PARAMETER_WARNING: + return AudioProcessing::kBadStreamParameterWarning; + default: + // AECM_UNSPECIFIED_ERROR + // AECM_UNINITIALIZED_ERROR + return AudioProcessing::kUnspecifiedError; + } +} + +} // namespace + +struct EchoControlMobileImpl::StreamProperties { + StreamProperties() = delete; + StreamProperties(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels) + : sample_rate_hz(sample_rate_hz), + num_reverse_channels(num_reverse_channels), + num_output_channels(num_output_channels) {} + + int sample_rate_hz; + size_t num_reverse_channels; + size_t num_output_channels; +}; + +class EchoControlMobileImpl::Canceller { + public: + Canceller() { + state_ = WebRtcAecm_Create(); + RTC_CHECK(state_); + } + + ~Canceller() { + RTC_DCHECK(state_); + WebRtcAecm_Free(state_); + } + + Canceller(const Canceller&) = delete; + Canceller& operator=(const Canceller&) = delete; + + void* state() { + RTC_DCHECK(state_); + return state_; + } + + void Initialize(int sample_rate_hz) { + RTC_DCHECK(state_); + int error = WebRtcAecm_Init(state_, sample_rate_hz); + RTC_DCHECK_EQ(AudioProcessing::kNoError, error); + } + + private: + void* state_; +}; + +EchoControlMobileImpl::EchoControlMobileImpl() + : routing_mode_(kSpeakerphone), comfort_noise_enabled_(false) {} + +EchoControlMobileImpl::~EchoControlMobileImpl() {} + +void EchoControlMobileImpl::ProcessRenderAudio( + rtc::ArrayView packed_render_audio) { + RTC_DCHECK(stream_properties_); + + size_t buffer_index = 0; + size_t num_frames_per_band = + packed_render_audio.size() / (stream_properties_->num_output_channels * + stream_properties_->num_reverse_channels); + + for (auto& canceller : cancellers_) { + WebRtcAecm_BufferFarend(canceller->state(), + &packed_render_audio[buffer_index], + num_frames_per_band); + + buffer_index += num_frames_per_band; + } +} + +void EchoControlMobileImpl::PackRenderAudioBuffer( + const AudioBuffer* audio, + size_t num_output_channels, + size_t num_channels, + std::vector* packed_buffer) { + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + RTC_DCHECK_EQ(num_channels, audio->num_channels()); + + // The ordering convention must be followed to pass to the correct AECM. + packed_buffer->clear(); + int render_channel = 0; + for (size_t i = 0; i < num_output_channels; i++) { + for (size_t j = 0; j < audio->num_channels(); j++) { + std::array data_to_buffer; + FloatS16ToS16(audio->split_bands_const(render_channel)[kBand0To8kHz], + audio->num_frames_per_band(), data_to_buffer.data()); + + // Buffer the samples in the render queue. + packed_buffer->insert( + packed_buffer->end(), data_to_buffer.data(), + data_to_buffer.data() + audio->num_frames_per_band()); + render_channel = (render_channel + 1) % audio->num_channels(); + } + } +} + +size_t EchoControlMobileImpl::NumCancellersRequired( + size_t num_output_channels, + size_t num_reverse_channels) { + return num_output_channels * num_reverse_channels; +} + +int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, + int stream_delay_ms) { + RTC_DCHECK(stream_properties_); + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(audio->num_channels(), stream_properties_->num_output_channels); + RTC_DCHECK_GE(cancellers_.size(), stream_properties_->num_reverse_channels * + audio->num_channels()); + + int err = AudioProcessing::kNoError; + + // The ordering convention must be followed to pass to the correct AECM. + size_t handle_index = 0; + for (size_t capture = 0; capture < audio->num_channels(); ++capture) { + // TODO(ajm): improve how this works, possibly inside AECM. + // This is kind of hacked up. + RTC_DCHECK_LT(capture, low_pass_reference_.size()); + const int16_t* noisy = + reference_copied_ ? low_pass_reference_[capture].data() : nullptr; + + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + + std::array split_bands_data; + int16_t* split_bands = split_bands_data.data(); + const int16_t* clean = split_bands_data.data(); + if (audio->split_bands(capture)[kBand0To8kHz]) { + FloatS16ToS16(audio->split_bands(capture)[kBand0To8kHz], + audio->num_frames_per_band(), split_bands_data.data()); + } else { + clean = nullptr; + split_bands = nullptr; + } + + if (noisy == NULL) { + noisy = clean; + clean = NULL; + } + for (size_t render = 0; render < stream_properties_->num_reverse_channels; + ++render) { + err = WebRtcAecm_Process(cancellers_[handle_index]->state(), noisy, clean, + split_bands, audio->num_frames_per_band(), + stream_delay_ms); + + if (split_bands) { + S16ToFloatS16(split_bands, audio->num_frames_per_band(), + audio->split_bands(capture)[kBand0To8kHz]); + } + + if (err != AudioProcessing::kNoError) { + return MapError(err); + } + + ++handle_index; + } + for (size_t band = 1u; band < audio->num_bands(); ++band) { + memset(audio->split_bands_f(capture)[band], 0, + audio->num_frames_per_band() * + sizeof(audio->split_bands_f(capture)[band][0])); + } + } + return AudioProcessing::kNoError; +} + +int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) { + if (MapSetting(mode) == -1) { + return AudioProcessing::kBadParameterError; + } + routing_mode_ = mode; + return Configure(); +} + +EchoControlMobileImpl::RoutingMode EchoControlMobileImpl::routing_mode() const { + return routing_mode_; +} + +int EchoControlMobileImpl::enable_comfort_noise(bool enable) { + comfort_noise_enabled_ = enable; + return Configure(); +} + +bool EchoControlMobileImpl::is_comfort_noise_enabled() const { + return comfort_noise_enabled_; +} + +void EchoControlMobileImpl::Initialize(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels) { + low_pass_reference_.resize(num_output_channels); + for (auto& reference : low_pass_reference_) { + reference.fill(0); + } + + stream_properties_.reset(new StreamProperties( + sample_rate_hz, num_reverse_channels, num_output_channels)); + + // AECM only supports 16 kHz or lower sample rates. + RTC_DCHECK_LE(stream_properties_->sample_rate_hz, + AudioProcessing::kSampleRate16kHz); + + cancellers_.resize( + NumCancellersRequired(stream_properties_->num_output_channels, + stream_properties_->num_reverse_channels)); + + for (auto& canceller : cancellers_) { + if (!canceller) { + canceller.reset(new Canceller()); + } + canceller->Initialize(sample_rate_hz); + } + Configure(); +} + +int EchoControlMobileImpl::Configure() { + AecmConfig config; + config.cngMode = comfort_noise_enabled_; + config.echoMode = MapSetting(routing_mode_); + int error = AudioProcessing::kNoError; + for (auto& canceller : cancellers_) { + int handle_error = WebRtcAecm_set_config(canceller->state(), config); + if (handle_error != AudioProcessing::kNoError) { + error = handle_error; + } + } + return error; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.h b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.h new file mode 100644 index 0000000000..f7f2626a0e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ + +#include +#include + +#include +#include + +#include "api/array_view.h" + +namespace webrtc { + +class AudioBuffer; + +// The acoustic echo control for mobile (AECM) component is a low complexity +// robust option intended for use on mobile devices. +class EchoControlMobileImpl { + public: + EchoControlMobileImpl(); + + ~EchoControlMobileImpl(); + + // Recommended settings for particular audio routes. In general, the louder + // the echo is expected to be, the higher this value should be set. The + // preferred setting may vary from device to device. + enum RoutingMode { + kQuietEarpieceOrHeadset, + kEarpiece, + kLoudEarpiece, + kSpeakerphone, + kLoudSpeakerphone + }; + + // Sets echo control appropriate for the audio routing `mode` on the device. + // It can and should be updated during a call if the audio routing changes. + int set_routing_mode(RoutingMode mode); + RoutingMode routing_mode() const; + + // Comfort noise replaces suppressed background noise to maintain a + // consistent signal level. + int enable_comfort_noise(bool enable); + bool is_comfort_noise_enabled() const; + + void ProcessRenderAudio(rtc::ArrayView packed_render_audio); + int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms); + + void Initialize(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels); + + static void PackRenderAudioBuffer(const AudioBuffer* audio, + size_t num_output_channels, + size_t num_channels, + std::vector* packed_buffer); + + static size_t NumCancellersRequired(size_t num_output_channels, + size_t num_reverse_channels); + + private: + class Canceller; + struct StreamProperties; + + int Configure(); + + RoutingMode routing_mode_; + bool comfort_noise_enabled_; + + std::vector> cancellers_; + std::unique_ptr stream_properties_; + std::vector> low_pass_reference_; + bool reference_copied_ = false; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_unittest.cc new file mode 100644 index 0000000000..ed0393043c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_unittest.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "test/gtest.h" + +namespace webrtc { +TEST(EchoControlMobileTest, InterfaceConfiguration) { + EchoControlMobileImpl aecm; + aecm.Initialize(AudioProcessing::kSampleRate16kHz, 2, 2); + + // Toggle routing modes + std::array routing_modes = { + EchoControlMobileImpl::kQuietEarpieceOrHeadset, + EchoControlMobileImpl::kEarpiece, + EchoControlMobileImpl::kLoudEarpiece, + EchoControlMobileImpl::kSpeakerphone, + EchoControlMobileImpl::kLoudSpeakerphone, + }; + for (auto mode : routing_modes) { + EXPECT_EQ(0, aecm.set_routing_mode(mode)); + EXPECT_EQ(mode, aecm.routing_mode()); + } + + // Turn comfort noise off/on + EXPECT_EQ(0, aecm.enable_comfort_noise(false)); + EXPECT_FALSE(aecm.is_comfort_noise_enabled()); + EXPECT_EQ(0, aecm.enable_comfort_noise(true)); + EXPECT_TRUE(aecm.is_comfort_noise_enabled()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.cc new file mode 100644 index 0000000000..a6d10edfe2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/circular_buffer.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +CircularBuffer::CircularBuffer(size_t size) : buffer_(size) {} +CircularBuffer::~CircularBuffer() = default; + +void CircularBuffer::Push(float value) { + buffer_[next_insertion_index_] = value; + ++next_insertion_index_; + next_insertion_index_ %= buffer_.size(); + RTC_DCHECK_LT(next_insertion_index_, buffer_.size()); + nr_elements_in_buffer_ = std::min(nr_elements_in_buffer_ + 1, buffer_.size()); + RTC_DCHECK_LE(nr_elements_in_buffer_, buffer_.size()); +} + +absl::optional CircularBuffer::Pop() { + if (nr_elements_in_buffer_ == 0) { + return absl::nullopt; + } + const size_t index = + (buffer_.size() + next_insertion_index_ - nr_elements_in_buffer_) % + buffer_.size(); + RTC_DCHECK_LT(index, buffer_.size()); + --nr_elements_in_buffer_; + return buffer_[index]; +} + +void CircularBuffer::Clear() { + std::fill(buffer_.begin(), buffer_.end(), 0.f); + next_insertion_index_ = 0; + nr_elements_in_buffer_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.h b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.h new file mode 100644 index 0000000000..db1aeaebf6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_CIRCULAR_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_CIRCULAR_BUFFER_H_ + +#include + +#include + +#include "absl/types/optional.h" + +namespace webrtc { + +// Ring buffer containing floating point values. +struct CircularBuffer { + public: + explicit CircularBuffer(size_t size); + ~CircularBuffer(); + + void Push(float value); + absl::optional Pop(); + size_t Size() const { return nr_elements_in_buffer_; } + // This function fills the buffer with zeros, but does not change its size. + void Clear(); + + private: + std::vector buffer_; + size_t next_insertion_index_ = 0; + // This is the number of elements that have been pushed into the circular + // buffer, not the allocated buffer size. + size_t nr_elements_in_buffer_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_CIRCULAR_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc new file mode 100644 index 0000000000..7a234d4a55 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/circular_buffer.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(CircularBufferTests, LessThanMaxTest) { + CircularBuffer test_buffer(3); + test_buffer.Push(1.f); + test_buffer.Push(2.f); + EXPECT_EQ(1.f, test_buffer.Pop()); + EXPECT_EQ(2.f, test_buffer.Pop()); +} + +TEST(CircularBufferTests, FillTest) { + CircularBuffer test_buffer(3); + test_buffer.Push(1.f); + test_buffer.Push(2.f); + test_buffer.Push(3.f); + EXPECT_EQ(1.f, test_buffer.Pop()); + EXPECT_EQ(2.f, test_buffer.Pop()); + EXPECT_EQ(3.f, test_buffer.Pop()); +} + +TEST(CircularBufferTests, OverflowTest) { + CircularBuffer test_buffer(3); + test_buffer.Push(1.f); + test_buffer.Push(2.f); + test_buffer.Push(3.f); + test_buffer.Push(4.f); + // Because the circular buffer has a size of 3, the first insert should have + // been forgotten. + EXPECT_EQ(2.f, test_buffer.Pop()); + EXPECT_EQ(3.f, test_buffer.Pop()); + EXPECT_EQ(4.f, test_buffer.Pop()); +} + +TEST(CircularBufferTests, ReadFromEmpty) { + CircularBuffer test_buffer(3); + EXPECT_EQ(absl::nullopt, test_buffer.Pop()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc new file mode 100644 index 0000000000..a9ebb8cd92 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Parameter controlling the adaptation speed. +constexpr float kAlpha = 0.001f; + +} // namespace + +void MeanVarianceEstimator::Update(float value) { + mean_ = (1.f - kAlpha) * mean_ + kAlpha * value; + variance_ = + (1.f - kAlpha) * variance_ + kAlpha * (value - mean_) * (value - mean_); + RTC_DCHECK(std::isfinite(mean_)); + RTC_DCHECK(std::isfinite(variance_)); +} + +float MeanVarianceEstimator::std_deviation() const { + RTC_DCHECK_GE(variance_, 0.f); + return sqrtf(variance_); +} + +float MeanVarianceEstimator::mean() const { + return mean_; +} + +void MeanVarianceEstimator::Clear() { + mean_ = 0.f; + variance_ = 0.f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h new file mode 100644 index 0000000000..7f793df1e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MEAN_VARIANCE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MEAN_VARIANCE_ESTIMATOR_H_ + +namespace webrtc { + +// This class iteratively estimates the mean and variance of a signal. +class MeanVarianceEstimator { + public: + void Update(float value); + float std_deviation() const; + float mean() const; + void Clear(); + + private: + // Estimate of the expected value of the input values. + float mean_ = 0.f; + // Estimate of the variance of the input values. + float variance_ = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MEAN_VARIANCE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc new file mode 100644 index 0000000000..8327d23e8a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc @@ -0,0 +1,65 @@ + +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(MeanVarianceEstimatorTests, InsertTwoValues) { + MeanVarianceEstimator test_estimator; + // Insert two values. + test_estimator.Update(3.f); + test_estimator.Update(5.f); + + EXPECT_GT(test_estimator.mean(), 0.f); + EXPECT_GT(test_estimator.std_deviation(), 0.f); + // Test Clear method + test_estimator.Clear(); + EXPECT_EQ(test_estimator.mean(), 0.f); + EXPECT_EQ(test_estimator.std_deviation(), 0.f); +} + +TEST(MeanVarianceEstimatorTests, InsertZeroes) { + MeanVarianceEstimator test_estimator; + // Insert the same value many times. + for (size_t i = 0; i < 20000; i++) { + test_estimator.Update(0.f); + } + EXPECT_EQ(test_estimator.mean(), 0.f); + EXPECT_EQ(test_estimator.std_deviation(), 0.f); +} + +TEST(MeanVarianceEstimatorTests, ConstantValueTest) { + MeanVarianceEstimator test_estimator; + for (size_t i = 0; i < 20000; i++) { + test_estimator.Update(3.f); + } + // The mean should be close to three, and the standard deviation should be + // close to zero. + EXPECT_NEAR(3.0f, test_estimator.mean(), 0.01f); + EXPECT_NEAR(0.0f, test_estimator.std_deviation(), 0.01f); +} + +TEST(MeanVarianceEstimatorTests, AlternatingValueTest) { + MeanVarianceEstimator test_estimator; + for (size_t i = 0; i < 20000; i++) { + test_estimator.Update(1.f); + test_estimator.Update(-1.f); + } + // The mean should be close to zero, and the standard deviation should be + // close to one. + EXPECT_NEAR(0.0f, test_estimator.mean(), 0.01f); + EXPECT_NEAR(1.0f, test_estimator.std_deviation(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.cc new file mode 100644 index 0000000000..3054e98bd3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/moving_max.h" + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Parameter for controlling how fast the estimated maximum decays after the +// previous maximum is no longer valid. With a value of 0.99, the maximum will +// decay to 1% of its former value after 460 updates. +constexpr float kDecayFactor = 0.99f; + +} // namespace + +MovingMax::MovingMax(size_t window_size) : window_size_(window_size) { + RTC_DCHECK_GT(window_size, 0); +} + +MovingMax::~MovingMax() {} + +void MovingMax::Update(float value) { + if (counter_ >= window_size_ - 1) { + max_value_ *= kDecayFactor; + } else { + ++counter_; + } + if (value > max_value_) { + max_value_ = value; + counter_ = 0; + } +} + +float MovingMax::max() const { + return max_value_; +} + +void MovingMax::Clear() { + max_value_ = 0.f; + counter_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.h b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.h new file mode 100644 index 0000000000..f7d8ee8137 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_ + +#include + +namespace webrtc { + +class MovingMax { + public: + explicit MovingMax(size_t window_size); + ~MovingMax(); + + void Update(float value); + float max() const; + // Reset all of the state in this class. + void Clear(); + + private: + float max_value_ = 0.f; + size_t counter_ = 0; + size_t window_size_ = 1; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc new file mode 100644 index 0000000000..9429127a2b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/moving_max.h" + +#include "test/gtest.h" + +namespace webrtc { + +// Test if the maximum is correctly found. +TEST(MovingMaxTests, SimpleTest) { + MovingMax test_moving_max(5); + test_moving_max.Update(1.0f); + test_moving_max.Update(1.1f); + test_moving_max.Update(1.9f); + test_moving_max.Update(1.87f); + test_moving_max.Update(1.89f); + EXPECT_EQ(1.9f, test_moving_max.max()); +} + +// Test if values fall out of the window when expected. +TEST(MovingMaxTests, SlidingWindowTest) { + MovingMax test_moving_max(5); + test_moving_max.Update(1.0f); + test_moving_max.Update(1.9f); + test_moving_max.Update(1.7f); + test_moving_max.Update(1.87f); + test_moving_max.Update(1.89f); + test_moving_max.Update(1.3f); + test_moving_max.Update(1.2f); + EXPECT_LT(test_moving_max.max(), 1.9f); +} + +// Test if Clear() works as expected. +TEST(MovingMaxTests, ClearTest) { + MovingMax test_moving_max(5); + test_moving_max.Update(1.0f); + test_moving_max.Update(1.1f); + test_moving_max.Update(1.9f); + test_moving_max.Update(1.87f); + test_moving_max.Update(1.89f); + EXPECT_EQ(1.9f, test_moving_max.max()); + test_moving_max.Clear(); + EXPECT_EQ(0.f, test_moving_max.max()); +} + +// Test the decay of the estimated maximum. +TEST(MovingMaxTests, DecayTest) { + MovingMax test_moving_max(1); + test_moving_max.Update(1.0f); + float previous_value = 1.0f; + for (int i = 0; i < 500; i++) { + test_moving_max.Update(0.0f); + EXPECT_LT(test_moving_max.max(), previous_value); + EXPECT_GT(test_moving_max.max(), 0.0f); + previous_value = test_moving_max.max(); + } + EXPECT_LT(test_moving_max.max(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc new file mode 100644 index 0000000000..8ec9fe9f0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Parameter controlling the adaptation speed. +constexpr float kAlpha = 0.001f; + +} // namespace + +void NormalizedCovarianceEstimator::Update(float x, + float x_mean, + float x_sigma, + float y, + float y_mean, + float y_sigma) { + covariance_ = + (1.f - kAlpha) * covariance_ + kAlpha * (x - x_mean) * (y - y_mean); + normalized_cross_correlation_ = covariance_ / (x_sigma * y_sigma + .0001f); + RTC_DCHECK(isfinite(covariance_)); + RTC_DCHECK(isfinite(normalized_cross_correlation_)); +} + +void NormalizedCovarianceEstimator::Clear() { + covariance_ = 0.f; + normalized_cross_correlation_ = 0.f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h new file mode 100644 index 0000000000..e3c36d88ba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_NORMALIZED_COVARIANCE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_NORMALIZED_COVARIANCE_ESTIMATOR_H_ + +namespace webrtc { + +// This class iteratively estimates the normalized covariance between two +// signals. +class NormalizedCovarianceEstimator { + public: + void Update(float x, + float x_mean, + float x_var, + float y, + float y_mean, + float y_var); + // This function returns an estimate of the Pearson product-moment correlation + // coefficient of the two signals. + float normalized_cross_correlation() const { + return normalized_cross_correlation_; + } + float covariance() const { return covariance_; } + // This function resets the estimated values to zero. + void Clear(); + + private: + float normalized_cross_correlation_ = 0.f; + // Estimate of the covariance value. + float covariance_ = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_NORMALIZED_COVARIANCE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc new file mode 100644 index 0000000000..89fb9383f6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc @@ -0,0 +1,41 @@ + +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(NormalizedCovarianceEstimatorTests, IdenticalSignalTest) { + NormalizedCovarianceEstimator test_estimator; + for (size_t i = 0; i < 10000; i++) { + test_estimator.Update(1.f, 0.f, 1.f, 1.f, 0.f, 1.f); + test_estimator.Update(-1.f, 0.f, 1.f, -1.f, 0.f, 1.f); + } + // A normalized covariance value close to 1 is expected. + EXPECT_NEAR(1.f, test_estimator.normalized_cross_correlation(), 0.01f); + test_estimator.Clear(); + EXPECT_EQ(0.f, test_estimator.normalized_cross_correlation()); +} + +TEST(NormalizedCovarianceEstimatorTests, OppositeSignalTest) { + NormalizedCovarianceEstimator test_estimator; + // Insert the same value many times. + for (size_t i = 0; i < 10000; i++) { + test_estimator.Update(1.f, 0.f, 1.f, -1.f, 0.f, 1.f); + test_estimator.Update(-1.f, 0.f, 1.f, 1.f, 0.f, 1.f); + } + // A normalized covariance value close to -1 is expected. + EXPECT_NEAR(-1.f, test_estimator.normalized_cross_correlation(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/g3doc/audio_processing_module.md b/third_party/libwebrtc/modules/audio_processing/g3doc/audio_processing_module.md new file mode 100644 index 0000000000..a77f62fbaf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/g3doc/audio_processing_module.md @@ -0,0 +1,26 @@ + + + +# Audio Processing Module (APM) + +## Overview + +The APM is responsible for applying speech enhancements effects to the +microphone signal. These effects are required for VoIP calling and some +examples include echo cancellation (AEC), noise suppression (NS) and +automatic gain control (AGC). + +The API for APM resides in [`/modules/audio_processing/include`][https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_processing/include]. +APM is created using the [`AudioProcessingBuilder`][https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_processing/include/audio_processing.h] +builder that allows it to be customized and configured. + +Some specific aspects of APM include that: +* APM is fully thread-safe in that it can be accessed concurrently from + different threads. +* APM handles for any input sample rates < 384 kHz and achieves this by + automatic reconfiguration whenever a new sample format is observed. +* APM handles any number of microphone channels and loudspeaker channels, with + the same automatic reconfiguration as for the sample rates. + + +APM can either be used as part of the WebRTC native pipeline, or standalone. diff --git a/third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc b/third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc new file mode 100644 index 0000000000..edc49d1401 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/gain_control_impl.h" + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/agc/legacy/gain_control.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +typedef void Handle; + +namespace { +int16_t MapSetting(GainControl::Mode mode) { + switch (mode) { + case GainControl::kAdaptiveAnalog: + return kAgcModeAdaptiveAnalog; + case GainControl::kAdaptiveDigital: + return kAgcModeAdaptiveDigital; + case GainControl::kFixedDigital: + return kAgcModeFixedDigital; + } + RTC_DCHECK_NOTREACHED(); + return -1; +} + +// Applies the sub-frame `gains` to all the bands in `out` and clamps the output +// in the signed 16 bit range. +void ApplyDigitalGain(const int32_t gains[11], + size_t num_bands, + float* const* out) { + constexpr float kScaling = 1.f / 65536.f; + constexpr int kNumSubSections = 16; + constexpr float kOneByNumSubSections = 1.f / kNumSubSections; + + float gains_scaled[11]; + for (int k = 0; k < 11; ++k) { + gains_scaled[k] = gains[k] * kScaling; + } + + for (size_t b = 0; b < num_bands; ++b) { + float* out_band = out[b]; + for (int k = 0, sample = 0; k < 10; ++k) { + const float delta = + (gains_scaled[k + 1] - gains_scaled[k]) * kOneByNumSubSections; + float gain = gains_scaled[k]; + for (int n = 0; n < kNumSubSections; ++n, ++sample) { + RTC_DCHECK_EQ(k * kNumSubSections + n, sample); + out_band[sample] *= gain; + out_band[sample] = + std::min(32767.f, std::max(-32768.f, out_band[sample])); + gain += delta; + } + } + } +} + +} // namespace + +struct GainControlImpl::MonoAgcState { + MonoAgcState() { + state = WebRtcAgc_Create(); + RTC_CHECK(state); + } + + ~MonoAgcState() { + RTC_DCHECK(state); + WebRtcAgc_Free(state); + } + + MonoAgcState(const MonoAgcState&) = delete; + MonoAgcState& operator=(const MonoAgcState&) = delete; + int32_t gains[11]; + Handle* state; +}; + +int GainControlImpl::instance_counter_ = 0; + +GainControlImpl::GainControlImpl() + : data_dumper_(new ApmDataDumper(instance_counter_)), + mode_(kAdaptiveAnalog), + minimum_capture_level_(0), + maximum_capture_level_(255), + limiter_enabled_(true), + target_level_dbfs_(3), + compression_gain_db_(9), + analog_capture_level_(0), + was_analog_level_set_(false), + stream_is_saturated_(false) {} + +GainControlImpl::~GainControlImpl() = default; + +void GainControlImpl::ProcessRenderAudio( + rtc::ArrayView packed_render_audio) { + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + WebRtcAgc_AddFarend(mono_agcs_[ch]->state, packed_render_audio.data(), + packed_render_audio.size()); + } +} + +void GainControlImpl::PackRenderAudioBuffer( + const AudioBuffer& audio, + std::vector* packed_buffer) { + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, audio.num_frames_per_band()); + std::array + mixed_16_kHz_render_data; + rtc::ArrayView mixed_16_kHz_render( + mixed_16_kHz_render_data.data(), audio.num_frames_per_band()); + if (audio.num_channels() == 1) { + FloatS16ToS16(audio.split_bands_const(0)[kBand0To8kHz], + audio.num_frames_per_band(), mixed_16_kHz_render_data.data()); + } else { + const int num_channels = static_cast(audio.num_channels()); + for (size_t i = 0; i < audio.num_frames_per_band(); ++i) { + int32_t sum = 0; + for (int ch = 0; ch < num_channels; ++ch) { + sum += FloatS16ToS16(audio.split_channels_const(kBand0To8kHz)[ch][i]); + } + mixed_16_kHz_render_data[i] = sum / num_channels; + } + } + + packed_buffer->clear(); + packed_buffer->insert( + packed_buffer->end(), mixed_16_kHz_render.data(), + (mixed_16_kHz_render.data() + audio.num_frames_per_band())); +} + +int GainControlImpl::AnalyzeCaptureAudio(const AudioBuffer& audio) { + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, audio.num_frames_per_band()); + RTC_DCHECK_EQ(audio.num_channels(), *num_proc_channels_); + RTC_DCHECK_LE(*num_proc_channels_, mono_agcs_.size()); + + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + + if (mode_ == kAdaptiveAnalog) { + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + capture_levels_[ch] = analog_capture_level_; + + audio.ExportSplitChannelData(ch, split_bands); + + int err = + WebRtcAgc_AddMic(mono_agcs_[ch]->state, split_bands, + audio.num_bands(), audio.num_frames_per_band()); + + if (err != AudioProcessing::kNoError) { + return AudioProcessing::kUnspecifiedError; + } + } + } else if (mode_ == kAdaptiveDigital) { + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + int32_t capture_level_out = 0; + + audio.ExportSplitChannelData(ch, split_bands); + + int err = + WebRtcAgc_VirtualMic(mono_agcs_[ch]->state, split_bands, + audio.num_bands(), audio.num_frames_per_band(), + analog_capture_level_, &capture_level_out); + + capture_levels_[ch] = capture_level_out; + + if (err != AudioProcessing::kNoError) { + return AudioProcessing::kUnspecifiedError; + } + } + } + + return AudioProcessing::kNoError; +} + +int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio, + bool stream_has_echo) { + if (mode_ == kAdaptiveAnalog && !was_analog_level_set_) { + return AudioProcessing::kStreamParameterNotSetError; + } + + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_); + + stream_is_saturated_ = false; + bool error_reported = false; + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + audio->ExportSplitChannelData(ch, split_bands); + + // The call to stream_has_echo() is ok from a deadlock perspective + // as the capture lock is allready held. + int32_t new_capture_level = 0; + uint8_t saturation_warning = 0; + int err_analyze = WebRtcAgc_Analyze( + mono_agcs_[ch]->state, split_bands, audio->num_bands(), + audio->num_frames_per_band(), capture_levels_[ch], &new_capture_level, + stream_has_echo, &saturation_warning, mono_agcs_[ch]->gains); + capture_levels_[ch] = new_capture_level; + + error_reported = error_reported || err_analyze != AudioProcessing::kNoError; + + stream_is_saturated_ = stream_is_saturated_ || saturation_warning == 1; + } + + // Choose the minimun gain for application + size_t index_to_apply = 0; + for (size_t ch = 1; ch < mono_agcs_.size(); ++ch) { + if (mono_agcs_[index_to_apply]->gains[10] < mono_agcs_[ch]->gains[10]) { + index_to_apply = ch; + } + } + + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + ApplyDigitalGain(mono_agcs_[index_to_apply]->gains, audio->num_bands(), + audio->split_bands(ch)); + } + + RTC_DCHECK_LT(0ul, *num_proc_channels_); + if (mode_ == kAdaptiveAnalog) { + // Take the analog level to be the minimum accross all channels. + analog_capture_level_ = capture_levels_[0]; + for (size_t ch = 1; ch < mono_agcs_.size(); ++ch) { + analog_capture_level_ = + std::min(analog_capture_level_, capture_levels_[ch]); + } + } + + if (error_reported) { + return AudioProcessing::kUnspecifiedError; + } + + was_analog_level_set_ = false; + + return AudioProcessing::kNoError; +} + + +// TODO(ajm): ensure this is called under kAdaptiveAnalog. +int GainControlImpl::set_stream_analog_level(int level) { + data_dumper_->DumpRaw("gain_control_set_stream_analog_level", 1, &level); + + was_analog_level_set_ = true; + if (level < minimum_capture_level_ || level > maximum_capture_level_) { + return AudioProcessing::kBadParameterError; + } + analog_capture_level_ = level; + + return AudioProcessing::kNoError; +} + +int GainControlImpl::stream_analog_level() const { + data_dumper_->DumpRaw("gain_control_stream_analog_level", 1, + &analog_capture_level_); + return analog_capture_level_; +} + +int GainControlImpl::set_mode(Mode mode) { + if (MapSetting(mode) == -1) { + return AudioProcessing::kBadParameterError; + } + + mode_ = mode; + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK(sample_rate_hz_); + Initialize(*num_proc_channels_, *sample_rate_hz_); + return AudioProcessing::kNoError; +} + + +int GainControlImpl::set_analog_level_limits(int minimum, int maximum) { + if (minimum < 0 || maximum > 65535 || maximum < minimum) { + return AudioProcessing::kBadParameterError; + } + + minimum_capture_level_ = minimum; + maximum_capture_level_ = maximum; + + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK(sample_rate_hz_); + Initialize(*num_proc_channels_, *sample_rate_hz_); + return AudioProcessing::kNoError; +} + + +int GainControlImpl::set_target_level_dbfs(int level) { + if (level > 31 || level < 0) { + return AudioProcessing::kBadParameterError; + } + target_level_dbfs_ = level; + return Configure(); +} + +int GainControlImpl::set_compression_gain_db(int gain) { + if (gain < 0 || gain > 90) { + RTC_LOG(LS_ERROR) << "set_compression_gain_db(" << gain << ") failed."; + return AudioProcessing::kBadParameterError; + } + compression_gain_db_ = gain; + return Configure(); +} + +int GainControlImpl::enable_limiter(bool enable) { + limiter_enabled_ = enable; + return Configure(); +} + +void GainControlImpl::Initialize(size_t num_proc_channels, int sample_rate_hz) { + data_dumper_->InitiateNewSetOfRecordings(); + + RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000 || + sample_rate_hz == 48000); + + num_proc_channels_ = num_proc_channels; + sample_rate_hz_ = sample_rate_hz; + + mono_agcs_.resize(*num_proc_channels_); + capture_levels_.resize(*num_proc_channels_); + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + if (!mono_agcs_[ch]) { + mono_agcs_[ch].reset(new MonoAgcState()); + } + + int error = WebRtcAgc_Init(mono_agcs_[ch]->state, minimum_capture_level_, + maximum_capture_level_, MapSetting(mode_), + *sample_rate_hz_); + RTC_DCHECK_EQ(error, 0); + capture_levels_[ch] = analog_capture_level_; + } + + Configure(); +} + +int GainControlImpl::Configure() { + WebRtcAgcConfig config; + // TODO(ajm): Flip the sign here (since AGC expects a positive value) if we + // change the interface. + // RTC_DCHECK_LE(target_level_dbfs_, 0); + // config.targetLevelDbfs = static_cast(-target_level_dbfs_); + config.targetLevelDbfs = static_cast(target_level_dbfs_); + config.compressionGaindB = static_cast(compression_gain_db_); + config.limiterEnable = limiter_enabled_; + + int error = AudioProcessing::kNoError; + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + int error_ch = WebRtcAgc_set_config(mono_agcs_[ch]->state, config); + if (error_ch != AudioProcessing::kNoError) { + error = error_ch; + } + } + return error; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/gain_control_impl.h b/third_party/libwebrtc/modules/audio_processing/gain_control_impl.h new file mode 100644 index 0000000000..8aea8f2e95 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_control_impl.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ + +#include +#include + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc/gain_control.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class GainControlImpl : public GainControl { + public: + GainControlImpl(); + GainControlImpl(const GainControlImpl&) = delete; + GainControlImpl& operator=(const GainControlImpl&) = delete; + + ~GainControlImpl() override; + + void ProcessRenderAudio(rtc::ArrayView packed_render_audio); + int AnalyzeCaptureAudio(const AudioBuffer& audio); + int ProcessCaptureAudio(AudioBuffer* audio, bool stream_has_echo); + + void Initialize(size_t num_proc_channels, int sample_rate_hz); + + static void PackRenderAudioBuffer(const AudioBuffer& audio, + std::vector* packed_buffer); + + // GainControl implementation. + int stream_analog_level() const override; + bool is_limiter_enabled() const override { return limiter_enabled_; } + Mode mode() const override { return mode_; } + int set_mode(Mode mode) override; + int compression_gain_db() const override { return compression_gain_db_; } + int set_analog_level_limits(int minimum, int maximum) override; + int set_compression_gain_db(int gain) override; + int set_target_level_dbfs(int level) override; + int enable_limiter(bool enable) override; + int set_stream_analog_level(int level) override; + + private: + struct MonoAgcState; + + // GainControl implementation. + int target_level_dbfs() const override { return target_level_dbfs_; } + int analog_level_minimum() const override { return minimum_capture_level_; } + int analog_level_maximum() const override { return maximum_capture_level_; } + bool stream_is_saturated() const override { return stream_is_saturated_; } + + int Configure(); + + std::unique_ptr data_dumper_; + + Mode mode_; + int minimum_capture_level_; + int maximum_capture_level_; + bool limiter_enabled_; + int target_level_dbfs_; + int compression_gain_db_; + int analog_capture_level_ = 0; + bool was_analog_level_set_; + bool stream_is_saturated_; + + std::vector> mono_agcs_; + std::vector capture_levels_; + + absl::optional num_proc_channels_; + absl::optional sample_rate_hz_; + + static int instance_counter_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/gain_control_unittest.cc b/third_party/libwebrtc/modules/audio_processing/gain_control_unittest.cc new file mode 100644 index 0000000000..1662dc506f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_control_unittest.cc @@ -0,0 +1,393 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/gain_control_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 100; + +void ProcessOneFrame(int sample_rate_hz, + AudioBuffer* render_audio_buffer, + AudioBuffer* capture_audio_buffer, + GainControlImpl* gain_controller) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + render_audio_buffer->SplitIntoFrequencyBands(); + capture_audio_buffer->SplitIntoFrequencyBands(); + } + + std::vector render_audio; + GainControlImpl::PackRenderAudioBuffer(*render_audio_buffer, &render_audio); + gain_controller->ProcessRenderAudio(render_audio); + gain_controller->AnalyzeCaptureAudio(*capture_audio_buffer); + gain_controller->ProcessCaptureAudio(capture_audio_buffer, false); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_audio_buffer->MergeFrequencyBands(); + } +} + +void SetupComponent(int sample_rate_hz, + GainControl::Mode mode, + int target_level_dbfs, + int stream_analog_level, + int compression_gain_db, + bool enable_limiter, + int analog_level_min, + int analog_level_max, + GainControlImpl* gain_controller) { + gain_controller->Initialize(1, sample_rate_hz); + GainControl* gc = static_cast(gain_controller); + gc->set_mode(mode); + gc->set_stream_analog_level(stream_analog_level); + gc->set_target_level_dbfs(target_level_dbfs); + gc->set_compression_gain_db(compression_gain_db); + gc->enable_limiter(enable_limiter); + gc->set_analog_level_limits(analog_level_min, analog_level_max); +} + +void RunBitExactnessTest(int sample_rate_hz, + size_t num_channels, + GainControl::Mode mode, + int target_level_dbfs, + int stream_analog_level, + int compression_gain_db, + bool enable_limiter, + int analog_level_min, + int analog_level_max, + int achieved_stream_analog_level_reference, + rtc::ArrayView output_reference) { + GainControlImpl gain_controller; + SetupComponent(sample_rate_hz, mode, target_level_dbfs, stream_analog_level, + compression_gain_db, enable_limiter, analog_level_min, + analog_level_max, &gain_controller); + + const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig render_config(sample_rate_hz, num_channels); + AudioBuffer render_buffer( + render_config.sample_rate_hz(), render_config.num_channels(), + render_config.sample_rate_hz(), 1, render_config.sample_rate_hz(), 1); + test::InputAudioFile render_file( + test::GetApmRenderTestVectorFileName(sample_rate_hz)); + std::vector render_input(samples_per_channel * num_channels); + + const StreamConfig capture_config(sample_rate_hz, num_channels); + AudioBuffer capture_buffer( + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), 1, capture_config.sample_rate_hz(), 1); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector capture_input(samples_per_channel * num_channels); + + for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &render_file, render_input); + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer, + &gain_controller); + } + + // Extract and verify the test results. + std::vector capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + EXPECT_EQ(achieved_stream_analog_level_reference, + gain_controller.stream_analog_level()); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kElementErrorBound)); +} + +} // namespace + +// TODO(peah): Activate all these tests for ARM and ARM64 once the issue on the +// Chromium ARM and ARM64 boths have been identified. This is tracked in the +// issue https://bugs.chromium.org/p/webrtc/issues/detail?id=5711. + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006561f, -0.004608f, -0.002899f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Stereo16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Stereo16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.027313f, -0.015900f, -0.028107f, + -0.027313f, -0.015900f, -0.028107f}; + RunBitExactnessTest(16000, 2, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono32kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono32kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.010162f, -0.009155f, -0.008301f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono48kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono48kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.010162f, -0.009155f, -0.008301f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.003967f, -0.002777f, -0.001770f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Stereo16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Stereo16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.015411f, -0.008972f, -0.015839f, + -0.015411f, -0.008972f, -0.015839f}; + RunBitExactnessTest(16000, 2, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono32kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono32kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006134f, -0.005524f, -0.005005f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono48kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono48kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006134f, -0.005524f, -0.005005}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.011749f, -0.008270f, -0.005219f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Stereo16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Stereo16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.048896f, -0.028479f, -0.050345f, + -0.048896f, -0.028479f, -0.050345f}; + RunBitExactnessTest(16000, 2, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono32kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono32kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.018158f, -0.016357f, -0.014832f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono48kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono48kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.018158f, -0.016357f, -0.014832f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveAnalog_Tl10_SL10_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveAnalog_Tl10_SL10_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 12; + const float kOutputReference[] = {-0.006561f, -0.004608f, -0.002899f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 10, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveAnalog_Tl10_SL100_CG5_Lim_AL70_80) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveAnalog_Tl10_SL100_CG5_Lim_AL70_80) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.003998f, -0.002808f, -0.001770f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 100, 5, + true, 70, 80, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl10_SL100_CG5_NoLim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl10_SL100_CG5_NoLim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.004028f, -0.002838f, -0.001770f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 10, 100, 5, + false, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl40_SL100_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl40_SL100_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.008728f, -0.006134f, -0.003845f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 40, 100, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl10_SL100_CG30_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl10_SL100_CG30_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.005859f, -0.004120f, -0.002594f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 10, 100, + 30, true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/gain_controller2.cc b/third_party/libwebrtc/modules/audio_processing/gain_controller2.cc new file mode 100644 index 0000000000..dd3521268d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_controller2.cc @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/gain_controller2.h" + +#include +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +using Agc2Config = AudioProcessing::Config::GainController2; +using InputVolumeControllerConfig = InputVolumeController::Config; + +constexpr int kLogLimiterStatsPeriodMs = 30'000; +constexpr int kFrameLengthMs = 10; +constexpr int kLogLimiterStatsPeriodNumFrames = + kLogLimiterStatsPeriodMs / kFrameLengthMs; + +// Detects the available CPU features and applies any kill-switches. +AvailableCpuFeatures GetAllowedCpuFeatures() { + AvailableCpuFeatures features = GetAvailableCpuFeatures(); + if (field_trial::IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) { + features.sse2 = false; + } + if (field_trial::IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) { + features.avx2 = false; + } + if (field_trial::IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) { + features.neon = false; + } + return features; +} + +// Peak and RMS audio levels in dBFS. +struct AudioLevels { + float peak_dbfs; + float rms_dbfs; +}; + +// Speech level info. +struct SpeechLevel { + bool is_confident; + float rms_dbfs; +}; + +// Computes the audio levels for the first channel in `frame`. +AudioLevels ComputeAudioLevels(AudioFrameView frame, + ApmDataDumper& data_dumper) { + float peak = 0.0f; + float rms = 0.0f; + for (const auto& x : frame.channel(0)) { + peak = std::max(std::fabs(x), peak); + rms += x * x; + } + AudioLevels levels{ + FloatS16ToDbfs(peak), + FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))}; + data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs); + data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs); + return levels; +} + +} // namespace + +std::atomic GainController2::instance_count_(0); + +GainController2::GainController2( + const Agc2Config& config, + const InputVolumeControllerConfig& input_volume_controller_config, + int sample_rate_hz, + int num_channels, + bool use_internal_vad) + : cpu_features_(GetAllowedCpuFeatures()), + data_dumper_(instance_count_.fetch_add(1) + 1), + fixed_gain_applier_( + /*hard_clip_samples=*/false, + /*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)), + limiter_(sample_rate_hz, &data_dumper_, /*histogram_name_prefix=*/"Agc2"), + calls_since_last_limiter_log_(0) { + RTC_DCHECK(Validate(config)); + data_dumper_.InitiateNewSetOfRecordings(); + + if (config.input_volume_controller.enabled || + config.adaptive_digital.enabled) { + // Create dependencies. + speech_level_estimator_ = std::make_unique( + &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold); + if (use_internal_vad) + vad_ = std::make_unique( + kVadResetPeriodMs, cpu_features_, sample_rate_hz); + } + + if (config.input_volume_controller.enabled) { + // Create controller. + input_volume_controller_ = std::make_unique( + num_channels, input_volume_controller_config); + // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method. + input_volume_controller_->Initialize(); + } + + if (config.adaptive_digital.enabled) { + // Create dependencies. + noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_); + saturation_protector_ = CreateSaturationProtector( + kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold, + &data_dumper_); + // Create controller. + adaptive_digital_controller_ = + std::make_unique( + &data_dumper_, config.adaptive_digital, + kAdjacentSpeechFramesThreshold); + } +} + +GainController2::~GainController2() = default; + +// TODO(webrtc:7494): Pass the flag also to the other components. +void GainController2::SetCaptureOutputUsed(bool capture_output_used) { + if (input_volume_controller_) { + input_volume_controller_->HandleCaptureOutputUsedChange( + capture_output_used); + } +} + +void GainController2::SetFixedGainDb(float gain_db) { + const float gain_factor = DbToRatio(gain_db); + if (fixed_gain_applier_.GetGainFactor() != gain_factor) { + // Reset the limiter to quickly react on abrupt level changes caused by + // large changes of the fixed gain. + limiter_.Reset(); + } + fixed_gain_applier_.SetGainFactor(gain_factor); +} + +void GainController2::Analyze(int applied_input_volume, + const AudioBuffer& audio_buffer) { + recommended_input_volume_ = absl::nullopt; + + RTC_DCHECK_GE(applied_input_volume, 0); + RTC_DCHECK_LE(applied_input_volume, 255); + + if (input_volume_controller_) { + input_volume_controller_->AnalyzeInputAudio(applied_input_volume, + audio_buffer); + } +} + +void GainController2::Process(absl::optional speech_probability, + bool input_volume_changed, + AudioBuffer* audio) { + recommended_input_volume_ = absl::nullopt; + + data_dumper_.DumpRaw("agc2_applied_input_volume_changed", + input_volume_changed); + if (input_volume_changed) { + // Handle input volume changes. + if (speech_level_estimator_) + speech_level_estimator_->Reset(); + if (saturation_protector_) + saturation_protector_->Reset(); + } + + AudioFrameView float_frame(audio->channels(), audio->num_channels(), + audio->num_frames()); + // Compute speech probability. + if (vad_) { + // When the VAD component runs, `speech_probability` should not be specified + // because APM should not run the same VAD twice (as an APM sub-module and + // internally in AGC2). + RTC_DCHECK(!speech_probability.has_value()); + speech_probability = vad_->Analyze(float_frame); + } + if (speech_probability.has_value()) { + RTC_DCHECK_GE(*speech_probability, 0.0f); + RTC_DCHECK_LE(*speech_probability, 1.0f); + } + // The speech probability may not be defined at this step (e.g., when the + // fixed digital controller alone is enabled). + if (speech_probability.has_value()) + data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability); + + // Compute audio, noise and speech levels. + AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_); + absl::optional noise_rms_dbfs; + if (noise_level_estimator_) { + // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated + // computation in `noise_level_estimator_`. + noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame); + } + absl::optional speech_level; + if (speech_level_estimator_) { + RTC_DCHECK(speech_probability.has_value()); + speech_level_estimator_->Update( + audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability); + speech_level = + SpeechLevel{.is_confident = speech_level_estimator_->is_confident(), + .rms_dbfs = speech_level_estimator_->level_dbfs()}; + } + + // Update the recommended input volume. + if (input_volume_controller_) { + RTC_DCHECK(speech_level.has_value()); + RTC_DCHECK(speech_probability.has_value()); + if (speech_probability.has_value()) { + recommended_input_volume_ = + input_volume_controller_->RecommendInputVolume( + *speech_probability, + speech_level->is_confident + ? absl::optional(speech_level->rms_dbfs) + : absl::nullopt); + } + } + + if (adaptive_digital_controller_) { + RTC_DCHECK(saturation_protector_); + RTC_DCHECK(speech_probability.has_value()); + RTC_DCHECK(speech_level.has_value()); + saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs, + speech_level->rms_dbfs); + float headroom_db = saturation_protector_->HeadroomDb(); + data_dumper_.DumpRaw("agc2_headroom_db", headroom_db); + float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel()); + data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs); + RTC_DCHECK(noise_rms_dbfs.has_value()); + adaptive_digital_controller_->Process( + /*info=*/{.speech_probability = *speech_probability, + .speech_level_dbfs = speech_level->rms_dbfs, + .speech_level_reliable = speech_level->is_confident, + .noise_rms_dbfs = *noise_rms_dbfs, + .headroom_db = headroom_db, + .limiter_envelope_dbfs = limiter_envelope_dbfs}, + float_frame); + } + + // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated + // computation in `limiter_`. + fixed_gain_applier_.ApplyGain(float_frame); + + limiter_.Process(float_frame); + + // Periodically log limiter stats. + if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) { + calls_since_last_limiter_log_ = 0; + InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats(); + RTC_LOG(LS_INFO) << "[AGC2] limiter stats" + << " | identity: " << stats.look_ups_identity_region + << " | knee: " << stats.look_ups_knee_region + << " | limiter: " << stats.look_ups_limiter_region + << " | saturation: " << stats.look_ups_saturation_region; + } +} + +bool GainController2::Validate( + const AudioProcessing::Config::GainController2& config) { + const auto& fixed = config.fixed_digital; + const auto& adaptive = config.adaptive_digital; + return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f && + adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f && + adaptive.initial_gain_db >= 0.0f && + adaptive.max_gain_change_db_per_second > 0.0f && + adaptive.max_output_noise_level_dbfs <= 0.0f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/gain_controller2.h b/third_party/libwebrtc/modules/audio_processing/gain_controller2.h new file mode 100644 index 0000000000..43b5828d35 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_controller2.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_GAIN_CONTROLLER2_H_ +#define MODULES_AUDIO_PROCESSING_GAIN_CONTROLLER2_H_ + +#include +#include +#include + +#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/gain_applier.h" +#include "modules/audio_processing/agc2/input_volume_controller.h" +#include "modules/audio_processing/agc2/limiter.h" +#include "modules/audio_processing/agc2/noise_level_estimator.h" +#include "modules/audio_processing/agc2/saturation_protector.h" +#include "modules/audio_processing/agc2/speech_level_estimator.h" +#include "modules/audio_processing/agc2/vad_wrapper.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +class AudioBuffer; + +// Gain Controller 2 aims to automatically adjust levels by acting on the +// microphone gain and/or applying digital gain. +class GainController2 { + public: + // Ctor. If `use_internal_vad` is true, an internal voice activity + // detector is used for digital adaptive gain. + GainController2( + const AudioProcessing::Config::GainController2& config, + const InputVolumeController::Config& input_volume_controller_config, + int sample_rate_hz, + int num_channels, + bool use_internal_vad); + GainController2(const GainController2&) = delete; + GainController2& operator=(const GainController2&) = delete; + ~GainController2(); + + // Sets the fixed digital gain. + void SetFixedGainDb(float gain_db); + + // Updates the input volume controller about whether the capture output is + // used or not. + void SetCaptureOutputUsed(bool capture_output_used); + + // Analyzes `audio_buffer` before `Process()` is called so that the analysis + // can be performed before digital processing operations take place (e.g., + // echo cancellation). The analysis consists of input clipping detection and + // prediction (if enabled). The value of `applied_input_volume` is limited to + // [0, 255]. + void Analyze(int applied_input_volume, const AudioBuffer& audio_buffer); + + // Updates the recommended input volume, applies the adaptive digital and the + // fixed digital gains and runs a limiter on `audio`. + // When the internal VAD is not used, `speech_probability` should be specified + // and in the [0, 1] range. Otherwise ignores `speech_probability` and + // computes the speech probability via `vad_`. + // Handles input volume changes; if the caller cannot determine whether an + // input volume change occurred, set `input_volume_changed` to false. + void Process(absl::optional speech_probability, + bool input_volume_changed, + AudioBuffer* audio); + + static bool Validate(const AudioProcessing::Config::GainController2& config); + + AvailableCpuFeatures GetCpuFeatures() const { return cpu_features_; } + + absl::optional recommended_input_volume() const { + return recommended_input_volume_; + } + + private: + static std::atomic instance_count_; + const AvailableCpuFeatures cpu_features_; + ApmDataDumper data_dumper_; + + GainApplier fixed_gain_applier_; + std::unique_ptr noise_level_estimator_; + std::unique_ptr vad_; + std::unique_ptr speech_level_estimator_; + std::unique_ptr input_volume_controller_; + // TODO(bugs.webrtc.org/7494): Rename to `CrestFactorEstimator`. + std::unique_ptr saturation_protector_; + std::unique_ptr adaptive_digital_controller_; + Limiter limiter_; + + int calls_since_last_limiter_log_; + + // TODO(bugs.webrtc.org/7494): Remove intermediate storing at this level once + // APM refactoring is completed. + // Recommended input volume from `InputVolumecontroller`. Non-empty after + // `Process()` if input volume controller is enabled and + // `InputVolumeController::Process()` has returned a non-empty value. + absl::optional recommended_input_volume_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_GAIN_CONTROLLER2_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/gain_controller2_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/gain_controller2_gn/moz.build new file mode 100644 index 0000000000..fcbe873d27 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_controller2_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/gain_controller2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("gain_controller2_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/gain_controller2_unittest.cc b/third_party/libwebrtc/modules/audio_processing/gain_controller2_unittest.cc new file mode 100644 index 0000000000..5023bab617 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_controller2_unittest.cc @@ -0,0 +1,615 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/gain_controller2.h" + +#include +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "rtc_base/checks.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { +namespace { + +using ::testing::Eq; +using ::testing::Optional; + +using Agc2Config = AudioProcessing::Config::GainController2; +using InputVolumeControllerConfig = InputVolumeController::Config; + +// Sets all the samples in `ab` to `value`. +void SetAudioBufferSamples(float value, AudioBuffer& ab) { + for (size_t k = 0; k < ab.num_channels(); ++k) { + std::fill(ab.channels()[k], ab.channels()[k] + ab.num_frames(), value); + } +} + +float RunAgc2WithConstantInput(GainController2& agc2, + float input_level, + int num_frames, + int sample_rate_hz, + int num_channels = 1, + int applied_initial_volume = 0) { + const int num_samples = rtc::CheckedDivExact(sample_rate_hz, 100); + AudioBuffer ab(sample_rate_hz, num_channels, sample_rate_hz, num_channels, + sample_rate_hz, num_channels); + + // Give time to the level estimator to converge. + for (int i = 0; i < num_frames + 1; ++i) { + SetAudioBufferSamples(input_level, ab); + const auto applied_volume = agc2.recommended_input_volume(); + agc2.Analyze(applied_volume.value_or(applied_initial_volume), ab); + + agc2.Process(/*speech_probability=*/absl::nullopt, + /*input_volume_changed=*/false, &ab); + } + + // Return the last sample from the last processed frame. + return ab.channels()[0][num_samples - 1]; +} + +std::unique_ptr CreateAgc2FixedDigitalMode( + float fixed_gain_db, + int sample_rate_hz) { + Agc2Config config; + config.adaptive_digital.enabled = false; + config.fixed_digital.gain_db = fixed_gain_db; + EXPECT_TRUE(GainController2::Validate(config)); + return std::make_unique( + config, InputVolumeControllerConfig{}, sample_rate_hz, + /*num_channels=*/1, + /*use_internal_vad=*/true); +} + +constexpr InputVolumeControllerConfig kTestInputVolumeControllerConfig{ + .clipped_level_min = 20, + .clipped_level_step = 30, + .clipped_ratio_threshold = 0.4, + .clipped_wait_frames = 50, + .enable_clipping_predictor = true, + .target_range_max_dbfs = -6, + .target_range_min_dbfs = -70, + .update_input_volume_wait_frames = 100, + .speech_probability_threshold = 0.9, + .speech_ratio_threshold = 1, +}; + +} // namespace + +TEST(GainController2, CheckDefaultConfig) { + Agc2Config config; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckFixedDigitalConfig) { + Agc2Config config; + // Attenuation is not allowed. + config.fixed_digital.gain_db = -5.0f; + EXPECT_FALSE(GainController2::Validate(config)); + // No gain is allowed. + config.fixed_digital.gain_db = 0.0f; + EXPECT_TRUE(GainController2::Validate(config)); + // Positive gain is allowed. + config.fixed_digital.gain_db = 15.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckHeadroomDb) { + Agc2Config config; + config.adaptive_digital.headroom_db = -1.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.headroom_db = 0.0f; + EXPECT_TRUE(GainController2::Validate(config)); + config.adaptive_digital.headroom_db = 5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckMaxGainDb) { + Agc2Config config; + config.adaptive_digital.max_gain_db = -1.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_gain_db = 0.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_gain_db = 5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckInitialGainDb) { + Agc2Config config; + config.adaptive_digital.initial_gain_db = -1.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.initial_gain_db = 0.0f; + EXPECT_TRUE(GainController2::Validate(config)); + config.adaptive_digital.initial_gain_db = 5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckAdaptiveDigitalMaxGainChangeSpeedConfig) { + Agc2Config config; + config.adaptive_digital.max_gain_change_db_per_second = -1.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_gain_change_db_per_second = 0.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_gain_change_db_per_second = 5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckAdaptiveDigitalMaxOutputNoiseLevelConfig) { + Agc2Config config; + config.adaptive_digital.max_output_noise_level_dbfs = 5.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_output_noise_level_dbfs = 0.0f; + EXPECT_TRUE(GainController2::Validate(config)); + config.adaptive_digital.max_output_noise_level_dbfs = -5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, + CheckGetRecommendedInputVolumeWhenInputVolumeControllerNotEnabled) { + constexpr float kHighInputLevel = 32767.0f; + constexpr float kLowInputLevel = 1000.0f; + constexpr int kInitialInputVolume = 100; + constexpr int kNumChannels = 2; + constexpr int kNumFrames = 5; + constexpr int kSampleRateHz = 16000; + + Agc2Config config; + config.input_volume_controller.enabled = false; + + auto gain_controller = std::make_unique( + config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels, + /*use_internal_vad=*/true); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with no clipping or detected speech. + RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with clipping. + RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); +} + +TEST( + GainController2, + CheckGetRecommendedInputVolumeWhenInputVolumeControllerNotEnabledAndSpecificConfigUsed) { + constexpr float kHighInputLevel = 32767.0f; + constexpr float kLowInputLevel = 1000.0f; + constexpr int kInitialInputVolume = 100; + constexpr int kNumChannels = 2; + constexpr int kNumFrames = 5; + constexpr int kSampleRateHz = 16000; + + Agc2Config config; + config.input_volume_controller.enabled = false; + + auto gain_controller = std::make_unique( + config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels, + /*use_internal_vad=*/true); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with no clipping or detected speech. + RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with clipping. + RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); +} + +TEST(GainController2, + CheckGetRecommendedInputVolumeWhenInputVolumeControllerEnabled) { + constexpr float kHighInputLevel = 32767.0f; + constexpr float kLowInputLevel = 1000.0f; + constexpr int kInitialInputVolume = 100; + constexpr int kNumChannels = 2; + constexpr int kNumFrames = 5; + constexpr int kSampleRateHz = 16000; + + Agc2Config config; + config.input_volume_controller.enabled = true; + config.adaptive_digital.enabled = true; + + auto gain_controller = std::make_unique( + config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels, + /*use_internal_vad=*/true); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with no clipping or detected speech. + RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_TRUE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with clipping. + RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_TRUE(gain_controller->recommended_input_volume().has_value()); +} + +TEST( + GainController2, + CheckGetRecommendedInputVolumeWhenInputVolumeControllerEnabledAndSpecificConfigUsed) { + constexpr float kHighInputLevel = 32767.0f; + constexpr float kLowInputLevel = 1000.0f; + constexpr int kInitialInputVolume = 100; + constexpr int kNumChannels = 2; + constexpr int kNumFrames = 5; + constexpr int kSampleRateHz = 16000; + + Agc2Config config; + config.input_volume_controller.enabled = true; + config.adaptive_digital.enabled = true; + + auto gain_controller = std::make_unique( + config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels, + /*use_internal_vad=*/true); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with no clipping or detected speech. + RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_TRUE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with clipping. + RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_TRUE(gain_controller->recommended_input_volume().has_value()); +} + +// Checks that the default config is applied. +TEST(GainController2, ApplyDefaultConfig) { + auto gain_controller2 = std::make_unique( + Agc2Config{}, InputVolumeControllerConfig{}, + /*sample_rate_hz=*/16000, /*num_channels=*/2, + /*use_internal_vad=*/true); + EXPECT_TRUE(gain_controller2.get()); +} + +TEST(GainController2FixedDigital, GainShouldChangeOnSetGain) { + constexpr float kInputLevel = 1000.0f; + constexpr size_t kNumFrames = 5; + constexpr size_t kSampleRateHz = 8000; + constexpr float kGain0Db = 0.0f; + constexpr float kGain20Db = 20.0f; + + auto agc2_fixed = CreateAgc2FixedDigitalMode(kGain0Db, kSampleRateHz); + + // Signal level is unchanged with 0 db gain. + EXPECT_FLOAT_EQ(RunAgc2WithConstantInput(*agc2_fixed, kInputLevel, kNumFrames, + kSampleRateHz), + kInputLevel); + + // +20 db should increase signal by a factor of 10. + agc2_fixed->SetFixedGainDb(kGain20Db); + EXPECT_FLOAT_EQ(RunAgc2WithConstantInput(*agc2_fixed, kInputLevel, kNumFrames, + kSampleRateHz), + kInputLevel * 10); +} + +TEST(GainController2FixedDigital, ChangeFixedGainShouldBeFastAndTimeInvariant) { + // Number of frames required for the fixed gain controller to adapt on the + // input signal when the gain changes. + constexpr size_t kNumFrames = 5; + + constexpr float kInputLevel = 1000.0f; + constexpr size_t kSampleRateHz = 8000; + constexpr float kGainDbLow = 0.0f; + constexpr float kGainDbHigh = 25.0f; + static_assert(kGainDbLow < kGainDbHigh, ""); + + auto agc2_fixed = CreateAgc2FixedDigitalMode(kGainDbLow, kSampleRateHz); + + // Start with a lower gain. + const float output_level_pre = RunAgc2WithConstantInput( + *agc2_fixed, kInputLevel, kNumFrames, kSampleRateHz); + + // Increase gain. + agc2_fixed->SetFixedGainDb(kGainDbHigh); + static_cast(RunAgc2WithConstantInput(*agc2_fixed, kInputLevel, + kNumFrames, kSampleRateHz)); + + // Back to the lower gain. + agc2_fixed->SetFixedGainDb(kGainDbLow); + const float output_level_post = RunAgc2WithConstantInput( + *agc2_fixed, kInputLevel, kNumFrames, kSampleRateHz); + + EXPECT_EQ(output_level_pre, output_level_post); +} + +class FixedDigitalTest + : public ::testing::TestWithParam> { + protected: + float gain_db_min() const { return std::get<0>(GetParam()); } + float gain_db_max() const { return std::get<1>(GetParam()); } + int sample_rate_hz() const { return std::get<2>(GetParam()); } + bool saturation_expected() const { return std::get<3>(GetParam()); } +}; + +TEST_P(FixedDigitalTest, CheckSaturationBehaviorWithLimiter) { + for (const float gain_db : test::LinSpace(gain_db_min(), gain_db_max(), 10)) { + SCOPED_TRACE(gain_db); + auto agc2_fixed = CreateAgc2FixedDigitalMode(gain_db, sample_rate_hz()); + const float processed_sample = + RunAgc2WithConstantInput(*agc2_fixed, /*input_level=*/32767.0f, + /*num_frames=*/5, sample_rate_hz()); + if (saturation_expected()) { + EXPECT_FLOAT_EQ(processed_sample, 32767.0f); + } else { + EXPECT_LT(processed_sample, 32767.0f); + } + } +} + +static_assert(test::kLimiterMaxInputLevelDbFs < 10, ""); +INSTANTIATE_TEST_SUITE_P( + GainController2, + FixedDigitalTest, + ::testing::Values( + // When gain < `test::kLimiterMaxInputLevelDbFs`, the limiter will not + // saturate the signal (at any sample rate). + std::make_tuple(0.1f, + test::kLimiterMaxInputLevelDbFs - 0.01f, + 8000, + false), + std::make_tuple(0.1, + test::kLimiterMaxInputLevelDbFs - 0.01f, + 48000, + false), + // When gain > `test::kLimiterMaxInputLevelDbFs`, the limiter will + // saturate the signal (at any sample rate). + std::make_tuple(test::kLimiterMaxInputLevelDbFs + 0.01f, + 10.0f, + 8000, + true), + std::make_tuple(test::kLimiterMaxInputLevelDbFs + 0.01f, + 10.0f, + 48000, + true))); + +// Processes a test audio file and checks that the gain applied at the end of +// the recording is close to the expected value. +TEST(GainController2, CheckFinalGainWithAdaptiveDigitalController) { + constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz; + constexpr int kStereo = 2; + + // Create AGC2 enabling only the adaptive digital controller. + Agc2Config config; + config.fixed_digital.gain_db = 0.0f; + config.adaptive_digital.enabled = true; + GainController2 agc2(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/true); + + test::InputAudioFile input_file( + test::GetApmCaptureTestVectorFileName(kSampleRateHz), + /*loop_at_end=*/true); + const StreamConfig stream_config(kSampleRateHz, kStereo); + + // Init buffers. + constexpr int kFrameDurationMs = 10; + std::vector frame(kStereo * stream_config.num_frames()); + AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo, + kSampleRateHz, kStereo); + + // Simulate. + constexpr float kGainDb = -6.0f; + const float gain = std::pow(10.0f, kGainDb / 20.0f); + constexpr int kDurationMs = 10000; + constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs; + for (int i = 0; i < kNumFramesToProcess; ++i) { + ReadFloatSamplesFromStereoFile(stream_config.num_frames(), + stream_config.num_channels(), &input_file, + frame); + // Apply a fixed gain to the input audio. + for (float& x : frame) { + x *= gain; + } + test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer); + agc2.Process(/*speech_probability=*/absl::nullopt, + /*input_volume_changed=*/false, &audio_buffer); + } + + // Estimate the applied gain by processing a probing frame. + SetAudioBufferSamples(/*value=*/1.0f, audio_buffer); + agc2.Process(/*speech_probability=*/absl::nullopt, + /*input_volume_changed=*/false, &audio_buffer); + const float applied_gain_db = + 20.0f * std::log10(audio_buffer.channels_const()[0][0]); + + constexpr float kExpectedGainDb = 5.6f; + constexpr float kToleranceDb = 0.3f; + EXPECT_NEAR(applied_gain_db, kExpectedGainDb, kToleranceDb); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Checks that `GainController2` crashes in debug mode if it runs its internal +// VAD and the speech probability values are provided by the caller. +TEST(GainController2DeathTest, + DebugCrashIfUseInternalVadAndSpeechProbabilityGiven) { + constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz; + constexpr int kStereo = 2; + AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo, + kSampleRateHz, kStereo); + // Create AGC2 so that the interval VAD is also created. + GainController2 agc2(/*config=*/{.adaptive_digital = {.enabled = true}}, + /*input_volume_controller_config=*/{}, kSampleRateHz, + kStereo, + /*use_internal_vad=*/true); + + EXPECT_DEATH(agc2.Process(/*speech_probability=*/0.123f, + /*input_volume_changed=*/false, &audio_buffer), + ""); +} +#endif + +// Processes a test audio file and checks that the injected speech probability +// is not ignored when the internal VAD is not used. +TEST(GainController2, + CheckInjectedVadProbabilityUsedWithAdaptiveDigitalController) { + constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz; + constexpr int kStereo = 2; + + // Create AGC2 enabling only the adaptive digital controller. + Agc2Config config; + config.fixed_digital.gain_db = 0.0f; + config.adaptive_digital.enabled = true; + GainController2 agc2(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/false); + GainController2 agc2_reference(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/true); + + test::InputAudioFile input_file( + test::GetApmCaptureTestVectorFileName(kSampleRateHz), + /*loop_at_end=*/true); + const StreamConfig stream_config(kSampleRateHz, kStereo); + + // Init buffers. + constexpr int kFrameDurationMs = 10; + std::vector frame(kStereo * stream_config.num_frames()); + AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo, + kSampleRateHz, kStereo); + AudioBuffer audio_buffer_reference(kSampleRateHz, kStereo, kSampleRateHz, + kStereo, kSampleRateHz, kStereo); + // Simulate. + constexpr float kGainDb = -6.0f; + const float gain = std::pow(10.0f, kGainDb / 20.0f); + constexpr int kDurationMs = 10000; + constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs; + constexpr float kSpeechProbabilities[] = {1.0f, 0.3f}; + constexpr float kEpsilon = 0.0001f; + bool all_samples_zero = true; + bool all_samples_equal = true; + for (int i = 0, j = 0; i < kNumFramesToProcess; ++i, j = 1 - j) { + ReadFloatSamplesFromStereoFile(stream_config.num_frames(), + stream_config.num_channels(), &input_file, + frame); + // Apply a fixed gain to the input audio. + for (float& x : frame) { + x *= gain; + } + test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer); + agc2.Process(kSpeechProbabilities[j], /*input_volume_changed=*/false, + &audio_buffer); + test::CopyVectorToAudioBuffer(stream_config, frame, + &audio_buffer_reference); + agc2_reference.Process(/*speech_probability=*/absl::nullopt, + /*input_volume_changed=*/false, + &audio_buffer_reference); + // Check the output buffers. + for (int i = 0; i < kStereo; ++i) { + for (int j = 0; j < static_cast(audio_buffer.num_frames()); ++j) { + all_samples_zero &= + fabs(audio_buffer.channels_const()[i][j]) < kEpsilon; + all_samples_equal &= + fabs(audio_buffer.channels_const()[i][j] - + audio_buffer_reference.channels_const()[i][j]) < kEpsilon; + } + } + } + EXPECT_FALSE(all_samples_zero); + EXPECT_FALSE(all_samples_equal); +} + +// Processes a test audio file and checks that the output is equal when +// an injected speech probability from `VoiceActivityDetectorWrapper` and +// the speech probability computed by the internal VAD are the same. +TEST(GainController2, + CheckEqualResultFromInjectedVadProbabilityWithAdaptiveDigitalController) { + constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz; + constexpr int kStereo = 2; + + // Create AGC2 enabling only the adaptive digital controller. + Agc2Config config; + config.fixed_digital.gain_db = 0.0f; + config.adaptive_digital.enabled = true; + GainController2 agc2(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/false); + GainController2 agc2_reference(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/true); + VoiceActivityDetectorWrapper vad(GetAvailableCpuFeatures(), kSampleRateHz); + test::InputAudioFile input_file( + test::GetApmCaptureTestVectorFileName(kSampleRateHz), + /*loop_at_end=*/true); + const StreamConfig stream_config(kSampleRateHz, kStereo); + + // Init buffers. + constexpr int kFrameDurationMs = 10; + std::vector frame(kStereo * stream_config.num_frames()); + AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo, + kSampleRateHz, kStereo); + AudioBuffer audio_buffer_reference(kSampleRateHz, kStereo, kSampleRateHz, + kStereo, kSampleRateHz, kStereo); + + // Simulate. + constexpr float kGainDb = -6.0f; + const float gain = std::pow(10.0f, kGainDb / 20.0f); + constexpr int kDurationMs = 10000; + constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs; + for (int i = 0; i < kNumFramesToProcess; ++i) { + ReadFloatSamplesFromStereoFile(stream_config.num_frames(), + stream_config.num_channels(), &input_file, + frame); + // Apply a fixed gain to the input audio. + for (float& x : frame) { + x *= gain; + } + test::CopyVectorToAudioBuffer(stream_config, frame, + &audio_buffer_reference); + agc2_reference.Process(absl::nullopt, /*input_volume_changed=*/false, + &audio_buffer_reference); + test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer); + float speech_probability = vad.Analyze(AudioFrameView( + audio_buffer.channels(), audio_buffer.num_channels(), + audio_buffer.num_frames())); + agc2.Process(speech_probability, /*input_volume_changed=*/false, + &audio_buffer); + // Check the output buffer. + for (int i = 0; i < kStereo; ++i) { + for (int j = 0; j < static_cast(audio_buffer.num_frames()); ++j) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[i][j], + audio_buffer_reference.channels_const()[i][j]); + } + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc b/third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc new file mode 100644 index 0000000000..3b4740f6a5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/high_pass_filter.h" + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +// [B,A] = butter(2,100/8000,'high') +constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients16kHz = {{0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + +// [B,A] = butter(2,100/16000,'high') +constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients32kHz = {{0.98621f, -1.97242f, 0.98621f}, + {-1.97223f, 0.97261f}}; + +// [B,A] = butter(2,100/24000,'high') +constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients48kHz = {{0.99079f, -1.98157f, 0.99079f}, + {-1.98149f, 0.98166f}}; + +constexpr size_t kNumberOfHighPassBiQuads = 1; + +const CascadedBiQuadFilter::BiQuadCoefficients& ChooseCoefficients( + int sample_rate_hz) { + switch (sample_rate_hz) { + case 16000: + return kHighPassFilterCoefficients16kHz; + case 32000: + return kHighPassFilterCoefficients32kHz; + case 48000: + return kHighPassFilterCoefficients48kHz; + default: + RTC_DCHECK_NOTREACHED(); + } + RTC_DCHECK_NOTREACHED(); + return kHighPassFilterCoefficients16kHz; +} + +} // namespace + +HighPassFilter::HighPassFilter(int sample_rate_hz, size_t num_channels) + : sample_rate_hz_(sample_rate_hz) { + filters_.resize(num_channels); + const auto& coefficients = ChooseCoefficients(sample_rate_hz_); + for (size_t k = 0; k < filters_.size(); ++k) { + filters_[k].reset( + new CascadedBiQuadFilter(coefficients, kNumberOfHighPassBiQuads)); + } +} + +HighPassFilter::~HighPassFilter() = default; + +void HighPassFilter::Process(AudioBuffer* audio, bool use_split_band_data) { + RTC_DCHECK(audio); + RTC_DCHECK_EQ(filters_.size(), audio->num_channels()); + if (use_split_band_data) { + for (size_t k = 0; k < audio->num_channels(); ++k) { + rtc::ArrayView channel_data = rtc::ArrayView( + audio->split_bands(k)[0], audio->num_frames_per_band()); + filters_[k]->Process(channel_data); + } + } else { + for (size_t k = 0; k < audio->num_channels(); ++k) { + rtc::ArrayView channel_data = + rtc::ArrayView(&audio->channels()[k][0], audio->num_frames()); + filters_[k]->Process(channel_data); + } + } +} + +void HighPassFilter::Process(std::vector>* audio) { + RTC_DCHECK_EQ(filters_.size(), audio->size()); + for (size_t k = 0; k < audio->size(); ++k) { + filters_[k]->Process((*audio)[k]); + } +} + +void HighPassFilter::Reset() { + for (size_t k = 0; k < filters_.size(); ++k) { + filters_[k]->Reset(); + } +} + +void HighPassFilter::Reset(size_t num_channels) { + const size_t old_num_channels = filters_.size(); + filters_.resize(num_channels); + if (filters_.size() < old_num_channels) { + Reset(); + } else { + for (size_t k = 0; k < old_num_channels; ++k) { + filters_[k]->Reset(); + } + const auto& coefficients = ChooseCoefficients(sample_rate_hz_); + for (size_t k = old_num_channels; k < filters_.size(); ++k) { + filters_[k].reset( + new CascadedBiQuadFilter(coefficients, kNumberOfHighPassBiQuads)); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/high_pass_filter.h b/third_party/libwebrtc/modules/audio_processing/high_pass_filter.h new file mode 100644 index 0000000000..7e7c370cd1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/high_pass_filter.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" + +namespace webrtc { + +class AudioBuffer; + +class HighPassFilter { + public: + HighPassFilter(int sample_rate_hz, size_t num_channels); + ~HighPassFilter(); + HighPassFilter(const HighPassFilter&) = delete; + HighPassFilter& operator=(const HighPassFilter&) = delete; + + void Process(AudioBuffer* audio, bool use_split_band_data); + void Process(std::vector>* audio); + void Reset(); + void Reset(size_t num_channels); + + int sample_rate_hz() const { return sample_rate_hz_; } + size_t num_channels() const { return filters_.size(); } + + private: + const int sample_rate_hz_; + std::vector> filters_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/high_pass_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/high_pass_filter_gn/moz.build new file mode 100644 index 0000000000..4c8aac9c3d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/high_pass_filter_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("high_pass_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/high_pass_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/high_pass_filter_unittest.cc new file mode 100644 index 0000000000..9f3c8fe595 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/high_pass_filter_unittest.cc @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/high_pass_filter.h" + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// Process one frame of data via the AudioBuffer interface and produce the +// output. +std::vector ProcessOneFrameAsAudioBuffer( + const std::vector& frame_input, + const StreamConfig& stream_config, + HighPassFilter* high_pass_filter) { + AudioBuffer audio_buffer( + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels()); + + test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); + high_pass_filter->Process(&audio_buffer, /*use_split_band_data=*/false); + std::vector frame_output; + test::ExtractVectorFromAudioBuffer(stream_config, &audio_buffer, + &frame_output); + return frame_output; +} + +// Process one frame of data via the vector interface and produce the output. +std::vector ProcessOneFrameAsVector( + const std::vector& frame_input, + const StreamConfig& stream_config, + HighPassFilter* high_pass_filter) { + std::vector> process_vector( + stream_config.num_channels(), + std::vector(stream_config.num_frames())); + + for (size_t k = 0; k < stream_config.num_frames(); ++k) { + for (size_t channel = 0; channel < stream_config.num_channels(); + ++channel) { + process_vector[channel][k] = + frame_input[k * stream_config.num_channels() + channel]; + } + } + + high_pass_filter->Process(&process_vector); + + std::vector output; + for (size_t k = 0; k < stream_config.num_frames(); ++k) { + for (size_t channel = 0; channel < stream_config.num_channels(); + ++channel) { + output.push_back(process_vector[channel][k]); + } + } + + return process_vector[0]; +} + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int num_channels, + bool use_audio_buffer_interface, + const std::vector& input, + const std::vector& reference) { + const StreamConfig stream_config(16000, num_channels); + HighPassFilter high_pass_filter(16000, num_channels); + + std::vector output; + const size_t num_frames_to_process = + input.size() / + (stream_config.num_frames() * stream_config.num_channels()); + for (size_t frame_no = 0; frame_no < num_frames_to_process; ++frame_no) { + std::vector frame_input( + input.begin() + stream_config.num_frames() * + stream_config.num_channels() * frame_no, + input.begin() + stream_config.num_frames() * + stream_config.num_channels() * (frame_no + 1)); + if (use_audio_buffer_interface) { + output = ProcessOneFrameAsAudioBuffer(frame_input, stream_config, + &high_pass_filter); + } else { + output = ProcessOneFrameAsVector(frame_input, stream_config, + &high_pass_filter); + } + } + + // Form vector to compare the reference to. Only the last frame processed + // is compared in order not having to specify all preceeding frames as + // inputs. As the algorithm being tested has a memory, testing only + // the last frame implicitly also tests the preceeding frames. + const size_t reference_frame_length = + reference.size() / stream_config.num_channels(); + std::vector output_to_verify; + for (size_t channel_no = 0; channel_no < stream_config.num_channels(); + ++channel_no) { + output_to_verify.insert( + output_to_verify.end(), + output.begin() + channel_no * stream_config.num_frames(), + output.begin() + channel_no * stream_config.num_frames() + + reference_frame_length); + } + + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + reference_frame_length, num_channels, reference, output_to_verify, + kElementErrorBound)); +} + +// Method for forming a vector out of an array. +// TODO(peah): Remove once braced initialization is allowed. +std::vector CreateVector(const rtc::ArrayView& array_view) { + std::vector v; + for (auto value : array_view) { + v.push_back(value); + } + return v; +} +} // namespace + +TEST(HighPassFilterAccuracyTest, ResetWithAudioBufferInterface) { + const StreamConfig stream_config_stereo(16000, 2); + const StreamConfig stream_config_mono(16000, 1); + std::vector x_mono(160, 1.f); + std::vector x_stereo(320, 1.f); + HighPassFilter hpf(16000, 1); + std::vector y = + ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf); + hpf.Reset(2); + y = ProcessOneFrameAsAudioBuffer(x_stereo, stream_config_stereo, &hpf); + hpf.Reset(1); + y = ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf); + hpf.Reset(); + y = ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf); +} + +TEST(HighPassFilterAccuracyTest, ResetWithVectorInterface) { + const StreamConfig stream_config_stereo(16000, 2); + const StreamConfig stream_config_mono(16000, 1); + std::vector x_mono(160, 1.f); + std::vector x_stereo(320, 1.f); + HighPassFilter hpf(16000, 1); + std::vector y = + ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf); + hpf.Reset(2); + y = ProcessOneFrameAsVector(x_stereo, stream_config_stereo, &hpf); + hpf.Reset(1); + y = ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf); + hpf.Reset(); + y = ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf); +} + +TEST(HighPassFilterAccuracyTest, MonoInitial) { + const float kReferenceInput[] = { + 0.150254f, 0.512488f, -0.631245f, 0.240938f, 0.089080f, -0.365440f, + -0.121169f, 0.095748f, 1.000000f, 0.773932f, -0.377232f, 0.848124f, + 0.202718f, -0.017621f, 0.199738f, -0.057279f, -0.034693f, 0.416303f, + 0.393761f, 0.396041f, 0.187653f, -0.337438f, 0.200436f, 0.455577f, + 0.136624f, 0.289150f, 0.203131f, -0.084798f, 0.082124f, -0.220010f, + 0.248266f, -0.320554f, -0.298701f, -0.226218f, -0.822794f, 0.401962f, + 0.090876f, -0.210968f, 0.382936f, -0.478291f, -0.028572f, -0.067474f, + 0.089204f, 0.087430f, -0.241695f, -0.008398f, -0.046076f, 0.175416f, + 0.305518f, 0.309992f, -0.241352f, 0.021618f, -0.339291f, -0.311173f, + -0.001914f, 0.428301f, -0.215087f, 0.103784f, -0.063041f, 0.312250f, + -0.304344f, 0.009098f, 0.154406f, 0.307571f, 0.431537f, 0.024014f, + -0.416832f, -0.207440f, -0.296664f, 0.656846f, -0.172033f, 0.209054f, + -0.053772f, 0.248326f, -0.213741f, -0.391871f, -0.397490f, 0.136428f, + -0.049568f, -0.054788f, 0.396633f, 0.081485f, 0.055279f, 0.443690f, + -0.224812f, 0.194675f, 0.233369f, -0.068107f, 0.060270f, -0.325801f, + -0.320801f, 0.029308f, 0.201837f, 0.722528f, -0.186366f, 0.052351f, + -0.023053f, -0.540192f, -0.122671f, -0.501532f, 0.234847f, -0.248165f, + 0.027971f, -0.152171f, 0.084820f, -0.167764f, 0.136923f, 0.206619f, + 0.478395f, -0.054249f, -0.597574f, -0.234627f, 0.378548f, -0.299619f, + 0.268543f, 0.034666f, 0.401492f, -0.547983f, -0.055248f, -0.337538f, + 0.812657f, 0.230611f, 0.385360f, -0.295713f, -0.130957f, -0.076143f, + 0.306960f, -0.077653f, 0.196049f, -0.573390f, -0.098885f, -0.230155f, + -0.440716f, 0.141956f, 0.078802f, 0.009356f, -0.372703f, 0.315083f, + 0.097859f, -0.083575f, 0.006397f, -0.073216f, -0.489105f, -0.079827f, + -0.232329f, -0.273644f, -0.323162f, -0.149105f, -0.559646f, 0.269458f, + 0.145333f, -0.005597f, -0.009717f, -0.223051f, 0.284676f, -0.037228f, + -0.199679f, 0.377651f, -0.062813f, -0.164607f}; + const float kReference[] = {0.146139f, 0.490336f, -0.649520f, 0.233881f, + 0.073214f, -0.373256f, -0.115394f, 0.102109f, + 0.976217f, 0.702270f, -0.457697f, 0.757116f}; + + for (bool use_audio_buffer_interface : {true, false}) { + RunBitexactnessTest( + 1, use_audio_buffer_interface, + CreateVector(rtc::ArrayView(kReferenceInput)), + CreateVector(rtc::ArrayView(kReference))); + } +} + +TEST(HighPassFilterAccuracyTest, MonoConverged) { + const float kReferenceInput[] = { + 0.150254f, 0.512488f, -0.631245f, 0.240938f, 0.089080f, -0.365440f, + -0.121169f, 0.095748f, 1.000000f, 0.773932f, -0.377232f, 0.848124f, + 0.202718f, -0.017621f, 0.199738f, -0.057279f, -0.034693f, 0.416303f, + 0.393761f, 0.396041f, 0.187653f, -0.337438f, 0.200436f, 0.455577f, + 0.136624f, 0.289150f, 0.203131f, -0.084798f, 0.082124f, -0.220010f, + 0.248266f, -0.320554f, -0.298701f, -0.226218f, -0.822794f, 0.401962f, + 0.090876f, -0.210968f, 0.382936f, -0.478291f, -0.028572f, -0.067474f, + 0.089204f, 0.087430f, -0.241695f, -0.008398f, -0.046076f, 0.175416f, + 0.305518f, 0.309992f, -0.241352f, 0.021618f, -0.339291f, -0.311173f, + -0.001914f, 0.428301f, -0.215087f, 0.103784f, -0.063041f, 0.312250f, + -0.304344f, 0.009098f, 0.154406f, 0.307571f, 0.431537f, 0.024014f, + -0.416832f, -0.207440f, -0.296664f, 0.656846f, -0.172033f, 0.209054f, + -0.053772f, 0.248326f, -0.213741f, -0.391871f, -0.397490f, 0.136428f, + -0.049568f, -0.054788f, 0.396633f, 0.081485f, 0.055279f, 0.443690f, + -0.224812f, 0.194675f, 0.233369f, -0.068107f, 0.060270f, -0.325801f, + -0.320801f, 0.029308f, 0.201837f, 0.722528f, -0.186366f, 0.052351f, + -0.023053f, -0.540192f, -0.122671f, -0.501532f, 0.234847f, -0.248165f, + 0.027971f, -0.152171f, 0.084820f, -0.167764f, 0.136923f, 0.206619f, + 0.478395f, -0.054249f, -0.597574f, -0.234627f, 0.378548f, -0.299619f, + 0.268543f, 0.034666f, 0.401492f, -0.547983f, -0.055248f, -0.337538f, + 0.812657f, 0.230611f, 0.385360f, -0.295713f, -0.130957f, -0.076143f, + 0.306960f, -0.077653f, 0.196049f, -0.573390f, -0.098885f, -0.230155f, + -0.440716f, 0.141956f, 0.078802f, 0.009356f, -0.372703f, 0.315083f, + 0.097859f, -0.083575f, 0.006397f, -0.073216f, -0.489105f, -0.079827f, + -0.232329f, -0.273644f, -0.323162f, -0.149105f, -0.559646f, 0.269458f, + 0.145333f, -0.005597f, -0.009717f, -0.223051f, 0.284676f, -0.037228f, + -0.199679f, 0.377651f, -0.062813f, -0.164607f, -0.082091f, -0.236957f, + -0.313025f, 0.705903f, 0.462637f, 0.085942f, -0.351308f, -0.241859f, + -0.049333f, 0.221165f, -0.372235f, -0.651092f, -0.404957f, 0.093201f, + 0.109366f, 0.126224f, -0.036409f, 0.051333f, -0.133063f, 0.240896f, + -0.380532f, 0.127160f, -0.237176f, -0.093586f, 0.154478f, 0.290379f, + -0.312329f, 0.352297f, 0.184480f, -0.018965f, -0.054555f, -0.060811f, + -0.084705f, 0.006440f, 0.014333f, 0.230847f, 0.426721f, 0.130481f, + -0.058605f, 0.174712f, 0.051204f, -0.287773f, 0.265265f, 0.085810f, + 0.037775f, 0.143988f, 0.073051f, -0.263103f, -0.045366f, -0.040816f, + -0.148673f, 0.470072f, -0.244727f, -0.135204f, -0.198973f, -0.328139f, + -0.053722f, -0.076590f, 0.427586f, -0.069591f, -0.297399f, 0.448094f, + 0.345037f, -0.064170f, -0.420903f, -0.124253f, -0.043578f, 0.077149f, + -0.072983f, 0.123916f, 0.109517f, -0.349508f, -0.264912f, -0.207106f, + -0.141912f, -0.089586f, 0.003485f, -0.846518f, -0.127715f, 0.347208f, + -0.298095f, 0.260935f, 0.097899f, -0.008106f, 0.050987f, -0.437362f, + -0.023625f, 0.448230f, 0.027484f, 0.011562f, -0.205167f, -0.008611f, + 0.064930f, 0.119156f, -0.104183f, -0.066078f, 0.565530f, -0.631108f, + 0.623029f, 0.094334f, 0.279472f, -0.465059f, -0.164888f, -0.077706f, + 0.118130f, -0.466746f, 0.131800f, -0.338936f, 0.018497f, 0.182304f, + 0.091398f, 0.302547f, 0.281153f, -0.181899f, 0.071836f, -0.263911f, + -0.369380f, 0.258447f, 0.000014f, -0.015347f, 0.254619f, 0.166159f, + 0.097865f, 0.349389f, 0.259834f, 0.067003f, -0.192925f, -0.182080f, + 0.333139f, -0.450434f, -0.006836f, -0.544615f, 0.285183f, 0.240811f, + 0.000325f, -0.019796f, -0.694804f, 0.162411f, -0.612686f, -0.648134f, + 0.022338f, -0.265058f, 0.114993f, 0.189185f, 0.239697f, -0.193148f, + 0.125581f, 0.028122f, 0.230849f, 0.149832f, 0.250919f, -0.036871f, + -0.041136f, 0.281627f, -0.593466f, -0.141009f, -0.355074f, -0.106915f, + 0.181276f, 0.230753f, -0.283631f, -0.131643f, 0.038292f, -0.081563f, + 0.084345f, 0.111763f, -0.259882f, -0.049416f, -0.595824f, 0.320077f, + -0.175802f, -0.336422f, -0.070966f, -0.399242f, -0.005829f, -0.156680f, + 0.608591f, 0.318150f, -0.697767f, 0.123331f, -0.390716f, -0.071276f, + 0.045943f, 0.208958f, -0.076304f, 0.440505f, -0.134400f, 0.091525f, + 0.185763f, 0.023806f, 0.246186f, 0.090323f, -0.219133f, -0.504520f, + 0.519393f, -0.168939f, 0.028884f, 0.157380f, 0.031745f, -0.252830f, + -0.130705f, -0.034901f, 0.413302f, -0.240559f, 0.219279f, 0.086246f, + -0.065353f, -0.295376f, -0.079405f, -0.024226f, -0.410629f, 0.053706f, + -0.229794f, -0.026336f, 0.093956f, -0.252810f, -0.080555f, 0.097827f, + -0.513040f, 0.289508f, 0.677527f, 0.268109f, -0.088244f, 0.119781f, + -0.289511f, 0.524778f, 0.262884f, 0.220028f, -0.244767f, 0.089411f, + -0.156018f, -0.087030f, -0.159292f, -0.286646f, -0.253953f, -0.058657f, + -0.474756f, 0.169797f, -0.032919f, 0.195384f, 0.075355f, 0.138131f, + -0.414465f, -0.285118f, -0.124915f, 0.030645f, 0.315431f, -0.081032f, + 0.352546f, 0.132860f, 0.328112f, 0.035476f, -0.183550f, -0.413984f, + 0.043452f, 0.228748f, -0.081765f, -0.151125f, -0.086251f, -0.306448f, + -0.137774f, -0.050508f, 0.012811f, -0.017824f, 0.170841f, 0.030549f, + 0.506935f, 0.087197f, 0.504274f, -0.202080f, 0.147146f, -0.072728f, + 0.167713f, 0.165977f, -0.610894f, -0.370849f, -0.402698f, 0.112297f, + 0.410855f, -0.091330f, 0.227008f, 0.152454f, -0.293884f, 0.111074f, + -0.210121f, 0.423728f, -0.009101f, 0.457188f, -0.118785f, 0.164720f, + -0.017547f, -0.565046f, -0.274461f, 0.171169f, -0.015338f, -0.312635f, + -0.175044f, 0.069729f, -0.277504f, 0.272454f, -0.179049f, 0.505495f, + -0.301774f, 0.055664f, -0.425058f, -0.202222f, -0.165787f, 0.112155f, + 0.263284f, 0.083972f, -0.104256f, 0.227892f, 0.223253f, 0.033592f, + 0.159638f, 0.115358f, -0.275811f, 0.212265f, -0.183658f, -0.168768f}; + + const float kReference[] = {-0.248836f, -0.086982f, 0.083715f, -0.036787f, + 0.127212f, 0.147464f, -0.221733f, -0.004484f, + -0.535107f, 0.385999f, -0.116346f, -0.265302f}; + + for (bool use_audio_buffer_interface : {true, false}) { + RunBitexactnessTest( + 1, use_audio_buffer_interface, + CreateVector(rtc::ArrayView(kReferenceInput)), + CreateVector(rtc::ArrayView(kReference))); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc b/third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc new file mode 100644 index 0000000000..8f788cb802 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/aec_dump.h" + +namespace webrtc { +InternalAPMConfig::InternalAPMConfig() = default; +InternalAPMConfig::InternalAPMConfig(const InternalAPMConfig&) = default; +InternalAPMConfig::InternalAPMConfig(InternalAPMConfig&&) = default; +InternalAPMConfig& InternalAPMConfig::operator=(const InternalAPMConfig&) = + default; + +bool InternalAPMConfig::operator==(const InternalAPMConfig& other) const { + return aec_enabled == other.aec_enabled && + aec_delay_agnostic_enabled == other.aec_delay_agnostic_enabled && + aec_drift_compensation_enabled == + other.aec_drift_compensation_enabled && + aec_extended_filter_enabled == other.aec_extended_filter_enabled && + aec_suppression_level == other.aec_suppression_level && + aecm_enabled == other.aecm_enabled && + aecm_comfort_noise_enabled == other.aecm_comfort_noise_enabled && + aecm_routing_mode == other.aecm_routing_mode && + agc_enabled == other.agc_enabled && agc_mode == other.agc_mode && + agc_limiter_enabled == other.agc_limiter_enabled && + hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled && + ns_level == other.ns_level && + transient_suppression_enabled == other.transient_suppression_enabled && + noise_robust_agc_enabled == other.noise_robust_agc_enabled && + pre_amplifier_enabled == other.pre_amplifier_enabled && + pre_amplifier_fixed_gain_factor == + other.pre_amplifier_fixed_gain_factor && + experiments_description == other.experiments_description; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/aec_dump.h b/third_party/libwebrtc/modules/audio_processing/include/aec_dump.h new file mode 100644 index 0000000000..6f2eb64f3a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/aec_dump.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_ + +#include + +#include + +#include "absl/base/attributes.h" +#include "absl/types/optional.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +// Struct for passing current config from APM without having to +// include protobuf headers. +struct InternalAPMConfig { + InternalAPMConfig(); + InternalAPMConfig(const InternalAPMConfig&); + InternalAPMConfig(InternalAPMConfig&&); + + InternalAPMConfig& operator=(const InternalAPMConfig&); + InternalAPMConfig& operator=(InternalAPMConfig&&) = delete; + + bool operator==(const InternalAPMConfig& other) const; + + bool aec_enabled = false; + bool aec_delay_agnostic_enabled = false; + bool aec_drift_compensation_enabled = false; + bool aec_extended_filter_enabled = false; + int aec_suppression_level = 0; + bool aecm_enabled = false; + bool aecm_comfort_noise_enabled = false; + int aecm_routing_mode = 0; + bool agc_enabled = false; + int agc_mode = 0; + bool agc_limiter_enabled = false; + bool hpf_enabled = false; + bool ns_enabled = false; + int ns_level = 0; + bool transient_suppression_enabled = false; + bool noise_robust_agc_enabled = false; + bool pre_amplifier_enabled = false; + float pre_amplifier_fixed_gain_factor = 1.f; + std::string experiments_description = ""; +}; + +// An interface for recording configuration and input/output streams +// of the Audio Processing Module. The recordings are called +// 'aec-dumps' and are stored in a protobuf format defined in +// debug.proto. +// The Write* methods are always safe to call concurrently or +// otherwise for all implementing subclasses. The intended mode of +// operation is to create a protobuf object from the input, and send +// it away to be written to file asynchronously. +class AecDump { + public: + struct AudioProcessingState { + int delay; + int drift; + absl::optional applied_input_volume; + bool keypress; + }; + + virtual ~AecDump() = default; + + // Logs Event::Type INIT message. + virtual void WriteInitMessage(const ProcessingConfig& api_format, + int64_t time_now_ms) = 0; + ABSL_DEPRECATED("") + void WriteInitMessage(const ProcessingConfig& api_format) { + WriteInitMessage(api_format, 0); + } + + // Logs Event::Type STREAM message. To log an input/output pair, + // call the AddCapture* and AddAudioProcessingState methods followed + // by a WriteCaptureStreamMessage call. + virtual void AddCaptureStreamInput( + const AudioFrameView& src) = 0; + virtual void AddCaptureStreamOutput( + const AudioFrameView& src) = 0; + virtual void AddCaptureStreamInput(const int16_t* const data, + int num_channels, + int samples_per_channel) = 0; + virtual void AddCaptureStreamOutput(const int16_t* const data, + int num_channels, + int samples_per_channel) = 0; + virtual void AddAudioProcessingState(const AudioProcessingState& state) = 0; + virtual void WriteCaptureStreamMessage() = 0; + + // Logs Event::Type REVERSE_STREAM message. + virtual void WriteRenderStreamMessage(const int16_t* const data, + int num_channels, + int samples_per_channel) = 0; + virtual void WriteRenderStreamMessage( + const AudioFrameView& src) = 0; + + virtual void WriteRuntimeSetting( + const AudioProcessing::RuntimeSetting& runtime_setting) = 0; + + // Logs Event::Type CONFIG message. + virtual void WriteConfig(const InternalAPMConfig& config) = 0; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc new file mode 100644 index 0000000000..7cc4fb75e4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_frame_proxies.h" + +#include "api/audio/audio_frame.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame) { + if (!frame || !ap) { + return AudioProcessing::Error::kNullPointerError; + } + + StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_); + StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_); + RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames()); + + int result = ap->ProcessStream(frame->data(), input_config, output_config, + frame->mutable_data()); + + AudioProcessingStats stats = ap->GetStatistics(); + + if (stats.voice_detected) { + frame->vad_activity_ = *stats.voice_detected + ? AudioFrame::VADActivity::kVadActive + : AudioFrame::VADActivity::kVadPassive; + } + + return result; +} + +int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame) { + if (!frame || !ap) { + return AudioProcessing::Error::kNullPointerError; + } + + // Must be a native rate. + if (frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate8kHz && + frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate16kHz && + frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate32kHz && + frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate48kHz) { + return AudioProcessing::Error::kBadSampleRateError; + } + + if (frame->num_channels_ <= 0) { + return AudioProcessing::Error::kBadNumberChannelsError; + } + + StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_); + StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_); + + int result = ap->ProcessReverseStream(frame->data(), input_config, + output_config, frame->mutable_data()); + return result; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.h b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.h new file mode 100644 index 0000000000..5dd111ca2b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_ + +namespace webrtc { + +class AudioFrame; +class AudioProcessing; + +// Processes a 10 ms `frame` of the primary audio stream using the provided +// AudioProcessing object. On the client-side, this is the near-end (or +// captured) audio. The `sample_rate_hz_`, `num_channels_`, and +// `samples_per_channel_` members of `frame` must be valid. If changed from the +// previous call to this function, it will trigger an initialization of the +// provided AudioProcessing object. +// The function returns any error codes passed from the AudioProcessing +// ProcessStream method. +int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame); + +// Processes a 10 ms `frame` of the reverse direction audio stream using the +// provided AudioProcessing object. The frame may be modified. On the +// client-side, this is the far-end (or to be rendered) audio. The +// `sample_rate_hz_`, `num_channels_`, and `samples_per_channel_` members of +// `frame` must be valid. If changed from the previous call to this function, it +// will trigger an initialization of the provided AudioProcessing object. +// The function returns any error codes passed from the AudioProcessing +// ProcessReverseStream method. +int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_frame_view.h b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_view.h new file mode 100644 index 0000000000..164784a7cc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_view.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_ + +#include "api/array_view.h" + +namespace webrtc { + +// Class to pass audio data in T** format, where T is a numeric type. +template +class AudioFrameView { + public: + // `num_channels` and `channel_size` describe the T** + // `audio_samples`. `audio_samples` is assumed to point to a + // two-dimensional |num_channels * channel_size| array of floats. + AudioFrameView(T* const* audio_samples, int num_channels, int channel_size) + : audio_samples_(audio_samples), + num_channels_(num_channels), + channel_size_(channel_size) { + RTC_DCHECK_GE(num_channels_, 0); + RTC_DCHECK_GE(channel_size_, 0); + } + + // Implicit cast to allow converting Frame to + // Frame. + template + AudioFrameView(AudioFrameView other) + : audio_samples_(other.data()), + num_channels_(other.num_channels()), + channel_size_(other.samples_per_channel()) {} + + AudioFrameView() = delete; + + int num_channels() const { return num_channels_; } + + int samples_per_channel() const { return channel_size_; } + + rtc::ArrayView channel(int idx) { + RTC_DCHECK_LE(0, idx); + RTC_DCHECK_LE(idx, num_channels_); + return rtc::ArrayView(audio_samples_[idx], channel_size_); + } + + rtc::ArrayView channel(int idx) const { + RTC_DCHECK_LE(0, idx); + RTC_DCHECK_LE(idx, num_channels_); + return rtc::ArrayView(audio_samples_[idx], channel_size_); + } + + T* const* data() { return audio_samples_; } + + private: + T* const* audio_samples_; + int num_channels_; + int channel_size_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc b/third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc new file mode 100644 index 0000000000..13ddcc588a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_processing.h" + +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace { + +using Agc1Config = AudioProcessing::Config::GainController1; +using Agc2Config = AudioProcessing::Config::GainController2; + +std::string NoiseSuppressionLevelToString( + const AudioProcessing::Config::NoiseSuppression::Level& level) { + switch (level) { + case AudioProcessing::Config::NoiseSuppression::Level::kLow: + return "Low"; + case AudioProcessing::Config::NoiseSuppression::Level::kModerate: + return "Moderate"; + case AudioProcessing::Config::NoiseSuppression::Level::kHigh: + return "High"; + case AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh: + return "VeryHigh"; + } + RTC_CHECK_NOTREACHED(); +} + +std::string GainController1ModeToString(const Agc1Config::Mode& mode) { + switch (mode) { + case Agc1Config::Mode::kAdaptiveAnalog: + return "AdaptiveAnalog"; + case Agc1Config::Mode::kAdaptiveDigital: + return "AdaptiveDigital"; + case Agc1Config::Mode::kFixedDigital: + return "FixedDigital"; + } + RTC_CHECK_NOTREACHED(); +} + +} // namespace + +constexpr int AudioProcessing::kNativeSampleRatesHz[]; + +void CustomProcessing::SetRuntimeSetting( + AudioProcessing::RuntimeSetting setting) {} + +bool Agc1Config::operator==(const Agc1Config& rhs) const { + const auto& analog_lhs = analog_gain_controller; + const auto& analog_rhs = rhs.analog_gain_controller; + return enabled == rhs.enabled && mode == rhs.mode && + target_level_dbfs == rhs.target_level_dbfs && + compression_gain_db == rhs.compression_gain_db && + enable_limiter == rhs.enable_limiter && + analog_lhs.enabled == analog_rhs.enabled && + analog_lhs.startup_min_volume == analog_rhs.startup_min_volume && + analog_lhs.clipped_level_min == analog_rhs.clipped_level_min && + analog_lhs.enable_digital_adaptive == + analog_rhs.enable_digital_adaptive && + analog_lhs.clipped_level_step == analog_rhs.clipped_level_step && + analog_lhs.clipped_ratio_threshold == + analog_rhs.clipped_ratio_threshold && + analog_lhs.clipped_wait_frames == analog_rhs.clipped_wait_frames && + analog_lhs.clipping_predictor.mode == + analog_rhs.clipping_predictor.mode && + analog_lhs.clipping_predictor.window_length == + analog_rhs.clipping_predictor.window_length && + analog_lhs.clipping_predictor.reference_window_length == + analog_rhs.clipping_predictor.reference_window_length && + analog_lhs.clipping_predictor.reference_window_delay == + analog_rhs.clipping_predictor.reference_window_delay && + analog_lhs.clipping_predictor.clipping_threshold == + analog_rhs.clipping_predictor.clipping_threshold && + analog_lhs.clipping_predictor.crest_factor_margin == + analog_rhs.clipping_predictor.crest_factor_margin && + analog_lhs.clipping_predictor.use_predicted_step == + analog_rhs.clipping_predictor.use_predicted_step; +} + +bool Agc2Config::AdaptiveDigital::operator==( + const Agc2Config::AdaptiveDigital& rhs) const { + return enabled == rhs.enabled && headroom_db == rhs.headroom_db && + max_gain_db == rhs.max_gain_db && + initial_gain_db == rhs.initial_gain_db && + max_gain_change_db_per_second == rhs.max_gain_change_db_per_second && + max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs; +} + +bool Agc2Config::InputVolumeController::operator==( + const Agc2Config::InputVolumeController& rhs) const { + return enabled == rhs.enabled; +} + +bool Agc2Config::operator==(const Agc2Config& rhs) const { + return enabled == rhs.enabled && + fixed_digital.gain_db == rhs.fixed_digital.gain_db && + adaptive_digital == rhs.adaptive_digital && + input_volume_controller == rhs.input_volume_controller; +} + +bool AudioProcessing::Config::CaptureLevelAdjustment::operator==( + const AudioProcessing::Config::CaptureLevelAdjustment& rhs) const { + return enabled == rhs.enabled && pre_gain_factor == rhs.pre_gain_factor && + post_gain_factor == rhs.post_gain_factor && + analog_mic_gain_emulation == rhs.analog_mic_gain_emulation; +} + +bool AudioProcessing::Config::CaptureLevelAdjustment::AnalogMicGainEmulation:: +operator==(const AudioProcessing::Config::CaptureLevelAdjustment:: + AnalogMicGainEmulation& rhs) const { + return enabled == rhs.enabled && initial_level == rhs.initial_level; +} + +std::string AudioProcessing::Config::ToString() const { + char buf[2048]; + rtc::SimpleStringBuilder builder(buf); + builder << "AudioProcessing::Config{ " + "pipeline: { " + "maximum_internal_processing_rate: " + << pipeline.maximum_internal_processing_rate + << ", multi_channel_render: " << pipeline.multi_channel_render + << ", multi_channel_capture: " << pipeline.multi_channel_capture + << " }, pre_amplifier: { enabled: " << pre_amplifier.enabled + << ", fixed_gain_factor: " << pre_amplifier.fixed_gain_factor + << " },capture_level_adjustment: { enabled: " + << capture_level_adjustment.enabled + << ", pre_gain_factor: " << capture_level_adjustment.pre_gain_factor + << ", post_gain_factor: " << capture_level_adjustment.post_gain_factor + << ", analog_mic_gain_emulation: { enabled: " + << capture_level_adjustment.analog_mic_gain_emulation.enabled + << ", initial_level: " + << capture_level_adjustment.analog_mic_gain_emulation.initial_level + << " }}, high_pass_filter: { enabled: " << high_pass_filter.enabled + << " }, echo_canceller: { enabled: " << echo_canceller.enabled + << ", mobile_mode: " << echo_canceller.mobile_mode + << ", enforce_high_pass_filtering: " + << echo_canceller.enforce_high_pass_filtering + << " }, noise_suppression: { enabled: " << noise_suppression.enabled + << ", level: " + << NoiseSuppressionLevelToString(noise_suppression.level) + << " }, transient_suppression: { enabled: " + << transient_suppression.enabled + << " }, gain_controller1: { enabled: " << gain_controller1.enabled + << ", mode: " << GainController1ModeToString(gain_controller1.mode) + << ", target_level_dbfs: " << gain_controller1.target_level_dbfs + << ", compression_gain_db: " << gain_controller1.compression_gain_db + << ", enable_limiter: " << gain_controller1.enable_limiter + << ", analog_gain_controller { enabled: " + << gain_controller1.analog_gain_controller.enabled + << ", startup_min_volume: " + << gain_controller1.analog_gain_controller.startup_min_volume + << ", clipped_level_min: " + << gain_controller1.analog_gain_controller.clipped_level_min + << ", enable_digital_adaptive: " + << gain_controller1.analog_gain_controller.enable_digital_adaptive + << ", clipped_level_step: " + << gain_controller1.analog_gain_controller.clipped_level_step + << ", clipped_ratio_threshold: " + << gain_controller1.analog_gain_controller.clipped_ratio_threshold + << ", clipped_wait_frames: " + << gain_controller1.analog_gain_controller.clipped_wait_frames + << ", clipping_predictor: { enabled: " + << gain_controller1.analog_gain_controller.clipping_predictor.enabled + << ", mode: " + << gain_controller1.analog_gain_controller.clipping_predictor.mode + << ", window_length: " + << gain_controller1.analog_gain_controller.clipping_predictor + .window_length + << ", reference_window_length: " + << gain_controller1.analog_gain_controller.clipping_predictor + .reference_window_length + << ", reference_window_delay: " + << gain_controller1.analog_gain_controller.clipping_predictor + .reference_window_delay + << ", clipping_threshold: " + << gain_controller1.analog_gain_controller.clipping_predictor + .clipping_threshold + << ", crest_factor_margin: " + << gain_controller1.analog_gain_controller.clipping_predictor + .crest_factor_margin + << ", use_predicted_step: " + << gain_controller1.analog_gain_controller.clipping_predictor + .use_predicted_step + << " }}}, gain_controller2: { enabled: " << gain_controller2.enabled + << ", fixed_digital: { gain_db: " + << gain_controller2.fixed_digital.gain_db + << " }, adaptive_digital: { enabled: " + << gain_controller2.adaptive_digital.enabled + << ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db + << ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db + << ", initial_gain_db: " + << gain_controller2.adaptive_digital.initial_gain_db + << ", max_gain_change_db_per_second: " + << gain_controller2.adaptive_digital.max_gain_change_db_per_second + << ", max_output_noise_level_dbfs: " + << gain_controller2.adaptive_digital.max_output_noise_level_dbfs + << " }, input_volume_control : { enabled " + << gain_controller2.input_volume_controller.enabled << "}}"; + return builder.str(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_processing.h b/third_party/libwebrtc/modules/audio_processing/include/audio_processing.h new file mode 100644 index 0000000000..f613a38de1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_processing.h @@ -0,0 +1,941 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#ifndef _USE_MATH_DEFINES +#define _USE_MATH_DEFINES +#endif + +#include +#include // size_t +#include // FILE +#include + +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "api/scoped_refptr.h" +#include "modules/audio_processing/include/audio_processing_statistics.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/ref_count.h" +#include "rtc_base/system/file_wrapper.h" +#include "rtc_base/system/rtc_export.h" + +namespace rtc { +class TaskQueue; +} // namespace rtc + +namespace webrtc { + +class AecDump; +class AudioBuffer; + +class StreamConfig; +class ProcessingConfig; + +class EchoDetector; +class CustomAudioAnalyzer; +class CustomProcessing; + +// The Audio Processing Module (APM) provides a collection of voice processing +// components designed for real-time communications software. +// +// APM operates on two audio streams on a frame-by-frame basis. Frames of the +// primary stream, on which all processing is applied, are passed to +// `ProcessStream()`. Frames of the reverse direction stream are passed to +// `ProcessReverseStream()`. On the client-side, this will typically be the +// near-end (capture) and far-end (render) streams, respectively. APM should be +// placed in the signal chain as close to the audio hardware abstraction layer +// (HAL) as possible. +// +// On the server-side, the reverse stream will normally not be used, with +// processing occurring on each incoming stream. +// +// Component interfaces follow a similar pattern and are accessed through +// corresponding getters in APM. All components are disabled at create-time, +// with default settings that are recommended for most situations. New settings +// can be applied without enabling a component. Enabling a component triggers +// memory allocation and initialization to allow it to start processing the +// streams. +// +// Thread safety is provided with the following assumptions to reduce locking +// overhead: +// 1. The stream getters and setters are called from the same thread as +// ProcessStream(). More precisely, stream functions are never called +// concurrently with ProcessStream(). +// 2. Parameter getters are never called concurrently with the corresponding +// setter. +// +// APM accepts only linear PCM audio data in chunks of ~10 ms (see +// AudioProcessing::GetFrameSize() for details) and sample rates ranging from +// 8000 Hz to 384000 Hz. The int16 interfaces use interleaved data, while the +// float interfaces use deinterleaved data. +// +// Usage example, omitting error checking: +// rtc::scoped_refptr apm = AudioProcessingBuilder().Create(); +// +// AudioProcessing::Config config; +// config.echo_canceller.enabled = true; +// config.echo_canceller.mobile_mode = false; +// +// config.gain_controller1.enabled = true; +// config.gain_controller1.mode = +// AudioProcessing::Config::GainController1::kAdaptiveAnalog; +// config.gain_controller1.analog_level_minimum = 0; +// config.gain_controller1.analog_level_maximum = 255; +// +// config.gain_controller2.enabled = true; +// +// config.high_pass_filter.enabled = true; +// +// apm->ApplyConfig(config) +// +// // Start a voice call... +// +// // ... Render frame arrives bound for the audio HAL ... +// apm->ProcessReverseStream(render_frame); +// +// // ... Capture frame arrives from the audio HAL ... +// // Call required set_stream_ functions. +// apm->set_stream_delay_ms(delay_ms); +// apm->set_stream_analog_level(analog_level); +// +// apm->ProcessStream(capture_frame); +// +// // Call required stream_ functions. +// analog_level = apm->recommended_stream_analog_level(); +// has_voice = apm->stream_has_voice(); +// +// // Repeat render and capture processing for the duration of the call... +// // Start a new call... +// apm->Initialize(); +// +// // Close the application... +// apm.reset(); +// +class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { + public: + // The struct below constitutes the new parameter scheme for the audio + // processing. It is being introduced gradually and until it is fully + // introduced, it is prone to change. + // TODO(peah): Remove this comment once the new config scheme is fully rolled + // out. + // + // The parameters and behavior of the audio processing module are controlled + // by changing the default values in the AudioProcessing::Config struct. + // The config is applied by passing the struct to the ApplyConfig method. + // + // This config is intended to be used during setup, and to enable/disable + // top-level processing effects. Use during processing may cause undesired + // submodule resets, affecting the audio quality. Use the RuntimeSetting + // construct for runtime configuration. + struct RTC_EXPORT Config { + // Sets the properties of the audio processing pipeline. + struct RTC_EXPORT Pipeline { + // Ways to downmix a multi-channel track to mono. + enum class DownmixMethod { + kAverageChannels, // Average across channels. + kUseFirstChannel // Use the first channel. + }; + + // Maximum allowed processing rate used internally. May only be set to + // 32000 or 48000 and any differing values will be treated as 48000. + int maximum_internal_processing_rate = 48000; + // Allow multi-channel processing of render audio. + bool multi_channel_render = false; + // Allow multi-channel processing of capture audio when AEC3 is active + // or a custom AEC is injected.. + bool multi_channel_capture = false; + // Indicates how to downmix multi-channel capture audio to mono (when + // needed). + DownmixMethod capture_downmix_method = DownmixMethod::kAverageChannels; + } pipeline; + + // Enabled the pre-amplifier. It amplifies the capture signal + // before any other processing is done. + // TODO(webrtc:5298): Deprecate and use the pre-gain functionality in + // capture_level_adjustment instead. + struct PreAmplifier { + bool enabled = false; + float fixed_gain_factor = 1.0f; + } pre_amplifier; + + // Functionality for general level adjustment in the capture pipeline. This + // should not be used together with the legacy PreAmplifier functionality. + struct CaptureLevelAdjustment { + bool operator==(const CaptureLevelAdjustment& rhs) const; + bool operator!=(const CaptureLevelAdjustment& rhs) const { + return !(*this == rhs); + } + bool enabled = false; + // The `pre_gain_factor` scales the signal before any processing is done. + float pre_gain_factor = 1.0f; + // The `post_gain_factor` scales the signal after all processing is done. + float post_gain_factor = 1.0f; + struct AnalogMicGainEmulation { + bool operator==(const AnalogMicGainEmulation& rhs) const; + bool operator!=(const AnalogMicGainEmulation& rhs) const { + return !(*this == rhs); + } + bool enabled = false; + // Initial analog gain level to use for the emulated analog gain. Must + // be in the range [0...255]. + int initial_level = 255; + } analog_mic_gain_emulation; + } capture_level_adjustment; + + struct HighPassFilter { + bool enabled = false; + bool apply_in_full_band = true; + } high_pass_filter; + + struct EchoCanceller { + bool enabled = false; + bool mobile_mode = false; + bool export_linear_aec_output = false; + // Enforce the highpass filter to be on (has no effect for the mobile + // mode). + bool enforce_high_pass_filtering = true; + } echo_canceller; + + // Enables background noise suppression. + struct NoiseSuppression { + bool enabled = false; + enum Level { kLow, kModerate, kHigh, kVeryHigh }; + Level level = kModerate; + bool analyze_linear_aec_output_when_available = false; + } noise_suppression; + + // Enables transient suppression. + struct TransientSuppression { + bool enabled = false; + } transient_suppression; + + // Enables automatic gain control (AGC) functionality. + // The automatic gain control (AGC) component brings the signal to an + // appropriate range. This is done by applying a digital gain directly and, + // in the analog mode, prescribing an analog gain to be applied at the audio + // HAL. + // Recommended to be enabled on the client-side. + struct RTC_EXPORT GainController1 { + bool operator==(const GainController1& rhs) const; + bool operator!=(const GainController1& rhs) const { + return !(*this == rhs); + } + + bool enabled = false; + enum Mode { + // Adaptive mode intended for use if an analog volume control is + // available on the capture device. It will require the user to provide + // coupling between the OS mixer controls and AGC through the + // stream_analog_level() functions. + // It consists of an analog gain prescription for the audio device and a + // digital compression stage. + kAdaptiveAnalog, + // Adaptive mode intended for situations in which an analog volume + // control is unavailable. It operates in a similar fashion to the + // adaptive analog mode, but with scaling instead applied in the digital + // domain. As with the analog mode, it additionally uses a digital + // compression stage. + kAdaptiveDigital, + // Fixed mode which enables only the digital compression stage also used + // by the two adaptive modes. + // It is distinguished from the adaptive modes by considering only a + // short time-window of the input signal. It applies a fixed gain + // through most of the input level range, and compresses (gradually + // reduces gain with increasing level) the input signal at higher + // levels. This mode is preferred on embedded devices where the capture + // signal level is predictable, so that a known gain can be applied. + kFixedDigital + }; + Mode mode = kAdaptiveAnalog; + // Sets the target peak level (or envelope) of the AGC in dBFs (decibels + // from digital full-scale). The convention is to use positive values. For + // instance, passing in a value of 3 corresponds to -3 dBFs, or a target + // level 3 dB below full-scale. Limited to [0, 31]. + int target_level_dbfs = 3; + // Sets the maximum gain the digital compression stage may apply, in dB. A + // higher number corresponds to greater compression, while a value of 0 + // will leave the signal uncompressed. Limited to [0, 90]. + // For updates after APM setup, use a RuntimeSetting instead. + int compression_gain_db = 9; + // When enabled, the compression stage will hard limit the signal to the + // target level. Otherwise, the signal will be compressed but not limited + // above the target level. + bool enable_limiter = true; + + // Enables the analog gain controller functionality. + struct AnalogGainController { + bool enabled = true; + // TODO(bugs.webrtc.org/7494): Deprecated. Stop using and remove. + int startup_min_volume = 0; + // Lowest analog microphone level that will be applied in response to + // clipping. + int clipped_level_min = 70; + // If true, an adaptive digital gain is applied. + bool enable_digital_adaptive = true; + // Amount the microphone level is lowered with every clipping event. + // Limited to (0, 255]. + int clipped_level_step = 15; + // Proportion of clipped samples required to declare a clipping event. + // Limited to (0.f, 1.f). + float clipped_ratio_threshold = 0.1f; + // Time in frames to wait after a clipping event before checking again. + // Limited to values higher than 0. + int clipped_wait_frames = 300; + + // Enables clipping prediction functionality. + struct ClippingPredictor { + bool enabled = false; + enum Mode { + // Clipping event prediction mode with fixed step estimation. + kClippingEventPrediction, + // Clipped peak estimation mode with adaptive step estimation. + kAdaptiveStepClippingPeakPrediction, + // Clipped peak estimation mode with fixed step estimation. + kFixedStepClippingPeakPrediction, + }; + Mode mode = kClippingEventPrediction; + // Number of frames in the sliding analysis window. + int window_length = 5; + // Number of frames in the sliding reference window. + int reference_window_length = 5; + // Reference window delay (unit: number of frames). + int reference_window_delay = 5; + // Clipping prediction threshold (dBFS). + float clipping_threshold = -1.0f; + // Crest factor drop threshold (dB). + float crest_factor_margin = 3.0f; + // If true, the recommended clipped level step is used to modify the + // analog gain. Otherwise, the predictor runs without affecting the + // analog gain. + bool use_predicted_step = true; + } clipping_predictor; + } analog_gain_controller; + } gain_controller1; + + // Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which + // replaces the AGC sub-module parametrized by `gain_controller1`. + // AGC2 brings the captured audio signal to the desired level by combining + // three different controllers (namely, input volume controller, adapative + // digital controller and fixed digital controller) and a limiter. + // TODO(bugs.webrtc.org:7494): Name `GainController` when AGC1 removed. + struct RTC_EXPORT GainController2 { + bool operator==(const GainController2& rhs) const; + bool operator!=(const GainController2& rhs) const { + return !(*this == rhs); + } + + // AGC2 must be created if and only if `enabled` is true. + bool enabled = false; + + // Parameters for the input volume controller, which adjusts the input + // volume applied when the audio is captured (e.g., microphone volume on + // a soundcard, input volume on HAL). + struct InputVolumeController { + bool operator==(const InputVolumeController& rhs) const; + bool operator!=(const InputVolumeController& rhs) const { + return !(*this == rhs); + } + bool enabled = false; + } input_volume_controller; + + // Parameters for the adaptive digital controller, which adjusts and + // applies a digital gain after echo cancellation and after noise + // suppression. + struct RTC_EXPORT AdaptiveDigital { + bool operator==(const AdaptiveDigital& rhs) const; + bool operator!=(const AdaptiveDigital& rhs) const { + return !(*this == rhs); + } + bool enabled = false; + float headroom_db = 6.0f; + float max_gain_db = 30.0f; + float initial_gain_db = 8.0f; + float max_gain_change_db_per_second = 3.0f; + float max_output_noise_level_dbfs = -50.0f; + } adaptive_digital; + + // Parameters for the fixed digital controller, which applies a fixed + // digital gain after the adaptive digital controller and before the + // limiter. + struct FixedDigital { + // By setting `gain_db` to a value greater than zero, the limiter can be + // turned into a compressor that first applies a fixed gain. + float gain_db = 0.0f; + } fixed_digital; + } gain_controller2; + + std::string ToString() const; + }; + + // Specifies the properties of a setting to be passed to AudioProcessing at + // runtime. + class RuntimeSetting { + public: + enum class Type { + kNotSpecified, + kCapturePreGain, + kCaptureCompressionGain, + kCaptureFixedPostGain, + kPlayoutVolumeChange, + kCustomRenderProcessingRuntimeSetting, + kPlayoutAudioDeviceChange, + kCapturePostGain, + kCaptureOutputUsed + }; + + // Play-out audio device properties. + struct PlayoutAudioDeviceInfo { + int id; // Identifies the audio device. + int max_volume; // Maximum play-out volume. + }; + + RuntimeSetting() : type_(Type::kNotSpecified), value_(0.0f) {} + ~RuntimeSetting() = default; + + static RuntimeSetting CreateCapturePreGain(float gain) { + return {Type::kCapturePreGain, gain}; + } + + static RuntimeSetting CreateCapturePostGain(float gain) { + return {Type::kCapturePostGain, gain}; + } + + // Corresponds to Config::GainController1::compression_gain_db, but for + // runtime configuration. + static RuntimeSetting CreateCompressionGainDb(int gain_db) { + RTC_DCHECK_GE(gain_db, 0); + RTC_DCHECK_LE(gain_db, 90); + return {Type::kCaptureCompressionGain, static_cast(gain_db)}; + } + + // Corresponds to Config::GainController2::fixed_digital::gain_db, but for + // runtime configuration. + static RuntimeSetting CreateCaptureFixedPostGain(float gain_db) { + RTC_DCHECK_GE(gain_db, 0.0f); + RTC_DCHECK_LE(gain_db, 90.0f); + return {Type::kCaptureFixedPostGain, gain_db}; + } + + // Creates a runtime setting to notify play-out (aka render) audio device + // changes. + static RuntimeSetting CreatePlayoutAudioDeviceChange( + PlayoutAudioDeviceInfo audio_device) { + return {Type::kPlayoutAudioDeviceChange, audio_device}; + } + + // Creates a runtime setting to notify play-out (aka render) volume changes. + // `volume` is the unnormalized volume, the maximum of which + static RuntimeSetting CreatePlayoutVolumeChange(int volume) { + return {Type::kPlayoutVolumeChange, volume}; + } + + static RuntimeSetting CreateCustomRenderSetting(float payload) { + return {Type::kCustomRenderProcessingRuntimeSetting, payload}; + } + + static RuntimeSetting CreateCaptureOutputUsedSetting( + bool capture_output_used) { + return {Type::kCaptureOutputUsed, capture_output_used}; + } + + Type type() const { return type_; } + // Getters do not return a value but instead modify the argument to protect + // from implicit casting. + void GetFloat(float* value) const { + RTC_DCHECK(value); + *value = value_.float_value; + } + void GetInt(int* value) const { + RTC_DCHECK(value); + *value = value_.int_value; + } + void GetBool(bool* value) const { + RTC_DCHECK(value); + *value = value_.bool_value; + } + void GetPlayoutAudioDeviceInfo(PlayoutAudioDeviceInfo* value) const { + RTC_DCHECK(value); + *value = value_.playout_audio_device_info; + } + + private: + RuntimeSetting(Type id, float value) : type_(id), value_(value) {} + RuntimeSetting(Type id, int value) : type_(id), value_(value) {} + RuntimeSetting(Type id, PlayoutAudioDeviceInfo value) + : type_(id), value_(value) {} + Type type_; + union U { + U() {} + U(int value) : int_value(value) {} + U(float value) : float_value(value) {} + U(PlayoutAudioDeviceInfo value) : playout_audio_device_info(value) {} + float float_value; + int int_value; + bool bool_value; + PlayoutAudioDeviceInfo playout_audio_device_info; + } value_; + }; + + ~AudioProcessing() override {} + + // Initializes internal states, while retaining all user settings. This + // should be called before beginning to process a new audio stream. However, + // it is not necessary to call before processing the first stream after + // creation. + // + // It is also not necessary to call if the audio parameters (sample + // rate and number of channels) have changed. Passing updated parameters + // directly to `ProcessStream()` and `ProcessReverseStream()` is permissible. + // If the parameters are known at init-time though, they may be provided. + // TODO(webrtc:5298): Change to return void. + virtual int Initialize() = 0; + + // The int16 interfaces require: + // - only `NativeRate`s be used + // - that the input, output and reverse rates must match + // - that `processing_config.output_stream()` matches + // `processing_config.input_stream()`. + // + // The float interfaces accept arbitrary rates and support differing input and + // output layouts, but the output must have either one channel or the same + // number of channels as the input. + virtual int Initialize(const ProcessingConfig& processing_config) = 0; + + // TODO(peah): This method is a temporary solution used to take control + // over the parameters in the audio processing module and is likely to change. + virtual void ApplyConfig(const Config& config) = 0; + + // TODO(ajm): Only intended for internal use. Make private and friend the + // necessary classes? + virtual int proc_sample_rate_hz() const = 0; + virtual int proc_split_sample_rate_hz() const = 0; + virtual size_t num_input_channels() const = 0; + virtual size_t num_proc_channels() const = 0; + virtual size_t num_output_channels() const = 0; + virtual size_t num_reverse_channels() const = 0; + + // Set to true when the output of AudioProcessing will be muted or in some + // other way not used. Ideally, the captured audio would still be processed, + // but some components may change behavior based on this information. + // Default false. This method takes a lock. To achieve this in a lock-less + // manner the PostRuntimeSetting can instead be used. + virtual void set_output_will_be_muted(bool muted) = 0; + + // Enqueues a runtime setting. + virtual void SetRuntimeSetting(RuntimeSetting setting) = 0; + + // Enqueues a runtime setting. Returns a bool indicating whether the + // enqueueing was successfull. + virtual bool PostRuntimeSetting(RuntimeSetting setting) = 0; + + // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio as + // specified in `input_config` and `output_config`. `src` and `dest` may use + // the same memory, if desired. + virtual int ProcessStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // `src` points to a channel buffer, arranged according to `input_stream`. At + // output, the channels will be arranged according to `output_stream` in + // `dest`. + // + // The output must have one channel or as many channels as the input. `src` + // and `dest` may use the same memory, if desired. + virtual int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) = 0; + + // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio for + // the reverse direction audio stream as specified in `input_config` and + // `output_config`. `src` and `dest` may use the same memory, if desired. + virtual int ProcessReverseStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // `data` points to a channel buffer, arranged according to `reverse_config`. + virtual int ProcessReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element + // of `data` points to a channel buffer, arranged according to + // `reverse_config`. + virtual int AnalyzeReverseStream(const float* const* data, + const StreamConfig& reverse_config) = 0; + + // Returns the most recently produced ~10 ms of the linear AEC output at a + // rate of 16 kHz. If there is more than one capture channel, a mono + // representation of the input is returned. Returns true/false to indicate + // whether an output returned. + virtual bool GetLinearAecOutput( + rtc::ArrayView> linear_output) const = 0; + + // This must be called prior to ProcessStream() if and only if adaptive analog + // gain control is enabled, to pass the current analog level from the audio + // HAL. Must be within the range [0, 255]. + virtual void set_stream_analog_level(int level) = 0; + + // When an analog mode is set, this should be called after + // `set_stream_analog_level()` and `ProcessStream()` to obtain the recommended + // new analog level for the audio HAL. It is the user's responsibility to + // apply this level. + virtual int recommended_stream_analog_level() const = 0; + + // This must be called if and only if echo processing is enabled. + // + // Sets the `delay` in ms between ProcessReverseStream() receiving a far-end + // frame and ProcessStream() receiving a near-end frame containing the + // corresponding echo. On the client-side this can be expressed as + // delay = (t_render - t_analyze) + (t_process - t_capture) + // where, + // - t_analyze is the time a frame is passed to ProcessReverseStream() and + // t_render is the time the first sample of the same frame is rendered by + // the audio hardware. + // - t_capture is the time the first sample of a frame is captured by the + // audio hardware and t_process is the time the same frame is passed to + // ProcessStream(). + virtual int set_stream_delay_ms(int delay) = 0; + virtual int stream_delay_ms() const = 0; + + // Call to signal that a key press occurred (true) or did not occur (false) + // with this chunk of audio. + virtual void set_stream_key_pressed(bool key_pressed) = 0; + + // Creates and attaches an webrtc::AecDump for recording debugging + // information. + // The `worker_queue` may not be null and must outlive the created + // AecDump instance. |max_log_size_bytes == -1| means the log size + // will be unlimited. `handle` may not be null. The AecDump takes + // responsibility for `handle` and closes it in the destructor. A + // return value of true indicates that the file has been + // sucessfully opened, while a value of false indicates that + // opening the file failed. + virtual bool CreateAndAttachAecDump(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) = 0; + virtual bool CreateAndAttachAecDump(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) = 0; + + // TODO(webrtc:5298) Deprecated variant. + // Attaches provided webrtc::AecDump for recording debugging + // information. Log file and maximum file size logic is supposed to + // be handled by implementing instance of AecDump. Calling this + // method when another AecDump is attached resets the active AecDump + // with a new one. This causes the d-tor of the earlier AecDump to + // be called. The d-tor call may block until all pending logging + // tasks are completed. + virtual void AttachAecDump(std::unique_ptr aec_dump) = 0; + + // If no AecDump is attached, this has no effect. If an AecDump is + // attached, it's destructor is called. The d-tor may block until + // all pending logging tasks are completed. + virtual void DetachAecDump() = 0; + + // Get audio processing statistics. + virtual AudioProcessingStats GetStatistics() = 0; + // TODO(webrtc:5298) Deprecated variant. The `has_remote_tracks` argument + // should be set if there are active remote tracks (this would usually be true + // during a call). If there are no remote tracks some of the stats will not be + // set by AudioProcessing, because they only make sense if there is at least + // one remote track. + virtual AudioProcessingStats GetStatistics(bool has_remote_tracks) = 0; + + // Returns the last applied configuration. + virtual AudioProcessing::Config GetConfig() const = 0; + + enum Error { + // Fatal errors. + kNoError = 0, + kUnspecifiedError = -1, + kCreationFailedError = -2, + kUnsupportedComponentError = -3, + kUnsupportedFunctionError = -4, + kNullPointerError = -5, + kBadParameterError = -6, + kBadSampleRateError = -7, + kBadDataLengthError = -8, + kBadNumberChannelsError = -9, + kFileError = -10, + kStreamParameterNotSetError = -11, + kNotEnabledError = -12, + + // Warnings are non-fatal. + // This results when a set_stream_ parameter is out of range. Processing + // will continue, but the parameter may have been truncated. + kBadStreamParameterWarning = -13 + }; + + // Native rates supported by the integer interfaces. + enum NativeRate { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000, + kSampleRate48kHz = 48000 + }; + + // TODO(kwiberg): We currently need to support a compiler (Visual C++) that + // complains if we don't explicitly state the size of the array here. Remove + // the size when that's no longer the case. + static constexpr int kNativeSampleRatesHz[4] = { + kSampleRate8kHz, kSampleRate16kHz, kSampleRate32kHz, kSampleRate48kHz}; + static constexpr size_t kNumNativeSampleRates = + arraysize(kNativeSampleRatesHz); + static constexpr int kMaxNativeSampleRateHz = + kNativeSampleRatesHz[kNumNativeSampleRates - 1]; + + // APM processes audio in chunks of about 10 ms. See GetFrameSize() for + // details. + static constexpr int kChunkSizeMs = 10; + + // Returns floor(sample_rate_hz/100): the number of samples per channel used + // as input and output to the audio processing module in calls to + // ProcessStream, ProcessReverseStream, AnalyzeReverseStream, and + // GetLinearAecOutput. + // + // This is exactly 10 ms for sample rates divisible by 100. For example: + // - 48000 Hz (480 samples per channel), + // - 44100 Hz (441 samples per channel), + // - 16000 Hz (160 samples per channel). + // + // Sample rates not divisible by 100 are received/produced in frames of + // approximately 10 ms. For example: + // - 22050 Hz (220 samples per channel, or ~9.98 ms per frame), + // - 11025 Hz (110 samples per channel, or ~9.98 ms per frame). + // These nondivisible sample rates yield lower audio quality compared to + // multiples of 100. Internal resampling to 10 ms frames causes a simulated + // clock drift effect which impacts the performance of (for example) echo + // cancellation. + static int GetFrameSize(int sample_rate_hz) { return sample_rate_hz / 100; } +}; + +class RTC_EXPORT AudioProcessingBuilder { + public: + AudioProcessingBuilder(); + AudioProcessingBuilder(const AudioProcessingBuilder&) = delete; + AudioProcessingBuilder& operator=(const AudioProcessingBuilder&) = delete; + ~AudioProcessingBuilder(); + + // Sets the APM configuration. + AudioProcessingBuilder& SetConfig(const AudioProcessing::Config& config) { + config_ = config; + return *this; + } + + // Sets the echo controller factory to inject when APM is created. + AudioProcessingBuilder& SetEchoControlFactory( + std::unique_ptr echo_control_factory) { + echo_control_factory_ = std::move(echo_control_factory); + return *this; + } + + // Sets the capture post-processing sub-module to inject when APM is created. + AudioProcessingBuilder& SetCapturePostProcessing( + std::unique_ptr capture_post_processing) { + capture_post_processing_ = std::move(capture_post_processing); + return *this; + } + + // Sets the render pre-processing sub-module to inject when APM is created. + AudioProcessingBuilder& SetRenderPreProcessing( + std::unique_ptr render_pre_processing) { + render_pre_processing_ = std::move(render_pre_processing); + return *this; + } + + // Sets the echo detector to inject when APM is created. + AudioProcessingBuilder& SetEchoDetector( + rtc::scoped_refptr echo_detector) { + echo_detector_ = std::move(echo_detector); + return *this; + } + + // Sets the capture analyzer sub-module to inject when APM is created. + AudioProcessingBuilder& SetCaptureAnalyzer( + std::unique_ptr capture_analyzer) { + capture_analyzer_ = std::move(capture_analyzer); + return *this; + } + + // Creates an APM instance with the specified config or the default one if + // unspecified. Injects the specified components transferring the ownership + // to the newly created APM instance - i.e., except for the config, the + // builder is reset to its initial state. + rtc::scoped_refptr Create(); + + private: + AudioProcessing::Config config_; + std::unique_ptr echo_control_factory_; + std::unique_ptr capture_post_processing_; + std::unique_ptr render_pre_processing_; + rtc::scoped_refptr echo_detector_; + std::unique_ptr capture_analyzer_; +}; + +class StreamConfig { + public: + // sample_rate_hz: The sampling rate of the stream. + // num_channels: The number of audio channels in the stream. + StreamConfig(int sample_rate_hz = 0, size_t num_channels = 0) + : sample_rate_hz_(sample_rate_hz), + num_channels_(num_channels), + num_frames_(calculate_frames(sample_rate_hz)) {} + + void set_sample_rate_hz(int value) { + sample_rate_hz_ = value; + num_frames_ = calculate_frames(value); + } + void set_num_channels(size_t value) { num_channels_ = value; } + + int sample_rate_hz() const { return sample_rate_hz_; } + + // The number of channels in the stream. + size_t num_channels() const { return num_channels_; } + + size_t num_frames() const { return num_frames_; } + size_t num_samples() const { return num_channels_ * num_frames_; } + + bool operator==(const StreamConfig& other) const { + return sample_rate_hz_ == other.sample_rate_hz_ && + num_channels_ == other.num_channels_; + } + + bool operator!=(const StreamConfig& other) const { return !(*this == other); } + + private: + static size_t calculate_frames(int sample_rate_hz) { + return static_cast(AudioProcessing::GetFrameSize(sample_rate_hz)); + } + + int sample_rate_hz_; + size_t num_channels_; + size_t num_frames_; +}; + +class ProcessingConfig { + public: + enum StreamName { + kInputStream, + kOutputStream, + kReverseInputStream, + kReverseOutputStream, + kNumStreamNames, + }; + + const StreamConfig& input_stream() const { + return streams[StreamName::kInputStream]; + } + const StreamConfig& output_stream() const { + return streams[StreamName::kOutputStream]; + } + const StreamConfig& reverse_input_stream() const { + return streams[StreamName::kReverseInputStream]; + } + const StreamConfig& reverse_output_stream() const { + return streams[StreamName::kReverseOutputStream]; + } + + StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } + StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } + StreamConfig& reverse_input_stream() { + return streams[StreamName::kReverseInputStream]; + } + StreamConfig& reverse_output_stream() { + return streams[StreamName::kReverseOutputStream]; + } + + bool operator==(const ProcessingConfig& other) const { + for (int i = 0; i < StreamName::kNumStreamNames; ++i) { + if (this->streams[i] != other.streams[i]) { + return false; + } + } + return true; + } + + bool operator!=(const ProcessingConfig& other) const { + return !(*this == other); + } + + StreamConfig streams[StreamName::kNumStreamNames]; +}; + +// Experimental interface for a custom analysis submodule. +class CustomAudioAnalyzer { + public: + // (Re-) Initializes the submodule. + virtual void Initialize(int sample_rate_hz, int num_channels) = 0; + // Analyzes the given capture or render signal. + virtual void Analyze(const AudioBuffer* audio) = 0; + // Returns a string representation of the module state. + virtual std::string ToString() const = 0; + + virtual ~CustomAudioAnalyzer() {} +}; + +// Interface for a custom processing submodule. +class CustomProcessing { + public: + // (Re-)Initializes the submodule. + virtual void Initialize(int sample_rate_hz, int num_channels) = 0; + // Processes the given capture or render signal. + virtual void Process(AudioBuffer* audio) = 0; + // Returns a string representation of the module state. + virtual std::string ToString() const = 0; + // Handles RuntimeSettings. TODO(webrtc:9262): make pure virtual + // after updating dependencies. + virtual void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting); + + virtual ~CustomProcessing() {} +}; + +// Interface for an echo detector submodule. +class EchoDetector : public rtc::RefCountInterface { + public: + // (Re-)Initializes the submodule. + virtual void Initialize(int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels) = 0; + + // Analysis (not changing) of the first channel of the render signal. + virtual void AnalyzeRenderAudio(rtc::ArrayView render_audio) = 0; + + // Analysis (not changing) of the capture signal. + virtual void AnalyzeCaptureAudio( + rtc::ArrayView capture_audio) = 0; + + struct Metrics { + absl::optional echo_likelihood; + absl::optional echo_likelihood_recent_max; + }; + + // Collect current metrics from the echo detector. + virtual Metrics GetMetrics() const = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc b/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc new file mode 100644 index 0000000000..7139ee502e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc @@ -0,0 +1,22 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_processing_statistics.h" + +namespace webrtc { + +AudioProcessingStats::AudioProcessingStats() = default; + +AudioProcessingStats::AudioProcessingStats(const AudioProcessingStats& other) = + default; + +AudioProcessingStats::~AudioProcessingStats() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.h b/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.h new file mode 100644 index 0000000000..3b43319951 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.h @@ -0,0 +1,67 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_STATISTICS_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_STATISTICS_H_ + +#include + +#include "absl/types/optional.h" +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { +// This version of the stats uses Optionals, it will replace the regular +// AudioProcessingStatistics struct. +struct RTC_EXPORT AudioProcessingStats { + AudioProcessingStats(); + AudioProcessingStats(const AudioProcessingStats& other); + ~AudioProcessingStats(); + + // Deprecated. + // TODO(bugs.webrtc.org/11226): Remove. + // True if voice is detected in the last capture frame, after processing. + // It is conservative in flagging audio as speech, with low likelihood of + // incorrectly flagging a frame as voice. + // Only reported if voice detection is enabled in AudioProcessing::Config. + absl::optional voice_detected; + + // AEC Statistics. + // ERL = 10log_10(P_far / P_echo) + absl::optional echo_return_loss; + // ERLE = 10log_10(P_echo / P_out) + absl::optional echo_return_loss_enhancement; + // Fraction of time that the AEC linear filter is divergent, in a 1-second + // non-overlapped aggregation window. + absl::optional divergent_filter_fraction; + + // The delay metrics consists of the delay median and standard deviation. It + // also consists of the fraction of delay estimates that can make the echo + // cancellation perform poorly. The values are aggregated until the first + // call to `GetStatistics()` and afterwards aggregated and updated every + // second. Note that if there are several clients pulling metrics from + // `GetStatistics()` during a session the first call from any of them will + // change to one second aggregation window for all. + absl::optional delay_median_ms; + absl::optional delay_standard_deviation_ms; + + // Residual echo detector likelihood. + absl::optional residual_echo_likelihood; + // Maximum residual echo likelihood from the last time period. + absl::optional residual_echo_likelihood_recent_max; + + // The instantaneous delay estimate produced in the AEC. The unit is in + // milliseconds and the value is the instantaneous value at the time of the + // call to `GetStatistics()`. + absl::optional delay_ms; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_STATISTICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/mock_audio_processing.h b/third_party/libwebrtc/modules/audio_processing/include/mock_audio_processing.h new file mode 100644 index 0000000000..2ea1a865c3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/mock_audio_processing.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_MOCK_AUDIO_PROCESSING_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_MOCK_AUDIO_PROCESSING_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/include/audio_processing_statistics.h" +#include "test/gmock.h" + +namespace webrtc { + +namespace test { +class MockCustomProcessing : public CustomProcessing { + public: + virtual ~MockCustomProcessing() {} + MOCK_METHOD(void, + Initialize, + (int sample_rate_hz, int num_channels), + (override)); + MOCK_METHOD(void, Process, (AudioBuffer * audio), (override)); + MOCK_METHOD(void, + SetRuntimeSetting, + (AudioProcessing::RuntimeSetting setting), + (override)); + MOCK_METHOD(std::string, ToString, (), (const, override)); +}; + +class MockCustomAudioAnalyzer : public CustomAudioAnalyzer { + public: + virtual ~MockCustomAudioAnalyzer() {} + MOCK_METHOD(void, + Initialize, + (int sample_rate_hz, int num_channels), + (override)); + MOCK_METHOD(void, Analyze, (const AudioBuffer* audio), (override)); + MOCK_METHOD(std::string, ToString, (), (const, override)); +}; + +class MockEchoControl : public EchoControl { + public: + virtual ~MockEchoControl() {} + MOCK_METHOD(void, AnalyzeRender, (AudioBuffer * render), (override)); + MOCK_METHOD(void, AnalyzeCapture, (AudioBuffer * capture), (override)); + MOCK_METHOD(void, + ProcessCapture, + (AudioBuffer * capture, bool echo_path_change), + (override)); + MOCK_METHOD(void, + ProcessCapture, + (AudioBuffer * capture, + AudioBuffer* linear_output, + bool echo_path_change), + (override)); + MOCK_METHOD(Metrics, GetMetrics, (), (const, override)); + MOCK_METHOD(void, SetAudioBufferDelay, (int delay_ms), (override)); + MOCK_METHOD(bool, ActiveProcessing, (), (const, override)); +}; + +class MockEchoDetector : public EchoDetector { + public: + virtual ~MockEchoDetector() {} + MOCK_METHOD(void, + Initialize, + (int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels), + (override)); + MOCK_METHOD(void, + AnalyzeRenderAudio, + (rtc::ArrayView render_audio), + (override)); + MOCK_METHOD(void, + AnalyzeCaptureAudio, + (rtc::ArrayView capture_audio), + (override)); + MOCK_METHOD(Metrics, GetMetrics, (), (const, override)); +}; + +class MockAudioProcessing : public AudioProcessing { + public: + MockAudioProcessing() {} + + virtual ~MockAudioProcessing() {} + + MOCK_METHOD(int, Initialize, (), (override)); + MOCK_METHOD(int, + Initialize, + (const ProcessingConfig& processing_config), + (override)); + MOCK_METHOD(void, ApplyConfig, (const Config& config), (override)); + MOCK_METHOD(int, proc_sample_rate_hz, (), (const, override)); + MOCK_METHOD(int, proc_split_sample_rate_hz, (), (const, override)); + MOCK_METHOD(size_t, num_input_channels, (), (const, override)); + MOCK_METHOD(size_t, num_proc_channels, (), (const, override)); + MOCK_METHOD(size_t, num_output_channels, (), (const, override)); + MOCK_METHOD(size_t, num_reverse_channels, (), (const, override)); + MOCK_METHOD(void, set_output_will_be_muted, (bool muted), (override)); + MOCK_METHOD(void, SetRuntimeSetting, (RuntimeSetting setting), (override)); + MOCK_METHOD(bool, PostRuntimeSetting, (RuntimeSetting setting), (override)); + MOCK_METHOD(int, + ProcessStream, + (const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest), + (override)); + MOCK_METHOD(int, + ProcessStream, + (const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest), + (override)); + MOCK_METHOD(int, + ProcessReverseStream, + (const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest), + (override)); + MOCK_METHOD(int, + AnalyzeReverseStream, + (const float* const* data, const StreamConfig& reverse_config), + (override)); + MOCK_METHOD(int, + ProcessReverseStream, + (const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest), + (override)); + MOCK_METHOD(bool, + GetLinearAecOutput, + ((rtc::ArrayView> linear_output)), + (const, override)); + MOCK_METHOD(int, set_stream_delay_ms, (int delay), (override)); + MOCK_METHOD(int, stream_delay_ms, (), (const, override)); + MOCK_METHOD(void, set_stream_key_pressed, (bool key_pressed), (override)); + MOCK_METHOD(void, set_stream_analog_level, (int), (override)); + MOCK_METHOD(int, recommended_stream_analog_level, (), (const, override)); + MOCK_METHOD(bool, + CreateAndAttachAecDump, + (absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue), + (override)); + MOCK_METHOD(bool, + CreateAndAttachAecDump, + (FILE * handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue), + (override)); + MOCK_METHOD(void, AttachAecDump, (std::unique_ptr), (override)); + MOCK_METHOD(void, DetachAecDump, (), (override)); + + MOCK_METHOD(AudioProcessingStats, GetStatistics, (), (override)); + MOCK_METHOD(AudioProcessingStats, GetStatistics, (bool), (override)); + + MOCK_METHOD(AudioProcessing::Config, GetConfig, (), (const, override)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_MOCK_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc b/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc new file mode 100644 index 0000000000..a15321ad48 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/logging/apm_data_dumper.h" + +#include "absl/strings/string_view.h" +#include "rtc_base/strings/string_builder.h" + +// Check to verify that the define is properly set. +#if !defined(WEBRTC_APM_DEBUG_DUMP) || \ + (WEBRTC_APM_DEBUG_DUMP != 0 && WEBRTC_APM_DEBUG_DUMP != 1) +#error "Set WEBRTC_APM_DEBUG_DUMP to either 0 or 1" +#endif + +namespace webrtc { +namespace { + +#if WEBRTC_APM_DEBUG_DUMP == 1 + +#if defined(WEBRTC_WIN) +constexpr char kPathDelimiter = '\\'; +#else +constexpr char kPathDelimiter = '/'; +#endif + +std::string FormFileName(absl::string_view output_dir, + absl::string_view name, + int instance_index, + int reinit_index, + absl::string_view suffix) { +#ifdef WEBRTC_WIN + char sep = '\\'; +#else + char sep = '/'; +#endif + + std::stringstream ss; + std::string base = rtc::LogMessage::aec_debug_filename(); + ss << base; + + if (base.length() && base.back() != sep) { + ss << sep; + } + + ss << name << "_" << instance_index << "-" << reinit_index << suffix; + return ss.str(); +} +#endif + +} // namespace + +#if WEBRTC_APM_DEBUG_DUMP == 1 +ApmDataDumper::ApmDataDumper(int instance_index) + : instance_index_(instance_index) + , debug_written_(0) {} +#else +ApmDataDumper::ApmDataDumper(int instance_index) {} +#endif + +ApmDataDumper::~ApmDataDumper() = default; + +#if WEBRTC_APM_DEBUG_DUMP == 1 +bool ApmDataDumper::recording_activated_ = false; +absl::optional ApmDataDumper::dump_set_to_use_; +char ApmDataDumper::output_dir_[] = ""; + +FILE* ApmDataDumper::GetRawFile(absl::string_view name) { + std::string filename = FormFileName(output_dir_, name, instance_index_, + recording_set_index_, ".dat"); + auto& f = raw_files_[filename]; + if (!f) { + f.reset(fopen(filename.c_str(), "wb")); + RTC_CHECK(f.get()) << "Cannot write to " << filename << "."; + } + return f.get(); +} + +WavWriter* ApmDataDumper::GetWavFile(absl::string_view name, + int sample_rate_hz, + int num_channels, + WavFile::SampleFormat format) { + std::string filename = FormFileName(output_dir_, name, instance_index_, + recording_set_index_, ".wav"); + auto& f = wav_files_[filename]; + if (!f) { + f.reset( + new WavWriter(filename.c_str(), sample_rate_hz, num_channels, format)); + } + return f.get(); +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.h b/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.h new file mode 100644 index 0000000000..aa8496819b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.h @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LOGGING_APM_DATA_DUMPER_H_ +#define MODULES_AUDIO_PROCESSING_LOGGING_APM_DATA_DUMPER_H_ + +#include +#include + +#if WEBRTC_APM_DEBUG_DUMP == 1 +#include +#include +#include +#endif + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#if WEBRTC_APM_DEBUG_DUMP == 1 +#include "common_audio/wav_file.h" +#include "rtc_base/checks.h" +#include "rtc_base/string_utils.h" +#endif + +// Check to verify that the define is properly set. +#if !defined(WEBRTC_APM_DEBUG_DUMP) || \ + (WEBRTC_APM_DEBUG_DUMP != 0 && WEBRTC_APM_DEBUG_DUMP != 1) +#error "Set WEBRTC_APM_DEBUG_DUMP to either 0 or 1" +#endif + +namespace webrtc { + +#if WEBRTC_APM_DEBUG_DUMP == 1 +// Functor used to use as a custom deleter in the map of file pointers to raw +// files. +struct RawFileCloseFunctor { + void operator()(FILE* f) const { if (f) fclose(f); } +}; +#endif + +// Class that handles dumping of variables into files. +class ApmDataDumper { + public: + // Constructor that takes an instance index that may + // be used to distinguish data dumped from different + // instances of the code. + explicit ApmDataDumper(int instance_index); + + ApmDataDumper() = delete; + ApmDataDumper(const ApmDataDumper&) = delete; + ApmDataDumper& operator=(const ApmDataDumper&) = delete; + + ~ApmDataDumper(); + + // Activates or deactivate the dumping functionality. + static void SetActivated(bool activated) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + recording_activated_ = activated; +#endif + } + + // Returns whether dumping functionality is enabled/available. + static bool IsAvailable() { +#if WEBRTC_APM_DEBUG_DUMP == 1 + return true; +#else + return false; +#endif + } + + // Default dump set. + static constexpr size_t kDefaultDumpSet = 0; + + // Specifies what dump set to use. All dump commands with a different dump set + // than the one specified will be discarded. If not specificed, all dump sets + // will be used. + static void SetDumpSetToUse(int dump_set_to_use) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + dump_set_to_use_ = dump_set_to_use; +#endif + } + + // Set an optional output directory. + static void SetOutputDirectory(absl::string_view output_dir) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + RTC_CHECK_LT(output_dir.size(), kOutputDirMaxLength); + rtc::strcpyn(output_dir_, output_dir.size(), output_dir); +#endif + } + + // Reinitializes the data dumping such that new versions + // of all files being dumped to are created. + void InitiateNewSetOfRecordings() { +#if WEBRTC_APM_DEBUG_DUMP == 1 + ++recording_set_index_; + debug_written_ = 0; +#endif + } + + // Methods for performing dumping of data of various types into + // various formats. + void DumpRaw(absl::string_view name, + double v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const double* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, + float v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const float* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, bool v, int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, static_cast(v)); + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const bool* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + for (size_t k = 0; k < v_length; ++k) { + int16_t value = static_cast(v[k]); + fwrite(&value, sizeof(value), 1, file); + } + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, + int16_t v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const int16_t* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, + int32_t v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const int32_t* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const size_t* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + DumpRaw(name, v.size(), v.data()); +#endif + } + + void DumpWav(absl::string_view name, + size_t v_length, + const float* v, + int sample_rate_hz, + int num_channels, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + WavWriter* file = GetWavFile(name, sample_rate_hz, num_channels, + WavFile::SampleFormat::kFloat); + file->WriteSamples(v, v_length); + // Cheat and use aec_near as a stand-in for "size of the largest file" + // in the dump. We're looking to limit the total time, and that's a + // reasonable stand-in. + if (strcmp(name, "aec_near") == 0) { + updateDebugWritten(v_length * sizeof(float)); + } + } +#endif + } + + void DumpWav(absl::string_view name, + rtc::ArrayView v, + int sample_rate_hz, + int num_channels, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpWav(name, v.size(), v.data(), sample_rate_hz, num_channels); + } +#endif + } + + private: +#if WEBRTC_APM_DEBUG_DUMP == 1 + static bool recording_activated_; + static absl::optional dump_set_to_use_; + static constexpr size_t kOutputDirMaxLength = 1024; + static char output_dir_[kOutputDirMaxLength]; + const int instance_index_; + int recording_set_index_ = 0; + std::unordered_map> + raw_files_; + std::unordered_map> wav_files_; + + FILE* GetRawFile(absl::string_view name); + WavWriter* GetWavFile(absl::string_view name, + int sample_rate_hz, + int num_channels, + WavFile::SampleFormat format); + + uint32_t debug_written_ = 0; + + void updateDebugWritten(uint32_t amount) { + debug_written_ += amount; + if (debug_written_ >= webrtc::Trace::aec_debug_size()) { + SetActivated(false); + } + } + +#endif +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LOGGING_APM_DATA_DUMPER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/ns/BUILD.gn new file mode 100644 index 0000000000..8c2e9dba84 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/BUILD.gn @@ -0,0 +1,104 @@ +# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_static_library("ns") { + visibility = [ "*" ] + configs += [ "..:apm_debug_dump" ] + sources = [ + "fast_math.cc", + "fast_math.h", + "histograms.cc", + "histograms.h", + "noise_estimator.cc", + "noise_estimator.h", + "noise_suppressor.cc", + "noise_suppressor.h", + "ns_common.h", + "ns_config.h", + "ns_fft.cc", + "ns_fft.h", + "prior_signal_model.cc", + "prior_signal_model.h", + "prior_signal_model_estimator.cc", + "prior_signal_model_estimator.h", + "quantile_noise_estimator.cc", + "quantile_noise_estimator.h", + "signal_model.cc", + "signal_model.h", + "signal_model_estimator.cc", + "signal_model_estimator.h", + "speech_probability_estimator.cc", + "speech_probability_estimator.h", + "suppression_params.cc", + "suppression_params.h", + "wiener_filter.cc", + "wiener_filter.h", + ] + + defines = [] + if (rtc_build_with_neon && target_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + "..:apm_logging", + "..:audio_buffer", + "..:high_pass_filter", + "../../../api:array_view", + "../../../common_audio:common_audio_c", + "../../../common_audio/third_party/ooura:fft_size_128", + "../../../common_audio/third_party/ooura:fft_size_256", + "../../../rtc_base:checks", + "../../../rtc_base:safe_minmax", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + "../utility:cascaded_biquad_filter", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +if (rtc_include_tests) { + rtc_source_set("ns_unittests") { + testonly = true + + configs += [ "..:apm_debug_dump" ] + sources = [ "noise_suppressor_unittest.cc" ] + + deps = [ + ":ns", + "..:apm_logging", + "..:audio_buffer", + "..:audio_processing", + "..:high_pass_filter", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + "../../../test:test_support", + "../utility:cascaded_biquad_filter", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + + defines = [] + + if (rtc_enable_protobuf) { + sources += [] + } + + if (!build_with_chromium) { + deps += [ "..:audio_processing_unittests" ] + } + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc b/third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc new file mode 100644 index 0000000000..d13110c43f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/fast_math.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +float FastLog2f(float in) { + RTC_DCHECK_GT(in, .0f); + // Read and interpret float as uint32_t and then cast to float. + // This is done to extract the exponent (bits 30 - 23). + // "Right shift" of the exponent is then performed by multiplying + // with the constant (1/2^23). Finally, we subtract a constant to + // remove the bias (https://en.wikipedia.org/wiki/Exponent_bias). + union { + float dummy; + uint32_t a; + } x = {in}; + float out = x.a; + out *= 1.1920929e-7f; // 1/2^23 + out -= 126.942695f; // Remove bias. + return out; +} + +} // namespace + +float SqrtFastApproximation(float f) { + // TODO(peah): Add fast approximate implementation. + return sqrtf(f); +} + +float Pow2Approximation(float p) { + // TODO(peah): Add fast approximate implementation. + return powf(2.f, p); +} + +float PowApproximation(float x, float p) { + return Pow2Approximation(p * FastLog2f(x)); +} + +float LogApproximation(float x) { + constexpr float kLogOf2 = 0.69314718056f; + return FastLog2f(x) * kLogOf2; +} + +void LogApproximation(rtc::ArrayView x, rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + y[k] = LogApproximation(x[k]); + } +} + +float ExpApproximation(float x) { + constexpr float kLog10Ofe = 0.4342944819f; + return PowApproximation(10.f, x * kLog10Ofe); +} + +void ExpApproximation(rtc::ArrayView x, rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + y[k] = ExpApproximation(x[k]); + } +} + +void ExpApproximationSignFlip(rtc::ArrayView x, + rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + y[k] = ExpApproximation(-x[k]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/fast_math.h b/third_party/libwebrtc/modules/audio_processing/ns/fast_math.h new file mode 100644 index 0000000000..0aefee940b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/fast_math.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ +#define MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ + +#include "api/array_view.h" + +namespace webrtc { + +// Sqrt approximation. +float SqrtFastApproximation(float f); + +// Log base conversion log(x) = log2(x)/log2(e). +float LogApproximation(float x); +void LogApproximation(rtc::ArrayView x, rtc::ArrayView y); + +// 2^x approximation. +float Pow2Approximation(float p); + +// x^p approximation. +float PowApproximation(float x, float p); + +// e^x approximation. +float ExpApproximation(float x); +void ExpApproximation(rtc::ArrayView x, rtc::ArrayView y); +void ExpApproximationSignFlip(rtc::ArrayView x, + rtc::ArrayView y); +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/histograms.cc b/third_party/libwebrtc/modules/audio_processing/ns/histograms.cc new file mode 100644 index 0000000000..1d4f4590d2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/histograms.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/histograms.h" + +namespace webrtc { + +Histograms::Histograms() { + Clear(); +} + +void Histograms::Clear() { + lrt_.fill(0); + spectral_flatness_.fill(0); + spectral_diff_.fill(0); +} + +void Histograms::Update(const SignalModel& features_) { + // Update the histogram for the LRT. + constexpr float kOneByBinSizeLrt = 1.f / kBinSizeLrt; + if (features_.lrt < kHistogramSize * kBinSizeLrt && features_.lrt >= 0.f) { + ++lrt_[kOneByBinSizeLrt * features_.lrt]; + } + + // Update histogram for the spectral flatness. + constexpr float kOneByBinSizeSpecFlat = 1.f / kBinSizeSpecFlat; + if (features_.spectral_flatness < kHistogramSize * kBinSizeSpecFlat && + features_.spectral_flatness >= 0.f) { + ++spectral_flatness_[features_.spectral_flatness * kOneByBinSizeSpecFlat]; + } + + // Update histogram for the spectral difference. + constexpr float kOneByBinSizeSpecDiff = 1.f / kBinSizeSpecDiff; + if (features_.spectral_diff < kHistogramSize * kBinSizeSpecDiff && + features_.spectral_diff >= 0.f) { + ++spectral_diff_[features_.spectral_diff * kOneByBinSizeSpecDiff]; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/histograms.h b/third_party/libwebrtc/modules/audio_processing/ns/histograms.h new file mode 100644 index 0000000000..9640e743cf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/histograms.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ +#define MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/signal_model.h" + +namespace webrtc { + +constexpr int kHistogramSize = 1000; + +// Class for handling the updating of histograms. +class Histograms { + public: + Histograms(); + Histograms(const Histograms&) = delete; + Histograms& operator=(const Histograms&) = delete; + + // Clears the histograms. + void Clear(); + + // Extracts thresholds for feature parameters and updates the corresponding + // histogram. + void Update(const SignalModel& features_); + + // Methods for accessing the histograms. + rtc::ArrayView get_lrt() const { return lrt_; } + rtc::ArrayView get_spectral_flatness() const { + return spectral_flatness_; + } + rtc::ArrayView get_spectral_diff() const { + return spectral_diff_; + } + + private: + std::array lrt_; + std::array spectral_flatness_; + std::array spectral_diff_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc new file mode 100644 index 0000000000..5367545f25 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_estimator.h" + +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Log(i). +constexpr std::array log_table = { + 0.f, 0.f, 0.f, 0.f, 0.f, 1.609438f, 1.791759f, + 1.945910f, 2.079442f, 2.197225f, 2.302585f, 2.397895f, 2.484907f, 2.564949f, + 2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f, + 3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f, + 3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f, + 3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f, + 3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f, + 3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f, + 4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f, + 4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f, + 4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f, + 4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f, + 4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f, + 4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f, + 4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f, + 4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f, + 4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f, + 4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f, + 4.836282f, 4.844187f, 4.852030f}; + +} // namespace + +NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params) + : suppression_params_(suppression_params) { + noise_spectrum_.fill(0.f); + prev_noise_spectrum_.fill(0.f); + conservative_noise_spectrum_.fill(0.f); + parametric_noise_spectrum_.fill(0.f); +} + +void NoiseEstimator::PrepareAnalysis() { + std::copy(noise_spectrum_.begin(), noise_spectrum_.end(), + prev_noise_spectrum_.begin()); +} + +void NoiseEstimator::PreUpdate( + int32_t num_analyzed_frames, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum) { + quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_); + + if (num_analyzed_frames < kShortStartupPhaseBlocks) { + // Compute simplified noise model during startup. + const size_t kStartBand = 5; + float sum_log_i_log_magn = 0.f; + float sum_log_i = 0.f; + float sum_log_i_square = 0.f; + float sum_log_magn = 0.f; + for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) { + float log_i = log_table[i]; + sum_log_i += log_i; + sum_log_i_square += log_i * log_i; + float log_signal = LogApproximation(signal_spectrum[i]); + sum_log_magn += log_signal; + sum_log_i_log_magn += log_i * log_signal; + } + + // Estimate the parameter for the level of the white noise. + constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; + white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 * + suppression_params_.over_subtraction_factor; + + // Estimate pink noise parameters. + float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) - + sum_log_i * sum_log_i; + float num = + sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn; + RTC_DCHECK_NE(denom, 0.f); + float pink_noise_adjustment = num / denom; + + // Constrain the estimated spectrum to be positive. + pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f); + pink_noise_numerator_ += pink_noise_adjustment; + num = sum_log_i * sum_log_magn - + (kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn; + RTC_DCHECK_NE(denom, 0.f); + pink_noise_adjustment = num / denom; + + // Constrain the pink noise power to be in the interval [0, 1]. + pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f); + + pink_noise_exp_ += pink_noise_adjustment; + + const float one_by_num_analyzed_frames_plus_1 = + 1.f / (num_analyzed_frames + 1.f); + + // Calculate the frequency-independent parts of parametric noise estimate. + float parametric_exp = 0.f; + float parametric_num = 0.f; + if (pink_noise_exp_ > 0.f) { + // Use pink noise estimate. + parametric_num = ExpApproximation(pink_noise_numerator_ * + one_by_num_analyzed_frames_plus_1); + parametric_num *= num_analyzed_frames + 1.f; + parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1; + } + + constexpr float kOneByShortStartupPhaseBlocks = + 1.f / kShortStartupPhaseBlocks; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Estimate the background noise using the white and pink noise + // parameters. + if (pink_noise_exp_ == 0.f) { + // Use white noise estimate. + parametric_noise_spectrum_[i] = white_noise_level_; + } else { + // Use pink noise estimate. + float use_band = i < kStartBand ? kStartBand : i; + float denom = PowApproximation(use_band, parametric_exp); + RTC_DCHECK_NE(denom, 0.f); + parametric_noise_spectrum_[i] = parametric_num / denom; + } + } + + // Weight quantile noise with modeled noise. + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + noise_spectrum_[i] *= num_analyzed_frames; + float tmp = parametric_noise_spectrum_[i] * + (kShortStartupPhaseBlocks - num_analyzed_frames); + noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1; + noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks; + } + } +} + +void NoiseEstimator::PostUpdate( + rtc::ArrayView speech_probability, + rtc::ArrayView signal_spectrum) { + // Time-avg parameter for noise_spectrum update. + constexpr float kNoiseUpdate = 0.9f; + + float gamma = kNoiseUpdate; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + const float prob_speech = speech_probability[i]; + const float prob_non_speech = 1.f - prob_speech; + + // Temporary noise update used for speech frames if update value is less + // than previous. + float noise_update_tmp = + gamma * prev_noise_spectrum_[i] + + (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + + prob_speech * prev_noise_spectrum_[i]); + + // Time-constant based on speech/noise_spectrum state. + float gamma_old = gamma; + + // Increase gamma for frame likely to be seech. + constexpr float kProbRange = .2f; + gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate; + + // Conservative noise_spectrum update. + if (prob_speech < kProbRange) { + conservative_noise_spectrum_[i] += + 0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]); + } + + // Noise_spectrum update. + if (gamma == gamma_old) { + noise_spectrum_[i] = noise_update_tmp; + } else { + noise_spectrum_[i] = + gamma * prev_noise_spectrum_[i] + + (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + + prob_speech * prev_noise_spectrum_[i]); + // Allow for noise_spectrum update downwards: If noise_spectrum update + // decreases the noise_spectrum, it is safe, so allow it to happen. + noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.h new file mode 100644 index 0000000000..0c0466a679 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/quantile_noise_estimator.h" +#include "modules/audio_processing/ns/suppression_params.h" + +namespace webrtc { + +// Class for estimating the spectral characteristics of the noise in an incoming +// signal. +class NoiseEstimator { + public: + explicit NoiseEstimator(const SuppressionParams& suppression_params); + + // Prepare the estimator for analysis of a new frame. + void PrepareAnalysis(); + + // Performs the first step of the estimator update. + void PreUpdate(int32_t num_analyzed_frames, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum); + + // Performs the second step of the estimator update. + void PostUpdate( + rtc::ArrayView speech_probability, + rtc::ArrayView signal_spectrum); + + // Returns the noise spectral estimate. + rtc::ArrayView get_noise_spectrum() const { + return noise_spectrum_; + } + + // Returns the noise from the previous frame. + rtc::ArrayView get_prev_noise_spectrum() + const { + return prev_noise_spectrum_; + } + + // Returns a noise spectral estimate based on white and pink noise parameters. + rtc::ArrayView get_parametric_noise_spectrum() + const { + return parametric_noise_spectrum_; + } + rtc::ArrayView + get_conservative_noise_spectrum() const { + return conservative_noise_spectrum_; + } + + private: + const SuppressionParams& suppression_params_; + float white_noise_level_ = 0.f; + float pink_noise_numerator_ = 0.f; + float pink_noise_exp_ = 0.f; + std::array prev_noise_spectrum_; + std::array conservative_noise_spectrum_; + std::array parametric_noise_spectrum_; + std::array noise_spectrum_; + QuantileNoiseEstimator quantile_noise_estimator_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc new file mode 100644 index 0000000000..d66faa6ed4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc @@ -0,0 +1,555 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_suppressor.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Maps sample rate to number of bands. +size_t NumBandsForRate(size_t sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000 || + sample_rate_hz == 48000); + return sample_rate_hz / 16000; +} + +// Maximum number of channels for which the channel data is stored on +// the stack. If the number of channels are larger than this, they are stored +// using scratch memory that is pre-allocated on the heap. The reason for this +// partitioning is not to waste heap space for handling the more common numbers +// of channels, while at the same time not limiting the support for higher +// numbers of channels by enforcing the channel data to be stored on the +// stack using a fixed maximum value. +constexpr size_t kMaxNumChannelsOnStack = 2; + +// Chooses the number of channels to store on the heap when that is required due +// to the number of channels being larger than the pre-defined number +// of channels to store on the stack. +size_t NumChannelsOnHeap(size_t num_channels) { + return num_channels > kMaxNumChannelsOnStack ? num_channels : 0; +} + +// Hybrib Hanning and flat window for the filterbank. +constexpr std::array kBlocks160w256FirstHalf = { + 0.00000000f, 0.01636173f, 0.03271908f, 0.04906767f, 0.06540313f, + 0.08172107f, 0.09801714f, 0.11428696f, 0.13052619f, 0.14673047f, + 0.16289547f, 0.17901686f, 0.19509032f, 0.21111155f, 0.22707626f, + 0.24298018f, 0.25881905f, 0.27458862f, 0.29028468f, 0.30590302f, + 0.32143947f, 0.33688985f, 0.35225005f, 0.36751594f, 0.38268343f, + 0.39774847f, 0.41270703f, 0.42755509f, 0.44228869f, 0.45690388f, + 0.47139674f, 0.48576339f, 0.50000000f, 0.51410274f, 0.52806785f, + 0.54189158f, 0.55557023f, 0.56910015f, 0.58247770f, 0.59569930f, + 0.60876143f, 0.62166057f, 0.63439328f, 0.64695615f, 0.65934582f, + 0.67155895f, 0.68359230f, 0.69544264f, 0.70710678f, 0.71858162f, + 0.72986407f, 0.74095113f, 0.75183981f, 0.76252720f, 0.77301045f, + 0.78328675f, 0.79335334f, 0.80320753f, 0.81284668f, 0.82226822f, + 0.83146961f, 0.84044840f, 0.84920218f, 0.85772861f, 0.86602540f, + 0.87409034f, 0.88192126f, 0.88951608f, 0.89687274f, 0.90398929f, + 0.91086382f, 0.91749450f, 0.92387953f, 0.93001722f, 0.93590593f, + 0.94154407f, 0.94693013f, 0.95206268f, 0.95694034f, 0.96156180f, + 0.96592583f, 0.97003125f, 0.97387698f, 0.97746197f, 0.98078528f, + 0.98384601f, 0.98664333f, 0.98917651f, 0.99144486f, 0.99344778f, + 0.99518473f, 0.99665524f, 0.99785892f, 0.99879546f, 0.99946459f, + 0.99986614f}; + +// Applies the filterbank window to a buffer. +void ApplyFilterBankWindow(rtc::ArrayView x) { + for (size_t i = 0; i < 96; ++i) { + x[i] = kBlocks160w256FirstHalf[i] * x[i]; + } + + for (size_t i = 161, k = 95; i < kFftSize; ++i, --k) { + RTC_DCHECK_NE(0, k); + x[i] = kBlocks160w256FirstHalf[k] * x[i]; + } +} + +// Extends a frame with previous data. +void FormExtendedFrame(rtc::ArrayView frame, + rtc::ArrayView old_data, + rtc::ArrayView extended_frame) { + std::copy(old_data.begin(), old_data.end(), extended_frame.begin()); + std::copy(frame.begin(), frame.end(), + extended_frame.begin() + old_data.size()); + std::copy(extended_frame.end() - old_data.size(), extended_frame.end(), + old_data.begin()); +} + +// Uses overlap-and-add to produce an output frame. +void OverlapAndAdd(rtc::ArrayView extended_frame, + rtc::ArrayView overlap_memory, + rtc::ArrayView output_frame) { + for (size_t i = 0; i < kOverlapSize; ++i) { + output_frame[i] = overlap_memory[i] + extended_frame[i]; + } + std::copy(extended_frame.begin() + kOverlapSize, + extended_frame.begin() + kNsFrameSize, + output_frame.begin() + kOverlapSize); + std::copy(extended_frame.begin() + kNsFrameSize, extended_frame.end(), + overlap_memory.begin()); +} + +// Produces a delayed frame. +void DelaySignal(rtc::ArrayView frame, + rtc::ArrayView delay_buffer, + rtc::ArrayView delayed_frame) { + constexpr size_t kSamplesFromFrame = kNsFrameSize - (kFftSize - kNsFrameSize); + std::copy(delay_buffer.begin(), delay_buffer.end(), delayed_frame.begin()); + std::copy(frame.begin(), frame.begin() + kSamplesFromFrame, + delayed_frame.begin() + delay_buffer.size()); + + std::copy(frame.begin() + kSamplesFromFrame, frame.end(), + delay_buffer.begin()); +} + +// Computes the energy of an extended frame. +float ComputeEnergyOfExtendedFrame(rtc::ArrayView x) { + float energy = 0.f; + for (float x_k : x) { + energy += x_k * x_k; + } + + return energy; +} + +// Computes the energy of an extended frame based on its subcomponents. +float ComputeEnergyOfExtendedFrame( + rtc::ArrayView frame, + rtc::ArrayView old_data) { + float energy = 0.f; + for (float v : old_data) { + energy += v * v; + } + for (float v : frame) { + energy += v * v; + } + + return energy; +} + +// Computes the magnitude spectrum based on an FFT output. +void ComputeMagnitudeSpectrum( + rtc::ArrayView real, + rtc::ArrayView imag, + rtc::ArrayView signal_spectrum) { + signal_spectrum[0] = fabsf(real[0]) + 1.f; + signal_spectrum[kFftSizeBy2Plus1 - 1] = + fabsf(real[kFftSizeBy2Plus1 - 1]) + 1.f; + + for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { + signal_spectrum[i] = + SqrtFastApproximation(real[i] * real[i] + imag[i] * imag[i]) + 1.f; + } +} + +// Compute prior and post SNR. +void ComputeSnr(rtc::ArrayView filter, + rtc::ArrayView prev_signal_spectrum, + rtc::ArrayView signal_spectrum, + rtc::ArrayView prev_noise_spectrum, + rtc::ArrayView noise_spectrum, + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr) { + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Previous post SNR. + // Previous estimate: based on previous frame with gain filter. + float prev_estimate = prev_signal_spectrum[i] / + (prev_noise_spectrum[i] + 0.0001f) * filter[i]; + // Post SNR. + if (signal_spectrum[i] > noise_spectrum[i]) { + post_snr[i] = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f; + } else { + post_snr[i] = 0.f; + } + // The directed decision estimate of the prior SNR is a sum the current and + // previous estimates. + prior_snr[i] = 0.98f * prev_estimate + (1.f - 0.98f) * post_snr[i]; + } +} + +// Computes the attenuating gain for the noise suppression of the upper bands. +float ComputeUpperBandsGain( + float minimum_attenuating_gain, + rtc::ArrayView filter, + rtc::ArrayView speech_probability, + rtc::ArrayView prev_analysis_signal_spectrum, + rtc::ArrayView signal_spectrum) { + // Average speech prob and filter gain for the end of the lowest band. + constexpr int kNumAvgBins = 32; + constexpr float kOneByNumAvgBins = 1.f / kNumAvgBins; + + float avg_prob_speech = 0.f; + float avg_filter_gain = 0.f; + for (size_t i = kFftSizeBy2Plus1 - kNumAvgBins - 1; i < kFftSizeBy2Plus1 - 1; + i++) { + avg_prob_speech += speech_probability[i]; + avg_filter_gain += filter[i]; + } + avg_prob_speech = avg_prob_speech * kOneByNumAvgBins; + avg_filter_gain = avg_filter_gain * kOneByNumAvgBins; + + // If the speech was suppressed by a component between Analyze and Process, an + // example being by an AEC, it should not be considered speech for the purpose + // of high band suppression. To that end, the speech probability is scaled + // accordingly. + float sum_analysis_spectrum = 0.f; + float sum_processing_spectrum = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + sum_analysis_spectrum += prev_analysis_signal_spectrum[i]; + sum_processing_spectrum += signal_spectrum[i]; + } + + // The magnitude spectrum computation enforces the spectrum to be strictly + // positive. + RTC_DCHECK_GT(sum_analysis_spectrum, 0.f); + avg_prob_speech *= sum_processing_spectrum / sum_analysis_spectrum; + + // Compute gain based on speech probability. + float gain = + 0.5f * (1.f + static_cast(tanh(2.f * avg_prob_speech - 1.f))); + + // Combine gain with low band gain. + if (avg_prob_speech >= 0.5f) { + gain = 0.25f * gain + 0.75f * avg_filter_gain; + } else { + gain = 0.5f * gain + 0.5f * avg_filter_gain; + } + + // Make sure gain is within flooring range. + return std::min(std::max(gain, minimum_attenuating_gain), 1.f); +} + +} // namespace + +NoiseSuppressor::ChannelState::ChannelState( + const SuppressionParams& suppression_params, + size_t num_bands) + : wiener_filter(suppression_params), + noise_estimator(suppression_params), + process_delay_memory(num_bands > 1 ? num_bands - 1 : 0) { + analyze_analysis_memory.fill(0.f); + prev_analysis_signal_spectrum.fill(1.f); + process_analysis_memory.fill(0.f); + process_synthesis_memory.fill(0.f); + for (auto& d : process_delay_memory) { + d.fill(0.f); + } +} + +NoiseSuppressor::NoiseSuppressor(const NsConfig& config, + size_t sample_rate_hz, + size_t num_channels) + : num_bands_(NumBandsForRate(sample_rate_hz)), + num_channels_(num_channels), + suppression_params_(config.target_level), + filter_bank_states_heap_(NumChannelsOnHeap(num_channels_)), + upper_band_gains_heap_(NumChannelsOnHeap(num_channels_)), + energies_before_filtering_heap_(NumChannelsOnHeap(num_channels_)), + gain_adjustments_heap_(NumChannelsOnHeap(num_channels_)), + channels_(num_channels_) { + for (size_t ch = 0; ch < num_channels_; ++ch) { + channels_[ch] = + std::make_unique(suppression_params_, num_bands_); + } +} + +void NoiseSuppressor::AggregateWienerFilters( + rtc::ArrayView filter) const { + rtc::ArrayView filter0 = + channels_[0]->wiener_filter.get_filter(); + std::copy(filter0.begin(), filter0.end(), filter.begin()); + + for (size_t ch = 1; ch < num_channels_; ++ch) { + rtc::ArrayView filter_ch = + channels_[ch]->wiener_filter.get_filter(); + + for (size_t k = 0; k < kFftSizeBy2Plus1; ++k) { + filter[k] = std::min(filter[k], filter_ch[k]); + } + } +} + +void NoiseSuppressor::Analyze(const AudioBuffer& audio) { + // Prepare the noise estimator for the analysis stage. + for (size_t ch = 0; ch < num_channels_; ++ch) { + channels_[ch]->noise_estimator.PrepareAnalysis(); + } + + // Check for zero frames. + bool zero_frame = true; + for (size_t ch = 0; ch < num_channels_; ++ch) { + rtc::ArrayView y_band0( + &audio.split_bands_const(ch)[0][0], kNsFrameSize); + float energy = ComputeEnergyOfExtendedFrame( + y_band0, channels_[ch]->analyze_analysis_memory); + if (energy > 0.f) { + zero_frame = false; + break; + } + } + + if (zero_frame) { + // We want to avoid updating statistics in this case: + // Updating feature statistics when we have zeros only will cause + // thresholds to move towards zero signal situations. This in turn has the + // effect that once the signal is "turned on" (non-zero values) everything + // will be treated as speech and there is no noise suppression effect. + // Depending on the duration of the inactive signal it takes a + // considerable amount of time for the system to learn what is noise and + // what is speech. + return; + } + + // Only update analysis counter for frames that are properly analyzed. + if (++num_analyzed_frames_ < 0) { + num_analyzed_frames_ = 0; + } + + // Analyze all channels. + for (size_t ch = 0; ch < num_channels_; ++ch) { + std::unique_ptr& ch_p = channels_[ch]; + rtc::ArrayView y_band0( + &audio.split_bands_const(ch)[0][0], kNsFrameSize); + + // Form an extended frame and apply analysis filter bank windowing. + std::array extended_frame; + FormExtendedFrame(y_band0, ch_p->analyze_analysis_memory, extended_frame); + ApplyFilterBankWindow(extended_frame); + + // Compute the magnitude spectrum. + std::array real; + std::array imag; + fft_.Fft(extended_frame, real, imag); + + std::array signal_spectrum; + ComputeMagnitudeSpectrum(real, imag, signal_spectrum); + + // Compute energies. + float signal_energy = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + signal_energy += real[i] * real[i] + imag[i] * imag[i]; + } + signal_energy /= kFftSizeBy2Plus1; + + float signal_spectral_sum = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + signal_spectral_sum += signal_spectrum[i]; + } + + // Estimate the noise spectra and the probability estimates of speech + // presence. + ch_p->noise_estimator.PreUpdate(num_analyzed_frames_, signal_spectrum, + signal_spectral_sum); + + std::array post_snr; + std::array prior_snr; + ComputeSnr(ch_p->wiener_filter.get_filter(), + ch_p->prev_analysis_signal_spectrum, signal_spectrum, + ch_p->noise_estimator.get_prev_noise_spectrum(), + ch_p->noise_estimator.get_noise_spectrum(), prior_snr, post_snr); + + ch_p->speech_probability_estimator.Update( + num_analyzed_frames_, prior_snr, post_snr, + ch_p->noise_estimator.get_conservative_noise_spectrum(), + signal_spectrum, signal_spectral_sum, signal_energy); + + ch_p->noise_estimator.PostUpdate( + ch_p->speech_probability_estimator.get_probability(), signal_spectrum); + + // Store the magnitude spectrum to make it avalilable for the process + // method. + std::copy(signal_spectrum.begin(), signal_spectrum.end(), + ch_p->prev_analysis_signal_spectrum.begin()); + } +} + +void NoiseSuppressor::Process(AudioBuffer* audio) { + // Select the space for storing data during the processing. + std::array filter_bank_states_stack; + rtc::ArrayView filter_bank_states( + filter_bank_states_stack.data(), num_channels_); + std::array upper_band_gains_stack; + rtc::ArrayView upper_band_gains(upper_band_gains_stack.data(), + num_channels_); + std::array energies_before_filtering_stack; + rtc::ArrayView energies_before_filtering( + energies_before_filtering_stack.data(), num_channels_); + std::array gain_adjustments_stack; + rtc::ArrayView gain_adjustments(gain_adjustments_stack.data(), + num_channels_); + if (NumChannelsOnHeap(num_channels_) > 0) { + // If the stack-allocated space is too small, use the heap for storing the + // data. + filter_bank_states = rtc::ArrayView( + filter_bank_states_heap_.data(), num_channels_); + upper_band_gains = + rtc::ArrayView(upper_band_gains_heap_.data(), num_channels_); + energies_before_filtering = rtc::ArrayView( + energies_before_filtering_heap_.data(), num_channels_); + gain_adjustments = + rtc::ArrayView(gain_adjustments_heap_.data(), num_channels_); + } + + // Compute the suppression filters for all channels. + for (size_t ch = 0; ch < num_channels_; ++ch) { + // Form an extended frame and apply analysis filter bank windowing. + rtc::ArrayView y_band0(&audio->split_bands(ch)[0][0], + kNsFrameSize); + + FormExtendedFrame(y_band0, channels_[ch]->process_analysis_memory, + filter_bank_states[ch].extended_frame); + + ApplyFilterBankWindow(filter_bank_states[ch].extended_frame); + + energies_before_filtering[ch] = + ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame); + + // Perform filter bank analysis and compute the magnitude spectrum. + fft_.Fft(filter_bank_states[ch].extended_frame, filter_bank_states[ch].real, + filter_bank_states[ch].imag); + + std::array signal_spectrum; + ComputeMagnitudeSpectrum(filter_bank_states[ch].real, + filter_bank_states[ch].imag, signal_spectrum); + + // Compute the frequency domain gain filter for noise attenuation. + channels_[ch]->wiener_filter.Update( + num_analyzed_frames_, + channels_[ch]->noise_estimator.get_noise_spectrum(), + channels_[ch]->noise_estimator.get_prev_noise_spectrum(), + channels_[ch]->noise_estimator.get_parametric_noise_spectrum(), + signal_spectrum); + + if (num_bands_ > 1) { + // Compute the time-domain gain for attenuating the noise in the upper + // bands. + + upper_band_gains[ch] = ComputeUpperBandsGain( + suppression_params_.minimum_attenuating_gain, + channels_[ch]->wiener_filter.get_filter(), + channels_[ch]->speech_probability_estimator.get_probability(), + channels_[ch]->prev_analysis_signal_spectrum, signal_spectrum); + } + } + + // Only do the below processing if the output of the audio processing module + // is used. + if (!capture_output_used_) { + return; + } + + // Aggregate the Wiener filters for all channels. + std::array filter_data; + rtc::ArrayView filter = filter_data; + if (num_channels_ == 1) { + filter = channels_[0]->wiener_filter.get_filter(); + } else { + AggregateWienerFilters(filter_data); + } + + for (size_t ch = 0; ch < num_channels_; ++ch) { + // Apply the filter to the lower band. + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + filter_bank_states[ch].real[i] *= filter[i]; + filter_bank_states[ch].imag[i] *= filter[i]; + } + } + + // Perform filter bank synthesis + for (size_t ch = 0; ch < num_channels_; ++ch) { + fft_.Ifft(filter_bank_states[ch].real, filter_bank_states[ch].imag, + filter_bank_states[ch].extended_frame); + } + + for (size_t ch = 0; ch < num_channels_; ++ch) { + const float energy_after_filtering = + ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame); + + // Apply synthesis window. + ApplyFilterBankWindow(filter_bank_states[ch].extended_frame); + + // Compute the adjustment of the noise attenuation filter based on the + // effect of the attenuation. + gain_adjustments[ch] = + channels_[ch]->wiener_filter.ComputeOverallScalingFactor( + num_analyzed_frames_, + channels_[ch]->speech_probability_estimator.get_prior_probability(), + energies_before_filtering[ch], energy_after_filtering); + } + + // Select and apply adjustment of the noise attenuation filter based on the + // effect of the attenuation. + float gain_adjustment = gain_adjustments[0]; + for (size_t ch = 1; ch < num_channels_; ++ch) { + gain_adjustment = std::min(gain_adjustment, gain_adjustments[ch]); + } + for (size_t ch = 0; ch < num_channels_; ++ch) { + for (size_t i = 0; i < kFftSize; ++i) { + filter_bank_states[ch].extended_frame[i] = + gain_adjustment * filter_bank_states[ch].extended_frame[i]; + } + } + + // Use overlap-and-add to form the output frame of the lowest band. + for (size_t ch = 0; ch < num_channels_; ++ch) { + rtc::ArrayView y_band0(&audio->split_bands(ch)[0][0], + kNsFrameSize); + OverlapAndAdd(filter_bank_states[ch].extended_frame, + channels_[ch]->process_synthesis_memory, y_band0); + } + + if (num_bands_ > 1) { + // Select the noise attenuating gain to apply to the upper band. + float upper_band_gain = upper_band_gains[0]; + for (size_t ch = 1; ch < num_channels_; ++ch) { + upper_band_gain = std::min(upper_band_gain, upper_band_gains[ch]); + } + + // Process the upper bands. + for (size_t ch = 0; ch < num_channels_; ++ch) { + for (size_t b = 1; b < num_bands_; ++b) { + // Delay the upper bands to match the delay of the filterbank applied to + // the lowest band. + rtc::ArrayView y_band( + &audio->split_bands(ch)[b][0], kNsFrameSize); + std::array delayed_frame; + DelaySignal(y_band, channels_[ch]->process_delay_memory[b - 1], + delayed_frame); + + // Apply the time-domain noise-attenuating gain. + for (size_t j = 0; j < kNsFrameSize; j++) { + y_band[j] = upper_band_gain * delayed_frame[j]; + } + } + } + } + + // Limit the output the allowed range. + for (size_t ch = 0; ch < num_channels_; ++ch) { + for (size_t b = 0; b < num_bands_; ++b) { + rtc::ArrayView y_band(&audio->split_bands(ch)[b][0], + kNsFrameSize); + for (size_t j = 0; j < kNsFrameSize; j++) { + y_band[j] = std::min(std::max(y_band[j], -32768.f), 32767.f); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.h b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.h new file mode 100644 index 0000000000..1e321cf4a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/ns/noise_estimator.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/ns_config.h" +#include "modules/audio_processing/ns/ns_fft.h" +#include "modules/audio_processing/ns/speech_probability_estimator.h" +#include "modules/audio_processing/ns/wiener_filter.h" + +namespace webrtc { + +// Class for suppressing noise in a signal. +class NoiseSuppressor { + public: + NoiseSuppressor(const NsConfig& config, + size_t sample_rate_hz, + size_t num_channels); + NoiseSuppressor(const NoiseSuppressor&) = delete; + NoiseSuppressor& operator=(const NoiseSuppressor&) = delete; + + // Analyses the signal (typically applied before the AEC to avoid analyzing + // any comfort noise signal). + void Analyze(const AudioBuffer& audio); + + // Applies noise suppression. + void Process(AudioBuffer* audio); + + // Specifies whether the capture output will be used. The purpose of this is + // to allow the noise suppressor to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + void SetCaptureOutputUsage(bool capture_output_used) { + capture_output_used_ = capture_output_used; + } + + private: + const size_t num_bands_; + const size_t num_channels_; + const SuppressionParams suppression_params_; + int32_t num_analyzed_frames_ = -1; + NrFft fft_; + bool capture_output_used_ = true; + + struct ChannelState { + ChannelState(const SuppressionParams& suppression_params, size_t num_bands); + + SpeechProbabilityEstimator speech_probability_estimator; + WienerFilter wiener_filter; + NoiseEstimator noise_estimator; + std::array prev_analysis_signal_spectrum; + std::array analyze_analysis_memory; + std::array process_analysis_memory; + std::array process_synthesis_memory; + std::vector> process_delay_memory; + }; + + struct FilterBankState { + std::array real; + std::array imag; + std::array extended_frame; + }; + + std::vector filter_bank_states_heap_; + std::vector upper_band_gains_heap_; + std::vector energies_before_filtering_heap_; + std::vector gain_adjustments_heap_; + std::vector> channels_; + + // Aggregates the Wiener filters into a single filter to use. + void AggregateWienerFilters( + rtc::ArrayView filter) const; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor_unittest.cc new file mode 100644 index 0000000000..28ea63ae40 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor_unittest.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_suppressor.h" + +#include +#include +#include +#include +#include + +#include "rtc_base/strings/string_builder.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(int sample_rate_hz, + size_t num_channels, + NsConfig::SuppressionLevel level) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz << ", num_channels: " << num_channels + << ", level: " << static_cast(level); + return ss.Release(); +} + +void PopulateInputFrameWithIdenticalChannels(size_t num_channels, + size_t num_bands, + size_t frame_index, + AudioBuffer* audio) { + for (size_t ch = 0; ch < num_channels; ++ch) { + for (size_t b = 0; b < num_bands; ++b) { + for (size_t i = 0; i < 160; ++i) { + float value = static_cast(frame_index * 160 + i); + audio->split_bands(ch)[b][i] = (value > 0 ? 5000 * b + value : 0); + } + } + } +} + +void VerifyIdenticalChannels(size_t num_channels, + size_t num_bands, + size_t frame_index, + const AudioBuffer& audio) { + EXPECT_GT(num_channels, 1u); + for (size_t ch = 1; ch < num_channels; ++ch) { + for (size_t b = 0; b < num_bands; ++b) { + for (size_t i = 0; i < 160; ++i) { + EXPECT_EQ(audio.split_bands_const(ch)[b][i], + audio.split_bands_const(0)[b][i]); + } + } + } +} + +} // namespace + +// Verifies that the same noise reduction effect is applied to all channels. +TEST(NoiseSuppressor, IdenticalChannelEffects) { + for (auto rate : {16000, 32000, 48000}) { + for (auto num_channels : {1, 4, 8}) { + for (auto level : + {NsConfig::SuppressionLevel::k6dB, NsConfig::SuppressionLevel::k12dB, + NsConfig::SuppressionLevel::k18dB, + NsConfig::SuppressionLevel::k21dB}) { + SCOPED_TRACE(ProduceDebugText(rate, num_channels, level)); + + const size_t num_bands = rate / 16000; + // const int frame_length = rtc::CheckedDivExact(rate, 100); + AudioBuffer audio(rate, num_channels, rate, num_channels, rate, + num_channels); + NsConfig cfg; + NoiseSuppressor ns(cfg, rate, num_channels); + for (size_t frame_index = 0; frame_index < 1000; ++frame_index) { + if (rate > 16000) { + audio.SplitIntoFrequencyBands(); + } + + PopulateInputFrameWithIdenticalChannels(num_channels, num_bands, + frame_index, &audio); + + ns.Analyze(audio); + ns.Process(&audio); + if (num_channels > 1) { + VerifyIdenticalChannels(num_channels, num_bands, frame_index, + audio); + } + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_common.h b/third_party/libwebrtc/modules/audio_processing/ns/ns_common.h new file mode 100644 index 0000000000..d6149f72a7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_common.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ + +#include + +namespace webrtc { + +constexpr size_t kFftSize = 256; +constexpr size_t kFftSizeBy2Plus1 = kFftSize / 2 + 1; +constexpr size_t kNsFrameSize = 160; +constexpr size_t kOverlapSize = kFftSize - kNsFrameSize; + +constexpr int kShortStartupPhaseBlocks = 50; +constexpr int kLongStartupPhaseBlocks = 200; +constexpr int kFeatureUpdateWindowSize = 500; + +constexpr float kLtrFeatureThr = 0.5f; +constexpr float kBinSizeLrt = 0.1f; +constexpr float kBinSizeSpecFlat = 0.05f; +constexpr float kBinSizeSpecDiff = 0.1f; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_config.h b/third_party/libwebrtc/modules/audio_processing/ns/ns_config.h new file mode 100644 index 0000000000..0a285e9cea --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_config.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ + +namespace webrtc { + +// Config struct for the noise suppressor +struct NsConfig { + enum class SuppressionLevel { k6dB, k12dB, k18dB, k21dB }; + SuppressionLevel target_level = SuppressionLevel::k12dB; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc b/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc new file mode 100644 index 0000000000..264c46972c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/ns_fft.h" + +#include "common_audio/third_party/ooura/fft_size_256/fft4g.h" + +namespace webrtc { + +NrFft::NrFft() : bit_reversal_state_(kFftSize / 2), tables_(kFftSize / 2) { + // Initialize WebRtc_rdt (setting (bit_reversal_state_[0] to 0 triggers + // initialization) + bit_reversal_state_[0] = 0.f; + std::array tmp_buffer; + tmp_buffer.fill(0.f); + WebRtc_rdft(kFftSize, 1, tmp_buffer.data(), bit_reversal_state_.data(), + tables_.data()); +} + +void NrFft::Fft(rtc::ArrayView time_data, + rtc::ArrayView real, + rtc::ArrayView imag) { + WebRtc_rdft(kFftSize, 1, time_data.data(), bit_reversal_state_.data(), + tables_.data()); + + imag[0] = 0; + real[0] = time_data[0]; + + imag[kFftSizeBy2Plus1 - 1] = 0; + real[kFftSizeBy2Plus1 - 1] = time_data[1]; + + for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { + real[i] = time_data[2 * i]; + imag[i] = time_data[2 * i + 1]; + } +} + +void NrFft::Ifft(rtc::ArrayView real, + rtc::ArrayView imag, + rtc::ArrayView time_data) { + time_data[0] = real[0]; + time_data[1] = real[kFftSizeBy2Plus1 - 1]; + for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { + time_data[2 * i] = real[i]; + time_data[2 * i + 1] = imag[i]; + } + WebRtc_rdft(kFftSize, -1, time_data.data(), bit_reversal_state_.data(), + tables_.data()); + + // Scale the output + constexpr float kScaling = 2.f / kFftSize; + for (float& d : time_data) { + d *= kScaling; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.h b/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.h new file mode 100644 index 0000000000..539251eef2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" + +namespace webrtc { + +// Wrapper class providing 256 point FFT functionality. +class NrFft { + public: + NrFft(); + NrFft(const NrFft&) = delete; + NrFft& operator=(const NrFft&) = delete; + + // Transforms the signal from time to frequency domain. + void Fft(rtc::ArrayView time_data, + rtc::ArrayView real, + rtc::ArrayView imag); + + // Transforms the signal from frequency to time domain. + void Ifft(rtc::ArrayView real, + rtc::ArrayView imag, + rtc::ArrayView time_data); + + private: + std::vector bit_reversal_state_; + std::vector tables_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/ns/ns_gn/moz.build new file mode 100644 index 0000000000..12076eedcb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_gn/moz.build @@ -0,0 +1,245 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/histograms.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("ns_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc new file mode 100644 index 0000000000..f25a1e2060 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/prior_signal_model.h" + +namespace webrtc { + +PriorSignalModel::PriorSignalModel(float lrt_initial_value) + : lrt(lrt_initial_value) {} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.h b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.h new file mode 100644 index 0000000000..dcfa7ea709 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ +#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ + +namespace webrtc { + +// Struct for storing the prior signal model parameters. +struct PriorSignalModel { + explicit PriorSignalModel(float lrt_initial_value); + PriorSignalModel(const PriorSignalModel&) = delete; + PriorSignalModel& operator=(const PriorSignalModel&) = delete; + + float lrt; + float flatness_threshold = .5f; + float template_diff_threshold = .5f; + float lrt_weighting = 1.f; + float flatness_weighting = 0.f; + float difference_weighting = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc new file mode 100644 index 0000000000..c814658e57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/prior_signal_model_estimator.h" + +#include +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Identifies the first of the two largest peaks in the histogram. +void FindFirstOfTwoLargestPeaks( + float bin_size, + rtc::ArrayView spectral_flatness, + float* peak_position, + int* peak_weight) { + RTC_DCHECK(peak_position); + RTC_DCHECK(peak_weight); + + int peak_value = 0; + int secondary_peak_value = 0; + *peak_position = 0.f; + float secondary_peak_position = 0.f; + *peak_weight = 0; + int secondary_peak_weight = 0; + + // Identify the two largest peaks. + for (int i = 0; i < kHistogramSize; ++i) { + const float bin_mid = (i + 0.5f) * bin_size; + if (spectral_flatness[i] > peak_value) { + // Found new "first" peak candidate. + secondary_peak_value = peak_value; + secondary_peak_weight = *peak_weight; + secondary_peak_position = *peak_position; + + peak_value = spectral_flatness[i]; + *peak_weight = spectral_flatness[i]; + *peak_position = bin_mid; + } else if (spectral_flatness[i] > secondary_peak_value) { + // Found new "second" peak candidate. + secondary_peak_value = spectral_flatness[i]; + secondary_peak_weight = spectral_flatness[i]; + secondary_peak_position = bin_mid; + } + } + + // Merge the peaks if they are close. + if ((fabs(secondary_peak_position - *peak_position) < 2 * bin_size) && + (secondary_peak_weight > 0.5f * (*peak_weight))) { + *peak_weight += secondary_peak_weight; + *peak_position = 0.5f * (*peak_position + secondary_peak_position); + } +} + +void UpdateLrt(rtc::ArrayView lrt_histogram, + float* prior_model_lrt, + bool* low_lrt_fluctuations) { + RTC_DCHECK(prior_model_lrt); + RTC_DCHECK(low_lrt_fluctuations); + + float average = 0.f; + float average_compl = 0.f; + float average_squared = 0.f; + int count = 0; + + for (int i = 0; i < 10; ++i) { + float bin_mid = (i + 0.5f) * kBinSizeLrt; + average += lrt_histogram[i] * bin_mid; + count += lrt_histogram[i]; + } + if (count > 0) { + average = average / count; + } + + for (int i = 0; i < kHistogramSize; ++i) { + float bin_mid = (i + 0.5f) * kBinSizeLrt; + average_squared += lrt_histogram[i] * bin_mid * bin_mid; + average_compl += lrt_histogram[i] * bin_mid; + } + constexpr float kOneFeatureUpdateWindowSize = 1.f / kFeatureUpdateWindowSize; + average_squared = average_squared * kOneFeatureUpdateWindowSize; + average_compl = average_compl * kOneFeatureUpdateWindowSize; + + // Fluctuation limit of LRT feature. + *low_lrt_fluctuations = average_squared - average * average_compl < 0.05f; + + // Get threshold for LRT feature. + constexpr float kMaxLrt = 1.f; + constexpr float kMinLrt = .2f; + if (*low_lrt_fluctuations) { + // Very low fluctuation, so likely noise. + *prior_model_lrt = kMaxLrt; + } else { + *prior_model_lrt = std::min(kMaxLrt, std::max(kMinLrt, 1.2f * average)); + } +} + +} // namespace + +PriorSignalModelEstimator::PriorSignalModelEstimator(float lrt_initial_value) + : prior_model_(lrt_initial_value) {} + +// Extract thresholds for feature parameters and computes the threshold/weights. +void PriorSignalModelEstimator::Update(const Histograms& histograms) { + bool low_lrt_fluctuations; + UpdateLrt(histograms.get_lrt(), &prior_model_.lrt, &low_lrt_fluctuations); + + // For spectral flatness and spectral difference: compute the main peaks of + // the histograms. + float spectral_flatness_peak_position; + int spectral_flatness_peak_weight; + FindFirstOfTwoLargestPeaks( + kBinSizeSpecFlat, histograms.get_spectral_flatness(), + &spectral_flatness_peak_position, &spectral_flatness_peak_weight); + + float spectral_diff_peak_position = 0.f; + int spectral_diff_peak_weight = 0; + FindFirstOfTwoLargestPeaks(kBinSizeSpecDiff, histograms.get_spectral_diff(), + &spectral_diff_peak_position, + &spectral_diff_peak_weight); + + // Reject if weight of peaks is not large enough, or peak value too small. + // Peak limit for spectral flatness (varies between 0 and 1). + const int use_spec_flat = spectral_flatness_peak_weight < 0.3f * 500 || + spectral_flatness_peak_position < 0.6f + ? 0 + : 1; + + // Reject if weight of peaks is not large enough or if fluctuation of the LRT + // feature are very low, indicating a noise state. + const int use_spec_diff = + spectral_diff_peak_weight < 0.3f * 500 || low_lrt_fluctuations ? 0 : 1; + + // Update the model. + prior_model_.template_diff_threshold = 1.2f * spectral_diff_peak_position; + prior_model_.template_diff_threshold = + std::min(1.f, std::max(0.16f, prior_model_.template_diff_threshold)); + + float one_by_feature_sum = 1.f / (1.f + use_spec_flat + use_spec_diff); + prior_model_.lrt_weighting = one_by_feature_sum; + + if (use_spec_flat == 1) { + prior_model_.flatness_threshold = 0.9f * spectral_flatness_peak_position; + prior_model_.flatness_threshold = + std::min(.95f, std::max(0.1f, prior_model_.flatness_threshold)); + prior_model_.flatness_weighting = one_by_feature_sum; + } else { + prior_model_.flatness_weighting = 0.f; + } + + if (use_spec_diff == 1) { + prior_model_.difference_weighting = one_by_feature_sum; + } else { + prior_model_.difference_weighting = 0.f; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.h new file mode 100644 index 0000000000..d178323dba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ + +#include "modules/audio_processing/ns/histograms.h" +#include "modules/audio_processing/ns/prior_signal_model.h" + +namespace webrtc { + +// Estimator of the prior signal model parameters. +class PriorSignalModelEstimator { + public: + explicit PriorSignalModelEstimator(float lrt_initial_value); + PriorSignalModelEstimator(const PriorSignalModelEstimator&) = delete; + PriorSignalModelEstimator& operator=(const PriorSignalModelEstimator&) = + delete; + + // Updates the model estimate. + void Update(const Histograms& h); + + // Returns the estimated model. + const PriorSignalModel& get_prior_model() const { return prior_model_; } + + private: + PriorSignalModel prior_model_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc new file mode 100644 index 0000000000..bab494ff21 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/quantile_noise_estimator.h" + +#include + +#include "modules/audio_processing/ns/fast_math.h" + +namespace webrtc { + +QuantileNoiseEstimator::QuantileNoiseEstimator() { + quantile_.fill(0.f); + density_.fill(0.3f); + log_quantile_.fill(8.f); + + constexpr float kOneBySimult = 1.f / kSimult; + for (size_t i = 0; i < kSimult; ++i) { + counter_[i] = floor(kLongStartupPhaseBlocks * (i + 1.f) * kOneBySimult); + } +} + +void QuantileNoiseEstimator::Estimate( + rtc::ArrayView signal_spectrum, + rtc::ArrayView noise_spectrum) { + std::array log_spectrum; + LogApproximation(signal_spectrum, log_spectrum); + + int quantile_index_to_return = -1; + // Loop over simultaneous estimates. + for (int s = 0, k = 0; s < kSimult; + ++s, k += static_cast(kFftSizeBy2Plus1)) { + const float one_by_counter_plus_1 = 1.f / (counter_[s] + 1.f); + for (int i = 0, j = k; i < static_cast(kFftSizeBy2Plus1); ++i, ++j) { + // Update log quantile estimate. + const float delta = density_[j] > 1.f ? 40.f / density_[j] : 40.f; + + const float multiplier = delta * one_by_counter_plus_1; + if (log_spectrum[i] > log_quantile_[j]) { + log_quantile_[j] += 0.25f * multiplier; + } else { + log_quantile_[j] -= 0.75f * multiplier; + } + + // Update density estimate. + constexpr float kWidth = 0.01f; + constexpr float kOneByWidthPlus2 = 1.f / (2.f * kWidth); + if (fabs(log_spectrum[i] - log_quantile_[j]) < kWidth) { + density_[j] = (counter_[s] * density_[j] + kOneByWidthPlus2) * + one_by_counter_plus_1; + } + } + + if (counter_[s] >= kLongStartupPhaseBlocks) { + counter_[s] = 0; + if (num_updates_ >= kLongStartupPhaseBlocks) { + quantile_index_to_return = k; + } + } + + ++counter_[s]; + } + + // Sequentially update the noise during startup. + if (num_updates_ < kLongStartupPhaseBlocks) { + // Use the last "s" to get noise during startup that differ from zero. + quantile_index_to_return = kFftSizeBy2Plus1 * (kSimult - 1); + ++num_updates_; + } + + if (quantile_index_to_return >= 0) { + ExpApproximation( + rtc::ArrayView(&log_quantile_[quantile_index_to_return], + kFftSizeBy2Plus1), + quantile_); + } + + std::copy(quantile_.begin(), quantile_.end(), noise_spectrum.begin()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.h new file mode 100644 index 0000000000..67d1512209 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" + +namespace webrtc { + +constexpr int kSimult = 3; + +// For quantile noise estimation. +class QuantileNoiseEstimator { + public: + QuantileNoiseEstimator(); + QuantileNoiseEstimator(const QuantileNoiseEstimator&) = delete; + QuantileNoiseEstimator& operator=(const QuantileNoiseEstimator&) = delete; + + // Estimate noise. + void Estimate(rtc::ArrayView signal_spectrum, + rtc::ArrayView noise_spectrum); + + private: + std::array density_; + std::array log_quantile_; + std::array quantile_; + std::array counter_; + int num_updates_ = 1; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc b/third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc new file mode 100644 index 0000000000..364bfd00d8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/signal_model.h" + +namespace webrtc { + +SignalModel::SignalModel() { + constexpr float kSfFeatureThr = 0.5f; + + lrt = kLtrFeatureThr; + spectral_flatness = kSfFeatureThr; + spectral_diff = kSfFeatureThr; + avg_log_lrt.fill(kLtrFeatureThr); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/signal_model.h b/third_party/libwebrtc/modules/audio_processing/ns/signal_model.h new file mode 100644 index 0000000000..6614d38a38 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/signal_model.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ +#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ + +#include + +#include "modules/audio_processing/ns/ns_common.h" + +namespace webrtc { + +struct SignalModel { + SignalModel(); + SignalModel(const SignalModel&) = delete; + SignalModel& operator=(const SignalModel&) = delete; + + float lrt; + float spectral_diff; + float spectral_flatness; + // Log LRT factor with time-smoothing. + std::array avg_log_lrt; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc new file mode 100644 index 0000000000..67dd3bb687 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/signal_model_estimator.h" + +#include "modules/audio_processing/ns/fast_math.h" + +namespace webrtc { + +namespace { + +constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; + +// Computes the difference measure between input spectrum and a template/learned +// noise spectrum. +float ComputeSpectralDiff( + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float diff_normalization) { + // spectral_diff = var(signal_spectrum) - cov(signal_spectrum, magnAvgPause)^2 + // / var(magnAvgPause) + + // Compute average quantities. + float noise_average = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Conservative smooth noise spectrum from pause frames. + noise_average += conservative_noise_spectrum[i]; + } + noise_average = noise_average * kOneByFftSizeBy2Plus1; + float signal_average = signal_spectral_sum * kOneByFftSizeBy2Plus1; + + // Compute variance and covariance quantities. + float covariance = 0.f; + float noise_variance = 0.f; + float signal_variance = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + float signal_diff = signal_spectrum[i] - signal_average; + float noise_diff = conservative_noise_spectrum[i] - noise_average; + covariance += signal_diff * noise_diff; + noise_variance += noise_diff * noise_diff; + signal_variance += signal_diff * signal_diff; + } + covariance *= kOneByFftSizeBy2Plus1; + noise_variance *= kOneByFftSizeBy2Plus1; + signal_variance *= kOneByFftSizeBy2Plus1; + + // Update of average magnitude spectrum. + float spectral_diff = + signal_variance - (covariance * covariance) / (noise_variance + 0.0001f); + // Normalize. + return spectral_diff / (diff_normalization + 0.0001f); +} + +// Updates the spectral flatness based on the input spectrum. +void UpdateSpectralFlatness( + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float* spectral_flatness) { + RTC_DCHECK(spectral_flatness); + + // Compute log of ratio of the geometric to arithmetic mean (handle the log(0) + // separately). + constexpr float kAveraging = 0.3f; + float avg_spect_flatness_num = 0.f; + for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) { + if (signal_spectrum[i] == 0.f) { + *spectral_flatness -= kAveraging * (*spectral_flatness); + return; + } + } + + for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) { + avg_spect_flatness_num += LogApproximation(signal_spectrum[i]); + } + + float avg_spect_flatness_denom = signal_spectral_sum - signal_spectrum[0]; + + avg_spect_flatness_denom = avg_spect_flatness_denom * kOneByFftSizeBy2Plus1; + avg_spect_flatness_num = avg_spect_flatness_num * kOneByFftSizeBy2Plus1; + + float spectral_tmp = + ExpApproximation(avg_spect_flatness_num) / avg_spect_flatness_denom; + + // Time-avg update of spectral flatness feature. + *spectral_flatness += kAveraging * (spectral_tmp - *spectral_flatness); +} + +// Updates the log LRT measures. +void UpdateSpectralLrt(rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView avg_log_lrt, + float* lrt) { + RTC_DCHECK(lrt); + + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + float tmp1 = 1.f + 2.f * prior_snr[i]; + float tmp2 = 2.f * prior_snr[i] / (tmp1 + 0.0001f); + float bessel_tmp = (post_snr[i] + 1.f) * tmp2; + avg_log_lrt[i] += + .5f * (bessel_tmp - LogApproximation(tmp1) - avg_log_lrt[i]); + } + + float log_lrt_time_avg_k_sum = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + log_lrt_time_avg_k_sum += avg_log_lrt[i]; + } + *lrt = log_lrt_time_avg_k_sum * kOneByFftSizeBy2Plus1; +} + +} // namespace + +SignalModelEstimator::SignalModelEstimator() + : prior_model_estimator_(kLtrFeatureThr) {} + +void SignalModelEstimator::AdjustNormalization(int32_t num_analyzed_frames, + float signal_energy) { + diff_normalization_ *= num_analyzed_frames; + diff_normalization_ += signal_energy; + diff_normalization_ /= (num_analyzed_frames + 1); +} + +// Update the noise features. +void SignalModelEstimator::Update( + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy) { + // Compute spectral flatness on input spectrum. + UpdateSpectralFlatness(signal_spectrum, signal_spectral_sum, + &features_.spectral_flatness); + + // Compute difference of input spectrum with learned/estimated noise spectrum. + float spectral_diff = + ComputeSpectralDiff(conservative_noise_spectrum, signal_spectrum, + signal_spectral_sum, diff_normalization_); + // Compute time-avg update of difference feature. + features_.spectral_diff += 0.3f * (spectral_diff - features_.spectral_diff); + + signal_energy_sum_ += signal_energy; + + // Compute histograms for parameter decisions (thresholds and weights for + // features). Parameters are extracted periodically. + if (--histogram_analysis_counter_ > 0) { + histograms_.Update(features_); + } else { + // Compute model parameters. + prior_model_estimator_.Update(histograms_); + + // Clear histograms for next update. + histograms_.Clear(); + + histogram_analysis_counter_ = kFeatureUpdateWindowSize; + + // Update every window: + // Compute normalization for the spectral difference for next estimation. + signal_energy_sum_ = signal_energy_sum_ / kFeatureUpdateWindowSize; + diff_normalization_ = 0.5f * (signal_energy_sum_ + diff_normalization_); + signal_energy_sum_ = 0.f; + } + + // Compute the LRT. + UpdateSpectralLrt(prior_snr, post_snr, features_.avg_log_lrt, &features_.lrt); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.h new file mode 100644 index 0000000000..58ce00acbf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/histograms.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/prior_signal_model.h" +#include "modules/audio_processing/ns/prior_signal_model_estimator.h" +#include "modules/audio_processing/ns/signal_model.h" + +namespace webrtc { + +class SignalModelEstimator { + public: + SignalModelEstimator(); + SignalModelEstimator(const SignalModelEstimator&) = delete; + SignalModelEstimator& operator=(const SignalModelEstimator&) = delete; + + // Compute signal normalization during the initial startup phase. + void AdjustNormalization(int32_t num_analyzed_frames, float signal_energy); + + void Update( + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy); + + const PriorSignalModel& get_prior_model() const { + return prior_model_estimator_.get_prior_model(); + } + const SignalModel& get_model() { return features_; } + + private: + float diff_normalization_ = 0.f; + float signal_energy_sum_ = 0.f; + Histograms histograms_; + int histogram_analysis_counter_ = 500; + PriorSignalModelEstimator prior_model_estimator_; + SignalModel features_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc new file mode 100644 index 0000000000..fce9bc8e07 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/speech_probability_estimator.h" + +#include +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +SpeechProbabilityEstimator::SpeechProbabilityEstimator() { + speech_probability_.fill(0.f); +} + +void SpeechProbabilityEstimator::Update( + int32_t num_analyzed_frames, + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy) { + // Update models. + if (num_analyzed_frames < kLongStartupPhaseBlocks) { + signal_model_estimator_.AdjustNormalization(num_analyzed_frames, + signal_energy); + } + signal_model_estimator_.Update(prior_snr, post_snr, + conservative_noise_spectrum, signal_spectrum, + signal_spectral_sum, signal_energy); + + const SignalModel& model = signal_model_estimator_.get_model(); + const PriorSignalModel& prior_model = + signal_model_estimator_.get_prior_model(); + + // Width parameter in sigmoid map for prior model. + constexpr float kWidthPrior0 = 4.f; + // Width for pause region: lower range, so increase width in tanh map. + constexpr float kWidthPrior1 = 2.f * kWidthPrior0; + + // Average LRT feature: use larger width in tanh map for pause regions. + float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0; + + // Compute indicator function: sigmoid map. + float indicator0 = + 0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f); + + // Spectral flatness feature: use larger width in tanh map for pause regions. + width_prior = model.spectral_flatness > prior_model.flatness_threshold + ? kWidthPrior1 + : kWidthPrior0; + + // Compute indicator function: sigmoid map. + float indicator1 = + 0.5f * (tanh(1.f * width_prior * + (prior_model.flatness_threshold - model.spectral_flatness)) + + 1.f); + + // For template spectrum-difference : use larger width in tanh map for pause + // regions. + width_prior = model.spectral_diff < prior_model.template_diff_threshold + ? kWidthPrior1 + : kWidthPrior0; + + // Compute indicator function: sigmoid map. + float indicator2 = + 0.5f * (tanh(width_prior * (model.spectral_diff - + prior_model.template_diff_threshold)) + + 1.f); + + // Combine the indicator function with the feature weights. + float ind_prior = prior_model.lrt_weighting * indicator0 + + prior_model.flatness_weighting * indicator1 + + prior_model.difference_weighting * indicator2; + + // Compute the prior probability. + prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_); + + // Make sure probabilities are within range: keep floor to 0.01. + prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f); + + // Final speech probability: combine prior model with LR factor:. + float gain_prior = + (1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f); + + std::array inv_lrt; + ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt); + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.h new file mode 100644 index 0000000000..259c3b6776 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/signal_model_estimator.h" + +namespace webrtc { + +// Class for estimating the probability of speech. +class SpeechProbabilityEstimator { + public: + SpeechProbabilityEstimator(); + SpeechProbabilityEstimator(const SpeechProbabilityEstimator&) = delete; + SpeechProbabilityEstimator& operator=(const SpeechProbabilityEstimator&) = + delete; + + // Compute speech probability. + void Update( + int32_t num_analyzed_frames, + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy); + + float get_prior_probability() const { return prior_speech_prob_; } + rtc::ArrayView get_probability() { return speech_probability_; } + + private: + SignalModelEstimator signal_model_estimator_; + float prior_speech_prob_ = .5f; + std::array speech_probability_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc b/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc new file mode 100644 index 0000000000..7bf18346f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/suppression_params.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +SuppressionParams::SuppressionParams( + NsConfig::SuppressionLevel suppression_level) { + switch (suppression_level) { + case NsConfig::SuppressionLevel::k6dB: + over_subtraction_factor = 1.f; + // 6 dB attenuation. + minimum_attenuating_gain = 0.5f; + use_attenuation_adjustment = false; + break; + case NsConfig::SuppressionLevel::k12dB: + over_subtraction_factor = 1.f; + // 12 dB attenuation. + minimum_attenuating_gain = 0.25f; + use_attenuation_adjustment = true; + break; + case NsConfig::SuppressionLevel::k18dB: + over_subtraction_factor = 1.1f; + // 18 dB attenuation. + minimum_attenuating_gain = 0.125f; + use_attenuation_adjustment = true; + break; + case NsConfig::SuppressionLevel::k21dB: + over_subtraction_factor = 1.25f; + // 20.9 dB attenuation. + minimum_attenuating_gain = 0.09f; + use_attenuation_adjustment = true; + break; + default: + RTC_DCHECK_NOTREACHED(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.h b/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.h new file mode 100644 index 0000000000..ad11977d81 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ +#define MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ + +#include "modules/audio_processing/ns/ns_config.h" + +namespace webrtc { + +struct SuppressionParams { + explicit SuppressionParams(NsConfig::SuppressionLevel suppression_level); + SuppressionParams(const SuppressionParams&) = delete; + SuppressionParams& operator=(const SuppressionParams&) = delete; + + float over_subtraction_factor; + float minimum_attenuating_gain; + bool use_attenuation_adjustment; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc b/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc new file mode 100644 index 0000000000..e14b7970d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/wiener_filter.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +WienerFilter::WienerFilter(const SuppressionParams& suppression_params) + : suppression_params_(suppression_params) { + filter_.fill(1.f); + initial_spectral_estimate_.fill(0.f); + spectrum_prev_process_.fill(0.f); +} + +void WienerFilter::Update( + int32_t num_analyzed_frames, + rtc::ArrayView noise_spectrum, + rtc::ArrayView prev_noise_spectrum, + rtc::ArrayView parametric_noise_spectrum, + rtc::ArrayView signal_spectrum) { + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Previous estimate based on previous frame with gain filter. + float prev_tsa = spectrum_prev_process_[i] / + (prev_noise_spectrum[i] + 0.0001f) * filter_[i]; + + // Current estimate. + float current_tsa; + if (signal_spectrum[i] > noise_spectrum[i]) { + current_tsa = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f; + } else { + current_tsa = 0.f; + } + + // Directed decision estimate is sum of two terms: current estimate and + // previous estimate. + float snr_prior = 0.98f * prev_tsa + (1.f - 0.98f) * current_tsa; + filter_[i] = + snr_prior / (suppression_params_.over_subtraction_factor + snr_prior); + filter_[i] = std::max(std::min(filter_[i], 1.f), + suppression_params_.minimum_attenuating_gain); + } + + if (num_analyzed_frames < kShortStartupPhaseBlocks) { + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + initial_spectral_estimate_[i] += signal_spectrum[i]; + float filter_initial = initial_spectral_estimate_[i] - + suppression_params_.over_subtraction_factor * + parametric_noise_spectrum[i]; + filter_initial /= initial_spectral_estimate_[i] + 0.0001f; + + filter_initial = std::max(std::min(filter_initial, 1.f), + suppression_params_.minimum_attenuating_gain); + + // Weight the two suppression filters. + constexpr float kOnyByShortStartupPhaseBlocks = + 1.f / kShortStartupPhaseBlocks; + filter_initial *= kShortStartupPhaseBlocks - num_analyzed_frames; + filter_[i] *= num_analyzed_frames; + filter_[i] += filter_initial; + filter_[i] *= kOnyByShortStartupPhaseBlocks; + } + } + + std::copy(signal_spectrum.begin(), signal_spectrum.end(), + spectrum_prev_process_.begin()); +} + +float WienerFilter::ComputeOverallScalingFactor( + int32_t num_analyzed_frames, + float prior_speech_probability, + float energy_before_filtering, + float energy_after_filtering) const { + if (!suppression_params_.use_attenuation_adjustment || + num_analyzed_frames <= kLongStartupPhaseBlocks) { + return 1.f; + } + + float gain = SqrtFastApproximation(energy_after_filtering / + (energy_before_filtering + 1.f)); + + // Scaling for new version. Threshold in final energy gain factor calculation. + constexpr float kBLim = 0.5f; + float scale_factor1 = 1.f; + if (gain > kBLim) { + scale_factor1 = 1.f + 1.3f * (gain - kBLim); + if (gain * scale_factor1 > 1.f) { + scale_factor1 = 1.f / gain; + } + } + + float scale_factor2 = 1.f; + if (gain < kBLim) { + // Do not reduce scale too much for pause regions: attenuation here should + // be controlled by flooring. + gain = std::max(gain, suppression_params_.minimum_attenuating_gain); + scale_factor2 = 1.f - 0.3f * (kBLim - gain); + } + + // Combine both scales with speech/noise prob: note prior + // (prior_speech_probability) is not frequency dependent. + return prior_speech_probability * scale_factor1 + + (1.f - prior_speech_probability) * scale_factor2; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.h b/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.h new file mode 100644 index 0000000000..b55c5dc59d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/suppression_params.h" + +namespace webrtc { + +// Estimates a Wiener-filter based frequency domain noise reduction filter. +class WienerFilter { + public: + explicit WienerFilter(const SuppressionParams& suppression_params); + WienerFilter(const WienerFilter&) = delete; + WienerFilter& operator=(const WienerFilter&) = delete; + + // Updates the filter estimate. + void Update( + int32_t num_analyzed_frames, + rtc::ArrayView noise_spectrum, + rtc::ArrayView prev_noise_spectrum, + rtc::ArrayView parametric_noise_spectrum, + rtc::ArrayView signal_spectrum); + + // Compute an overall gain scaling factor. + float ComputeOverallScalingFactor(int32_t num_analyzed_frames, + float prior_speech_probability, + float energy_before_filtering, + float energy_after_filtering) const; + + // Returns the filter. + rtc::ArrayView get_filter() const { + return filter_; + } + + private: + const SuppressionParams& suppression_params_; + std::array spectrum_prev_process_; + std::array initial_spectral_estimate_; + std::array filter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc new file mode 100644 index 0000000000..cea5c837dc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/optionally_built_submodule_creators.h" + +#include + +#include "modules/audio_processing/transient/transient_suppressor_impl.h" + +namespace webrtc { + +std::unique_ptr CreateTransientSuppressor( + const ApmSubmoduleCreationOverrides& overrides, + TransientSuppressor::VadMode vad_mode, + int sample_rate_hz, + int detection_rate_hz, + int num_channels) { +#ifdef WEBRTC_EXCLUDE_TRANSIENT_SUPPRESSOR + return nullptr; +#else + if (overrides.transient_suppression) { + return nullptr; + } + return std::make_unique( + vad_mode, sample_rate_hz, detection_rate_hz, num_channels); +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.h b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.h new file mode 100644 index 0000000000..1be2743986 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_OPTIONALLY_BUILT_SUBMODULE_CREATORS_H_ +#define MODULES_AUDIO_PROCESSING_OPTIONALLY_BUILT_SUBMODULE_CREATORS_H_ + +#include + +#include "modules/audio_processing/transient/transient_suppressor.h" + +namespace webrtc { + +// These overrides are only to be used for testing purposes. +// Each flag emulates a preprocessor macro to exclude a submodule of APM from +// the build, e.g. WEBRTC_EXCLUDE_TRANSIENT_SUPPRESSOR. If the corresponding +// flag `transient_suppression` is enabled, then the creators will return +// nullptr instead of a submodule instance, as if the macro had been defined. +struct ApmSubmoduleCreationOverrides { + bool transient_suppression = false; +}; + +// Creates a transient suppressor. +// Will instead return nullptr if one of the following is true: +// * WEBRTC_EXCLUDE_TRANSIENT_SUPPRESSOR is defined +// * The corresponding override in `overrides` is enabled. +std::unique_ptr CreateTransientSuppressor( + const ApmSubmoduleCreationOverrides& overrides, + TransientSuppressor::VadMode vad_mode, + int sample_rate_hz, + int detection_rate_hz, + int num_channels); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_OPTIONALLY_BUILT_SUBMODULE_CREATORS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators_gn/moz.build new file mode 100644 index 0000000000..3e4fa4ca51 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("optionally_built_submodule_creators_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/render_queue_item_verifier.h b/third_party/libwebrtc/modules/audio_processing/render_queue_item_verifier.h new file mode 100644 index 0000000000..b8aff4a107 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/render_queue_item_verifier.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_ +#define MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_ + +#include + +namespace webrtc { + +// Functor to use when supplying a verifier function for the queue item +// verifcation. +template +class RenderQueueItemVerifier { + public: + explicit RenderQueueItemVerifier(size_t minimum_capacity) + : minimum_capacity_(minimum_capacity) {} + + bool operator()(const std::vector& v) const { + return v.capacity() >= minimum_capacity_; + } + + private: + size_t minimum_capacity_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H__ diff --git a/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.cc b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.cc new file mode 100644 index 0000000000..2a564fc233 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.cc @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/residual_echo_detector.h" + +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +namespace { + +float Power(rtc::ArrayView input) { + if (input.empty()) { + return 0.f; + } + return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) / + input.size(); +} + +constexpr size_t kLookbackFrames = 650; +// TODO(ivoc): Verify the size of this buffer. +constexpr size_t kRenderBufferSize = 30; +constexpr float kAlpha = 0.001f; +// 10 seconds of data, updated every 10 ms. +constexpr size_t kAggregationBufferSize = 10 * 100; + +} // namespace + +namespace webrtc { + +std::atomic ResidualEchoDetector::instance_count_(0); + +ResidualEchoDetector::ResidualEchoDetector() + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + render_buffer_(kRenderBufferSize), + render_power_(kLookbackFrames), + render_power_mean_(kLookbackFrames), + render_power_std_dev_(kLookbackFrames), + covariances_(kLookbackFrames), + recent_likelihood_max_(kAggregationBufferSize) {} + +ResidualEchoDetector::~ResidualEchoDetector() = default; + +void ResidualEchoDetector::AnalyzeRenderAudio( + rtc::ArrayView render_audio) { + // Dump debug data assuming 48 kHz sample rate (if this assumption is not + // valid the dumped audio will need to be converted offline accordingly). + data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(), + 48000, 1); + + if (render_buffer_.Size() == 0) { + frames_since_zero_buffer_size_ = 0; + } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) { + // This can happen in a few cases: at the start of a call, due to a glitch + // or due to clock drift. The excess capture value will be ignored. + // TODO(ivoc): Include how often this happens in APM stats. + render_buffer_.Pop(); + frames_since_zero_buffer_size_ = 0; + } + ++frames_since_zero_buffer_size_; + float power = Power(render_audio); + render_buffer_.Push(power); +} + +void ResidualEchoDetector::AnalyzeCaptureAudio( + rtc::ArrayView capture_audio) { + // Dump debug data assuming 48 kHz sample rate (if this assumption is not + // valid the dumped audio will need to be converted offline accordingly). + data_dumper_->DumpWav("ed_capture", capture_audio.size(), + capture_audio.data(), 48000, 1); + + if (first_process_call_) { + // On the first process call (so the start of a call), we must flush the + // render buffer, otherwise the render data will be delayed. + render_buffer_.Clear(); + first_process_call_ = false; + } + + // Get the next render value. + const absl::optional buffered_render_power = render_buffer_.Pop(); + if (!buffered_render_power) { + // This can happen in a few cases: at the start of a call, due to a glitch + // or due to clock drift. The excess capture value will be ignored. + // TODO(ivoc): Include how often this happens in APM stats. + return; + } + // Update the render statistics, and store the statistics in circular buffers. + render_statistics_.Update(*buffered_render_power); + RTC_DCHECK_LT(next_insertion_index_, kLookbackFrames); + render_power_[next_insertion_index_] = *buffered_render_power; + render_power_mean_[next_insertion_index_] = render_statistics_.mean(); + render_power_std_dev_[next_insertion_index_] = + render_statistics_.std_deviation(); + + // Get the next capture value, update capture statistics and add the relevant + // values to the buffers. + const float capture_power = Power(capture_audio); + capture_statistics_.Update(capture_power); + const float capture_mean = capture_statistics_.mean(); + const float capture_std_deviation = capture_statistics_.std_deviation(); + + // Update the covariance values and determine the new echo likelihood. + echo_likelihood_ = 0.f; + size_t read_index = next_insertion_index_; + + int best_delay = -1; + for (size_t delay = 0; delay < covariances_.size(); ++delay) { + RTC_DCHECK_LT(read_index, render_power_.size()); + covariances_[delay].Update(capture_power, capture_mean, + capture_std_deviation, render_power_[read_index], + render_power_mean_[read_index], + render_power_std_dev_[read_index]); + read_index = read_index > 0 ? read_index - 1 : kLookbackFrames - 1; + + if (covariances_[delay].normalized_cross_correlation() > echo_likelihood_) { + echo_likelihood_ = covariances_[delay].normalized_cross_correlation(); + best_delay = static_cast(delay); + } + } + // This is a temporary log message to help find the underlying cause for echo + // likelihoods > 1.0. + // TODO(ivoc): Remove once the issue is resolved. + if (echo_likelihood_ > 1.1f) { + // Make sure we don't spam the log. + if (log_counter_ < 5 && best_delay != -1) { + size_t read_index = kLookbackFrames + next_insertion_index_ - best_delay; + if (read_index >= kLookbackFrames) { + read_index -= kLookbackFrames; + } + RTC_DCHECK_LT(read_index, render_power_.size()); + RTC_LOG_F(LS_ERROR) << "Echo detector internal state: {" + "Echo likelihood: " + << echo_likelihood_ << ", Best Delay: " << best_delay + << ", Covariance: " + << covariances_[best_delay].covariance() + << ", Last capture power: " << capture_power + << ", Capture mean: " << capture_mean + << ", Capture_standard deviation: " + << capture_std_deviation << ", Last render power: " + << render_power_[read_index] + << ", Render mean: " << render_power_mean_[read_index] + << ", Render standard deviation: " + << render_power_std_dev_[read_index] + << ", Reliability: " << reliability_ << "}"; + log_counter_++; + } + } + RTC_DCHECK_LT(echo_likelihood_, 1.1f); + + reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f; + echo_likelihood_ *= reliability_; + // This is a temporary fix to prevent echo likelihood values > 1.0. + // TODO(ivoc): Find the root cause of this issue and fix it. + echo_likelihood_ = std::min(echo_likelihood_, 1.0f); + int echo_percentage = static_cast(echo_likelihood_ * 100); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood", + echo_percentage, 0, 100, 100 /* number of bins */); + + // Update the buffer of recent likelihood values. + recent_likelihood_max_.Update(echo_likelihood_); + + // Update the next insertion index. + next_insertion_index_ = next_insertion_index_ < (kLookbackFrames - 1) + ? next_insertion_index_ + 1 + : 0; +} + +void ResidualEchoDetector::Initialize(int /*capture_sample_rate_hz*/, + int /*num_capture_channels*/, + int /*render_sample_rate_hz*/, + int /*num_render_channels*/) { + render_buffer_.Clear(); + std::fill(render_power_.begin(), render_power_.end(), 0.f); + std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f); + std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f); + render_statistics_.Clear(); + capture_statistics_.Clear(); + recent_likelihood_max_.Clear(); + for (auto& cov : covariances_) { + cov.Clear(); + } + echo_likelihood_ = 0.f; + next_insertion_index_ = 0; + reliability_ = 0.f; +} + +EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const { + EchoDetector::Metrics metrics; + metrics.echo_likelihood = echo_likelihood_; + metrics.echo_likelihood_recent_max = recent_likelihood_max_.max(); + return metrics; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.h b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.h new file mode 100644 index 0000000000..ac554b17c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_RESIDUAL_ECHO_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_RESIDUAL_ECHO_DETECTOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/echo_detector/circular_buffer.h" +#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" +#include "modules/audio_processing/echo_detector/moving_max.h" +#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class ResidualEchoDetector : public EchoDetector { + public: + ResidualEchoDetector(); + ~ResidualEchoDetector() override; + + // This function should be called while holding the render lock. + void AnalyzeRenderAudio(rtc::ArrayView render_audio) override; + + // This function should be called while holding the capture lock. + void AnalyzeCaptureAudio(rtc::ArrayView capture_audio) override; + + // This function should be called while holding the capture lock. + void Initialize(int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels) override; + + // This function is for testing purposes only. + void SetReliabilityForTest(float value) { reliability_ = value; } + + // This function should be called while holding the capture lock. + EchoDetector::Metrics GetMetrics() const override; + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + // Keep track if the `Process` function has been previously called. + bool first_process_call_ = true; + // Buffer for storing the power of incoming farend buffers. This is needed for + // cases where calls to BufferFarend and Process are jittery. + CircularBuffer render_buffer_; + // Count how long ago it was that the size of `render_buffer_` was zero. This + // value is also reset to zero when clock drift is detected and a value from + // the renderbuffer is discarded, even though the buffer is not actually zero + // at that point. This is done to avoid repeatedly removing elements in this + // situation. + size_t frames_since_zero_buffer_size_ = 0; + + // Circular buffers containing delayed versions of the power, mean and + // standard deviation, for calculating the delayed covariance values. + std::vector render_power_; + std::vector render_power_mean_; + std::vector render_power_std_dev_; + // Covariance estimates for different delay values. + std::vector covariances_; + // Index where next element should be inserted in all of the above circular + // buffers. + size_t next_insertion_index_ = 0; + + MeanVarianceEstimator render_statistics_; + MeanVarianceEstimator capture_statistics_; + // Current echo likelihood. + float echo_likelihood_ = 0.f; + // Reliability of the current likelihood. + float reliability_ = 0.f; + MovingMax recent_likelihood_max_; + + int log_counter_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_RESIDUAL_ECHO_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/residual_echo_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector_unittest.cc new file mode 100644 index 0000000000..d8c227a443 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector_unittest.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/residual_echo_detector.h" + +#include + +#include "api/make_ref_counted.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(ResidualEchoDetectorTests, Echo) { + auto echo_detector = rtc::make_ref_counted(); + echo_detector->SetReliabilityForTest(1.0f); + std::vector ones(160, 1.f); + std::vector zeros(160, 0.f); + + // In this test the capture signal has a delay of 10 frames w.r.t. the render + // signal, but is otherwise identical. Both signals are periodic with a 20 + // frame interval. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector->AnalyzeRenderAudio(ones); + echo_detector->AnalyzeCaptureAudio(zeros); + } else if (i % 20 == 10) { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(ones); + } else { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(zeros); + } + } + // We expect to detect echo with near certain likelihood. + auto ed_metrics = echo_detector->GetMetrics(); + ASSERT_TRUE(ed_metrics.echo_likelihood); + EXPECT_NEAR(1.f, ed_metrics.echo_likelihood.value(), 0.01f); +} + +TEST(ResidualEchoDetectorTests, NoEcho) { + auto echo_detector = rtc::make_ref_counted(); + echo_detector->SetReliabilityForTest(1.0f); + std::vector ones(160, 1.f); + std::vector zeros(160, 0.f); + + // In this test the capture signal is always zero, so no echo should be + // detected. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector->AnalyzeRenderAudio(ones); + } else { + echo_detector->AnalyzeRenderAudio(zeros); + } + echo_detector->AnalyzeCaptureAudio(zeros); + } + // We expect to not detect any echo. + auto ed_metrics = echo_detector->GetMetrics(); + ASSERT_TRUE(ed_metrics.echo_likelihood); + EXPECT_NEAR(0.f, ed_metrics.echo_likelihood.value(), 0.01f); +} + +TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) { + auto echo_detector = rtc::make_ref_counted(); + echo_detector->SetReliabilityForTest(1.0f); + std::vector ones(160, 1.f); + std::vector zeros(160, 0.f); + + // In this test the capture signal has a delay of 10 frames w.r.t. the render + // signal, but is otherwise identical. Both signals are periodic with a 20 + // frame interval. There is a simulated clock drift of 1% in this test, with + // the render side producing data slightly faster. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector->AnalyzeRenderAudio(ones); + echo_detector->AnalyzeCaptureAudio(zeros); + } else if (i % 20 == 10) { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(ones); + } else { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(zeros); + } + if (i % 100 == 0) { + // This is causing the simulated clock drift. + echo_detector->AnalyzeRenderAudio(zeros); + } + } + // We expect to detect echo with high likelihood. Clock drift is harder to + // correct on the render side than on the capture side. This is due to the + // render buffer, clock drift can only be discovered after a certain delay. + // A growing buffer can be caused by jitter or clock drift and it's not + // possible to make this decision right away. For this reason we only expect + // an echo likelihood of 75% in this test. + auto ed_metrics = echo_detector->GetMetrics(); + ASSERT_TRUE(ed_metrics.echo_likelihood); + EXPECT_GT(ed_metrics.echo_likelihood.value(), 0.75f); +} + +TEST(ResidualEchoDetectorTests, EchoWithCaptureClockDrift) { + auto echo_detector = rtc::make_ref_counted(); + echo_detector->SetReliabilityForTest(1.0f); + std::vector ones(160, 1.f); + std::vector zeros(160, 0.f); + + // In this test the capture signal has a delay of 10 frames w.r.t. the render + // signal, but is otherwise identical. Both signals are periodic with a 20 + // frame interval. There is a simulated clock drift of 1% in this test, with + // the capture side producing data slightly faster. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector->AnalyzeRenderAudio(ones); + echo_detector->AnalyzeCaptureAudio(zeros); + } else if (i % 20 == 10) { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(ones); + } else { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(zeros); + } + if (i % 100 == 0) { + // This is causing the simulated clock drift. + echo_detector->AnalyzeCaptureAudio(zeros); + } + } + // We expect to detect echo with near certain likelihood. + auto ed_metrics = echo_detector->GetMetrics(); + ASSERT_TRUE(ed_metrics.echo_likelihood); + EXPECT_NEAR(1.f, ed_metrics.echo_likelihood.value(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/rms_level.cc b/third_party/libwebrtc/modules/audio_processing/rms_level.cc new file mode 100644 index 0000000000..b0a45cb403 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/rms_level.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/rms_level.h" + +#include +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { +static constexpr float kMaxSquaredLevel = 32768 * 32768; +// kMinLevel is the level corresponding to kMinLevelDb, that is 10^(-127/10). +static constexpr float kMinLevel = 1.995262314968883e-13f; + +// Calculates the normalized RMS value from a mean square value. The input +// should be the sum of squared samples divided by the number of samples. The +// value will be normalized to full range before computing the RMS, wich is +// returned as a negated dBfs. That is, 0 is full amplitude while 127 is very +// faint. +int ComputeRms(float mean_square) { + if (mean_square <= kMinLevel * kMaxSquaredLevel) { + // Very faint; simply return the minimum value. + return RmsLevel::kMinLevelDb; + } + // Normalize by the max level. + const float mean_square_norm = mean_square / kMaxSquaredLevel; + RTC_DCHECK_GT(mean_square_norm, kMinLevel); + // 20log_10(x^0.5) = 10log_10(x) + const float rms = 10.f * std::log10(mean_square_norm); + RTC_DCHECK_LE(rms, 0.f); + RTC_DCHECK_GT(rms, -RmsLevel::kMinLevelDb); + // Return the negated value. + return static_cast(-rms + 0.5f); +} +} // namespace + +RmsLevel::RmsLevel() { + Reset(); +} + +RmsLevel::~RmsLevel() = default; + +void RmsLevel::Reset() { + sum_square_ = 0.f; + sample_count_ = 0; + max_sum_square_ = 0.f; + block_size_ = absl::nullopt; +} + +void RmsLevel::Analyze(rtc::ArrayView data) { + if (data.empty()) { + return; + } + + CheckBlockSize(data.size()); + + const float sum_square = + std::accumulate(data.begin(), data.end(), 0.f, + [](float a, int16_t b) { return a + b * b; }); + RTC_DCHECK_GE(sum_square, 0.f); + sum_square_ += sum_square; + sample_count_ += data.size(); + + max_sum_square_ = std::max(max_sum_square_, sum_square); +} + +void RmsLevel::Analyze(rtc::ArrayView data) { + if (data.empty()) { + return; + } + + CheckBlockSize(data.size()); + + float sum_square = 0.f; + + for (float data_k : data) { + int16_t tmp = + static_cast(std::min(std::max(data_k, -32768.f), 32767.f)); + sum_square += tmp * tmp; + } + RTC_DCHECK_GE(sum_square, 0.f); + sum_square_ += sum_square; + sample_count_ += data.size(); + + max_sum_square_ = std::max(max_sum_square_, sum_square); +} + +void RmsLevel::AnalyzeMuted(size_t length) { + CheckBlockSize(length); + sample_count_ += length; +} + +int RmsLevel::Average() { + const bool have_samples = (sample_count_ != 0); + int rms = have_samples ? ComputeRms(sum_square_ / sample_count_) + : RmsLevel::kMinLevelDb; + + // To ensure that kMinLevelDb represents digital silence (muted audio + // sources) we'll check here if the sum_square is actually 0. If it's not + // we'll bump up the return value to `kInaudibleButNotMuted`. + // https://datatracker.ietf.org/doc/html/rfc6464 + if (have_samples && rms == RmsLevel::kMinLevelDb && sum_square_ != 0.0f) { + rms = kInaudibleButNotMuted; + } + + Reset(); + return rms; +} + +RmsLevel::Levels RmsLevel::AverageAndPeak() { + // Note that block_size_ should by design always be non-empty when + // sample_count_ != 0. Also, the * operator of absl::optional enforces this + // with a DCHECK. + Levels levels = (sample_count_ == 0) + ? Levels{RmsLevel::kMinLevelDb, RmsLevel::kMinLevelDb} + : Levels{ComputeRms(sum_square_ / sample_count_), + ComputeRms(max_sum_square_ / *block_size_)}; + Reset(); + return levels; +} + +void RmsLevel::CheckBlockSize(size_t block_size) { + if (block_size_ != block_size) { + Reset(); + block_size_ = block_size; + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/rms_level.h b/third_party/libwebrtc/modules/audio_processing/rms_level.h new file mode 100644 index 0000000000..fbece19ecd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/rms_level.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ +#define MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" + +namespace webrtc { + +// Computes the root mean square (RMS) level in dBFs (decibels from digital +// full-scale) of audio data. The computation follows RFC 6465: +// https://tools.ietf.org/html/rfc6465 +// with the intent that it can provide the RTP audio level indication. +// +// The expected approach is to provide constant-sized chunks of audio to +// Analyze(). When enough chunks have been accumulated to form a packet, call +// Average() to get the audio level indicator for the RTP header. +class RmsLevel { + public: + struct Levels { + int average; + int peak; + }; + + enum : int { kMinLevelDb = 127, kInaudibleButNotMuted = 126 }; + + RmsLevel(); + ~RmsLevel(); + + // Can be called to reset internal states, but is not required during normal + // operation. + void Reset(); + + // Pass each chunk of audio to Analyze() to accumulate the level. + void Analyze(rtc::ArrayView data); + void Analyze(rtc::ArrayView data); + + // If all samples with the given `length` have a magnitude of zero, this is + // a shortcut to avoid some computation. + void AnalyzeMuted(size_t length); + + // Computes the RMS level over all data passed to Analyze() since the last + // call to Average(). The returned value is positive but should be interpreted + // as negative as per the RFC. It is constrained to [0, 127]. Resets the + // internal state to start a new measurement period. + int Average(); + + // Like Average() above, but also returns the RMS peak value. Resets the + // internal state to start a new measurement period. + Levels AverageAndPeak(); + + private: + // Compares `block_size` with `block_size_`. If they are different, calls + // Reset() and stores the new size. + void CheckBlockSize(size_t block_size); + + float sum_square_; + size_t sample_count_; + float max_sum_square_; + absl::optional block_size_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/rms_level_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/rms_level_gn/moz.build new file mode 100644 index 0000000000..ec35734b6d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/rms_level_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/rms_level.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rms_level_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/rms_level_unittest.cc b/third_party/libwebrtc/modules/audio_processing/rms_level_unittest.cc new file mode 100644 index 0000000000..4cbad461e7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/rms_level_unittest.cc @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES +#include "modules/audio_processing/rms_level.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +constexpr int kSampleRateHz = 48000; +constexpr size_t kBlockSizeSamples = kSampleRateHz / 100; + +std::unique_ptr RunTest(rtc::ArrayView input) { + std::unique_ptr level(new RmsLevel); + for (size_t n = 0; n + kBlockSizeSamples <= input.size(); + n += kBlockSizeSamples) { + level->Analyze(input.subview(n, kBlockSizeSamples)); + } + return level; +} + +std::unique_ptr RunTest(rtc::ArrayView input) { + std::unique_ptr level(new RmsLevel); + for (size_t n = 0; n + kBlockSizeSamples <= input.size(); + n += kBlockSizeSamples) { + level->Analyze(input.subview(n, kBlockSizeSamples)); + } + return level; +} + +std::vector CreateInt16Sinusoid(int frequency_hz, + int amplitude, + size_t num_samples) { + std::vector x(num_samples); + for (size_t n = 0; n < num_samples; ++n) { + x[n] = rtc::saturated_cast( + amplitude * std::sin(2 * M_PI * n * frequency_hz / kSampleRateHz)); + } + return x; +} + +std::vector CreateFloatSinusoid(int frequency_hz, + int amplitude, + size_t num_samples) { + std::vector x16 = + CreateInt16Sinusoid(frequency_hz, amplitude, num_samples); + std::vector x(x16.size()); + for (size_t n = 0; n < x.size(); ++n) { + x[n] = x16[n]; + } + return x; +} + +} // namespace + +TEST(RmsLevelTest, VerifyIndentityBetweenFloatAndFix) { + auto x_f = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x_i = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level_f = RunTest(x_f); + auto level_i = RunTest(x_i); + int avg_i = level_i->Average(); + int avg_f = level_f->Average(); + EXPECT_EQ(3, avg_i); // -3 dBFS + EXPECT_EQ(avg_f, avg_i); +} + +TEST(RmsLevelTest, Run1000HzFullScale) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + EXPECT_EQ(3, level->Average()); // -3 dBFS +} + +TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + auto stats = level->AverageAndPeak(); + EXPECT_EQ(3, stats.average); // -3 dBFS + EXPECT_EQ(3, stats.peak); +} + +TEST(RmsLevelTest, Run1000HzHalfScale) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto level = RunTest(x); + EXPECT_EQ(9, level->Average()); // -9 dBFS +} + +TEST(RmsLevelTest, RunZeros) { + std::vector x(kSampleRateHz, 0); // 1 second of pure silence. + auto level = RunTest(x); + EXPECT_EQ(127, level->Average()); +} + +TEST(RmsLevelTest, RunZerosAverageAndPeak) { + std::vector x(kSampleRateHz, 0); // 1 second of pure silence. + auto level = RunTest(x); + auto stats = level->AverageAndPeak(); + EXPECT_EQ(127, stats.average); + EXPECT_EQ(127, stats.peak); +} + +TEST(RmsLevelTest, NoSamples) { + RmsLevel level; + EXPECT_EQ(127, level.Average()); // Return minimum if no samples are given. +} + +TEST(RmsLevelTest, NoSamplesAverageAndPeak) { + RmsLevel level; + auto stats = level.AverageAndPeak(); + EXPECT_EQ(127, stats.average); + EXPECT_EQ(127, stats.peak); +} + +TEST(RmsLevelTest, PollTwice) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + level->Average(); + EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. +} + +TEST(RmsLevelTest, Reset) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + level->Reset(); + EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. +} + +// Inserts 1 second of full-scale sinusoid, followed by 1 second of muted. +TEST(RmsLevelTest, ProcessMuted) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + const size_t kBlocksPerSecond = rtc::CheckedDivExact( + static_cast(kSampleRateHz), kBlockSizeSamples); + for (size_t i = 0; i < kBlocksPerSecond; ++i) { + level->AnalyzeMuted(kBlockSizeSamples); + } + EXPECT_EQ(6, level->Average()); // Average RMS halved due to the silence. +} + +// Digital silence must yield 127 and anything else should yield 126 or lower. +TEST(RmsLevelTest, OnlyDigitalSilenceIs127) { + std::vector test_buffer(kSampleRateHz, 0); + auto level = RunTest(test_buffer); + EXPECT_EQ(127, level->Average()); + // Change one sample to something other than 0 to make the buffer not strictly + // represent digital silence. + test_buffer[0] = 1; + level = RunTest(test_buffer); + EXPECT_LT(level->Average(), 127); +} + +// Inserts 1 second of half-scale sinusoid, follwed by 10 ms of full-scale, and +// finally 1 second of half-scale again. Expect the average to be -9 dBFS due +// to the vast majority of the signal being half-scale, and the peak to be +// -3 dBFS. +TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) { + auto half_scale = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto full_scale = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz / 100); + auto x = half_scale; + x.insert(x.end(), full_scale.begin(), full_scale.end()); + x.insert(x.end(), half_scale.begin(), half_scale.end()); + ASSERT_EQ(static_cast(2 * kSampleRateHz + kSampleRateHz / 100), + x.size()); + auto level = RunTest(x); + auto stats = level->AverageAndPeak(); + EXPECT_EQ(9, stats.average); + EXPECT_EQ(3, stats.peak); +} + +TEST(RmsLevelTest, ResetOnBlockSizeChange) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + // Create a new signal with half amplitude, but double block length. + auto y = CreateInt16Sinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2); + level->Analyze(y); + auto stats = level->AverageAndPeak(); + // Expect all stats to only be influenced by the last signal (y), since the + // changed block size should reset the stats. + EXPECT_EQ(9, stats.average); + EXPECT_EQ(9, stats.peak); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/splitting_filter.cc b/third_party/libwebrtc/modules/audio_processing/splitting_filter.cc new file mode 100644 index 0000000000..d47090bc03 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/splitting_filter.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/splitting_filter.h" + +#include + +#include "api/array_view.h" +#include "common_audio/channel_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr size_t kSamplesPerBand = 160; +constexpr size_t kTwoBandFilterSamplesPerFrame = 320; + +} // namespace + +SplittingFilter::SplittingFilter(size_t num_channels, + size_t num_bands, + size_t num_frames) + : num_bands_(num_bands), + two_bands_states_(num_bands_ == 2 ? num_channels : 0), + three_band_filter_banks_(num_bands_ == 3 ? num_channels : 0) { + RTC_CHECK(num_bands_ == 2 || num_bands_ == 3); +} + +SplittingFilter::~SplittingFilter() = default; + +void SplittingFilter::Analysis(const ChannelBuffer* data, + ChannelBuffer* bands) { + RTC_DCHECK_EQ(num_bands_, bands->num_bands()); + RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), + bands->num_frames_per_band() * bands->num_bands()); + if (bands->num_bands() == 2) { + TwoBandsAnalysis(data, bands); + } else if (bands->num_bands() == 3) { + ThreeBandsAnalysis(data, bands); + } +} + +void SplittingFilter::Synthesis(const ChannelBuffer* bands, + ChannelBuffer* data) { + RTC_DCHECK_EQ(num_bands_, bands->num_bands()); + RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), + bands->num_frames_per_band() * bands->num_bands()); + if (bands->num_bands() == 2) { + TwoBandsSynthesis(bands, data); + } else if (bands->num_bands() == 3) { + ThreeBandsSynthesis(bands, data); + } +} + +void SplittingFilter::TwoBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands) { + RTC_DCHECK_EQ(two_bands_states_.size(), data->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), kTwoBandFilterSamplesPerFrame); + + for (size_t i = 0; i < two_bands_states_.size(); ++i) { + std::array, 2> bands16; + std::array full_band16; + FloatS16ToS16(data->channels(0)[i], full_band16.size(), full_band16.data()); + WebRtcSpl_AnalysisQMF(full_band16.data(), data->num_frames(), + bands16[0].data(), bands16[1].data(), + two_bands_states_[i].analysis_state1, + two_bands_states_[i].analysis_state2); + S16ToFloatS16(bands16[0].data(), bands16[0].size(), bands->channels(0)[i]); + S16ToFloatS16(bands16[1].data(), bands16[1].size(), bands->channels(1)[i]); + } +} + +void SplittingFilter::TwoBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data) { + RTC_DCHECK_LE(data->num_channels(), two_bands_states_.size()); + RTC_DCHECK_EQ(data->num_frames(), kTwoBandFilterSamplesPerFrame); + for (size_t i = 0; i < data->num_channels(); ++i) { + std::array, 2> bands16; + std::array full_band16; + FloatS16ToS16(bands->channels(0)[i], bands16[0].size(), bands16[0].data()); + FloatS16ToS16(bands->channels(1)[i], bands16[1].size(), bands16[1].data()); + WebRtcSpl_SynthesisQMF(bands16[0].data(), bands16[1].data(), + bands->num_frames_per_band(), full_band16.data(), + two_bands_states_[i].synthesis_state1, + two_bands_states_[i].synthesis_state2); + S16ToFloatS16(full_band16.data(), full_band16.size(), data->channels(0)[i]); + } +} + +void SplittingFilter::ThreeBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands) { + RTC_DCHECK_EQ(three_band_filter_banks_.size(), data->num_channels()); + RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + RTC_DCHECK_LE(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands); + RTC_DCHECK_EQ(bands->num_frames_per_band(), + ThreeBandFilterBank::kSplitBandSize); + + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { + three_band_filter_banks_[i].Analysis( + rtc::ArrayView( + data->channels_view()[i].data(), + ThreeBandFilterBank::kFullBandSize), + rtc::ArrayView, + ThreeBandFilterBank::kNumBands>( + bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands)); + } +} + +void SplittingFilter::ThreeBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data) { + RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + RTC_DCHECK_LE(data->num_channels(), bands->num_channels()); + RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands); + RTC_DCHECK_EQ(bands->num_frames_per_band(), + ThreeBandFilterBank::kSplitBandSize); + + for (size_t i = 0; i < data->num_channels(); ++i) { + three_band_filter_banks_[i].Synthesis( + rtc::ArrayView, + ThreeBandFilterBank::kNumBands>( + bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands), + rtc::ArrayView( + data->channels_view()[i].data(), + ThreeBandFilterBank::kFullBandSize)); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/splitting_filter.h b/third_party/libwebrtc/modules/audio_processing/splitting_filter.h new file mode 100644 index 0000000000..e578dd07c1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/splitting_filter.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ + +#include +#include +#include + +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/three_band_filter_bank.h" + +namespace webrtc { + +struct TwoBandsStates { + TwoBandsStates() { + memset(analysis_state1, 0, sizeof(analysis_state1)); + memset(analysis_state2, 0, sizeof(analysis_state2)); + memset(synthesis_state1, 0, sizeof(synthesis_state1)); + memset(synthesis_state2, 0, sizeof(synthesis_state2)); + } + + static const int kStateSize = 6; + int analysis_state1[kStateSize]; + int analysis_state2[kStateSize]; + int synthesis_state1[kStateSize]; + int synthesis_state2[kStateSize]; +}; + +// Splitting filter which is able to split into and merge from 2 or 3 frequency +// bands. The number of channels needs to be provided at construction time. +// +// For each block, Analysis() is called to split into bands and then Synthesis() +// to merge these bands again. The input and output signals are contained in +// ChannelBuffers and for the different bands an array of ChannelBuffers is +// used. +class SplittingFilter { + public: + SplittingFilter(size_t num_channels, size_t num_bands, size_t num_frames); + ~SplittingFilter(); + + void Analysis(const ChannelBuffer* data, ChannelBuffer* bands); + void Synthesis(const ChannelBuffer* bands, ChannelBuffer* data); + + private: + // Two-band analysis and synthesis work for 640 samples or less. + void TwoBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands); + void TwoBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data); + void ThreeBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands); + void ThreeBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data); + void InitBuffers(); + + const size_t num_bands_; + std::vector two_bands_states_; + std::vector three_band_filter_banks_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/splitting_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/splitting_filter_unittest.cc new file mode 100644 index 0000000000..30fe4caf9c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/splitting_filter_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "modules/audio_processing/splitting_filter.h" + +#include + +#include "common_audio/channel_buffer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const size_t kSamplesPer16kHzChannel = 160; +const size_t kSamplesPer48kHzChannel = 480; + +} // namespace + +// Generates a signal from presence or absence of sine waves of different +// frequencies. +// Splits into 3 bands and checks their presence or absence. +// Recombines the bands. +// Calculates the delay. +// Checks that the cross correlation of input and output is high enough at the +// calculated delay. +TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { + static const int kChannels = 1; + static const int kSampleRateHz = 48000; + static const size_t kNumBands = 3; + static const int kFrequenciesHz[kNumBands] = {1000, 12000, 18000}; + static const float kAmplitude = 8192.f; + static const size_t kChunks = 8; + SplittingFilter splitting_filter(kChannels, kNumBands, + kSamplesPer48kHzChannel); + ChannelBuffer in_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + ChannelBuffer bands(kSamplesPer48kHzChannel, kChannels, kNumBands); + ChannelBuffer out_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + for (size_t i = 0; i < kChunks; ++i) { + // Input signal generation. + bool is_present[kNumBands]; + memset(in_data.channels()[0], 0, + kSamplesPer48kHzChannel * sizeof(in_data.channels()[0][0])); + for (size_t j = 0; j < kNumBands; ++j) { + is_present[j] = i & (static_cast(1) << j); + float amplitude = is_present[j] ? kAmplitude : 0.f; + for (size_t k = 0; k < kSamplesPer48kHzChannel; ++k) { + in_data.channels()[0][k] += + amplitude * sin(2.f * M_PI * kFrequenciesHz[j] * + (i * kSamplesPer48kHzChannel + k) / kSampleRateHz); + } + } + // Three band splitting filter. + splitting_filter.Analysis(&in_data, &bands); + // Energy calculation. + float energy[kNumBands]; + for (size_t j = 0; j < kNumBands; ++j) { + energy[j] = 0.f; + for (size_t k = 0; k < kSamplesPer16kHzChannel; ++k) { + energy[j] += bands.channels(j)[0][k] * bands.channels(j)[0][k]; + } + energy[j] /= kSamplesPer16kHzChannel; + if (is_present[j]) { + EXPECT_GT(energy[j], kAmplitude * kAmplitude / 4); + } else { + EXPECT_LT(energy[j], kAmplitude * kAmplitude / 4); + } + } + // Three band merge. + splitting_filter.Synthesis(&bands, &out_data); + // Delay and cross correlation estimation. + float xcorr = 0.f; + for (size_t delay = 0; delay < kSamplesPer48kHzChannel; ++delay) { + float tmpcorr = 0.f; + for (size_t j = delay; j < kSamplesPer48kHzChannel; ++j) { + tmpcorr += in_data.channels()[0][j - delay] * out_data.channels()[0][j]; + } + tmpcorr /= kSamplesPer48kHzChannel; + if (tmpcorr > xcorr) { + xcorr = tmpcorr; + } + } + // High cross correlation check. + bool any_present = false; + for (size_t j = 0; j < kNumBands; ++j) { + any_present |= is_present[j]; + } + if (any_present) { + EXPECT_GT(xcorr, kAmplitude * kAmplitude / 4); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.cc b/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.cc new file mode 100644 index 0000000000..416e287751 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.cc @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/aec_dump_based_simulator.h" + +#include +#include + +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/aec_dump_based_simulator.h" +#include "modules/audio_processing/test/protobuf_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace test { +namespace { + +// Verify output bitexactness for the fixed interface. +// TODO(peah): Check whether it would make sense to add a threshold +// to use for checking the bitexactness in a soft manner. +bool VerifyFixedBitExactness(const webrtc::audioproc::Stream& msg, + const Int16Frame& frame) { + if (sizeof(frame.data[0]) * frame.data.size() != msg.output_data().size()) { + return false; + } else { + const int16_t* frame_data = frame.data.data(); + for (int k = 0; k < frame.num_channels * frame.samples_per_channel; ++k) { + if (msg.output_data().data()[k] != frame_data[k]) { + return false; + } + } + } + return true; +} + +// Verify output bitexactness for the float interface. +bool VerifyFloatBitExactness(const webrtc::audioproc::Stream& msg, + const StreamConfig& out_config, + const ChannelBuffer& out_buf) { + if (static_cast(msg.output_channel_size()) != + out_config.num_channels() || + msg.output_channel(0).size() != out_config.num_frames()) { + return false; + } else { + for (int ch = 0; ch < msg.output_channel_size(); ++ch) { + for (size_t sample = 0; sample < out_config.num_frames(); ++sample) { + if (msg.output_channel(ch).data()[sample] != + out_buf.channels()[ch][sample]) { + return false; + } + } + } + } + return true; +} + +// Selectively reads the next proto-buf message from dump-file or string input. +// Returns a bool indicating whether a new message was available. +bool ReadNextMessage(bool use_dump_file, + FILE* dump_input_file, + std::stringstream& input, + webrtc::audioproc::Event& event_msg) { + if (use_dump_file) { + return ReadMessageFromFile(dump_input_file, &event_msg); + } + return ReadMessageFromString(&input, &event_msg); +} + +} // namespace + +AecDumpBasedSimulator::AecDumpBasedSimulator( + const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder) + : AudioProcessingSimulator(settings, + std::move(audio_processing), + std::move(ap_builder)) { + MaybeOpenCallOrderFile(); +} + +AecDumpBasedSimulator::~AecDumpBasedSimulator() = default; + +void AecDumpBasedSimulator::PrepareProcessStreamCall( + const webrtc::audioproc::Stream& msg) { + if (msg.has_input_data()) { + // Fixed interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFixedInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFixedInterface; + + // Populate input buffer. + RTC_CHECK_EQ(sizeof(fwd_frame_.data[0]) * fwd_frame_.data.size(), + msg.input_data().size()); + memcpy(fwd_frame_.data.data(), msg.input_data().data(), + msg.input_data().size()); + } else { + // Float interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFloatInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFloatInterface; + + RTC_CHECK_EQ(in_buf_->num_channels(), + static_cast(msg.input_channel_size())); + + // Populate input buffer. + for (size_t i = 0; i < in_buf_->num_channels(); ++i) { + RTC_CHECK_EQ(in_buf_->num_frames() * sizeof(*in_buf_->channels()[i]), + msg.input_channel(i).size()); + std::memcpy(in_buf_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + } + + if (artificial_nearend_buffer_reader_) { + if (artificial_nearend_buffer_reader_->Read( + artificial_nearend_buf_.get())) { + if (msg.has_input_data()) { + int16_t* fwd_frame_data = fwd_frame_.data.data(); + for (size_t k = 0; k < in_buf_->num_frames(); ++k) { + fwd_frame_data[k] = rtc::saturated_cast( + fwd_frame_data[k] + + static_cast(32767 * + artificial_nearend_buf_->channels()[0][k])); + } + } else { + for (int i = 0; i < msg.input_channel_size(); ++i) { + for (size_t k = 0; k < in_buf_->num_frames(); ++k) { + in_buf_->channels()[i][k] += + artificial_nearend_buf_->channels()[0][k]; + in_buf_->channels()[i][k] = std::min( + 32767.f, std::max(-32768.f, in_buf_->channels()[i][k])); + } + } + } + } else { + if (!artificial_nearend_eof_reported_) { + std::cout << "The artificial nearend file ended before the recording."; + artificial_nearend_eof_reported_ = true; + } + } + } + + if (!settings_.use_stream_delay || *settings_.use_stream_delay) { + if (!settings_.stream_delay) { + if (msg.has_delay()) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->set_stream_delay_ms(msg.delay())); + } + } else { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->set_stream_delay_ms(*settings_.stream_delay)); + } + } + + if (settings_.override_key_pressed.has_value()) { + // Key pressed state overridden. + ap_->set_stream_key_pressed(*settings_.override_key_pressed); + } else { + // Set the recorded key pressed state. + if (msg.has_keypress()) { + ap_->set_stream_key_pressed(msg.keypress()); + } + } + + // Set the applied input level if available. + aec_dump_applied_input_level_ = + msg.has_applied_input_volume() + ? absl::optional(msg.applied_input_volume()) + : absl::nullopt; +} + +void AecDumpBasedSimulator::VerifyProcessStreamBitExactness( + const webrtc::audioproc::Stream& msg) { + if (bitexact_output_) { + if (interface_used_ == InterfaceType::kFixedInterface) { + bitexact_output_ = VerifyFixedBitExactness(msg, fwd_frame_); + } else { + bitexact_output_ = VerifyFloatBitExactness(msg, out_config_, *out_buf_); + } + } +} + +void AecDumpBasedSimulator::PrepareReverseProcessStreamCall( + const webrtc::audioproc::ReverseStream& msg) { + if (msg.has_data()) { + // Fixed interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFixedInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFixedInterface; + + // Populate input buffer. + RTC_CHECK_EQ(sizeof(rev_frame_.data[0]) * rev_frame_.data.size(), + msg.data().size()); + memcpy(rev_frame_.data.data(), msg.data().data(), msg.data().size()); + } else { + // Float interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFloatInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFloatInterface; + + RTC_CHECK_EQ(reverse_in_buf_->num_channels(), + static_cast(msg.channel_size())); + + // Populate input buffer. + for (int i = 0; i < msg.channel_size(); ++i) { + RTC_CHECK_EQ(reverse_in_buf_->num_frames() * + sizeof(*reverse_in_buf_->channels()[i]), + msg.channel(i).size()); + std::memcpy(reverse_in_buf_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + } +} + +void AecDumpBasedSimulator::Process() { + ConfigureAudioProcessor(); + + if (settings_.artificial_nearend_filename) { + std::unique_ptr artificial_nearend_file( + new WavReader(settings_.artificial_nearend_filename->c_str())); + + RTC_CHECK_EQ(1, artificial_nearend_file->num_channels()) + << "Only mono files for the artificial nearend are supported, " + "reverted to not using the artificial nearend file"; + + const int sample_rate_hz = artificial_nearend_file->sample_rate(); + artificial_nearend_buffer_reader_.reset( + new ChannelBufferWavReader(std::move(artificial_nearend_file))); + artificial_nearend_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(sample_rate_hz, kChunksPerSecond), 1)); + } + + const bool use_dump_file = !settings_.aec_dump_input_string.has_value(); + std::stringstream input; + if (use_dump_file) { + dump_input_file_ = + OpenFile(settings_.aec_dump_input_filename->c_str(), "rb"); + } else { + input << settings_.aec_dump_input_string.value(); + } + + webrtc::audioproc::Event event_msg; + int capture_frames_since_init = 0; + int init_index = 0; + while (ReadNextMessage(use_dump_file, dump_input_file_, input, event_msg)) { + SelectivelyToggleDataDumping(init_index, capture_frames_since_init); + HandleEvent(event_msg, capture_frames_since_init, init_index); + + // Perfom an early exit if the init block to process has been fully + // processed + if (finished_processing_specified_init_block_) { + break; + } + RTC_CHECK(!settings_.init_to_process || + *settings_.init_to_process >= init_index); + } + + if (use_dump_file) { + fclose(dump_input_file_); + } + + DetachAecDump(); +} + +void AecDumpBasedSimulator::Analyze() { + const bool use_dump_file = !settings_.aec_dump_input_string.has_value(); + std::stringstream input; + if (use_dump_file) { + dump_input_file_ = + OpenFile(settings_.aec_dump_input_filename->c_str(), "rb"); + } else { + input << settings_.aec_dump_input_string.value(); + } + + webrtc::audioproc::Event event_msg; + int num_capture_frames = 0; + int num_render_frames = 0; + int init_index = 0; + while (ReadNextMessage(use_dump_file, dump_input_file_, input, event_msg)) { + if (event_msg.type() == webrtc::audioproc::Event::INIT) { + ++init_index; + constexpr float kNumFramesPerSecond = 100.f; + float capture_time_seconds = num_capture_frames / kNumFramesPerSecond; + float render_time_seconds = num_render_frames / kNumFramesPerSecond; + + std::cout << "Inits:" << std::endl; + std::cout << init_index << ": -->" << std::endl; + std::cout << " Time:" << std::endl; + std::cout << " Capture: " << capture_time_seconds << " s (" + << num_capture_frames << " frames) " << std::endl; + std::cout << " Render: " << render_time_seconds << " s (" + << num_render_frames << " frames) " << std::endl; + } else if (event_msg.type() == webrtc::audioproc::Event::STREAM) { + ++num_capture_frames; + } else if (event_msg.type() == webrtc::audioproc::Event::REVERSE_STREAM) { + ++num_render_frames; + } + } + + if (use_dump_file) { + fclose(dump_input_file_); + } +} + +void AecDumpBasedSimulator::HandleEvent( + const webrtc::audioproc::Event& event_msg, + int& capture_frames_since_init, + int& init_index) { + switch (event_msg.type()) { + case webrtc::audioproc::Event::INIT: + RTC_CHECK(event_msg.has_init()); + ++init_index; + capture_frames_since_init = 0; + HandleMessage(event_msg.init(), init_index); + break; + case webrtc::audioproc::Event::STREAM: + RTC_CHECK(event_msg.has_stream()); + ++capture_frames_since_init; + HandleMessage(event_msg.stream()); + break; + case webrtc::audioproc::Event::REVERSE_STREAM: + RTC_CHECK(event_msg.has_reverse_stream()); + HandleMessage(event_msg.reverse_stream()); + break; + case webrtc::audioproc::Event::CONFIG: + RTC_CHECK(event_msg.has_config()); + HandleMessage(event_msg.config()); + break; + case webrtc::audioproc::Event::RUNTIME_SETTING: + HandleMessage(event_msg.runtime_setting()); + break; + case webrtc::audioproc::Event::UNKNOWN_EVENT: + RTC_CHECK_NOTREACHED(); + } +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::Config& msg) { + if (settings_.use_verbose_logging) { + std::cout << "Config at frame:" << std::endl; + std::cout << " Forward: " << get_num_process_stream_calls() << std::endl; + std::cout << " Reverse: " << get_num_reverse_process_stream_calls() + << std::endl; + } + + if (!settings_.discard_all_settings_in_aecdump) { + if (settings_.use_verbose_logging) { + std::cout << "Setting used in config:" << std::endl; + } + AudioProcessing::Config apm_config = ap_->GetConfig(); + + if (msg.has_aec_enabled() || settings_.use_aec) { + bool enable = settings_.use_aec ? *settings_.use_aec : msg.aec_enabled(); + apm_config.echo_canceller.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " aec_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_aecm_enabled() || settings_.use_aecm) { + bool enable = + settings_.use_aecm ? *settings_.use_aecm : msg.aecm_enabled(); + apm_config.echo_canceller.enabled |= enable; + apm_config.echo_canceller.mobile_mode = enable; + if (settings_.use_verbose_logging) { + std::cout << " aecm_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_aecm_comfort_noise_enabled() && + msg.aecm_comfort_noise_enabled()) { + RTC_LOG(LS_ERROR) << "Ignoring deprecated setting: AECM comfort noise"; + } + + if (msg.has_aecm_routing_mode() && + static_cast( + msg.aecm_routing_mode()) != EchoControlMobileImpl::kSpeakerphone) { + RTC_LOG(LS_ERROR) << "Ignoring deprecated setting: AECM routing mode: " + << msg.aecm_routing_mode(); + } + + if (msg.has_agc_enabled() || settings_.use_agc) { + bool enable = settings_.use_agc ? *settings_.use_agc : msg.agc_enabled(); + apm_config.gain_controller1.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " agc_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_agc_mode() || settings_.agc_mode) { + int mode = settings_.agc_mode ? *settings_.agc_mode : msg.agc_mode(); + apm_config.gain_controller1.mode = + static_cast( + mode); + if (settings_.use_verbose_logging) { + std::cout << " agc_mode: " << mode << std::endl; + } + } + + if (msg.has_agc_limiter_enabled() || settings_.use_agc_limiter) { + bool enable = settings_.use_agc_limiter ? *settings_.use_agc_limiter + : msg.agc_limiter_enabled(); + apm_config.gain_controller1.enable_limiter = enable; + if (settings_.use_verbose_logging) { + std::cout << " agc_limiter_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (settings_.use_agc2) { + bool enable = *settings_.use_agc2; + apm_config.gain_controller2.enabled = enable; + if (settings_.agc2_fixed_gain_db) { + apm_config.gain_controller2.fixed_digital.gain_db = + *settings_.agc2_fixed_gain_db; + } + if (settings_.use_verbose_logging) { + std::cout << " agc2_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_noise_robust_agc_enabled()) { + apm_config.gain_controller1.analog_gain_controller.enabled = + settings_.use_analog_agc ? *settings_.use_analog_agc + : msg.noise_robust_agc_enabled(); + if (settings_.use_verbose_logging) { + std::cout << " noise_robust_agc_enabled: " + << (msg.noise_robust_agc_enabled() ? "true" : "false") + << std::endl; + } + } + + if (msg.has_transient_suppression_enabled() || settings_.use_ts) { + bool enable = settings_.use_ts ? *settings_.use_ts + : msg.transient_suppression_enabled(); + apm_config.transient_suppression.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " transient_suppression_enabled: " + << (enable ? "true" : "false") << std::endl; + } + } + + if (msg.has_hpf_enabled() || settings_.use_hpf) { + bool enable = settings_.use_hpf ? *settings_.use_hpf : msg.hpf_enabled(); + apm_config.high_pass_filter.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " hpf_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_ns_enabled() || settings_.use_ns) { + bool enable = settings_.use_ns ? *settings_.use_ns : msg.ns_enabled(); + apm_config.noise_suppression.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " ns_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_ns_level() || settings_.ns_level) { + int level = settings_.ns_level ? *settings_.ns_level : msg.ns_level(); + apm_config.noise_suppression.level = + static_cast(level); + if (settings_.use_verbose_logging) { + std::cout << " ns_level: " << level << std::endl; + } + } + + if (msg.has_pre_amplifier_enabled() || settings_.use_pre_amplifier) { + const bool enable = settings_.use_pre_amplifier + ? *settings_.use_pre_amplifier + : msg.pre_amplifier_enabled(); + apm_config.pre_amplifier.enabled = enable; + } + + if (msg.has_pre_amplifier_fixed_gain_factor() || + settings_.pre_amplifier_gain_factor) { + const float gain = settings_.pre_amplifier_gain_factor + ? *settings_.pre_amplifier_gain_factor + : msg.pre_amplifier_fixed_gain_factor(); + apm_config.pre_amplifier.fixed_gain_factor = gain; + } + + if (settings_.use_verbose_logging && msg.has_experiments_description() && + !msg.experiments_description().empty()) { + std::cout << " experiments not included by default in the simulation: " + << msg.experiments_description() << std::endl; + } + + ap_->ApplyConfig(apm_config); + } +} + +void AecDumpBasedSimulator::HandleMessage(const webrtc::audioproc::Init& msg, + int init_index) { + RTC_CHECK(msg.has_sample_rate()); + RTC_CHECK(msg.has_num_input_channels()); + RTC_CHECK(msg.has_num_reverse_channels()); + RTC_CHECK(msg.has_reverse_sample_rate()); + + // Do not perform the init if the init block to process is fully processed + if (settings_.init_to_process && *settings_.init_to_process < init_index) { + finished_processing_specified_init_block_ = true; + } + + MaybeOpenCallOrderFile(); + + if (settings_.use_verbose_logging) { + std::cout << "Init at frame:" << std::endl; + std::cout << " Forward: " << get_num_process_stream_calls() << std::endl; + std::cout << " Reverse: " << get_num_reverse_process_stream_calls() + << std::endl; + } + + int num_output_channels; + if (settings_.output_num_channels) { + num_output_channels = *settings_.output_num_channels; + } else { + num_output_channels = msg.has_num_output_channels() + ? msg.num_output_channels() + : msg.num_input_channels(); + } + + int output_sample_rate; + if (settings_.output_sample_rate_hz) { + output_sample_rate = *settings_.output_sample_rate_hz; + } else { + output_sample_rate = msg.has_output_sample_rate() ? msg.output_sample_rate() + : msg.sample_rate(); + } + + int num_reverse_output_channels; + if (settings_.reverse_output_num_channels) { + num_reverse_output_channels = *settings_.reverse_output_num_channels; + } else { + num_reverse_output_channels = msg.has_num_reverse_output_channels() + ? msg.num_reverse_output_channels() + : msg.num_reverse_channels(); + } + + int reverse_output_sample_rate; + if (settings_.reverse_output_sample_rate_hz) { + reverse_output_sample_rate = *settings_.reverse_output_sample_rate_hz; + } else { + reverse_output_sample_rate = msg.has_reverse_output_sample_rate() + ? msg.reverse_output_sample_rate() + : msg.reverse_sample_rate(); + } + + SetupBuffersConfigsOutputs( + msg.sample_rate(), output_sample_rate, msg.reverse_sample_rate(), + reverse_output_sample_rate, msg.num_input_channels(), num_output_channels, + msg.num_reverse_channels(), num_reverse_output_channels); +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::Stream& msg) { + if (call_order_output_file_) { + *call_order_output_file_ << "c"; + } + PrepareProcessStreamCall(msg); + ProcessStream(interface_used_ == InterfaceType::kFixedInterface); + VerifyProcessStreamBitExactness(msg); +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::ReverseStream& msg) { + if (call_order_output_file_) { + *call_order_output_file_ << "r"; + } + PrepareReverseProcessStreamCall(msg); + ProcessReverseStream(interface_used_ == InterfaceType::kFixedInterface); +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::RuntimeSetting& msg) { + RTC_CHECK(ap_.get()); + if (msg.has_capture_pre_gain()) { + // Handle capture pre-gain runtime setting only if not overridden. + const bool pre_amplifier_overridden = + (!settings_.use_pre_amplifier || *settings_.use_pre_amplifier) && + !settings_.pre_amplifier_gain_factor; + const bool capture_level_adjustment_overridden = + (!settings_.use_capture_level_adjustment || + *settings_.use_capture_level_adjustment) && + !settings_.pre_gain_factor; + if (pre_amplifier_overridden || capture_level_adjustment_overridden) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain( + msg.capture_pre_gain())); + } + } else if (msg.has_capture_post_gain()) { + // Handle capture post-gain runtime setting only if not overridden. + if ((!settings_.use_capture_level_adjustment || + *settings_.use_capture_level_adjustment) && + !settings_.post_gain_factor) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain( + msg.capture_pre_gain())); + } + } else if (msg.has_capture_fixed_post_gain()) { + // Handle capture fixed-post-gain runtime setting only if not overridden. + if ((!settings_.use_agc2 || *settings_.use_agc2) && + !settings_.agc2_fixed_gain_db) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureFixedPostGain( + msg.capture_fixed_post_gain())); + } + } else if (msg.has_playout_volume_change()) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange( + msg.playout_volume_change())); + } else if (msg.has_playout_audio_device_change()) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutAudioDeviceChange( + {msg.playout_audio_device_change().id(), + msg.playout_audio_device_change().max_volume()})); + } else if (msg.has_capture_output_used()) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + msg.capture_output_used())); + } +} + +void AecDumpBasedSimulator::MaybeOpenCallOrderFile() { + if (settings_.call_order_output_filename.has_value()) { + const std::string filename = settings_.store_intermediate_output + ? *settings_.call_order_output_filename + + "_" + + std::to_string(output_reset_counter_) + : *settings_.call_order_output_filename; + call_order_output_file_ = std::make_unique(filename); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.h b/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.h new file mode 100644 index 0000000000..e2c1f3e4ba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AEC_DUMP_BASED_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AEC_DUMP_BASED_SIMULATOR_H_ + +#include +#include + +#include "modules/audio_processing/test/audio_processing_simulator.h" +#include "rtc_base/ignore_wundef.h" + +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { +namespace test { + +// Used to perform an audio processing simulation from an aec dump. +class AecDumpBasedSimulator final : public AudioProcessingSimulator { + public: + AecDumpBasedSimulator(const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder); + + AecDumpBasedSimulator() = delete; + AecDumpBasedSimulator(const AecDumpBasedSimulator&) = delete; + AecDumpBasedSimulator& operator=(const AecDumpBasedSimulator&) = delete; + + ~AecDumpBasedSimulator() override; + + // Processes the messages in the aecdump file. + void Process() override; + + // Analyzes the data in the aecdump file and reports the resulting statistics. + void Analyze() override; + + private: + void HandleEvent(const webrtc::audioproc::Event& event_msg, + int& num_forward_chunks_processed, + int& init_index); + void HandleMessage(const webrtc::audioproc::Init& msg, int init_index); + void HandleMessage(const webrtc::audioproc::Stream& msg); + void HandleMessage(const webrtc::audioproc::ReverseStream& msg); + void HandleMessage(const webrtc::audioproc::Config& msg); + void HandleMessage(const webrtc::audioproc::RuntimeSetting& msg); + void PrepareProcessStreamCall(const webrtc::audioproc::Stream& msg); + void PrepareReverseProcessStreamCall( + const webrtc::audioproc::ReverseStream& msg); + void VerifyProcessStreamBitExactness(const webrtc::audioproc::Stream& msg); + void MaybeOpenCallOrderFile(); + enum InterfaceType { + kFixedInterface, + kFloatInterface, + kNotSpecified, + }; + + FILE* dump_input_file_; + std::unique_ptr> artificial_nearend_buf_; + std::unique_ptr artificial_nearend_buffer_reader_; + bool artificial_nearend_eof_reported_ = false; + InterfaceType interface_used_ = InterfaceType::kNotSpecified; + std::unique_ptr call_order_output_file_; + bool finished_processing_specified_init_block_ = false; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AEC_DUMP_BASED_SIMULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml new file mode 100644 index 0000000000..c6063b3d76 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/default.properties b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/default.properties new file mode 100644 index 0000000000..9a2c9f6c88 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/default.properties @@ -0,0 +1,11 @@ +# This file is automatically generated by Android Tools. +# Do not modify this file -- YOUR CHANGES WILL BE ERASED! +# +# This file must be checked in Version Control Systems. +# +# To customize properties used by the Ant build system use, +# "build.properties", and override values to adapt the script to your +# project structure. + +# Project target. +target=android-9 diff --git a/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/jni/main.c b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/jni/main.c new file mode 100644 index 0000000000..2e19635683 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/jni/main.c @@ -0,0 +1,307 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +//BEGIN_INCLUDE(all) +#include +#include + +#include +#include + +#include +#include +#include + +#define LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, "native-activity", __VA_ARGS__)) +#define LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, "native-activity", __VA_ARGS__)) + +/** + * Our saved state data. + */ +struct saved_state { + float angle; + int32_t x; + int32_t y; +}; + +/** + * Shared state for our app. + */ +struct engine { + struct android_app* app; + + ASensorManager* sensorManager; + const ASensor* accelerometerSensor; + ASensorEventQueue* sensorEventQueue; + + int animating; + EGLDisplay display; + EGLSurface surface; + EGLContext context; + int32_t width; + int32_t height; + struct saved_state state; +}; + +/** + * Initialize an EGL context for the current display. + */ +static int engine_init_display(struct engine* engine) { + // initialize OpenGL ES and EGL + + /* + * Here specify the attributes of the desired configuration. + * Below, we select an EGLConfig with at least 8 bits per color + * component compatible with on-screen windows + */ + const EGLint attribs[] = { + EGL_SURFACE_TYPE, EGL_WINDOW_BIT, + EGL_BLUE_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_RED_SIZE, 8, + EGL_NONE + }; + EGLint w, h, dummy, format; + EGLint numConfigs; + EGLConfig config; + EGLSurface surface; + EGLContext context; + + EGLDisplay display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + + eglInitialize(display, 0, 0); + + /* Here, the application chooses the configuration it desires. In this + * sample, we have a very simplified selection process, where we pick + * the first EGLConfig that matches our criteria */ + eglChooseConfig(display, attribs, &config, 1, &numConfigs); + + /* EGL_NATIVE_VISUAL_ID is an attribute of the EGLConfig that is + * guaranteed to be accepted by ANativeWindow_setBuffersGeometry(). + * As soon as we picked a EGLConfig, we can safely reconfigure the + * ANativeWindow buffers to match, using EGL_NATIVE_VISUAL_ID. */ + eglGetConfigAttrib(display, config, EGL_NATIVE_VISUAL_ID, &format); + + ANativeWindow_setBuffersGeometry(engine->app->window, 0, 0, format); + + surface = eglCreateWindowSurface(display, config, engine->app->window, NULL); + context = eglCreateContext(display, config, NULL, NULL); + + if (eglMakeCurrent(display, surface, surface, context) == EGL_FALSE) { + LOGW("Unable to eglMakeCurrent"); + return -1; + } + + eglQuerySurface(display, surface, EGL_WIDTH, &w); + eglQuerySurface(display, surface, EGL_HEIGHT, &h); + + engine->display = display; + engine->context = context; + engine->surface = surface; + engine->width = w; + engine->height = h; + engine->state.angle = 0; + + // Initialize GL state. + glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_FASTEST); + glEnable(GL_CULL_FACE); + glShadeModel(GL_SMOOTH); + glDisable(GL_DEPTH_TEST); + + return 0; +} + +/** + * Just the current frame in the display. + */ +static void engine_draw_frame(struct engine* engine) { + if (engine->display == NULL) { + // No display. + return; + } + + // Just fill the screen with a color. + glClearColor(((float)engine->state.x)/engine->width, engine->state.angle, + ((float)engine->state.y)/engine->height, 1); + glClear(GL_COLOR_BUFFER_BIT); + + eglSwapBuffers(engine->display, engine->surface); +} + +/** + * Tear down the EGL context currently associated with the display. + */ +static void engine_term_display(struct engine* engine) { + if (engine->display != EGL_NO_DISPLAY) { + eglMakeCurrent(engine->display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + if (engine->context != EGL_NO_CONTEXT) { + eglDestroyContext(engine->display, engine->context); + } + if (engine->surface != EGL_NO_SURFACE) { + eglDestroySurface(engine->display, engine->surface); + } + eglTerminate(engine->display); + } + engine->animating = 0; + engine->display = EGL_NO_DISPLAY; + engine->context = EGL_NO_CONTEXT; + engine->surface = EGL_NO_SURFACE; +} + +/** + * Process the next input event. + */ +static int32_t engine_handle_input(struct android_app* app, AInputEvent* event) { + struct engine* engine = (struct engine*)app->userData; + if (AInputEvent_getType(event) == AINPUT_EVENT_TYPE_MOTION) { + engine->animating = 1; + engine->state.x = AMotionEvent_getX(event, 0); + engine->state.y = AMotionEvent_getY(event, 0); + return 1; + } + return 0; +} + +/** + * Process the next main command. + */ +static void engine_handle_cmd(struct android_app* app, int32_t cmd) { + struct engine* engine = (struct engine*)app->userData; + switch (cmd) { + case APP_CMD_SAVE_STATE: + // The system has asked us to save our current state. Do so. + engine->app->savedState = malloc(sizeof(struct saved_state)); + *((struct saved_state*)engine->app->savedState) = engine->state; + engine->app->savedStateSize = sizeof(struct saved_state); + break; + case APP_CMD_INIT_WINDOW: + // The window is being shown, get it ready. + if (engine->app->window != NULL) { + engine_init_display(engine); + engine_draw_frame(engine); + } + break; + case APP_CMD_TERM_WINDOW: + // The window is being hidden or closed, clean it up. + engine_term_display(engine); + break; + case APP_CMD_GAINED_FOCUS: + // When our app gains focus, we start monitoring the accelerometer. + if (engine->accelerometerSensor != NULL) { + ASensorEventQueue_enableSensor(engine->sensorEventQueue, + engine->accelerometerSensor); + // We'd like to get 60 events per second (in us). + ASensorEventQueue_setEventRate(engine->sensorEventQueue, + engine->accelerometerSensor, (1000L/60)*1000); + } + break; + case APP_CMD_LOST_FOCUS: + // When our app loses focus, we stop monitoring the accelerometer. + // This is to avoid consuming battery while not being used. + if (engine->accelerometerSensor != NULL) { + ASensorEventQueue_disableSensor(engine->sensorEventQueue, + engine->accelerometerSensor); + } + // Also stop animating. + engine->animating = 0; + engine_draw_frame(engine); + break; + } +} + +/** + * This is the main entry point of a native application that is using + * android_native_app_glue. It runs in its own thread, with its own + * event loop for receiving input events and doing other things. + */ +void android_main(struct android_app* state) { + struct engine engine; + + // Make sure glue isn't stripped. + app_dummy(); + + memset(&engine, 0, sizeof(engine)); + state->userData = &engine; + state->onAppCmd = engine_handle_cmd; + state->onInputEvent = engine_handle_input; + engine.app = state; + + // Prepare to monitor accelerometer + engine.sensorManager = ASensorManager_getInstance(); + engine.accelerometerSensor = ASensorManager_getDefaultSensor(engine.sensorManager, + ASENSOR_TYPE_ACCELEROMETER); + engine.sensorEventQueue = ASensorManager_createEventQueue(engine.sensorManager, + state->looper, LOOPER_ID_USER, NULL, NULL); + + if (state->savedState != NULL) { + // We are starting with a previous saved state; restore from it. + engine.state = *(struct saved_state*)state->savedState; + } + + // loop waiting for stuff to do. + + while (1) { + // Read all pending events. + int ident; + int events; + struct android_poll_source* source; + + // If not animating, we will block forever waiting for events. + // If animating, we loop until all events are read, then continue + // to draw the next frame of animation. + while ((ident=ALooper_pollAll(engine.animating ? 0 : -1, NULL, &events, + (void**)&source)) >= 0) { + + // Process this event. + if (source != NULL) { + source->process(state, source); + } + + // If a sensor has data, process it now. + if (ident == LOOPER_ID_USER) { + if (engine.accelerometerSensor != NULL) { + ASensorEvent event; + while (ASensorEventQueue_getEvents(engine.sensorEventQueue, + &event, 1) > 0) { + LOGI("accelerometer: x=%f y=%f z=%f", + event.acceleration.x, event.acceleration.y, + event.acceleration.z); + } + } + } + + // Check if we are exiting. + if (state->destroyRequested != 0) { + engine_term_display(&engine); + return; + } + } + + if (engine.animating) { + // Done with events; draw next animation frame. + engine.state.angle += .01f; + if (engine.state.angle > 1) { + engine.state.angle = 0; + } + + // Drawing is throttled to the screen update rate, so there + // is no need to do timing here. + engine_draw_frame(&engine); + } + } +} +//END_INCLUDE(all) diff --git a/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml new file mode 100644 index 0000000000..d0bd0f3051 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml @@ -0,0 +1,4 @@ + + + apmtest + diff --git a/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.cc b/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.cc new file mode 100644 index 0000000000..ee8a308596 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/api_call_statistics.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { +namespace test { + +void ApiCallStatistics::Add(int64_t duration_nanos, CallType call_type) { + calls_.push_back(CallData(duration_nanos, call_type)); +} + +void ApiCallStatistics::PrintReport() const { + int64_t min_render = std::numeric_limits::max(); + int64_t min_capture = std::numeric_limits::max(); + int64_t max_render = 0; + int64_t max_capture = 0; + int64_t sum_render = 0; + int64_t sum_capture = 0; + int64_t num_render = 0; + int64_t num_capture = 0; + int64_t avg_render = 0; + int64_t avg_capture = 0; + + for (auto v : calls_) { + if (v.call_type == CallType::kRender) { + ++num_render; + min_render = std::min(min_render, v.duration_nanos); + max_render = std::max(max_render, v.duration_nanos); + sum_render += v.duration_nanos; + } else { + ++num_capture; + min_capture = std::min(min_capture, v.duration_nanos); + max_capture = std::max(max_capture, v.duration_nanos); + sum_capture += v.duration_nanos; + } + } + min_render /= rtc::kNumNanosecsPerMicrosec; + max_render /= rtc::kNumNanosecsPerMicrosec; + sum_render /= rtc::kNumNanosecsPerMicrosec; + min_capture /= rtc::kNumNanosecsPerMicrosec; + max_capture /= rtc::kNumNanosecsPerMicrosec; + sum_capture /= rtc::kNumNanosecsPerMicrosec; + avg_render = num_render > 0 ? sum_render / num_render : 0; + avg_capture = num_capture > 0 ? sum_capture / num_capture : 0; + + std::cout << std::endl + << "Total time: " << (sum_capture + sum_render) * 1e-6 << " s" + << std::endl + << " Render API calls:" << std::endl + << " min: " << min_render << " us" << std::endl + << " max: " << max_render << " us" << std::endl + << " avg: " << avg_render << " us" << std::endl + << " Capture API calls:" << std::endl + << " min: " << min_capture << " us" << std::endl + << " max: " << max_capture << " us" << std::endl + << " avg: " << avg_capture << " us" << std::endl; +} + +void ApiCallStatistics::WriteReportToFile(absl::string_view filename) const { + std::unique_ptr out = + std::make_unique(std::string(filename)); + for (auto v : calls_) { + if (v.call_type == CallType::kRender) { + *out << "render, "; + } else { + *out << "capture, "; + } + *out << (v.duration_nanos / rtc::kNumNanosecsPerMicrosec) << std::endl; + } +} + +ApiCallStatistics::CallData::CallData(int64_t duration_nanos, + CallType call_type) + : duration_nanos(duration_nanos), call_type(call_type) {} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.h b/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.h new file mode 100644 index 0000000000..8fced104f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_API_CALL_STATISTICS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_API_CALL_STATISTICS_H_ + +#include + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { + +// Collects statistics about the API call durations. +class ApiCallStatistics { + public: + enum class CallType { kRender, kCapture }; + + // Adds a new datapoint. + void Add(int64_t duration_nanos, CallType call_type); + + // Prints out a report of the statistics. + void PrintReport() const; + + // Writes the call information to a file. + void WriteReportToFile(absl::string_view filename) const; + + private: + struct CallData { + CallData(int64_t duration_nanos, CallType call_type); + int64_t duration_nanos; + CallType call_type; + }; + std::vector calls_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_API_CALL_STATISTICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/apmtest.m b/third_party/libwebrtc/modules/audio_processing/test/apmtest.m new file mode 100644 index 0000000000..1c8183c3ec --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/apmtest.m @@ -0,0 +1,365 @@ +% +% Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function apmtest(task, testname, filepath, casenumber, legacy) +%APMTEST is a tool to process APM file sets and easily display the output. +% APMTEST(TASK, TESTNAME, CASENUMBER) performs one of several TASKs: +% 'test' Processes the files to produce test output. +% 'list' Prints a list of cases in the test set, preceded by their +% CASENUMBERs. +% 'show' Uses spclab to show the test case specified by the +% CASENUMBER parameter. +% +% using a set of test files determined by TESTNAME: +% 'all' All tests. +% 'apm' The standard APM test set (default). +% 'apmm' The mobile APM test set. +% 'aec' The AEC test set. +% 'aecm' The AECM test set. +% 'agc' The AGC test set. +% 'ns' The NS test set. +% 'vad' The VAD test set. +% +% FILEPATH specifies the path to the test data files. +% +% CASENUMBER can be used to select a single test case. Omit CASENUMBER, +% or set to zero, to use all test cases. +% + +if nargin < 5 || isempty(legacy) + % Set to true to run old VQE recordings. + legacy = false; +end + +if nargin < 4 || isempty(casenumber) + casenumber = 0; +end + +if nargin < 3 || isempty(filepath) + filepath = 'data/'; +end + +if nargin < 2 || isempty(testname) + testname = 'all'; +end + +if nargin < 1 || isempty(task) + task = 'test'; +end + +if ~strcmp(task, 'test') && ~strcmp(task, 'list') && ~strcmp(task, 'show') + error(['TASK ' task ' is not recognized']); +end + +if casenumber == 0 && strcmp(task, 'show') + error(['CASENUMBER must be specified for TASK ' task]); +end + +inpath = [filepath 'input/']; +outpath = [filepath 'output/']; +refpath = [filepath 'reference/']; + +if strcmp(testname, 'all') + tests = {'apm','apmm','aec','aecm','agc','ns','vad'}; +else + tests = {testname}; +end + +if legacy + progname = './test'; +else + progname = './process_test'; +end + +global farFile; +global nearFile; +global eventFile; +global delayFile; +global driftFile; + +if legacy + farFile = 'vqeFar.pcm'; + nearFile = 'vqeNear.pcm'; + eventFile = 'vqeEvent.dat'; + delayFile = 'vqeBuf.dat'; + driftFile = 'vqeDrift.dat'; +else + farFile = 'apm_far.pcm'; + nearFile = 'apm_near.pcm'; + eventFile = 'apm_event.dat'; + delayFile = 'apm_delay.dat'; + driftFile = 'apm_drift.dat'; +end + +simulateMode = false; +nErr = 0; +nCases = 0; +for i=1:length(tests) + simulateMode = false; + + if strcmp(tests{i}, 'apm') + testdir = ['apm/']; + outfile = ['out']; + if legacy + opt = ['-ec 1 -agc 2 -nc 2 -vad 3']; + else + opt = ['--no_progress -hpf' ... + ' -aec --drift_compensation -agc --fixed_digital' ... + ' -ns --ns_moderate -vad']; + end + + elseif strcmp(tests{i}, 'apm-swb') + simulateMode = true; + testdir = ['apm-swb/']; + outfile = ['out']; + if legacy + opt = ['-fs 32000 -ec 1 -agc 2 -nc 2']; + else + opt = ['--no_progress -fs 32000 -hpf' ... + ' -aec --drift_compensation -agc --adaptive_digital' ... + ' -ns --ns_moderate -vad']; + end + elseif strcmp(tests{i}, 'apmm') + testdir = ['apmm/']; + outfile = ['out']; + opt = ['-aec --drift_compensation -agc --fixed_digital -hpf -ns ' ... + '--ns_moderate']; + + else + error(['TESTNAME ' tests{i} ' is not recognized']); + end + + inpathtest = [inpath testdir]; + outpathtest = [outpath testdir]; + refpathtest = [refpath testdir]; + + if ~exist(inpathtest,'dir') + error(['Input directory ' inpathtest ' does not exist']); + end + + if ~exist(refpathtest,'dir') + warning(['Reference directory ' refpathtest ' does not exist']); + end + + [status, errMsg] = mkdir(outpathtest); + if (status == 0) + error(errMsg); + end + + [nErr, nCases] = recurseDir(inpathtest, outpathtest, refpathtest, outfile, ... + progname, opt, simulateMode, nErr, nCases, task, casenumber, legacy); + + if strcmp(task, 'test') || strcmp(task, 'show') + system(['rm ' farFile]); + system(['rm ' nearFile]); + if simulateMode == false + system(['rm ' eventFile]); + system(['rm ' delayFile]); + system(['rm ' driftFile]); + end + end +end + +if ~strcmp(task, 'list') + if nErr == 0 + fprintf(1, '\nAll files are bit-exact to reference\n', nErr); + else + fprintf(1, '\n%d files are NOT bit-exact to reference\n', nErr); + end +end + + +function [nErrOut, nCases] = recurseDir(inpath, outpath, refpath, ... + outfile, progname, opt, simulateMode, nErr, nCases, task, casenumber, ... + legacy) + +global farFile; +global nearFile; +global eventFile; +global delayFile; +global driftFile; + +dirs = dir(inpath); +nDirs = 0; +nErrOut = nErr; +for i=3:length(dirs) % skip . and .. + nDirs = nDirs + dirs(i).isdir; +end + + +if nDirs == 0 + nCases = nCases + 1; + + if casenumber == nCases || casenumber == 0 + + if strcmp(task, 'list') + fprintf([num2str(nCases) '. ' outfile '\n']) + else + vadoutfile = ['vad_' outfile '.dat']; + outfile = [outfile '.pcm']; + + % Check for VAD test + vadTest = 0; + if ~isempty(findstr(opt, '-vad')) + vadTest = 1; + if legacy + opt = [opt ' ' outpath vadoutfile]; + else + opt = [opt ' --vad_out_file ' outpath vadoutfile]; + end + end + + if exist([inpath 'vqeFar.pcm']) + system(['ln -s -f ' inpath 'vqeFar.pcm ' farFile]); + elseif exist([inpath 'apm_far.pcm']) + system(['ln -s -f ' inpath 'apm_far.pcm ' farFile]); + end + + if exist([inpath 'vqeNear.pcm']) + system(['ln -s -f ' inpath 'vqeNear.pcm ' nearFile]); + elseif exist([inpath 'apm_near.pcm']) + system(['ln -s -f ' inpath 'apm_near.pcm ' nearFile]); + end + + if exist([inpath 'vqeEvent.dat']) + system(['ln -s -f ' inpath 'vqeEvent.dat ' eventFile]); + elseif exist([inpath 'apm_event.dat']) + system(['ln -s -f ' inpath 'apm_event.dat ' eventFile]); + end + + if exist([inpath 'vqeBuf.dat']) + system(['ln -s -f ' inpath 'vqeBuf.dat ' delayFile]); + elseif exist([inpath 'apm_delay.dat']) + system(['ln -s -f ' inpath 'apm_delay.dat ' delayFile]); + end + + if exist([inpath 'vqeSkew.dat']) + system(['ln -s -f ' inpath 'vqeSkew.dat ' driftFile]); + elseif exist([inpath 'vqeDrift.dat']) + system(['ln -s -f ' inpath 'vqeDrift.dat ' driftFile]); + elseif exist([inpath 'apm_drift.dat']) + system(['ln -s -f ' inpath 'apm_drift.dat ' driftFile]); + end + + if simulateMode == false + command = [progname ' -o ' outpath outfile ' ' opt]; + else + if legacy + inputCmd = [' -in ' nearFile]; + else + inputCmd = [' -i ' nearFile]; + end + + if exist([farFile]) + if legacy + inputCmd = [' -if ' farFile inputCmd]; + else + inputCmd = [' -ir ' farFile inputCmd]; + end + end + command = [progname inputCmd ' -o ' outpath outfile ' ' opt]; + end + % This prevents MATLAB from using its own C libraries. + shellcmd = ['bash -c "unset LD_LIBRARY_PATH;']; + fprintf([command '\n']); + [status, result] = system([shellcmd command '"']); + fprintf(result); + + fprintf(['Reference file: ' refpath outfile '\n']); + + if vadTest == 1 + equal_to_ref = are_files_equal([outpath vadoutfile], ... + [refpath vadoutfile], ... + 'int8'); + if ~equal_to_ref + nErr = nErr + 1; + end + end + + [equal_to_ref, diffvector] = are_files_equal([outpath outfile], ... + [refpath outfile], ... + 'int16'); + if ~equal_to_ref + nErr = nErr + 1; + end + + if strcmp(task, 'show') + % Assume the last init gives the sample rate of interest. + str_idx = strfind(result, 'Sample rate:'); + fs = str2num(result(str_idx(end) + 13:str_idx(end) + 17)); + fprintf('Using %d Hz\n', fs); + + if exist([farFile]) + spclab(fs, farFile, nearFile, [refpath outfile], ... + [outpath outfile], diffvector); + %spclab(fs, diffvector); + else + spclab(fs, nearFile, [refpath outfile], [outpath outfile], ... + diffvector); + %spclab(fs, diffvector); + end + end + end + end +else + + for i=3:length(dirs) + if dirs(i).isdir + [nErr, nCases] = recurseDir([inpath dirs(i).name '/'], outpath, ... + refpath,[outfile '_' dirs(i).name], progname, opt, ... + simulateMode, nErr, nCases, task, casenumber, legacy); + end + end +end +nErrOut = nErr; + +function [are_equal, diffvector] = ... + are_files_equal(newfile, reffile, precision, diffvector) + +are_equal = false; +diffvector = 0; +if ~exist(newfile,'file') + warning(['Output file ' newfile ' does not exist']); + return +end + +if ~exist(reffile,'file') + warning(['Reference file ' reffile ' does not exist']); + return +end + +fid = fopen(newfile,'rb'); +new = fread(fid,inf,precision); +fclose(fid); + +fid = fopen(reffile,'rb'); +ref = fread(fid,inf,precision); +fclose(fid); + +if length(new) ~= length(ref) + warning('Reference is not the same length as output'); + minlength = min(length(new), length(ref)); + new = new(1:minlength); + ref = ref(1:minlength); +end +diffvector = new - ref; + +if isequal(new, ref) + fprintf([newfile ' is bit-exact to reference\n']); + are_equal = true; +else + if isempty(new) + warning([newfile ' is empty']); + return + end + snr = snrseg(new,ref,80); + fprintf('\n'); + are_equal = false; +end diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.cc b/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.cc new file mode 100644 index 0000000000..64fb9c7ab1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audio_buffer_tools.h" + +#include + +namespace webrtc { +namespace test { + +void SetupFrame(const StreamConfig& stream_config, + std::vector* frame, + std::vector* frame_samples) { + frame_samples->resize(stream_config.num_channels() * + stream_config.num_frames()); + frame->resize(stream_config.num_channels()); + for (size_t ch = 0; ch < stream_config.num_channels(); ++ch) { + (*frame)[ch] = &(*frame_samples)[ch * stream_config.num_frames()]; + } +} + +void CopyVectorToAudioBuffer(const StreamConfig& stream_config, + rtc::ArrayView source, + AudioBuffer* destination) { + std::vector input; + std::vector input_samples; + + SetupFrame(stream_config, &input, &input_samples); + + RTC_CHECK_EQ(input_samples.size(), source.size()); + memcpy(input_samples.data(), source.data(), + source.size() * sizeof(source[0])); + + destination->CopyFrom(&input[0], stream_config); +} + +void ExtractVectorFromAudioBuffer(const StreamConfig& stream_config, + AudioBuffer* source, + std::vector* destination) { + std::vector output; + + SetupFrame(stream_config, &output, destination); + + source->CopyTo(stream_config, &output[0]); +} + +void FillBuffer(float value, AudioBuffer& audio_buffer) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + FillBufferChannel(value, ch, audio_buffer); + } +} + +void FillBufferChannel(float value, int channel, AudioBuffer& audio_buffer) { + RTC_CHECK_LT(channel, audio_buffer.num_channels()); + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + audio_buffer.channels()[channel][i] = value; + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.h b/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.h new file mode 100644 index 0000000000..faac4bf9ff --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIO_BUFFER_TOOLS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIO_BUFFER_TOOLS_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { +namespace test { + +// Copies a vector into an audiobuffer. +void CopyVectorToAudioBuffer(const StreamConfig& stream_config, + rtc::ArrayView source, + AudioBuffer* destination); + +// Extracts a vector from an audiobuffer. +void ExtractVectorFromAudioBuffer(const StreamConfig& stream_config, + AudioBuffer* source, + std::vector* destination); + +// Sets all values in `audio_buffer` to `value`. +void FillBuffer(float value, AudioBuffer& audio_buffer); + +// Sets all values channel `channel` for `audio_buffer` to `value`. +void FillBufferChannel(float value, int channel, AudioBuffer& audio_buffer); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIO_BUFFER_TOOLS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.cc b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.cc new file mode 100644 index 0000000000..6bd266dc58 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" + +#include +#include + +#include "modules/audio_processing/audio_processing_impl.h" + +namespace webrtc { + +AudioProcessingBuilderForTesting::AudioProcessingBuilderForTesting() = default; +AudioProcessingBuilderForTesting::~AudioProcessingBuilderForTesting() = default; + +#ifdef WEBRTC_EXCLUDE_AUDIO_PROCESSING_MODULE + +rtc::scoped_refptr AudioProcessingBuilderForTesting::Create() { + return rtc::make_ref_counted( + config_, std::move(capture_post_processing_), + std::move(render_pre_processing_), std::move(echo_control_factory_), + std::move(echo_detector_), std::move(capture_analyzer_)); +} + +#else + +rtc::scoped_refptr AudioProcessingBuilderForTesting::Create() { + AudioProcessingBuilder builder; + TransferOwnershipsToBuilder(&builder); + return builder.SetConfig(config_).Create(); +} + +#endif + +void AudioProcessingBuilderForTesting::TransferOwnershipsToBuilder( + AudioProcessingBuilder* builder) { + builder->SetCapturePostProcessing(std::move(capture_post_processing_)); + builder->SetRenderPreProcessing(std::move(render_pre_processing_)); + builder->SetEchoControlFactory(std::move(echo_control_factory_)); + builder->SetEchoDetector(std::move(echo_detector_)); + builder->SetCaptureAnalyzer(std::move(capture_analyzer_)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.h b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.h new file mode 100644 index 0000000000..e73706c1b6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_BUILDER_FOR_TESTING_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_BUILDER_FOR_TESTING_H_ + +#include +#include +#include +#include + +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +// Facilitates building of AudioProcessingImp for the tests. +class AudioProcessingBuilderForTesting { + public: + AudioProcessingBuilderForTesting(); + AudioProcessingBuilderForTesting(const AudioProcessingBuilderForTesting&) = + delete; + AudioProcessingBuilderForTesting& operator=( + const AudioProcessingBuilderForTesting&) = delete; + ~AudioProcessingBuilderForTesting(); + + // Sets the APM configuration. + AudioProcessingBuilderForTesting& SetConfig( + const AudioProcessing::Config& config) { + config_ = config; + return *this; + } + + // Sets the echo controller factory to inject when APM is created. + AudioProcessingBuilderForTesting& SetEchoControlFactory( + std::unique_ptr echo_control_factory) { + echo_control_factory_ = std::move(echo_control_factory); + return *this; + } + + // Sets the capture post-processing sub-module to inject when APM is created. + AudioProcessingBuilderForTesting& SetCapturePostProcessing( + std::unique_ptr capture_post_processing) { + capture_post_processing_ = std::move(capture_post_processing); + return *this; + } + + // Sets the render pre-processing sub-module to inject when APM is created. + AudioProcessingBuilderForTesting& SetRenderPreProcessing( + std::unique_ptr render_pre_processing) { + render_pre_processing_ = std::move(render_pre_processing); + return *this; + } + + // Sets the echo detector to inject when APM is created. + AudioProcessingBuilderForTesting& SetEchoDetector( + rtc::scoped_refptr echo_detector) { + echo_detector_ = std::move(echo_detector); + return *this; + } + + // Sets the capture analyzer sub-module to inject when APM is created. + AudioProcessingBuilderForTesting& SetCaptureAnalyzer( + std::unique_ptr capture_analyzer) { + capture_analyzer_ = std::move(capture_analyzer); + return *this; + } + + // Creates an APM instance with the specified config or the default one if + // unspecified. Injects the specified components transferring the ownership + // to the newly created APM instance - i.e., except for the config, the + // builder is reset to its initial state. + rtc::scoped_refptr Create(); + + private: + // Transfers the ownership to a non-testing builder. + void TransferOwnershipsToBuilder(AudioProcessingBuilder* builder); + + AudioProcessing::Config config_; + std::unique_ptr echo_control_factory_; + std::unique_ptr capture_post_processing_; + std::unique_ptr render_pre_processing_; + rtc::scoped_refptr echo_detector_; + std::unique_ptr capture_analyzer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_BUILDER_FOR_TESTING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.cc b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.cc new file mode 100644 index 0000000000..7497d49fde --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.cc @@ -0,0 +1,630 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audio_processing_simulator.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/audio/echo_canceller3_config_json.h" +#include "api/audio/echo_canceller3_factory.h" +#include "api/audio/echo_detector_creator.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/fake_recording_device.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/json.h" +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { +namespace test { +namespace { +// Helper for reading JSON from a file and parsing it to an AEC3 configuration. +EchoCanceller3Config ReadAec3ConfigFromJsonFile(absl::string_view filename) { + std::string json_string; + std::string s; + std::ifstream f(std::string(filename).c_str()); + if (f.fail()) { + std::cout << "Failed to open the file " << filename << std::endl; + RTC_CHECK_NOTREACHED(); + } + while (std::getline(f, s)) { + json_string += s; + } + + bool parsing_successful; + EchoCanceller3Config cfg; + Aec3ConfigFromJsonString(json_string, &cfg, &parsing_successful); + if (!parsing_successful) { + std::cout << "Parsing of json string failed: " << std::endl + << json_string << std::endl; + RTC_CHECK_NOTREACHED(); + } + RTC_CHECK(EchoCanceller3Config::Validate(&cfg)); + + return cfg; +} + +std::string GetIndexedOutputWavFilename(absl::string_view wav_name, + int counter) { + rtc::StringBuilder ss; + ss << wav_name.substr(0, wav_name.size() - 4) << "_" << counter + << wav_name.substr(wav_name.size() - 4); + return ss.Release(); +} + +void WriteEchoLikelihoodGraphFileHeader(std::ofstream* output_file) { + (*output_file) << "import numpy as np" << std::endl + << "import matplotlib.pyplot as plt" << std::endl + << "y = np.array(["; +} + +void WriteEchoLikelihoodGraphFileFooter(std::ofstream* output_file) { + (*output_file) << "])" << std::endl + << "if __name__ == '__main__':" << std::endl + << " x = np.arange(len(y))*.01" << std::endl + << " plt.plot(x, y)" << std::endl + << " plt.ylabel('Echo likelihood')" << std::endl + << " plt.xlabel('Time (s)')" << std::endl + << " plt.show()" << std::endl; +} + +// RAII class for execution time measurement. Updates the provided +// ApiCallStatistics based on the time between ScopedTimer creation and +// leaving the enclosing scope. +class ScopedTimer { + public: + ScopedTimer(ApiCallStatistics* api_call_statistics, + ApiCallStatistics::CallType call_type) + : start_time_(rtc::TimeNanos()), + call_type_(call_type), + api_call_statistics_(api_call_statistics) {} + + ~ScopedTimer() { + api_call_statistics_->Add(rtc::TimeNanos() - start_time_, call_type_); + } + + private: + const int64_t start_time_; + const ApiCallStatistics::CallType call_type_; + ApiCallStatistics* const api_call_statistics_; +}; + +} // namespace + +SimulationSettings::SimulationSettings() = default; +SimulationSettings::SimulationSettings(const SimulationSettings&) = default; +SimulationSettings::~SimulationSettings() = default; + +AudioProcessingSimulator::AudioProcessingSimulator( + const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder) + : settings_(settings), + ap_(std::move(audio_processing)), + applied_input_volume_(settings.initial_mic_level), + fake_recording_device_( + settings.initial_mic_level, + settings_.simulate_mic_gain ? *settings.simulated_mic_kind : 0), + worker_queue_("file_writer_task_queue") { + RTC_CHECK(!settings_.dump_internal_data || WEBRTC_APM_DEBUG_DUMP == 1); + if (settings_.dump_start_frame || settings_.dump_end_frame) { + ApmDataDumper::SetActivated(!settings_.dump_start_frame); + } else { + ApmDataDumper::SetActivated(settings_.dump_internal_data); + } + + if (settings_.dump_set_to_use) { + ApmDataDumper::SetDumpSetToUse(*settings_.dump_set_to_use); + } + + if (settings_.dump_internal_data_output_dir.has_value()) { + ApmDataDumper::SetOutputDirectory( + settings_.dump_internal_data_output_dir.value()); + } + + if (settings_.ed_graph_output_filename && + !settings_.ed_graph_output_filename->empty()) { + residual_echo_likelihood_graph_writer_.open( + *settings_.ed_graph_output_filename); + RTC_CHECK(residual_echo_likelihood_graph_writer_.is_open()); + WriteEchoLikelihoodGraphFileHeader(&residual_echo_likelihood_graph_writer_); + } + + if (settings_.simulate_mic_gain) + RTC_LOG(LS_VERBOSE) << "Simulating analog mic gain"; + + // Create the audio processing object. + RTC_CHECK(!(ap_ && ap_builder)) + << "The AudioProcessing and the AudioProcessingBuilder cannot both be " + "specified at the same time."; + + if (ap_) { + RTC_CHECK(!settings_.aec_settings_filename); + RTC_CHECK(!settings_.print_aec_parameter_values); + } else { + // Use specied builder if such is provided, otherwise create a new builder. + std::unique_ptr builder = + !!ap_builder ? std::move(ap_builder) + : std::make_unique(); + + // Create and set an EchoCanceller3Factory if needed. + const bool use_aec = settings_.use_aec && *settings_.use_aec; + if (use_aec) { + EchoCanceller3Config cfg; + if (settings_.aec_settings_filename) { + if (settings_.use_verbose_logging) { + std::cout << "Reading AEC Parameters from JSON input." << std::endl; + } + cfg = ReadAec3ConfigFromJsonFile(*settings_.aec_settings_filename); + } + + if (settings_.linear_aec_output_filename) { + cfg.filter.export_linear_aec_output = true; + } + + if (settings_.print_aec_parameter_values) { + if (!settings_.use_quiet_output) { + std::cout << "AEC settings:" << std::endl; + } + std::cout << Aec3ConfigToJsonString(cfg) << std::endl; + } + + auto echo_control_factory = std::make_unique(cfg); + builder->SetEchoControlFactory(std::move(echo_control_factory)); + } + + if (settings_.use_ed && *settings.use_ed) { + builder->SetEchoDetector(CreateEchoDetector()); + } + + // Create an audio processing object. + ap_ = builder->Create(); + RTC_CHECK(ap_); + } +} + +AudioProcessingSimulator::~AudioProcessingSimulator() { + if (residual_echo_likelihood_graph_writer_.is_open()) { + WriteEchoLikelihoodGraphFileFooter(&residual_echo_likelihood_graph_writer_); + residual_echo_likelihood_graph_writer_.close(); + } +} + +void AudioProcessingSimulator::ProcessStream(bool fixed_interface) { + // Optionally simulate the input volume. + if (settings_.simulate_mic_gain) { + RTC_DCHECK(!settings_.use_analog_mic_gain_emulation); + // Set the input volume to simulate. + fake_recording_device_.SetMicLevel(applied_input_volume_); + + if (settings_.aec_dump_input_filename && + aec_dump_applied_input_level_.has_value()) { + // For AEC dumps, use the applied input level, if recorded, to "virtually + // restore" the capture signal level before the input volume was applied. + fake_recording_device_.SetUndoMicLevel(*aec_dump_applied_input_level_); + } + + // Apply the input volume. + if (fixed_interface) { + fake_recording_device_.SimulateAnalogGain(fwd_frame_.data); + } else { + fake_recording_device_.SimulateAnalogGain(in_buf_.get()); + } + } + + // Let APM know which input volume was applied. + // Keep track of whether `set_stream_analog_level()` is called. + bool applied_input_volume_set = false; + if (settings_.simulate_mic_gain) { + // When the input volume is simulated, use the volume applied for + // simulation. + ap_->set_stream_analog_level(fake_recording_device_.MicLevel()); + applied_input_volume_set = true; + } else if (!settings_.use_analog_mic_gain_emulation) { + // Ignore the recommended input volume stored in `applied_input_volume_` and + // instead notify APM with the recorded input volume (if available). + if (settings_.aec_dump_input_filename && + aec_dump_applied_input_level_.has_value()) { + // The actually applied input volume is available in the AEC dump. + ap_->set_stream_analog_level(*aec_dump_applied_input_level_); + applied_input_volume_set = true; + } else if (!settings_.aec_dump_input_filename) { + // Wav files do not include any information about the actually applied + // input volume. Hence, use the recommended input volume stored in + // `applied_input_volume_`. + ap_->set_stream_analog_level(applied_input_volume_); + applied_input_volume_set = true; + } + } + + // Post any scheduled runtime settings. + if (settings_.frame_for_sending_capture_output_used_false && + *settings_.frame_for_sending_capture_output_used_false == + static_cast(num_process_stream_calls_)) { + ap_->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting(false)); + } + if (settings_.frame_for_sending_capture_output_used_true && + *settings_.frame_for_sending_capture_output_used_true == + static_cast(num_process_stream_calls_)) { + ap_->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting(true)); + } + + // Process the current audio frame. + if (fixed_interface) { + { + const auto st = ScopedTimer(&api_call_statistics_, + ApiCallStatistics::CallType::kCapture); + RTC_CHECK_EQ( + AudioProcessing::kNoError, + ap_->ProcessStream(fwd_frame_.data.data(), fwd_frame_.config, + fwd_frame_.config, fwd_frame_.data.data())); + } + fwd_frame_.CopyTo(out_buf_.get()); + } else { + const auto st = ScopedTimer(&api_call_statistics_, + ApiCallStatistics::CallType::kCapture); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->ProcessStream(in_buf_->channels(), in_config_, + out_config_, out_buf_->channels())); + } + + // Retrieve the recommended input volume only if `set_stream_analog_level()` + // has been called to stick to the APM API contract. + if (applied_input_volume_set) { + applied_input_volume_ = ap_->recommended_stream_analog_level(); + } + + if (buffer_memory_writer_) { + RTC_CHECK(!buffer_file_writer_); + buffer_memory_writer_->Write(*out_buf_); + } else if (buffer_file_writer_) { + RTC_CHECK(!buffer_memory_writer_); + buffer_file_writer_->Write(*out_buf_); + } + + if (linear_aec_output_file_writer_) { + bool output_available = ap_->GetLinearAecOutput(linear_aec_output_buf_); + RTC_CHECK(output_available); + RTC_CHECK_GT(linear_aec_output_buf_.size(), 0); + RTC_CHECK_EQ(linear_aec_output_buf_[0].size(), 160); + for (size_t k = 0; k < linear_aec_output_buf_[0].size(); ++k) { + for (size_t ch = 0; ch < linear_aec_output_buf_.size(); ++ch) { + RTC_CHECK_EQ(linear_aec_output_buf_[ch].size(), 160); + float sample = FloatToFloatS16(linear_aec_output_buf_[ch][k]); + linear_aec_output_file_writer_->WriteSamples(&sample, 1); + } + } + } + + if (residual_echo_likelihood_graph_writer_.is_open()) { + auto stats = ap_->GetStatistics(); + residual_echo_likelihood_graph_writer_ + << stats.residual_echo_likelihood.value_or(-1.f) << ", "; + } + + ++num_process_stream_calls_; +} + +void AudioProcessingSimulator::ProcessReverseStream(bool fixed_interface) { + if (fixed_interface) { + { + const auto st = ScopedTimer(&api_call_statistics_, + ApiCallStatistics::CallType::kRender); + RTC_CHECK_EQ( + AudioProcessing::kNoError, + ap_->ProcessReverseStream(rev_frame_.data.data(), rev_frame_.config, + rev_frame_.config, rev_frame_.data.data())); + } + rev_frame_.CopyTo(reverse_out_buf_.get()); + } else { + const auto st = ScopedTimer(&api_call_statistics_, + ApiCallStatistics::CallType::kRender); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->ProcessReverseStream( + reverse_in_buf_->channels(), reverse_in_config_, + reverse_out_config_, reverse_out_buf_->channels())); + } + + if (reverse_buffer_file_writer_) { + reverse_buffer_file_writer_->Write(*reverse_out_buf_); + } + + ++num_reverse_process_stream_calls_; +} + +void AudioProcessingSimulator::SetupBuffersConfigsOutputs( + int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_input_sample_rate_hz, + int reverse_output_sample_rate_hz, + int input_num_channels, + int output_num_channels, + int reverse_input_num_channels, + int reverse_output_num_channels) { + in_config_ = StreamConfig(input_sample_rate_hz, input_num_channels); + in_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(input_sample_rate_hz, kChunksPerSecond), + input_num_channels)); + + reverse_in_config_ = + StreamConfig(reverse_input_sample_rate_hz, reverse_input_num_channels); + reverse_in_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(reverse_input_sample_rate_hz, kChunksPerSecond), + reverse_input_num_channels)); + + out_config_ = StreamConfig(output_sample_rate_hz, output_num_channels); + out_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(output_sample_rate_hz, kChunksPerSecond), + output_num_channels)); + + reverse_out_config_ = + StreamConfig(reverse_output_sample_rate_hz, reverse_output_num_channels); + reverse_out_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(reverse_output_sample_rate_hz, kChunksPerSecond), + reverse_output_num_channels)); + + fwd_frame_.SetFormat(input_sample_rate_hz, input_num_channels); + rev_frame_.SetFormat(reverse_input_sample_rate_hz, + reverse_input_num_channels); + + if (settings_.use_verbose_logging) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + + std::cout << "Sample rates:" << std::endl; + std::cout << " Forward input: " << input_sample_rate_hz << std::endl; + std::cout << " Forward output: " << output_sample_rate_hz << std::endl; + std::cout << " Reverse input: " << reverse_input_sample_rate_hz + << std::endl; + std::cout << " Reverse output: " << reverse_output_sample_rate_hz + << std::endl; + std::cout << "Number of channels: " << std::endl; + std::cout << " Forward input: " << input_num_channels << std::endl; + std::cout << " Forward output: " << output_num_channels << std::endl; + std::cout << " Reverse input: " << reverse_input_num_channels << std::endl; + std::cout << " Reverse output: " << reverse_output_num_channels + << std::endl; + } + + SetupOutput(); +} + +void AudioProcessingSimulator::SelectivelyToggleDataDumping( + int init_index, + int capture_frames_since_init) const { + if (!(settings_.dump_start_frame || settings_.dump_end_frame)) { + return; + } + + if (settings_.init_to_process && *settings_.init_to_process != init_index) { + return; + } + + if (settings_.dump_start_frame && + *settings_.dump_start_frame == capture_frames_since_init) { + ApmDataDumper::SetActivated(true); + } + + if (settings_.dump_end_frame && + *settings_.dump_end_frame == capture_frames_since_init) { + ApmDataDumper::SetActivated(false); + } +} + +void AudioProcessingSimulator::SetupOutput() { + if (settings_.output_filename) { + std::string filename; + if (settings_.store_intermediate_output) { + filename = GetIndexedOutputWavFilename(*settings_.output_filename, + output_reset_counter_); + } else { + filename = *settings_.output_filename; + } + + std::unique_ptr out_file( + new WavWriter(filename, out_config_.sample_rate_hz(), + static_cast(out_config_.num_channels()), + settings_.wav_output_format)); + buffer_file_writer_.reset(new ChannelBufferWavWriter(std::move(out_file))); + } else if (settings_.aec_dump_input_string.has_value()) { + buffer_memory_writer_ = std::make_unique( + settings_.processed_capture_samples); + } + + if (settings_.linear_aec_output_filename) { + std::string filename; + if (settings_.store_intermediate_output) { + filename = GetIndexedOutputWavFilename( + *settings_.linear_aec_output_filename, output_reset_counter_); + } else { + filename = *settings_.linear_aec_output_filename; + } + + linear_aec_output_file_writer_.reset( + new WavWriter(filename, 16000, out_config_.num_channels(), + settings_.wav_output_format)); + + linear_aec_output_buf_.resize(out_config_.num_channels()); + } + + if (settings_.reverse_output_filename) { + std::string filename; + if (settings_.store_intermediate_output) { + filename = GetIndexedOutputWavFilename(*settings_.reverse_output_filename, + output_reset_counter_); + } else { + filename = *settings_.reverse_output_filename; + } + + std::unique_ptr reverse_out_file( + new WavWriter(filename, reverse_out_config_.sample_rate_hz(), + static_cast(reverse_out_config_.num_channels()), + settings_.wav_output_format)); + reverse_buffer_file_writer_.reset( + new ChannelBufferWavWriter(std::move(reverse_out_file))); + } + + ++output_reset_counter_; +} + +void AudioProcessingSimulator::DetachAecDump() { + if (settings_.aec_dump_output_filename) { + ap_->DetachAecDump(); + } +} + +void AudioProcessingSimulator::ConfigureAudioProcessor() { + AudioProcessing::Config apm_config; + if (settings_.use_ts) { + apm_config.transient_suppression.enabled = *settings_.use_ts != 0; + } + if (settings_.multi_channel_render) { + apm_config.pipeline.multi_channel_render = *settings_.multi_channel_render; + } + + if (settings_.multi_channel_capture) { + apm_config.pipeline.multi_channel_capture = + *settings_.multi_channel_capture; + } + + if (settings_.use_agc2) { + apm_config.gain_controller2.enabled = *settings_.use_agc2; + if (settings_.agc2_fixed_gain_db) { + apm_config.gain_controller2.fixed_digital.gain_db = + *settings_.agc2_fixed_gain_db; + } + if (settings_.agc2_use_adaptive_gain) { + apm_config.gain_controller2.adaptive_digital.enabled = + *settings_.agc2_use_adaptive_gain; + } + } + if (settings_.use_pre_amplifier) { + apm_config.pre_amplifier.enabled = *settings_.use_pre_amplifier; + if (settings_.pre_amplifier_gain_factor) { + apm_config.pre_amplifier.fixed_gain_factor = + *settings_.pre_amplifier_gain_factor; + } + } + + if (settings_.use_analog_mic_gain_emulation) { + if (*settings_.use_analog_mic_gain_emulation) { + apm_config.capture_level_adjustment.enabled = true; + apm_config.capture_level_adjustment.analog_mic_gain_emulation.enabled = + true; + } else { + apm_config.capture_level_adjustment.analog_mic_gain_emulation.enabled = + false; + } + } + if (settings_.analog_mic_gain_emulation_initial_level) { + apm_config.capture_level_adjustment.analog_mic_gain_emulation + .initial_level = *settings_.analog_mic_gain_emulation_initial_level; + } + + if (settings_.use_capture_level_adjustment) { + apm_config.capture_level_adjustment.enabled = + *settings_.use_capture_level_adjustment; + } + if (settings_.pre_gain_factor) { + apm_config.capture_level_adjustment.pre_gain_factor = + *settings_.pre_gain_factor; + } + if (settings_.post_gain_factor) { + apm_config.capture_level_adjustment.post_gain_factor = + *settings_.post_gain_factor; + } + + const bool use_aec = settings_.use_aec && *settings_.use_aec; + const bool use_aecm = settings_.use_aecm && *settings_.use_aecm; + if (use_aec || use_aecm) { + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = use_aecm; + } + apm_config.echo_canceller.export_linear_aec_output = + !!settings_.linear_aec_output_filename; + + if (settings_.use_hpf) { + apm_config.high_pass_filter.enabled = *settings_.use_hpf; + } + + if (settings_.use_agc) { + apm_config.gain_controller1.enabled = *settings_.use_agc; + } + if (settings_.agc_mode) { + apm_config.gain_controller1.mode = + static_cast( + *settings_.agc_mode); + } + if (settings_.use_agc_limiter) { + apm_config.gain_controller1.enable_limiter = *settings_.use_agc_limiter; + } + if (settings_.agc_target_level) { + apm_config.gain_controller1.target_level_dbfs = *settings_.agc_target_level; + } + if (settings_.agc_compression_gain) { + apm_config.gain_controller1.compression_gain_db = + *settings_.agc_compression_gain; + } + if (settings_.use_analog_agc) { + apm_config.gain_controller1.analog_gain_controller.enabled = + *settings_.use_analog_agc; + } + if (settings_.analog_agc_use_digital_adaptive_controller) { + apm_config.gain_controller1.analog_gain_controller.enable_digital_adaptive = + *settings_.analog_agc_use_digital_adaptive_controller; + } + + if (settings_.maximum_internal_processing_rate) { + apm_config.pipeline.maximum_internal_processing_rate = + *settings_.maximum_internal_processing_rate; + } + + if (settings_.use_ns) { + apm_config.noise_suppression.enabled = *settings_.use_ns; + } + if (settings_.ns_level) { + const int level = *settings_.ns_level; + RTC_CHECK_GE(level, 0); + RTC_CHECK_LE(level, 3); + apm_config.noise_suppression.level = + static_cast(level); + } + if (settings_.ns_analysis_on_linear_aec_output) { + apm_config.noise_suppression.analyze_linear_aec_output_when_available = + *settings_.ns_analysis_on_linear_aec_output; + } + + ap_->ApplyConfig(apm_config); + + if (settings_.use_ts) { + // Default to key pressed if activating the transient suppressor with + // continuous key events. + ap_->set_stream_key_pressed(*settings_.use_ts == 2); + } + + if (settings_.aec_dump_output_filename) { + ap_->AttachAecDump(AecDumpFactory::Create( + *settings_.aec_dump_output_filename, -1, &worker_queue_)); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h new file mode 100644 index 0000000000..e40d818bd8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_SIMULATOR_H_ + +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "common_audio/channel_buffer.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/api_call_statistics.h" +#include "modules/audio_processing/test/fake_recording_device.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/task_queue_for_test.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { +namespace test { + +static const int kChunksPerSecond = 1000 / AudioProcessing::kChunkSizeMs; + +struct Int16Frame { + void SetFormat(int sample_rate_hz, int num_channels) { + this->sample_rate_hz = sample_rate_hz; + samples_per_channel = + rtc::CheckedDivExact(sample_rate_hz, kChunksPerSecond); + this->num_channels = num_channels; + config = StreamConfig(sample_rate_hz, num_channels); + data.resize(num_channels * samples_per_channel); + } + + void CopyTo(ChannelBuffer* dest) { + RTC_DCHECK(dest); + RTC_CHECK_EQ(num_channels, dest->num_channels()); + RTC_CHECK_EQ(samples_per_channel, dest->num_frames()); + // Copy the data from the input buffer. + std::vector tmp(samples_per_channel * num_channels); + S16ToFloat(data.data(), tmp.size(), tmp.data()); + Deinterleave(tmp.data(), samples_per_channel, num_channels, + dest->channels()); + } + + void CopyFrom(const ChannelBuffer& src) { + RTC_CHECK_EQ(src.num_channels(), num_channels); + RTC_CHECK_EQ(src.num_frames(), samples_per_channel); + data.resize(num_channels * samples_per_channel); + int16_t* dest_data = data.data(); + for (int ch = 0; ch < num_channels; ++ch) { + for (int sample = 0; sample < samples_per_channel; ++sample) { + dest_data[sample * num_channels + ch] = + src.channels()[ch][sample] * 32767; + } + } + } + + int sample_rate_hz; + int samples_per_channel; + int num_channels; + + StreamConfig config; + + std::vector data; +}; + +// Holds all the parameters available for controlling the simulation. +struct SimulationSettings { + SimulationSettings(); + SimulationSettings(const SimulationSettings&); + ~SimulationSettings(); + absl::optional stream_delay; + absl::optional use_stream_delay; + absl::optional output_sample_rate_hz; + absl::optional output_num_channels; + absl::optional reverse_output_sample_rate_hz; + absl::optional reverse_output_num_channels; + absl::optional output_filename; + absl::optional reverse_output_filename; + absl::optional input_filename; + absl::optional reverse_input_filename; + absl::optional artificial_nearend_filename; + absl::optional linear_aec_output_filename; + absl::optional use_aec; + absl::optional use_aecm; + absl::optional use_ed; // Residual Echo Detector. + absl::optional ed_graph_output_filename; + absl::optional use_agc; + absl::optional use_agc2; + absl::optional use_pre_amplifier; + absl::optional use_capture_level_adjustment; + absl::optional use_analog_mic_gain_emulation; + absl::optional use_hpf; + absl::optional use_ns; + absl::optional use_ts; + absl::optional use_analog_agc; + absl::optional use_all; + absl::optional analog_agc_use_digital_adaptive_controller; + absl::optional agc_mode; + absl::optional agc_target_level; + absl::optional use_agc_limiter; + absl::optional agc_compression_gain; + absl::optional agc2_use_adaptive_gain; + absl::optional agc2_fixed_gain_db; + absl::optional pre_amplifier_gain_factor; + absl::optional pre_gain_factor; + absl::optional post_gain_factor; + absl::optional analog_mic_gain_emulation_initial_level; + absl::optional ns_level; + absl::optional ns_analysis_on_linear_aec_output; + absl::optional override_key_pressed; + absl::optional maximum_internal_processing_rate; + int initial_mic_level; + bool simulate_mic_gain = false; + absl::optional multi_channel_render; + absl::optional multi_channel_capture; + absl::optional simulated_mic_kind; + absl::optional frame_for_sending_capture_output_used_false; + absl::optional frame_for_sending_capture_output_used_true; + bool report_performance = false; + absl::optional performance_report_output_filename; + bool report_bitexactness = false; + bool use_verbose_logging = false; + bool use_quiet_output = false; + bool discard_all_settings_in_aecdump = true; + absl::optional aec_dump_input_filename; + absl::optional aec_dump_output_filename; + bool fixed_interface = false; + bool store_intermediate_output = false; + bool print_aec_parameter_values = false; + bool dump_internal_data = false; + WavFile::SampleFormat wav_output_format = WavFile::SampleFormat::kInt16; + absl::optional dump_internal_data_output_dir; + absl::optional dump_set_to_use; + absl::optional call_order_input_filename; + absl::optional call_order_output_filename; + absl::optional aec_settings_filename; + absl::optional aec_dump_input_string; + std::vector* processed_capture_samples = nullptr; + bool analysis_only = false; + absl::optional dump_start_frame; + absl::optional dump_end_frame; + absl::optional init_to_process; +}; + +// Provides common functionality for performing audioprocessing simulations. +class AudioProcessingSimulator { + public: + AudioProcessingSimulator(const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder); + + AudioProcessingSimulator() = delete; + AudioProcessingSimulator(const AudioProcessingSimulator&) = delete; + AudioProcessingSimulator& operator=(const AudioProcessingSimulator&) = delete; + + virtual ~AudioProcessingSimulator(); + + // Processes the data in the input. + virtual void Process() = 0; + + // Returns the execution times of all AudioProcessing calls. + const ApiCallStatistics& GetApiCallStatistics() const { + return api_call_statistics_; + } + + // Analyzes the data in the input and reports the resulting statistics. + virtual void Analyze() = 0; + + // Reports whether the processed recording was bitexact. + bool OutputWasBitexact() { return bitexact_output_; } + + size_t get_num_process_stream_calls() { return num_process_stream_calls_; } + size_t get_num_reverse_process_stream_calls() { + return num_reverse_process_stream_calls_; + } + + protected: + void ProcessStream(bool fixed_interface); + void ProcessReverseStream(bool fixed_interface); + void ConfigureAudioProcessor(); + void DetachAecDump(); + void SetupBuffersConfigsOutputs(int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_input_sample_rate_hz, + int reverse_output_sample_rate_hz, + int input_num_channels, + int output_num_channels, + int reverse_input_num_channels, + int reverse_output_num_channels); + void SelectivelyToggleDataDumping(int init_index, + int capture_frames_since_init) const; + + const SimulationSettings settings_; + rtc::scoped_refptr ap_; + + std::unique_ptr> in_buf_; + std::unique_ptr> out_buf_; + std::unique_ptr> reverse_in_buf_; + std::unique_ptr> reverse_out_buf_; + std::vector> linear_aec_output_buf_; + StreamConfig in_config_; + StreamConfig out_config_; + StreamConfig reverse_in_config_; + StreamConfig reverse_out_config_; + std::unique_ptr buffer_reader_; + std::unique_ptr reverse_buffer_reader_; + Int16Frame rev_frame_; + Int16Frame fwd_frame_; + bool bitexact_output_ = true; + absl::optional aec_dump_applied_input_level_ = 0; + + protected: + size_t output_reset_counter_ = 0; + + private: + void SetupOutput(); + + size_t num_process_stream_calls_ = 0; + size_t num_reverse_process_stream_calls_ = 0; + std::unique_ptr buffer_file_writer_; + std::unique_ptr reverse_buffer_file_writer_; + std::unique_ptr buffer_memory_writer_; + std::unique_ptr linear_aec_output_file_writer_; + ApiCallStatistics api_call_statistics_; + std::ofstream residual_echo_likelihood_graph_writer_; + int applied_input_volume_; + FakeRecordingDevice fake_recording_device_; + + TaskQueueForTest worker_queue_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_SIMULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc b/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc new file mode 100644 index 0000000000..c23ec74366 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audioproc_float_impl.h" + +#include + +#include +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/string_view.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/aec_dump_based_simulator.h" +#include "modules/audio_processing/test/audio_processing_simulator.h" +#include "modules/audio_processing/test/wav_based_simulator.h" +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/field_trial.h" + +constexpr int kParameterNotSpecifiedValue = -10000; + +ABSL_FLAG(std::string, dump_input, "", "Aec dump input filename"); +ABSL_FLAG(std::string, dump_output, "", "Aec dump output filename"); +ABSL_FLAG(std::string, i, "", "Forward stream input wav filename"); +ABSL_FLAG(std::string, o, "", "Forward stream output wav filename"); +ABSL_FLAG(std::string, ri, "", "Reverse stream input wav filename"); +ABSL_FLAG(std::string, ro, "", "Reverse stream output wav filename"); +ABSL_FLAG(std::string, + artificial_nearend, + "", + "Artificial nearend wav filename"); +ABSL_FLAG(std::string, linear_aec_output, "", "Linear AEC output wav filename"); +ABSL_FLAG(int, + output_num_channels, + kParameterNotSpecifiedValue, + "Number of forward stream output channels"); +ABSL_FLAG(int, + reverse_output_num_channels, + kParameterNotSpecifiedValue, + "Number of Reverse stream output channels"); +ABSL_FLAG(int, + output_sample_rate_hz, + kParameterNotSpecifiedValue, + "Forward stream output sample rate in Hz"); +ABSL_FLAG(int, + reverse_output_sample_rate_hz, + kParameterNotSpecifiedValue, + "Reverse stream output sample rate in Hz"); +ABSL_FLAG(bool, + fixed_interface, + false, + "Use the fixed interface when operating on wav files"); +ABSL_FLAG(int, + aec, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the echo canceller"); +ABSL_FLAG(int, + aecm, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the mobile echo controller"); +ABSL_FLAG(int, + ed, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the residual echo detector"); +ABSL_FLAG(std::string, + ed_graph, + "", + "Output filename for graph of echo likelihood"); +ABSL_FLAG(int, + agc, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the AGC"); +ABSL_FLAG(int, + agc2, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the AGC2"); +ABSL_FLAG(int, + pre_amplifier, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the pre amplifier"); +ABSL_FLAG( + int, + capture_level_adjustment, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the capture level adjustment functionality"); +ABSL_FLAG(int, + analog_mic_gain_emulation, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the analog mic gain emulation in the " + "production (non-test) code."); +ABSL_FLAG(int, + hpf, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the high-pass filter"); +ABSL_FLAG(int, + ns, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the noise suppressor"); +ABSL_FLAG(int, + ts, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the transient suppressor"); +ABSL_FLAG(int, + analog_agc, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the analog AGC"); +ABSL_FLAG(bool, + all_default, + false, + "Activate all of the default components (will be overridden by any " + "other settings)"); +ABSL_FLAG(int, + analog_agc_use_digital_adaptive_controller, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) digital adaptation in AGC1. " + "Digital adaptation is active by default."); +ABSL_FLAG(int, + agc_mode, + kParameterNotSpecifiedValue, + "Specify the AGC mode (0-2)"); +ABSL_FLAG(int, + agc_target_level, + kParameterNotSpecifiedValue, + "Specify the AGC target level (0-31)"); +ABSL_FLAG(int, + agc_limiter, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the level estimator"); +ABSL_FLAG(int, + agc_compression_gain, + kParameterNotSpecifiedValue, + "Specify the AGC compression gain (0-90)"); +ABSL_FLAG(int, + agc2_enable_adaptive_gain, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the AGC2 adaptive gain"); +ABSL_FLAG(float, + agc2_fixed_gain_db, + kParameterNotSpecifiedValue, + "AGC2 fixed gain (dB) to apply"); +ABSL_FLAG(float, + pre_amplifier_gain_factor, + kParameterNotSpecifiedValue, + "Pre-amplifier gain factor (linear) to apply"); +ABSL_FLAG(float, + pre_gain_factor, + kParameterNotSpecifiedValue, + "Pre-gain factor (linear) to apply in the capture level adjustment"); +ABSL_FLAG(float, + post_gain_factor, + kParameterNotSpecifiedValue, + "Post-gain factor (linear) to apply in the capture level adjustment"); +ABSL_FLAG(float, + analog_mic_gain_emulation_initial_level, + kParameterNotSpecifiedValue, + "Emulated analog mic level to apply initially in the production " + "(non-test) code."); +ABSL_FLAG(int, + ns_level, + kParameterNotSpecifiedValue, + "Specify the NS level (0-3)"); +ABSL_FLAG(int, + ns_analysis_on_linear_aec_output, + kParameterNotSpecifiedValue, + "Specifies whether the noise suppression analysis is done on the " + "linear AEC output"); +ABSL_FLAG(int, + maximum_internal_processing_rate, + kParameterNotSpecifiedValue, + "Set a maximum internal processing rate (32000 or 48000) to override " + "the default rate"); +ABSL_FLAG(int, + stream_delay, + kParameterNotSpecifiedValue, + "Specify the stream delay in ms to use"); +ABSL_FLAG(int, + use_stream_delay, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) reporting the stream delay"); +ABSL_FLAG(int, + stream_drift_samples, + kParameterNotSpecifiedValue, + "Specify the number of stream drift samples to use"); +ABSL_FLAG(int, + initial_mic_level, + 100, + "Initial mic level (0-255) for the analog mic gain simulation in the " + "test code"); +ABSL_FLAG(int, + simulate_mic_gain, + 0, + "Activate (1) or deactivate(0) the analog mic gain simulation in the " + "test code"); +ABSL_FLAG(int, + multi_channel_render, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) multi-channel render processing in " + "APM pipeline"); +ABSL_FLAG(int, + multi_channel_capture, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) multi-channel capture processing in " + "APM pipeline"); +ABSL_FLAG(int, + simulated_mic_kind, + kParameterNotSpecifiedValue, + "Specify which microphone kind to use for microphone simulation"); +ABSL_FLAG(int, + override_key_pressed, + kParameterNotSpecifiedValue, + "Always set to true (1) or to false (0) the key press state. If " + "unspecified, false is set with Wav files or, with AEC dumps, the " + "recorded event is used."); +ABSL_FLAG(int, + frame_for_sending_capture_output_used_false, + kParameterNotSpecifiedValue, + "Capture frame index for sending a runtime setting for that the " + "capture output is not used."); +ABSL_FLAG(int, + frame_for_sending_capture_output_used_true, + kParameterNotSpecifiedValue, + "Capture frame index for sending a runtime setting for that the " + "capture output is used."); +ABSL_FLAG(bool, performance_report, false, "Report the APM performance "); +ABSL_FLAG(std::string, + performance_report_output_file, + "", + "Generate a CSV file with the API call durations"); +ABSL_FLAG(bool, verbose, false, "Produce verbose output"); +ABSL_FLAG(bool, + quiet, + false, + "Avoid producing information about the progress."); +ABSL_FLAG(bool, + bitexactness_report, + false, + "Report bitexactness for aec dump result reproduction"); +ABSL_FLAG(bool, + discard_settings_in_aecdump, + false, + "Discard any config settings specified in the aec dump"); +ABSL_FLAG(bool, + store_intermediate_output, + false, + "Creates new output files after each init"); +ABSL_FLAG(std::string, + custom_call_order_file, + "", + "Custom process API call order file"); +ABSL_FLAG(std::string, + output_custom_call_order_file, + "", + "Generate custom process API call order file from AEC dump"); +ABSL_FLAG(bool, + print_aec_parameter_values, + false, + "Print parameter values used in AEC in JSON-format"); +ABSL_FLAG(std::string, + aec_settings, + "", + "File in JSON-format with custom AEC settings"); +ABSL_FLAG(bool, + dump_data, + false, + "Dump internal data during the call (requires build flag)"); +ABSL_FLAG(std::string, + dump_data_output_dir, + "", + "Internal data dump output directory"); +ABSL_FLAG(int, + dump_set_to_use, + kParameterNotSpecifiedValue, + "Specifies the dump set to use (if not all the dump sets will " + "be used"); +ABSL_FLAG(bool, + analyze, + false, + "Only analyze the call setup behavior (no processing)"); +ABSL_FLAG(float, + dump_start_seconds, + kParameterNotSpecifiedValue, + "Start of when to dump data (seconds)."); +ABSL_FLAG(float, + dump_end_seconds, + kParameterNotSpecifiedValue, + "End of when to dump data (seconds)."); +ABSL_FLAG(int, + dump_start_frame, + kParameterNotSpecifiedValue, + "Start of when to dump data (frames)."); +ABSL_FLAG(int, + dump_end_frame, + kParameterNotSpecifiedValue, + "End of when to dump data (frames)."); +ABSL_FLAG(int, + init_to_process, + kParameterNotSpecifiedValue, + "Init index to process."); + +ABSL_FLAG(bool, + float_wav_output, + false, + "Produce floating point wav output files."); + +ABSL_FLAG(std::string, + force_fieldtrials, + "", + "Field trials control experimental feature code which can be forced. " + "E.g. running with --force_fieldtrials=WebRTC-FooFeature/Enable/" + " will assign the group Enable to field trial WebRTC-FooFeature."); + +namespace webrtc { +namespace test { +namespace { + +const char kUsageDescription[] = + "Usage: audioproc_f [options] -i \n" + " or\n" + " audioproc_f [options] -dump_input \n" + "\n\n" + "Command-line tool to simulate a call using the audio " + "processing module, either based on wav files or " + "protobuf debug dump recordings.\n"; + +void SetSettingIfSpecified(absl::string_view value, + absl::optional* parameter) { + if (value.compare("") != 0) { + *parameter = std::string(value); + } +} + +void SetSettingIfSpecified(int value, absl::optional* parameter) { + if (value != kParameterNotSpecifiedValue) { + *parameter = value; + } +} + +void SetSettingIfSpecified(float value, absl::optional* parameter) { + constexpr float kFloatParameterNotSpecifiedValue = + kParameterNotSpecifiedValue; + if (value != kFloatParameterNotSpecifiedValue) { + *parameter = value; + } +} + +void SetSettingIfFlagSet(int32_t flag, absl::optional* parameter) { + if (flag == 0) { + *parameter = false; + } else if (flag == 1) { + *parameter = true; + } +} + +SimulationSettings CreateSettings() { + SimulationSettings settings; + if (absl::GetFlag(FLAGS_all_default)) { + settings.use_ts = true; + settings.use_analog_agc = true; + settings.use_ns = true; + settings.use_hpf = true; + settings.use_agc = true; + settings.use_agc2 = false; + settings.use_pre_amplifier = false; + settings.use_aec = true; + settings.use_aecm = false; + settings.use_ed = false; + } + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_input), + &settings.aec_dump_input_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_output), + &settings.aec_dump_output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_i), &settings.input_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_o), &settings.output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ri), + &settings.reverse_input_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ro), + &settings.reverse_output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_artificial_nearend), + &settings.artificial_nearend_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_linear_aec_output), + &settings.linear_aec_output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_output_num_channels), + &settings.output_num_channels); + SetSettingIfSpecified(absl::GetFlag(FLAGS_reverse_output_num_channels), + &settings.reverse_output_num_channels); + SetSettingIfSpecified(absl::GetFlag(FLAGS_output_sample_rate_hz), + &settings.output_sample_rate_hz); + SetSettingIfSpecified(absl::GetFlag(FLAGS_reverse_output_sample_rate_hz), + &settings.reverse_output_sample_rate_hz); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_aec), &settings.use_aec); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_aecm), &settings.use_aecm); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_ed), &settings.use_ed); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ed_graph), + &settings.ed_graph_output_filename); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_agc), &settings.use_agc); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_agc2), &settings.use_agc2); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_pre_amplifier), + &settings.use_pre_amplifier); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_capture_level_adjustment), + &settings.use_capture_level_adjustment); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_mic_gain_emulation), + &settings.use_analog_mic_gain_emulation); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_hpf), &settings.use_hpf); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_ns), &settings.use_ns); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ts), &settings.use_ts); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc), + &settings.use_analog_agc); + SetSettingIfFlagSet( + absl::GetFlag(FLAGS_analog_agc_use_digital_adaptive_controller), + &settings.analog_agc_use_digital_adaptive_controller); + SetSettingIfSpecified(absl::GetFlag(FLAGS_agc_mode), &settings.agc_mode); + SetSettingIfSpecified(absl::GetFlag(FLAGS_agc_target_level), + &settings.agc_target_level); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_agc_limiter), + &settings.use_agc_limiter); + SetSettingIfSpecified(absl::GetFlag(FLAGS_agc_compression_gain), + &settings.agc_compression_gain); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_agc2_enable_adaptive_gain), + &settings.agc2_use_adaptive_gain); + + SetSettingIfSpecified(absl::GetFlag(FLAGS_agc2_fixed_gain_db), + &settings.agc2_fixed_gain_db); + SetSettingIfSpecified(absl::GetFlag(FLAGS_pre_amplifier_gain_factor), + &settings.pre_amplifier_gain_factor); + SetSettingIfSpecified(absl::GetFlag(FLAGS_pre_gain_factor), + &settings.pre_gain_factor); + SetSettingIfSpecified(absl::GetFlag(FLAGS_post_gain_factor), + &settings.post_gain_factor); + SetSettingIfSpecified( + absl::GetFlag(FLAGS_analog_mic_gain_emulation_initial_level), + &settings.analog_mic_gain_emulation_initial_level); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ns_level), &settings.ns_level); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_ns_analysis_on_linear_aec_output), + &settings.ns_analysis_on_linear_aec_output); + SetSettingIfSpecified(absl::GetFlag(FLAGS_maximum_internal_processing_rate), + &settings.maximum_internal_processing_rate); + SetSettingIfSpecified(absl::GetFlag(FLAGS_stream_delay), + &settings.stream_delay); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_use_stream_delay), + &settings.use_stream_delay); + SetSettingIfSpecified(absl::GetFlag(FLAGS_custom_call_order_file), + &settings.call_order_input_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_output_custom_call_order_file), + &settings.call_order_output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_aec_settings), + &settings.aec_settings_filename); + settings.initial_mic_level = absl::GetFlag(FLAGS_initial_mic_level); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_multi_channel_render), + &settings.multi_channel_render); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_multi_channel_capture), + &settings.multi_channel_capture); + settings.simulate_mic_gain = absl::GetFlag(FLAGS_simulate_mic_gain); + SetSettingIfSpecified(absl::GetFlag(FLAGS_simulated_mic_kind), + &settings.simulated_mic_kind); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_override_key_pressed), + &settings.override_key_pressed); + SetSettingIfSpecified( + absl::GetFlag(FLAGS_frame_for_sending_capture_output_used_false), + &settings.frame_for_sending_capture_output_used_false); + SetSettingIfSpecified( + absl::GetFlag(FLAGS_frame_for_sending_capture_output_used_true), + &settings.frame_for_sending_capture_output_used_true); + settings.report_performance = absl::GetFlag(FLAGS_performance_report); + SetSettingIfSpecified(absl::GetFlag(FLAGS_performance_report_output_file), + &settings.performance_report_output_filename); + settings.use_verbose_logging = absl::GetFlag(FLAGS_verbose); + settings.use_quiet_output = absl::GetFlag(FLAGS_quiet); + settings.report_bitexactness = absl::GetFlag(FLAGS_bitexactness_report); + settings.discard_all_settings_in_aecdump = + absl::GetFlag(FLAGS_discard_settings_in_aecdump); + settings.fixed_interface = absl::GetFlag(FLAGS_fixed_interface); + settings.store_intermediate_output = + absl::GetFlag(FLAGS_store_intermediate_output); + settings.print_aec_parameter_values = + absl::GetFlag(FLAGS_print_aec_parameter_values); + settings.dump_internal_data = absl::GetFlag(FLAGS_dump_data); + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_data_output_dir), + &settings.dump_internal_data_output_dir); + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_set_to_use), + &settings.dump_set_to_use); + settings.wav_output_format = absl::GetFlag(FLAGS_float_wav_output) + ? WavFile::SampleFormat::kFloat + : WavFile::SampleFormat::kInt16; + + settings.analysis_only = absl::GetFlag(FLAGS_analyze); + + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_start_frame), + &settings.dump_start_frame); + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_end_frame), + &settings.dump_end_frame); + + constexpr int kFramesPerSecond = 100; + absl::optional start_seconds; + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_start_seconds), + &start_seconds); + if (start_seconds) { + settings.dump_start_frame = *start_seconds * kFramesPerSecond; + } + + absl::optional end_seconds; + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_end_seconds), &end_seconds); + if (end_seconds) { + settings.dump_end_frame = *end_seconds * kFramesPerSecond; + } + + SetSettingIfSpecified(absl::GetFlag(FLAGS_init_to_process), + &settings.init_to_process); + + return settings; +} + +void ReportConditionalErrorAndExit(bool condition, absl::string_view message) { + if (condition) { + std::cerr << message << std::endl; + exit(1); + } +} + +void PerformBasicParameterSanityChecks( + const SimulationSettings& settings, + bool pre_constructed_ap_provided, + bool pre_constructed_ap_builder_provided) { + if (settings.input_filename || settings.reverse_input_filename) { + ReportConditionalErrorAndExit( + !!settings.aec_dump_input_filename, + "Error: The aec dump file cannot be specified " + "together with input wav files!\n"); + + ReportConditionalErrorAndExit( + !!settings.aec_dump_input_string, + "Error: The aec dump input string cannot be specified " + "together with input wav files!\n"); + + ReportConditionalErrorAndExit(!!settings.artificial_nearend_filename, + "Error: The artificial nearend cannot be " + "specified together with input wav files!\n"); + + ReportConditionalErrorAndExit(!settings.input_filename, + "Error: When operating at wav files, the " + "input wav filename must be " + "specified!\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_filename && !settings.reverse_input_filename, + "Error: When operating at wav files, the reverse input wav filename " + "must be specified if the reverse output wav filename is specified!\n"); + } else { + ReportConditionalErrorAndExit( + !settings.aec_dump_input_filename && !settings.aec_dump_input_string, + "Error: Either the aec dump input file, the wav " + "input file or the aec dump input string must be specified!\n"); + ReportConditionalErrorAndExit( + settings.aec_dump_input_filename && settings.aec_dump_input_string, + "Error: The aec dump input file cannot be specified together with the " + "aec dump input string!\n"); + } + + ReportConditionalErrorAndExit(settings.use_aec && !(*settings.use_aec) && + settings.linear_aec_output_filename, + "Error: The linear AEC ouput filename cannot " + "be specified without the AEC being active"); + + ReportConditionalErrorAndExit( + settings.use_aec && *settings.use_aec && settings.use_aecm && + *settings.use_aecm, + "Error: The AEC and the AECM cannot be activated at the same time!\n"); + + ReportConditionalErrorAndExit( + settings.output_sample_rate_hz && *settings.output_sample_rate_hz <= 0, + "Error: --output_sample_rate_hz must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_sample_rate_hz && + settings.output_sample_rate_hz && + *settings.output_sample_rate_hz <= 0, + "Error: --reverse_output_sample_rate_hz must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.output_num_channels && *settings.output_num_channels <= 0, + "Error: --output_num_channels must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_num_channels && + *settings.reverse_output_num_channels <= 0, + "Error: --reverse_output_num_channels must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.agc_target_level && ((*settings.agc_target_level) < 0 || + (*settings.agc_target_level) > 31), + "Error: --agc_target_level must be specified between 0 and 31.\n"); + + ReportConditionalErrorAndExit( + settings.agc_compression_gain && ((*settings.agc_compression_gain) < 0 || + (*settings.agc_compression_gain) > 90), + "Error: --agc_compression_gain must be specified between 0 and 90.\n"); + + ReportConditionalErrorAndExit( + settings.agc2_fixed_gain_db && ((*settings.agc2_fixed_gain_db) < 0 || + (*settings.agc2_fixed_gain_db) > 90), + "Error: --agc2_fixed_gain_db must be specified between 0 and 90.\n"); + + ReportConditionalErrorAndExit( + settings.ns_level && + ((*settings.ns_level) < 0 || (*settings.ns_level) > 3), + "Error: --ns_level must be specified between 0 and 3.\n"); + + ReportConditionalErrorAndExit( + settings.report_bitexactness && !settings.aec_dump_input_filename, + "Error: --bitexactness_report can only be used when operating on an " + "aecdump\n"); + + ReportConditionalErrorAndExit( + settings.call_order_input_filename && settings.aec_dump_input_filename, + "Error: --custom_call_order_file cannot be used when operating on an " + "aecdump\n"); + + ReportConditionalErrorAndExit( + (settings.initial_mic_level < 0 || settings.initial_mic_level > 255), + "Error: --initial_mic_level must be specified between 0 and 255.\n"); + + ReportConditionalErrorAndExit( + settings.simulated_mic_kind && !settings.simulate_mic_gain, + "Error: --simulated_mic_kind cannot be specified when mic simulation is " + "disabled\n"); + + ReportConditionalErrorAndExit( + !settings.simulated_mic_kind && settings.simulate_mic_gain, + "Error: --simulated_mic_kind must be specified when mic simulation is " + "enabled\n"); + + // TODO(bugs.webrtc.org/7494): Document how the two settings below differ. + ReportConditionalErrorAndExit( + settings.simulate_mic_gain && settings.use_analog_mic_gain_emulation, + "Error: --simulate_mic_gain and --use_analog_mic_gain_emulation cannot " + "be enabled at the same time\n"); + + auto valid_wav_name = [](absl::string_view wav_file_name) { + if (wav_file_name.size() < 5) { + return false; + } + if ((wav_file_name.compare(wav_file_name.size() - 4, 4, ".wav") == 0) || + (wav_file_name.compare(wav_file_name.size() - 4, 4, ".WAV") == 0)) { + return true; + } + return false; + }; + + ReportConditionalErrorAndExit( + settings.input_filename && (!valid_wav_name(*settings.input_filename)), + "Error: --i must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.output_filename && (!valid_wav_name(*settings.output_filename)), + "Error: --o must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.reverse_input_filename && + (!valid_wav_name(*settings.reverse_input_filename)), + "Error: --ri must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_filename && + (!valid_wav_name(*settings.reverse_output_filename)), + "Error: --ro must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.artificial_nearend_filename && + !valid_wav_name(*settings.artificial_nearend_filename), + "Error: --artifical_nearend must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.linear_aec_output_filename && + (!valid_wav_name(*settings.linear_aec_output_filename)), + "Error: --linear_aec_output must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + WEBRTC_APM_DEBUG_DUMP == 0 && settings.dump_internal_data, + "Error: --dump_data cannot be set without proper build support.\n"); + + ReportConditionalErrorAndExit(settings.init_to_process && + *settings.init_to_process != 1 && + !settings.aec_dump_input_filename, + "Error: --init_to_process must be set to 1 for " + "wav-file based simulations.\n"); + + ReportConditionalErrorAndExit( + !settings.init_to_process && + (settings.dump_start_frame || settings.dump_end_frame), + "Error: --init_to_process must be set when specifying a start and/or end " + "frame for when to dump internal data.\n"); + + ReportConditionalErrorAndExit( + !settings.dump_internal_data && + settings.dump_internal_data_output_dir.has_value(), + "Error: --dump_data_output_dir cannot be set without --dump_data.\n"); + + ReportConditionalErrorAndExit( + !settings.aec_dump_input_filename && + settings.call_order_output_filename.has_value(), + "Error: --output_custom_call_order_file needs an AEC dump input file.\n"); + + ReportConditionalErrorAndExit( + (!settings.use_pre_amplifier || !(*settings.use_pre_amplifier)) && + settings.pre_amplifier_gain_factor.has_value(), + "Error: --pre_amplifier_gain_factor needs --pre_amplifier to be " + "specified and set.\n"); + + ReportConditionalErrorAndExit( + pre_constructed_ap_provided && pre_constructed_ap_builder_provided, + "Error: The AudioProcessing and the AudioProcessingBuilder cannot both " + "be specified at the same time.\n"); + + ReportConditionalErrorAndExit( + settings.aec_settings_filename && pre_constructed_ap_provided, + "Error: The aec_settings_filename cannot be specified when a " + "pre-constructed audio processing object is provided.\n"); + + ReportConditionalErrorAndExit( + settings.aec_settings_filename && pre_constructed_ap_provided, + "Error: The print_aec_parameter_values cannot be set when a " + "pre-constructed audio processing object is provided.\n"); + + if (settings.linear_aec_output_filename && pre_constructed_ap_provided) { + std::cout << "Warning: For the linear AEC output to be stored, this must " + "be configured in the AEC that is part of the provided " + "AudioProcessing object." + << std::endl; + } +} + +int RunSimulation(rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder, + int argc, + char* argv[], + absl::string_view input_aecdump, + std::vector* processed_capture_samples) { + std::vector args = absl::ParseCommandLine(argc, argv); + if (args.size() != 1) { + printf("%s", kUsageDescription); + return 1; + } + // InitFieldTrialsFromString stores the char*, so the char array must + // outlive the application. + const std::string field_trials = absl::GetFlag(FLAGS_force_fieldtrials); + webrtc::field_trial::InitFieldTrialsFromString(field_trials.c_str()); + + SimulationSettings settings = CreateSettings(); + if (!input_aecdump.empty()) { + settings.aec_dump_input_string = input_aecdump; + settings.processed_capture_samples = processed_capture_samples; + RTC_CHECK(settings.processed_capture_samples); + } + PerformBasicParameterSanityChecks(settings, !!audio_processing, !!ap_builder); + std::unique_ptr processor; + + if (settings.aec_dump_input_filename || settings.aec_dump_input_string) { + processor.reset(new AecDumpBasedSimulator( + settings, std::move(audio_processing), std::move(ap_builder))); + } else { + processor.reset(new WavBasedSimulator(settings, std::move(audio_processing), + std::move(ap_builder))); + } + + if (settings.analysis_only) { + processor->Analyze(); + } else { + processor->Process(); + } + + if (settings.report_performance) { + processor->GetApiCallStatistics().PrintReport(); + } + if (settings.performance_report_output_filename) { + processor->GetApiCallStatistics().WriteReportToFile( + *settings.performance_report_output_filename); + } + + if (settings.report_bitexactness && settings.aec_dump_input_filename) { + if (processor->OutputWasBitexact()) { + std::cout << "The processing was bitexact."; + } else { + std::cout << "The processing was not bitexact."; + } + } + + return 0; +} + +} // namespace + +int AudioprocFloatImpl(rtc::scoped_refptr audio_processing, + int argc, + char* argv[]) { + return RunSimulation( + std::move(audio_processing), /*ap_builder=*/nullptr, argc, argv, + /*input_aecdump=*/"", /*processed_capture_samples=*/nullptr); +} + +int AudioprocFloatImpl(std::unique_ptr ap_builder, + int argc, + char* argv[], + absl::string_view input_aecdump, + std::vector* processed_capture_samples) { + return RunSimulation(/*audio_processing=*/nullptr, std::move(ap_builder), + argc, argv, input_aecdump, processed_capture_samples); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.h b/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.h new file mode 100644 index 0000000000..5ed3aefab7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIOPROC_FLOAT_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIOPROC_FLOAT_IMPL_H_ + +#include + +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { +namespace test { + +// This function implements the audio processing simulation utility. Pass +// `input_aecdump` to provide the content of an AEC dump file as a string; if +// `input_aecdump` is not passed, a WAV or AEC input dump file must be specified +// via the `argv` argument. Pass `processed_capture_samples` to write in it the +// samples processed on the capture side; if `processed_capture_samples` is not +// passed, the output file can optionally be specified via the `argv` argument. +// Any audio_processing object specified in the input is used for the +// simulation. Note that when the audio_processing object is specified all +// functionality that relies on using the internal builder is deactivated, +// since the AudioProcessing object is already created and the builder is not +// used in the simulation. +int AudioprocFloatImpl(rtc::scoped_refptr audio_processing, + int argc, + char* argv[]); + +// This function implements the audio processing simulation utility. Pass +// `input_aecdump` to provide the content of an AEC dump file as a string; if +// `input_aecdump` is not passed, a WAV or AEC input dump file must be specified +// via the `argv` argument. Pass `processed_capture_samples` to write in it the +// samples processed on the capture side; if `processed_capture_samples` is not +// passed, the output file can optionally be specified via the `argv` argument. +int AudioprocFloatImpl(std::unique_ptr ap_builder, + int argc, + char* argv[], + absl::string_view input_aecdump, + std::vector* processed_capture_samples); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIOPROC_FLOAT_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.cc b/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.cc new file mode 100644 index 0000000000..0464345364 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/bitexactness_tools.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { + +std::string GetApmRenderTestVectorFileName(int sample_rate_hz) { + switch (sample_rate_hz) { + case 8000: + return ResourcePath("far8_stereo", "pcm"); + case 16000: + return ResourcePath("far16_stereo", "pcm"); + case 32000: + return ResourcePath("far32_stereo", "pcm"); + case 48000: + return ResourcePath("far48_stereo", "pcm"); + default: + RTC_DCHECK_NOTREACHED(); + } + return ""; +} + +std::string GetApmCaptureTestVectorFileName(int sample_rate_hz) { + switch (sample_rate_hz) { + case 8000: + return ResourcePath("near8_stereo", "pcm"); + case 16000: + return ResourcePath("near16_stereo", "pcm"); + case 32000: + return ResourcePath("near32_stereo", "pcm"); + case 48000: + return ResourcePath("near48_stereo", "pcm"); + default: + RTC_DCHECK_NOTREACHED(); + } + return ""; +} + +void ReadFloatSamplesFromStereoFile(size_t samples_per_channel, + size_t num_channels, + InputAudioFile* stereo_pcm_file, + rtc::ArrayView data) { + RTC_DCHECK_LE(num_channels, 2); + RTC_DCHECK_EQ(data.size(), samples_per_channel * num_channels); + std::vector read_samples(samples_per_channel * 2); + stereo_pcm_file->Read(samples_per_channel * 2, read_samples.data()); + + // Convert samples to float and discard any channels not needed. + for (size_t sample = 0; sample < samples_per_channel; ++sample) { + for (size_t channel = 0; channel < num_channels; ++channel) { + data[sample * num_channels + channel] = + read_samples[sample * 2 + channel] / 32768.0f; + } + } +} + +::testing::AssertionResult VerifyDeinterleavedArray( + size_t samples_per_channel, + size_t num_channels, + rtc::ArrayView reference, + rtc::ArrayView output, + float element_error_bound) { + // Form vectors to compare the reference to. Only the first values of the + // outputs are compared in order not having to specify all preceeding frames + // as testvectors. + const size_t reference_frame_length = + rtc::CheckedDivExact(reference.size(), num_channels); + + std::vector output_to_verify; + for (size_t channel_no = 0; channel_no < num_channels; ++channel_no) { + output_to_verify.insert(output_to_verify.end(), + output.begin() + channel_no * samples_per_channel, + output.begin() + channel_no * samples_per_channel + + reference_frame_length); + } + + return VerifyArray(reference, output_to_verify, element_error_bound); +} + +::testing::AssertionResult VerifyArray(rtc::ArrayView reference, + rtc::ArrayView output, + float element_error_bound) { + // The vectors are deemed to be bitexact only if + // a) output have a size at least as long as the reference. + // b) the samples in the reference are bitexact with the corresponding samples + // in the output. + + bool equal = true; + if (output.size() < reference.size()) { + equal = false; + } else { + // Compare the first samples in the vectors. + for (size_t k = 0; k < reference.size(); ++k) { + if (fabs(output[k] - reference[k]) > element_error_bound) { + equal = false; + break; + } + } + } + + if (equal) { + return ::testing::AssertionSuccess(); + } + + // Lambda function that produces a formatted string with the data in the + // vector. + auto print_vector_in_c_format = [](rtc::ArrayView v, + size_t num_values_to_print) { + std::string s = "{ "; + for (size_t k = 0; k < std::min(num_values_to_print, v.size()); ++k) { + s += std::to_string(v[k]) + "f"; + s += (k < (num_values_to_print - 1)) ? ", " : ""; + } + return s + " }"; + }; + + // If the vectors are deemed not to be similar, return a report of the + // difference. + return ::testing::AssertionFailure() + << std::endl + << " Actual values : " + << print_vector_in_c_format(output, + std::min(output.size(), reference.size())) + << std::endl + << " Expected values: " + << print_vector_in_c_format(reference, reference.size()) << std::endl; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.h b/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.h new file mode 100644 index 0000000000..2d3113276d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.h @@ -0,0 +1,56 @@ + +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_BITEXACTNESS_TOOLS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_BITEXACTNESS_TOOLS_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +// Returns test vector to use for the render signal in an +// APM bitexactness test. +std::string GetApmRenderTestVectorFileName(int sample_rate_hz); + +// Returns test vector to use for the capture signal in an +// APM bitexactness test. +std::string GetApmCaptureTestVectorFileName(int sample_rate_hz); + +// Extract float samples of up to two channels from a pcm file. +void ReadFloatSamplesFromStereoFile(size_t samples_per_channel, + size_t num_channels, + InputAudioFile* stereo_pcm_file, + rtc::ArrayView data); + +// Verifies a frame against a reference and returns the results as an +// AssertionResult. +::testing::AssertionResult VerifyDeinterleavedArray( + size_t samples_per_channel, + size_t num_channels, + rtc::ArrayView reference, + rtc::ArrayView output, + float element_error_bound); + +// Verifies a vector against a reference and returns the results as an +// AssertionResult. +::testing::AssertionResult VerifyArray(rtc::ArrayView reference, + rtc::ArrayView output, + float element_error_bound); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_BITEXACTNESS_TOOLS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn new file mode 100644 index 0000000000..2c3678092e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn @@ -0,0 +1,81 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +if (!build_with_chromium) { + group("conversational_speech") { + testonly = true + deps = [ ":conversational_speech_generator" ] + } + + rtc_executable("conversational_speech_generator") { + testonly = true + sources = [ "generator.cc" ] + deps = [ + ":lib", + "../../../../test:fileutils", + "../../../../test:test_support", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } +} + +rtc_library("lib") { + testonly = true + sources = [ + "config.cc", + "config.h", + "multiend_call.cc", + "multiend_call.h", + "simulator.cc", + "simulator.h", + "timing.cc", + "timing.h", + "wavreader_abstract_factory.h", + "wavreader_factory.cc", + "wavreader_factory.h", + "wavreader_interface.h", + ] + deps = [ + "../../../../api:array_view", + "../../../../common_audio", + "../../../../rtc_base:checks", + "../../../../rtc_base:logging", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base:stringutils", + "../../../../test:fileutils", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. +} + +rtc_library("unittest") { + testonly = true + sources = [ + "generator_unittest.cc", + "mock_wavreader.cc", + "mock_wavreader.h", + "mock_wavreader_factory.cc", + "mock_wavreader_factory.h", + ] + deps = [ + ":lib", + "../../../../api:array_view", + "../../../../common_audio", + "../../../../rtc_base:logging", + "../../../../test:fileutils", + "../../../../test:test_support", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS new file mode 100644 index 0000000000..07cff405e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS @@ -0,0 +1,3 @@ +alessiob@webrtc.org +henrik.lundin@webrtc.org +peah@webrtc.org diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md new file mode 100644 index 0000000000..0fa66669e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md @@ -0,0 +1,74 @@ +# Conversational Speech generator tool + +Tool to generate multiple-end audio tracks to simulate conversational speech +with two or more participants. + +The input to the tool is a directory containing a number of audio tracks and +a text file indicating how to time the sequence of speech turns (see the Example +section). + +Since the timing of the speaking turns is specified by the user, the generated +tracks may not be suitable for testing scenarios in which there is unpredictable +network delay (e.g., end-to-end RTC assessment). + +Instead, the generated pairs can be used when the delay is constant (obviously +including the case in which there is no delay). +For instance, echo cancellation in the APM module can be evaluated using two-end +audio tracks as input and reverse input. + +By indicating negative and positive time offsets, one can reproduce cross-talk +(aka double-talk) and silence in the conversation. + +### Example + +For each end, there is a set of audio tracks, e.g., a1, a2 and a3 (speaker A) +and b1, b2 (speaker B). +The text file with the timing information may look like this: + +``` +A a1 0 +B b1 0 +A a2 100 +B b2 -200 +A a3 0 +A a4 0 +``` + +The first column indicates the speaker name, the second contains the audio track +file names, and the third the offsets (in milliseconds) used to concatenate the +chunks. An optional fourth column contains positive or negative integral gains +in dB that will be applied to the tracks. It's possible to specify the gain for +some turns but not for others. If the gain is left out, no gain is applied. + +Assume that all the audio tracks in the example above are 1000 ms long. +The tool will then generate two tracks (A and B) that look like this: + +**Track A** +``` + a1 (1000 ms) + silence (1100 ms) + a2 (1000 ms) + silence (800 ms) + a3 (1000 ms) + a4 (1000 ms) +``` + +**Track B** +``` + silence (1000 ms) + b1 (1000 ms) + silence (900 ms) + b2 (1000 ms) + silence (2000 ms) +``` + +The two tracks can be also visualized as follows (one characheter represents +100 ms, "." is silence and "*" is speech). + +``` +t: 0 1 2 3 4 5 6 (s) +A: **********...........**********........******************** +B: ..........**********.........**********.................... + ^ 200 ms cross-talk + 100 ms silence ^ +``` diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc new file mode 100644 index 0000000000..76d3de8108 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/config.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +const std::string& Config::audiotracks_path() const { + return audiotracks_path_; +} + +const std::string& Config::timing_filepath() const { + return timing_filepath_; +} + +const std::string& Config::output_path() const { + return output_path_; +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h new file mode 100644 index 0000000000..5a847e06a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_ + +#include + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +struct Config { + Config(absl::string_view audiotracks_path, + absl::string_view timing_filepath, + absl::string_view output_path) + : audiotracks_path_(audiotracks_path), + timing_filepath_(timing_filepath), + output_path_(output_path) {} + + const std::string& audiotracks_path() const; + const std::string& timing_filepath() const; + const std::string& output_path() const; + + const std::string audiotracks_path_; + const std::string timing_filepath_; + const std::string output_path_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc new file mode 100644 index 0000000000..d0bc2f2319 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "modules/audio_processing/test/conversational_speech/config.h" +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" +#include "modules/audio_processing/test/conversational_speech/simulator.h" +#include "modules/audio_processing/test/conversational_speech/timing.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(std::string, i, "", "Directory containing the speech turn wav files"); +ABSL_FLAG(std::string, t, "", "Path to the timing text file"); +ABSL_FLAG(std::string, o, "", "Output wav files destination path"); + +namespace webrtc { +namespace test { +namespace { + +const char kUsageDescription[] = + "Usage: conversational_speech_generator\n" + " -i \n" + " -t \n" + " -o \n" + "\n\n" + "Command-line tool to generate multiple-end audio tracks to simulate " + "conversational speech with two or more participants.\n"; + +} // namespace + +int main(int argc, char* argv[]) { + std::vector args = absl::ParseCommandLine(argc, argv); + if (args.size() != 1) { + printf("%s", kUsageDescription); + return 1; + } + RTC_CHECK(DirExists(absl::GetFlag(FLAGS_i))); + RTC_CHECK(FileExists(absl::GetFlag(FLAGS_t))); + RTC_CHECK(DirExists(absl::GetFlag(FLAGS_o))); + + conversational_speech::Config config( + absl::GetFlag(FLAGS_i), absl::GetFlag(FLAGS_t), absl::GetFlag(FLAGS_o)); + + // Load timing. + std::vector timing = + conversational_speech::LoadTiming(config.timing_filepath()); + + // Parse timing and audio tracks. + auto wavreader_factory = + std::make_unique(); + conversational_speech::MultiEndCall multiend_call( + timing, config.audiotracks_path(), std::move(wavreader_factory)); + + // Generate output audio tracks. + auto generated_audiotrack_pairs = + conversational_speech::Simulate(multiend_call, config.output_path()); + + // Show paths to created audio tracks. + std::cout << "Output files:" << std::endl; + for (const auto& output_paths_entry : *generated_audiotrack_pairs) { + std::cout << " speaker: " << output_paths_entry.first << std::endl; + std::cout << " near end: " << output_paths_entry.second.near_end + << std::endl; + std::cout << " far end: " << output_paths_entry.second.far_end + << std::endl; + } + + return 0; +} + +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc new file mode 100644 index 0000000000..17714440d4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc @@ -0,0 +1,675 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file consists of unit tests for webrtc::test::conversational_speech +// members. Part of them focus on accepting or rejecting different +// conversational speech setups. A setup is defined by a set of audio tracks and +// timing information). +// The docstring at the beginning of each TEST(ConversationalSpeechTest, +// MultiEndCallSetup*) function looks like the drawing below and indicates which +// setup is tested. +// +// Accept: +// A 0****..... +// B .....1**** +// +// The drawing indicates the following: +// - the illustrated setup should be accepted, +// - there are two speakers (namely, A and B), +// - A is the first speaking, B is the second one, +// - each character after the speaker's letter indicates a time unit (e.g., 100 +// ms), +// - "*" indicates speaking, "." listening, +// - numbers indicate the turn index in std::vector. +// +// Note that the same speaker can appear in multiple lines in order to depict +// cases in which there are wrong offsets leading to self cross-talk (which is +// rejected). + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/test/conversational_speech/config.h" +#include "modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h" +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" +#include "modules/audio_processing/test/conversational_speech/simulator.h" +#include "modules/audio_processing/test/conversational_speech/timing.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace { + +using conversational_speech::LoadTiming; +using conversational_speech::MockWavReaderFactory; +using conversational_speech::MultiEndCall; +using conversational_speech::SaveTiming; +using conversational_speech::Turn; +using conversational_speech::WavReaderFactory; + +const char* const audiotracks_path = "/path/to/audiotracks"; +const char* const timing_filepath = "/path/to/timing_file.txt"; +const char* const output_path = "/path/to/output_dir"; + +const std::vector expected_timing = { + {"A", "a1", 0, 0}, {"B", "b1", 0, 0}, {"A", "a2", 100, 0}, + {"B", "b2", -200, 0}, {"A", "a3", 0, 0}, {"A", "a3", 0, 0}, +}; +const std::size_t kNumberOfTurns = expected_timing.size(); + +// Default arguments for MockWavReaderFactory ctor. +// Fake audio track parameters. +constexpr int kDefaultSampleRate = 48000; +const std::map + kDefaultMockWavReaderFactoryParamsMap = { + {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds. + {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds. + {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds. + {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second. + {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second. + {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo. +}; +const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = + kDefaultMockWavReaderFactoryParamsMap.at("t500"); + +std::unique_ptr CreateMockWavReaderFactory() { + return std::unique_ptr( + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, + kDefaultMockWavReaderFactoryParamsMap)); +} + +void CreateSineWavFile(absl::string_view filepath, + const MockWavReaderFactory::Params& params, + float frequency = 440.0f) { + // Create samples. + constexpr double two_pi = 2.0 * M_PI; + std::vector samples(params.num_samples); + for (std::size_t i = 0; i < params.num_samples; ++i) { + // TODO(alessiob): the produced tone is not pure, improve. + samples[i] = std::lround( + 32767.0f * std::sin(two_pi * i * frequency / params.sample_rate)); + } + + // Write samples. + WavWriter wav_writer(filepath, params.sample_rate, params.num_channels); + wav_writer.WriteSamples(samples.data(), params.num_samples); +} + +// Parameters to generate audio tracks with CreateSineWavFile. +struct SineAudioTrackParams { + MockWavReaderFactory::Params params; + float frequency; +}; + +// Creates a temporary directory in which sine audio tracks are written. +std::string CreateTemporarySineAudioTracks( + const std::map& sine_tracks_params) { + // Create temporary directory. + std::string temp_directory = + OutputPath() + "TempConversationalSpeechAudioTracks"; + CreateDir(temp_directory); + + // Create sine tracks. + for (const auto& it : sine_tracks_params) { + const std::string temp_filepath = JoinFilename(temp_directory, it.first); + CreateSineWavFile(temp_filepath, it.second.params, it.second.frequency); + } + + return temp_directory; +} + +void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory, + absl::string_view filepath, + const MockWavReaderFactory::Params& expeted_params) { + auto wav_reader = wav_reader_factory.Create(filepath); + EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate()); + EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels()); + EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples()); +} + +void DeleteFolderAndContents(absl::string_view dir) { + if (!DirExists(dir)) { + return; + } + absl::optional> dir_content = ReadDirectory(dir); + EXPECT_TRUE(dir_content); + for (const auto& path : *dir_content) { + if (DirExists(path)) { + DeleteFolderAndContents(path); + } else if (FileExists(path)) { + // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. + RemoveFile(path); + } else { + FAIL(); + } + } + // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. + RemoveDir(dir); +} + +} // namespace + +using ::testing::_; + +TEST(ConversationalSpeechTest, Settings) { + const conversational_speech::Config config(audiotracks_path, timing_filepath, + output_path); + + // Test getters. + EXPECT_EQ(audiotracks_path, config.audiotracks_path()); + EXPECT_EQ(timing_filepath, config.timing_filepath()); + EXPECT_EQ(output_path, config.output_path()); +} + +TEST(ConversationalSpeechTest, TimingSaveLoad) { + // Save test timing. + const std::string temporary_filepath = + TempFilename(OutputPath(), "TempTimingTestFile"); + SaveTiming(temporary_filepath, expected_timing); + + // Create a std::vector instance by loading from file. + std::vector actual_timing = LoadTiming(temporary_filepath); + RemoveFile(temporary_filepath); + + // Check size. + EXPECT_EQ(expected_timing.size(), actual_timing.size()); + + // Check Turn instances. + for (size_t index = 0; index < expected_timing.size(); ++index) { + EXPECT_EQ(expected_timing[index], actual_timing[index]) + << "turn #" << index << " not matching"; + } +} + +TEST(ConversationalSpeechTest, MultiEndCallCreate) { + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are 5 unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5); + + // Inject the mock wav reader factory. + conversational_speech::MultiEndCall multiend_call( + expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(6u, multiend_call.speaking_turns().size()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { + const std::vector timing = { + {"A", "sr8000", 0, 0}, + {"B", "sr16000", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(::testing::_)).Times(2); + + MultiEndCall multiend_call(timing, audiotracks_path, + std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { + const std::vector timing = { + {"A", "sr16000_stereo", 0, 0}, + {"B", "sr16000_stereo", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(::testing::_)).Times(1); + + MultiEndCall multiend_call(timing, audiotracks_path, + std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, + MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { + const std::vector timing = { + {"A", "sr8000", 0, 0}, + {"B", "sr16000_stereo", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(::testing::_)).Times(2); + + MultiEndCall multiend_call(timing, audiotracks_path, + std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { + const std::vector timing = { + {"A", "t500", -100, 0}, + {"B", "t500", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupSimple) { + // Accept: + // A 0****..... + // B .....1**** + constexpr std::size_t expected_duration = kDefaultSampleRate; + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t500", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupPause) { + // Accept: + // A 0****....... + // B .......1**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t500", 200, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { + // Accept: + // A 0****.... + // B ....1**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9; + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t500", -100, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { + // Reject: + // A ..0**** + // B .1****. The n-th turn cannot start before the (n-1)-th one. + const std::vector timing = { + {"A", "t500", 200, 0}, + {"B", "t500", -600, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { + // Accept: + // A 0****2****... + // B ...1********* + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3; + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t1000", -200, 0}, + {"A", "t500", -800, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(3u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { + // Reject: + // A 0****...... + // A ...1****... + // B ......2**** + // ^ Turn #1 overlaps with #0 which is from the same speaker. + const std::vector timing = { + {"A", "t500", 0, 0}, + {"A", "t500", -200, 0}, + {"B", "t500", -200, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { + // Reject: + // A 0********* + // B 1**....... + // C ...2**.... + // A ......3**. + // ^ Turn #3 overlaps with #0 which is from the same speaker. + const std::vector timing = { + {"A", "t1000", 0, 0}, + {"B", "t300", -1000, 0}, + {"C", "t300", 0, 0}, + {"A", "t300", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { + // Accept: + // A 0*********.. + // B ..1****..... + // C .......2**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; + const std::vector timing = { + {"A", "t1000", 0, 0}, + {"B", "t500", -800, 0}, + {"C", "t500", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(3u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(3u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { + // Reject: + // A 0********* + // B ..1****... + // C ....2****. + // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers + // not permitted). + const std::vector timing = { + {"A", "t1000", 0, 0}, + {"B", "t500", -800, 0}, + {"C", "t500", -300, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { + // Accept: + // A 0*********.. + // B .2****...... + // C .......3**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; + const std::vector timing = { + {"A", "t1000", 0, 0}, + {"B", "t500", -900, 0}, + {"C", "t500", 100, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(3u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(3u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { + // Accept: + // A 0**** + // B 1**** + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t500", -500, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { + // Accept: + // A 0****....3****.5**. + // B .....1****...4**... + // C ......2**.......6**.. + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9; + const std::vector timing = { + {"A", "t500", 0, 0}, {"B", "t500", 0, 0}, {"C", "t300", -400, 0}, + {"A", "t500", 0, 0}, {"B", "t300", -100, 0}, {"A", "t300", -100, 0}, + {"C", "t300", -200, 0}, + }; + auto mock_wavreader_factory = std::unique_ptr( + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, + kDefaultMockWavReaderFactoryParamsMap)); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(3u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(7u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { + // Reject: + // A 0****....3****.6** + // B .....1****...4**.. + // C ......2**.....5**.. + // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ + // speakers not permitted). + const std::vector timing = { + {"A", "t500", 0, 0}, {"B", "t500", 0, 0}, {"C", "t300", -400, 0}, + {"A", "t500", 0, 0}, {"B", "t300", -100, 0}, {"A", "t300", -200, 0}, + {"C", "t300", -200, 0}, + }; + auto mock_wavreader_factory = std::unique_ptr( + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, + kDefaultMockWavReaderFactoryParamsMap)); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) { + // Parameters with which wav files are created. + constexpr int duration_seconds = 5; + const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000}; + + for (int sample_rate : sample_rates) { + const std::string temp_filename = OutputPath() + "TempSineWavFile_" + + std::to_string(sample_rate) + ".wav"; + + // Write wav file. + const std::size_t num_samples = duration_seconds * sample_rate; + MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; + CreateSineWavFile(temp_filename, params); + + // Load wav file and check if params match. + WavReaderFactory wav_reader_factory; + MockWavReaderFactory::Params expeted_params = {sample_rate, 1u, + num_samples}; + CheckAudioTrackParams(wav_reader_factory, temp_filename, expeted_params); + + // Clean up. + RemoveFile(temp_filename); + } +} + +TEST(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) { + // Simulated call (one character corresponding to 500 ms): + // A 0*********...........2*********..... + // B ...........1*********.....3********* + const std::vector expected_timing = { + {"A", "t5000_440.wav", 0, 0}, + {"B", "t5000_880.wav", 500, 0}, + {"A", "t5000_440.wav", 0, 0}, + {"B", "t5000_880.wav", -2500, 0}, + }; + const std::size_t expected_duration_seconds = 18; + + // Create temporary audio track files. + const int sample_rate = 16000; + const std::map sine_tracks_params = { + {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}}, + {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}}, + }; + const std::string audiotracks_path = + CreateTemporarySineAudioTracks(sine_tracks_params); + + // Set up the multi-end call. + auto wavreader_factory = + std::unique_ptr(new WavReaderFactory()); + MultiEndCall multiend_call(expected_timing, audiotracks_path, + std::move(wavreader_factory)); + + // Simulate the call. + std::string output_path = JoinFilename(audiotracks_path, "output"); + CreateDir(output_path); + RTC_LOG(LS_VERBOSE) << "simulator output path: " << output_path; + auto generated_audiotrak_pairs = + conversational_speech::Simulate(multiend_call, output_path); + EXPECT_EQ(2u, generated_audiotrak_pairs->size()); + + // Check the output. + WavReaderFactory wav_reader_factory; + const MockWavReaderFactory::Params expeted_params = { + sample_rate, 1u, sample_rate * expected_duration_seconds}; + for (const auto& it : *generated_audiotrak_pairs) { + RTC_LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">"; + CheckAudioTrackParams(wav_reader_factory, it.second.near_end, + expeted_params); + CheckAudioTrackParams(wav_reader_factory, it.second.far_end, + expeted_params); + } + + // Clean. + EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path)); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc new file mode 100644 index 0000000000..1263e938c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/mock_wavreader.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +using ::testing::Return; + +MockWavReader::MockWavReader(int sample_rate, + size_t num_channels, + size_t num_samples) + : sample_rate_(sample_rate), + num_channels_(num_channels), + num_samples_(num_samples) { + ON_CALL(*this, SampleRate()).WillByDefault(Return(sample_rate_)); + ON_CALL(*this, NumChannels()).WillByDefault(Return(num_channels_)); + ON_CALL(*this, NumSamples()).WillByDefault(Return(num_samples_)); +} + +MockWavReader::~MockWavReader() = default; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h new file mode 100644 index 0000000000..94e20b9ec6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class MockWavReader : public WavReaderInterface { + public: + MockWavReader(int sample_rate, size_t num_channels, size_t num_samples); + ~MockWavReader(); + + // TODO(alessiob): use ON_CALL to return random samples if needed. + MOCK_METHOD(size_t, ReadFloatSamples, (rtc::ArrayView), (override)); + MOCK_METHOD(size_t, ReadInt16Samples, (rtc::ArrayView), (override)); + + MOCK_METHOD(int, SampleRate, (), (const, override)); + MOCK_METHOD(size_t, NumChannels, (), (const, override)); + MOCK_METHOD(size_t, NumSamples, (), (const, override)); + + private: + const int sample_rate_; + const size_t num_channels_; + const size_t num_samples_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc new file mode 100644 index 0000000000..a377cce7e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h" + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/mock_wavreader.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +using ::testing::_; +using ::testing::Invoke; + +MockWavReaderFactory::MockWavReaderFactory( + const Params& default_params, + const std::map& params) + : default_params_(default_params), audiotrack_names_params_(params) { + ON_CALL(*this, Create(_)) + .WillByDefault(Invoke(this, &MockWavReaderFactory::CreateMock)); +} + +MockWavReaderFactory::MockWavReaderFactory(const Params& default_params) + : MockWavReaderFactory(default_params, + std::map{}) {} + +MockWavReaderFactory::~MockWavReaderFactory() = default; + +std::unique_ptr MockWavReaderFactory::CreateMock( + absl::string_view filepath) { + // Search the parameters corresponding to filepath. + size_t delimiter = filepath.find_last_of("/\\"); // Either windows or posix + std::string filename(filepath.substr( + delimiter == absl::string_view::npos ? 0 : delimiter + 1)); + const auto it = audiotrack_names_params_.find(filename); + + // If not found, use default parameters. + if (it == audiotrack_names_params_.end()) { + RTC_LOG(LS_VERBOSE) << "using default parameters for " << filepath; + return std::unique_ptr(new MockWavReader( + default_params_.sample_rate, default_params_.num_channels, + default_params_.num_samples)); + } + + // Found, use the audiotrack-specific parameters. + RTC_LOG(LS_VERBOSE) << "using ad-hoc parameters for " << filepath; + RTC_LOG(LS_VERBOSE) << "sample_rate " << it->second.sample_rate; + RTC_LOG(LS_VERBOSE) << "num_channels " << it->second.num_channels; + RTC_LOG(LS_VERBOSE) << "num_samples " << it->second.num_samples; + return std::unique_ptr(new MockWavReader( + it->second.sample_rate, it->second.num_channels, it->second.num_samples)); +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h new file mode 100644 index 0000000000..bcc7f3069b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class MockWavReaderFactory : public WavReaderAbstractFactory { + public: + struct Params { + int sample_rate; + size_t num_channels; + size_t num_samples; + }; + + MockWavReaderFactory(const Params& default_params, + const std::map& params); + explicit MockWavReaderFactory(const Params& default_params); + ~MockWavReaderFactory(); + + MOCK_METHOD(std::unique_ptr, + Create, + (absl::string_view), + (const, override)); + + private: + // Creates a MockWavReader instance using the parameters in + // audiotrack_names_params_ if the entry corresponding to filepath exists, + // otherwise creates a MockWavReader instance using the default parameters. + std::unique_ptr CreateMock(absl::string_view filepath); + + const Params& default_params_; + std::map audiotrack_names_params_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc new file mode 100644 index 0000000000..952114a78b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/logging.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +MultiEndCall::MultiEndCall( + rtc::ArrayView timing, + absl::string_view audiotracks_path, + std::unique_ptr wavreader_abstract_factory) + : timing_(timing), + audiotracks_path_(audiotracks_path), + wavreader_abstract_factory_(std::move(wavreader_abstract_factory)), + valid_(false) { + FindSpeakerNames(); + if (CreateAudioTrackReaders()) + valid_ = CheckTiming(); +} + +MultiEndCall::~MultiEndCall() = default; + +void MultiEndCall::FindSpeakerNames() { + RTC_DCHECK(speaker_names_.empty()); + for (const Turn& turn : timing_) { + speaker_names_.emplace(turn.speaker_name); + } +} + +bool MultiEndCall::CreateAudioTrackReaders() { + RTC_DCHECK(audiotrack_readers_.empty()); + sample_rate_hz_ = 0; // Sample rate will be set when reading the first track. + for (const Turn& turn : timing_) { + auto it = audiotrack_readers_.find(turn.audiotrack_file_name); + if (it != audiotrack_readers_.end()) + continue; + + const std::string audiotrack_file_path = + test::JoinFilename(audiotracks_path_, turn.audiotrack_file_name); + + // Map the audiotrack file name to a new instance of WavReaderInterface. + std::unique_ptr wavreader = + wavreader_abstract_factory_->Create( + test::JoinFilename(audiotracks_path_, turn.audiotrack_file_name)); + + if (sample_rate_hz_ == 0) { + sample_rate_hz_ = wavreader->SampleRate(); + } else if (sample_rate_hz_ != wavreader->SampleRate()) { + RTC_LOG(LS_ERROR) + << "All the audio tracks should have the same sample rate."; + return false; + } + + if (wavreader->NumChannels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono audio tracks supported."; + return false; + } + + audiotrack_readers_.emplace(turn.audiotrack_file_name, + std::move(wavreader)); + } + + return true; +} + +bool MultiEndCall::CheckTiming() { + struct Interval { + size_t begin; + size_t end; + }; + size_t number_of_turns = timing_.size(); + auto millisecond_to_samples = [](int ms, int sr) -> int { + // Truncation may happen if the sampling rate is not an integer multiple + // of 1000 (e.g., 44100). + return ms * sr / 1000; + }; + auto in_interval = [](size_t value, const Interval& interval) { + return interval.begin <= value && value < interval.end; + }; + total_duration_samples_ = 0; + speaking_turns_.clear(); + + // Begin and end timestamps for the last two turns (unit: number of samples). + Interval second_last_turn = {0, 0}; + Interval last_turn = {0, 0}; + + // Initialize map to store speaking turn indices of each speaker (used to + // detect self cross-talk). + std::map> speaking_turn_indices; + for (const std::string& speaker_name : speaker_names_) { + speaking_turn_indices.emplace(std::piecewise_construct, + std::forward_as_tuple(speaker_name), + std::forward_as_tuple()); + } + + // Parse turns. + for (size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) { + const Turn& turn = timing_[turn_index]; + auto it = audiotrack_readers_.find(turn.audiotrack_file_name); + RTC_CHECK(it != audiotrack_readers_.end()) + << "Audio track reader not created"; + + // Begin and end timestamps for the current turn. + int offset_samples = + millisecond_to_samples(turn.offset, it->second->SampleRate()); + std::size_t begin_timestamp = last_turn.end + offset_samples; + std::size_t end_timestamp = begin_timestamp + it->second->NumSamples(); + RTC_LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp << "-" + << end_timestamp << " ms"; + + // The order is invalid if the offset is negative and its absolute value is + // larger then the duration of the previous turn. + if (offset_samples < 0 && + -offset_samples > static_cast(last_turn.end - last_turn.begin)) { + RTC_LOG(LS_ERROR) << "invalid order"; + return false; + } + + // Cross-talk with 3 or more speakers occurs when the beginning of the + // current interval falls in the last two turns. + if (turn_index > 1 && in_interval(begin_timestamp, last_turn) && + in_interval(begin_timestamp, second_last_turn)) { + RTC_LOG(LS_ERROR) << "cross-talk with 3+ speakers"; + return false; + } + + // Append turn. + speaking_turns_.emplace_back(turn.speaker_name, turn.audiotrack_file_name, + begin_timestamp, end_timestamp, turn.gain); + + // Save speaking turn index for self cross-talk detection. + RTC_DCHECK_EQ(speaking_turns_.size(), turn_index + 1); + speaking_turn_indices[turn.speaker_name].push_back(turn_index); + + // Update total duration of the consversational speech. + if (total_duration_samples_ < end_timestamp) + total_duration_samples_ = end_timestamp; + + // Update and continue with next turn. + second_last_turn = last_turn; + last_turn.begin = begin_timestamp; + last_turn.end = end_timestamp; + } + + // Detect self cross-talk. + for (const std::string& speaker_name : speaker_names_) { + RTC_LOG(LS_INFO) << "checking self cross-talk for <" << speaker_name << ">"; + + // Copy all turns for this speaker to new vector. + std::vector speaking_turns_for_name; + std::copy_if(speaking_turns_.begin(), speaking_turns_.end(), + std::back_inserter(speaking_turns_for_name), + [&speaker_name](const SpeakingTurn& st) { + return st.speaker_name == speaker_name; + }); + + // Check for overlap between adjacent elements. + // This is a sufficient condition for self cross-talk since the intervals + // are sorted by begin timestamp. + auto overlap = std::adjacent_find( + speaking_turns_for_name.begin(), speaking_turns_for_name.end(), + [](const SpeakingTurn& a, const SpeakingTurn& b) { + return a.end > b.begin; + }); + + if (overlap != speaking_turns_for_name.end()) { + RTC_LOG(LS_ERROR) << "Self cross-talk detected"; + return false; + } + } + + return true; +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h new file mode 100644 index 0000000000..63283465fa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_ + +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/test/conversational_speech/timing.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class MultiEndCall { + public: + struct SpeakingTurn { + // Constructor required in order to use std::vector::emplace_back(). + SpeakingTurn(absl::string_view new_speaker_name, + absl::string_view new_audiotrack_file_name, + size_t new_begin, + size_t new_end, + int gain) + : speaker_name(new_speaker_name), + audiotrack_file_name(new_audiotrack_file_name), + begin(new_begin), + end(new_end), + gain(gain) {} + std::string speaker_name; + std::string audiotrack_file_name; + size_t begin; + size_t end; + int gain; + }; + + MultiEndCall( + rtc::ArrayView timing, + absl::string_view audiotracks_path, + std::unique_ptr wavreader_abstract_factory); + ~MultiEndCall(); + + MultiEndCall(const MultiEndCall&) = delete; + MultiEndCall& operator=(const MultiEndCall&) = delete; + + const std::set& speaker_names() const { return speaker_names_; } + const std::map>& + audiotrack_readers() const { + return audiotrack_readers_; + } + bool valid() const { return valid_; } + int sample_rate() const { return sample_rate_hz_; } + size_t total_duration_samples() const { return total_duration_samples_; } + const std::vector& speaking_turns() const { + return speaking_turns_; + } + + private: + // Finds unique speaker names. + void FindSpeakerNames(); + + // Creates one WavReader instance for each unique audiotrack. It returns false + // if the audio tracks do not have the same sample rate or if they are not + // mono. + bool CreateAudioTrackReaders(); + + // Validates the speaking turns timing information. Accepts cross-talk, but + // only up to 2 speakers. Rejects unordered turns and self cross-talk. + bool CheckTiming(); + + rtc::ArrayView timing_; + std::string audiotracks_path_; + std::unique_ptr wavreader_abstract_factory_; + std::set speaker_names_; + std::map> + audiotrack_readers_; + bool valid_; + int sample_rate_hz_; + size_t total_duration_samples_; + std::vector speaking_turns_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc new file mode 100644 index 0000000000..89bcd48d84 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/simulator.h" + +#include + +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace { + +using conversational_speech::MultiEndCall; +using conversational_speech::SpeakerOutputFilePaths; +using conversational_speech::WavReaderInterface; + +// Combines output path and speaker names to define the output file paths for +// the near-end and far=end audio tracks. +std::unique_ptr> +InitSpeakerOutputFilePaths(const std::set& speaker_names, + absl::string_view output_path) { + // Create map. + auto speaker_output_file_paths_map = + std::make_unique>(); + + // Add near-end and far-end output paths into the map. + for (const auto& speaker_name : speaker_names) { + const std::string near_end_path = + test::JoinFilename(output_path, "s_" + speaker_name + "-near_end.wav"); + RTC_LOG(LS_VERBOSE) << "The near-end audio track will be created in " + << near_end_path << "."; + + const std::string far_end_path = + test::JoinFilename(output_path, "s_" + speaker_name + "-far_end.wav"); + RTC_LOG(LS_VERBOSE) << "The far-end audio track will be created in " + << far_end_path << "."; + + // Add to map. + speaker_output_file_paths_map->emplace( + std::piecewise_construct, std::forward_as_tuple(speaker_name), + std::forward_as_tuple(near_end_path, far_end_path)); + } + + return speaker_output_file_paths_map; +} + +// Class that provides one WavWriter for the near-end and one for the far-end +// output track of a speaker. +class SpeakerWavWriters { + public: + SpeakerWavWriters(const SpeakerOutputFilePaths& output_file_paths, + int sample_rate) + : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u), + far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {} + WavWriter* near_end_wav_writer() { return &near_end_wav_writer_; } + WavWriter* far_end_wav_writer() { return &far_end_wav_writer_; } + + private: + WavWriter near_end_wav_writer_; + WavWriter far_end_wav_writer_; +}; + +// Initializes one WavWriter instance for each speaker and both the near-end and +// far-end output tracks. +std::unique_ptr> +InitSpeakersWavWriters(const std::map& + speaker_output_file_paths, + int sample_rate) { + // Create map. + auto speaker_wav_writers_map = + std::make_unique>(); + + // Add SpeakerWavWriters instance into the map. + for (auto it = speaker_output_file_paths.begin(); + it != speaker_output_file_paths.end(); ++it) { + speaker_wav_writers_map->emplace( + std::piecewise_construct, std::forward_as_tuple(it->first), + std::forward_as_tuple(it->second, sample_rate)); + } + + return speaker_wav_writers_map; +} + +// Reads all the samples for each audio track. +std::unique_ptr>> PreloadAudioTracks( + const std::map>& + audiotrack_readers) { + // Create map. + auto audiotracks_map = + std::make_unique>>(); + + // Add audio track vectors. + for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end(); + ++it) { + // Add map entry. + audiotracks_map->emplace(std::piecewise_construct, + std::forward_as_tuple(it->first), + std::forward_as_tuple(it->second->NumSamples())); + + // Read samples. + it->second->ReadInt16Samples(audiotracks_map->at(it->first)); + } + + return audiotracks_map; +} + +// Writes all the values in `source_samples` via `wav_writer`. If the number of +// previously written samples in `wav_writer` is less than `interval_begin`, it +// adds zeros as left padding. The padding corresponds to intervals during which +// a speaker is not active. +void PadLeftWriteChunk(rtc::ArrayView source_samples, + size_t interval_begin, + WavWriter* wav_writer) { + // Add left padding. + RTC_CHECK(wav_writer); + RTC_CHECK_GE(interval_begin, wav_writer->num_samples()); + size_t padding_size = interval_begin - wav_writer->num_samples(); + if (padding_size != 0) { + const std::vector padding(padding_size, 0); + wav_writer->WriteSamples(padding.data(), padding_size); + } + + // Write source samples. + wav_writer->WriteSamples(source_samples.data(), source_samples.size()); +} + +// Appends zeros via `wav_writer`. The number of zeros is always non-negative +// and equal to the difference between the previously written samples and +// `pad_samples`. +void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) { + RTC_CHECK(wav_writer); + RTC_CHECK_GE(pad_samples, wav_writer->num_samples()); + size_t padding_size = pad_samples - wav_writer->num_samples(); + if (padding_size != 0) { + const std::vector padding(padding_size, 0); + wav_writer->WriteSamples(padding.data(), padding_size); + } +} + +void ScaleSignal(rtc::ArrayView source_samples, + int gain, + rtc::ArrayView output_samples) { + const float gain_linear = DbToRatio(gain); + RTC_DCHECK_EQ(source_samples.size(), output_samples.size()); + std::transform(source_samples.begin(), source_samples.end(), + output_samples.begin(), [gain_linear](int16_t x) -> int16_t { + return rtc::saturated_cast(x * gain_linear); + }); +} + +} // namespace + +namespace conversational_speech { + +std::unique_ptr> Simulate( + const MultiEndCall& multiend_call, + absl::string_view output_path) { + // Set output file paths and initialize wav writers. + const auto& speaker_names = multiend_call.speaker_names(); + auto speaker_output_file_paths = + InitSpeakerOutputFilePaths(speaker_names, output_path); + auto speakers_wav_writers = InitSpeakersWavWriters( + *speaker_output_file_paths, multiend_call.sample_rate()); + + // Preload all the input audio tracks. + const auto& audiotrack_readers = multiend_call.audiotrack_readers(); + auto audiotracks = PreloadAudioTracks(audiotrack_readers); + + // TODO(alessiob): When speaker_names.size() == 2, near-end and far-end + // across the 2 speakers are symmetric; hence, the code below could be + // replaced by only creating the near-end or the far-end. However, this would + // require to split the unit tests and document the behavior in README.md. + // In practice, it should not be an issue since the files are not expected to + // be signinificant. + + // Write near-end and far-end output tracks. + for (const auto& speaking_turn : multiend_call.speaking_turns()) { + const std::string& active_speaker_name = speaking_turn.speaker_name; + const auto source_audiotrack = + audiotracks->at(speaking_turn.audiotrack_file_name); + std::vector scaled_audiotrack(source_audiotrack.size()); + ScaleSignal(source_audiotrack, speaking_turn.gain, scaled_audiotrack); + + // Write active speaker's chunk to active speaker's near-end. + PadLeftWriteChunk( + scaled_audiotrack, speaking_turn.begin, + speakers_wav_writers->at(active_speaker_name).near_end_wav_writer()); + + // Write active speaker's chunk to other participants' far-ends. + for (const std::string& speaker_name : speaker_names) { + if (speaker_name == active_speaker_name) + continue; + PadLeftWriteChunk( + scaled_audiotrack, speaking_turn.begin, + speakers_wav_writers->at(speaker_name).far_end_wav_writer()); + } + } + + // Finalize all the output tracks with right padding. + // This is required to make all the output tracks duration equal. + size_t duration_samples = multiend_call.total_duration_samples(); + for (const std::string& speaker_name : speaker_names) { + PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(), + duration_samples); + PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(), + duration_samples); + } + + return speaker_output_file_paths; +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h new file mode 100644 index 0000000000..2f311e16b3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_ + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +struct SpeakerOutputFilePaths { + SpeakerOutputFilePaths(absl::string_view new_near_end, + absl::string_view new_far_end) + : near_end(new_near_end), far_end(new_far_end) {} + // Paths to the near-end and far-end audio track files. + const std::string near_end; + const std::string far_end; +}; + +// Generates the near-end and far-end audio track pairs for each speaker. +std::unique_ptr> Simulate( + const MultiEndCall& multiend_call, + absl::string_view output_path); + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc new file mode 100644 index 0000000000..95ec9f542e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/timing.h" + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/string_encode.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +bool Turn::operator==(const Turn& b) const { + return b.speaker_name == speaker_name && + b.audiotrack_file_name == audiotrack_file_name && b.offset == offset && + b.gain == gain; +} + +std::vector LoadTiming(absl::string_view timing_filepath) { + // Line parser. + auto parse_line = [](absl::string_view line) { + std::vector fields = rtc::split(line, ' '); + RTC_CHECK_GE(fields.size(), 3); + RTC_CHECK_LE(fields.size(), 4); + int gain = 0; + if (fields.size() == 4) { + gain = rtc::StringToNumber(fields[3]).value_or(0); + } + return Turn(fields[0], fields[1], + rtc::StringToNumber(fields[2]).value_or(0), gain); + }; + + // Init. + std::vector timing; + + // Parse lines. + std::string line; + std::ifstream infile(std::string{timing_filepath}); + while (std::getline(infile, line)) { + if (line.empty()) + continue; + timing.push_back(parse_line(line)); + } + infile.close(); + + return timing; +} + +void SaveTiming(absl::string_view timing_filepath, + rtc::ArrayView timing) { + std::ofstream outfile(std::string{timing_filepath}); + RTC_CHECK(outfile.is_open()); + for (const Turn& turn : timing) { + outfile << turn.speaker_name << " " << turn.audiotrack_file_name << " " + << turn.offset << " " << turn.gain << std::endl; + } + outfile.close(); +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h new file mode 100644 index 0000000000..9314f6fc43 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +struct Turn { + Turn(absl::string_view new_speaker_name, + absl::string_view new_audiotrack_file_name, + int new_offset, + int gain) + : speaker_name(new_speaker_name), + audiotrack_file_name(new_audiotrack_file_name), + offset(new_offset), + gain(gain) {} + bool operator==(const Turn& b) const; + std::string speaker_name; + std::string audiotrack_file_name; + int offset; + int gain; +}; + +// Loads a list of turns from a file. +std::vector LoadTiming(absl::string_view timing_filepath); + +// Writes a list of turns into a file. +void SaveTiming(absl::string_view timing_filepath, + rtc::ArrayView timing); + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h new file mode 100644 index 0000000000..14ddfc7539 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class WavReaderAbstractFactory { + public: + virtual ~WavReaderAbstractFactory() = default; + virtual std::unique_ptr Create( + absl::string_view filepath) const = 0; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc new file mode 100644 index 0000000000..99b1686484 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h" + +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "common_audio/wav_file.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { +namespace { + +using conversational_speech::WavReaderInterface; + +class WavReaderAdaptor final : public WavReaderInterface { + public: + explicit WavReaderAdaptor(absl::string_view filepath) + : wav_reader_(filepath) {} + ~WavReaderAdaptor() override = default; + + size_t ReadFloatSamples(rtc::ArrayView samples) override { + return wav_reader_.ReadSamples(samples.size(), samples.begin()); + } + + size_t ReadInt16Samples(rtc::ArrayView samples) override { + return wav_reader_.ReadSamples(samples.size(), samples.begin()); + } + + int SampleRate() const override { return wav_reader_.sample_rate(); } + + size_t NumChannels() const override { return wav_reader_.num_channels(); } + + size_t NumSamples() const override { return wav_reader_.num_samples(); } + + private: + WavReader wav_reader_; +}; + +} // namespace + +namespace conversational_speech { + +WavReaderFactory::WavReaderFactory() = default; + +WavReaderFactory::~WavReaderFactory() = default; + +std::unique_ptr WavReaderFactory::Create( + absl::string_view filepath) const { + return std::unique_ptr(new WavReaderAdaptor(filepath)); +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h new file mode 100644 index 0000000000..f2e5b61055 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class WavReaderFactory : public WavReaderAbstractFactory { + public: + WavReaderFactory(); + ~WavReaderFactory() override; + std::unique_ptr Create( + absl::string_view filepath) const override; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h new file mode 100644 index 0000000000..c74f639461 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_ + +#include + +#include "api/array_view.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class WavReaderInterface { + public: + virtual ~WavReaderInterface() = default; + + // Returns the number of samples read. + virtual size_t ReadFloatSamples(rtc::ArrayView samples) = 0; + virtual size_t ReadInt16Samples(rtc::ArrayView samples) = 0; + + // Getters. + virtual int SampleRate() const = 0; + virtual size_t NumChannels() const = 0; + virtual size_t NumSamples() const = 0; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.cc b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.cc new file mode 100644 index 0000000000..2f483f5077 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.cc @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/debug_dump_replayer.h" + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/protobuf_utils.h" +#include "modules/audio_processing/test/runtime_setting_util.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +namespace { + +void MaybeResetBuffer(std::unique_ptr>* buffer, + const StreamConfig& config) { + auto& buffer_ref = *buffer; + if (!buffer_ref.get() || buffer_ref->num_frames() != config.num_frames() || + buffer_ref->num_channels() != config.num_channels()) { + buffer_ref.reset( + new ChannelBuffer(config.num_frames(), config.num_channels())); + } +} + +} // namespace + +DebugDumpReplayer::DebugDumpReplayer() + : input_(nullptr), // will be created upon usage. + reverse_(nullptr), + output_(nullptr), + apm_(nullptr), + debug_file_(nullptr) {} + +DebugDumpReplayer::~DebugDumpReplayer() { + if (debug_file_) + fclose(debug_file_); +} + +bool DebugDumpReplayer::SetDumpFile(absl::string_view filename) { + debug_file_ = fopen(std::string(filename).c_str(), "rb"); + LoadNextMessage(); + return debug_file_; +} + +// Get next event that has not run. +absl::optional DebugDumpReplayer::GetNextEvent() const { + if (!has_next_event_) + return absl::nullopt; + else + return next_event_; +} + +// Run the next event. Returns the event type. +bool DebugDumpReplayer::RunNextEvent() { + if (!has_next_event_) + return false; + switch (next_event_.type()) { + case audioproc::Event::INIT: + OnInitEvent(next_event_.init()); + break; + case audioproc::Event::STREAM: + OnStreamEvent(next_event_.stream()); + break; + case audioproc::Event::REVERSE_STREAM: + OnReverseStreamEvent(next_event_.reverse_stream()); + break; + case audioproc::Event::CONFIG: + OnConfigEvent(next_event_.config()); + break; + case audioproc::Event::RUNTIME_SETTING: + OnRuntimeSettingEvent(next_event_.runtime_setting()); + break; + case audioproc::Event::UNKNOWN_EVENT: + // We do not expect to receive UNKNOWN event. + RTC_CHECK_NOTREACHED(); + } + LoadNextMessage(); + return true; +} + +const ChannelBuffer* DebugDumpReplayer::GetOutput() const { + return output_.get(); +} + +StreamConfig DebugDumpReplayer::GetOutputConfig() const { + return output_config_; +} + +// OnInitEvent reset the input/output/reserve channel format. +void DebugDumpReplayer::OnInitEvent(const audioproc::Init& msg) { + RTC_CHECK(msg.has_num_input_channels()); + RTC_CHECK(msg.has_output_sample_rate()); + RTC_CHECK(msg.has_num_output_channels()); + RTC_CHECK(msg.has_reverse_sample_rate()); + RTC_CHECK(msg.has_num_reverse_channels()); + + input_config_ = StreamConfig(msg.sample_rate(), msg.num_input_channels()); + output_config_ = + StreamConfig(msg.output_sample_rate(), msg.num_output_channels()); + reverse_config_ = + StreamConfig(msg.reverse_sample_rate(), msg.num_reverse_channels()); + + MaybeResetBuffer(&input_, input_config_); + MaybeResetBuffer(&output_, output_config_); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +// OnStreamEvent replays an input signal and verifies the output. +void DebugDumpReplayer::OnStreamEvent(const audioproc::Stream& msg) { + // APM should have been created. + RTC_CHECK(apm_.get()); + + if (msg.has_applied_input_volume()) { + apm_->set_stream_analog_level(msg.applied_input_volume()); + } + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->set_stream_delay_ms(msg.delay())); + + if (msg.has_keypress()) { + apm_->set_stream_key_pressed(msg.keypress()); + } else { + apm_->set_stream_key_pressed(true); + } + + RTC_CHECK_EQ(input_config_.num_channels(), + static_cast(msg.input_channel_size())); + RTC_CHECK_EQ(input_config_.num_frames() * sizeof(float), + msg.input_channel(0).size()); + + for (int i = 0; i < msg.input_channel_size(); ++i) { + memcpy(input_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->ProcessStream(input_->channels(), input_config_, + output_config_, output_->channels())); +} + +void DebugDumpReplayer::OnReverseStreamEvent( + const audioproc::ReverseStream& msg) { + // APM should have been created. + RTC_CHECK(apm_.get()); + + RTC_CHECK_GT(msg.channel_size(), 0); + RTC_CHECK_EQ(reverse_config_.num_channels(), + static_cast(msg.channel_size())); + RTC_CHECK_EQ(reverse_config_.num_frames() * sizeof(float), + msg.channel(0).size()); + + for (int i = 0; i < msg.channel_size(); ++i) { + memcpy(reverse_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + + RTC_CHECK_EQ( + AudioProcessing::kNoError, + apm_->ProcessReverseStream(reverse_->channels(), reverse_config_, + reverse_config_, reverse_->channels())); +} + +void DebugDumpReplayer::OnConfigEvent(const audioproc::Config& msg) { + MaybeRecreateApm(msg); + ConfigureApm(msg); +} + +void DebugDumpReplayer::OnRuntimeSettingEvent( + const audioproc::RuntimeSetting& msg) { + RTC_CHECK(apm_.get()); + ReplayRuntimeSetting(apm_.get(), msg); +} + +void DebugDumpReplayer::MaybeRecreateApm(const audioproc::Config& msg) { + // These configurations cannot be changed on the fly. + RTC_CHECK(msg.has_aec_delay_agnostic_enabled()); + RTC_CHECK(msg.has_aec_extended_filter_enabled()); + + // We only create APM once, since changes on these fields should not + // happen in current implementation. + if (!apm_.get()) { + apm_ = AudioProcessingBuilderForTesting().Create(); + } +} + +void DebugDumpReplayer::ConfigureApm(const audioproc::Config& msg) { + AudioProcessing::Config apm_config; + + // AEC2/AECM configs. + RTC_CHECK(msg.has_aec_enabled()); + RTC_CHECK(msg.has_aecm_enabled()); + apm_config.echo_canceller.enabled = msg.aec_enabled() || msg.aecm_enabled(); + apm_config.echo_canceller.mobile_mode = msg.aecm_enabled(); + + // HPF configs. + RTC_CHECK(msg.has_hpf_enabled()); + apm_config.high_pass_filter.enabled = msg.hpf_enabled(); + + // Preamp configs. + RTC_CHECK(msg.has_pre_amplifier_enabled()); + apm_config.pre_amplifier.enabled = msg.pre_amplifier_enabled(); + apm_config.pre_amplifier.fixed_gain_factor = + msg.pre_amplifier_fixed_gain_factor(); + + // NS configs. + RTC_CHECK(msg.has_ns_enabled()); + RTC_CHECK(msg.has_ns_level()); + apm_config.noise_suppression.enabled = msg.ns_enabled(); + apm_config.noise_suppression.level = + static_cast( + msg.ns_level()); + + // TS configs. + RTC_CHECK(msg.has_transient_suppression_enabled()); + apm_config.transient_suppression.enabled = + msg.transient_suppression_enabled(); + + // AGC configs. + RTC_CHECK(msg.has_agc_enabled()); + RTC_CHECK(msg.has_agc_mode()); + RTC_CHECK(msg.has_agc_limiter_enabled()); + apm_config.gain_controller1.enabled = msg.agc_enabled(); + apm_config.gain_controller1.mode = + static_cast( + msg.agc_mode()); + apm_config.gain_controller1.enable_limiter = msg.agc_limiter_enabled(); + RTC_CHECK(msg.has_noise_robust_agc_enabled()); + apm_config.gain_controller1.analog_gain_controller.enabled = + msg.noise_robust_agc_enabled(); + + apm_->ApplyConfig(apm_config); +} + +void DebugDumpReplayer::LoadNextMessage() { + has_next_event_ = + debug_file_ && ReadMessageFromFile(debug_file_, &next_event_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.h b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.h new file mode 100644 index 0000000000..be21c68663 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_DEBUG_DUMP_REPLAYER_H_ +#define MODULES_AUDIO_PROCESSING_TEST_DEBUG_DUMP_REPLAYER_H_ + +#include + +#include "absl/strings/string_view.h" +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/ignore_wundef.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "modules/audio_processing/debug.pb.h" +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { +namespace test { + +class DebugDumpReplayer { + public: + DebugDumpReplayer(); + ~DebugDumpReplayer(); + + // Set dump file + bool SetDumpFile(absl::string_view filename); + + // Return next event. + absl::optional GetNextEvent() const; + + // Run the next event. Returns true if succeeded. + bool RunNextEvent(); + + const ChannelBuffer* GetOutput() const; + StreamConfig GetOutputConfig() const; + + private: + // Following functions are facilities for replaying debug dumps. + void OnInitEvent(const audioproc::Init& msg); + void OnStreamEvent(const audioproc::Stream& msg); + void OnReverseStreamEvent(const audioproc::ReverseStream& msg); + void OnConfigEvent(const audioproc::Config& msg); + void OnRuntimeSettingEvent(const audioproc::RuntimeSetting& msg); + + void MaybeRecreateApm(const audioproc::Config& msg); + void ConfigureApm(const audioproc::Config& msg); + + void LoadNextMessage(); + + // Buffer for APM input/output. + std::unique_ptr> input_; + std::unique_ptr> reverse_; + std::unique_ptr> output_; + + rtc::scoped_refptr apm_; + + FILE* debug_file_; + + StreamConfig input_config_; + StreamConfig reverse_config_; + StreamConfig output_config_; + + bool has_next_event_; + audioproc::Event next_event_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_DEBUG_DUMP_REPLAYER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/debug_dump_test.cc b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_test.cc new file mode 100644 index 0000000000..cded5de217 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_test.cc @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include // size_t + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/audio/echo_canceller3_factory.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/debug_dump_replayer.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/task_queue_for_test.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { + +namespace { + +void MaybeResetBuffer(std::unique_ptr>* buffer, + const StreamConfig& config) { + auto& buffer_ref = *buffer; + if (!buffer_ref.get() || buffer_ref->num_frames() != config.num_frames() || + buffer_ref->num_channels() != config.num_channels()) { + buffer_ref.reset( + new ChannelBuffer(config.num_frames(), config.num_channels())); + } +} + +class DebugDumpGenerator { + public: + DebugDumpGenerator(absl::string_view input_file_name, + int input_rate_hz, + int input_channels, + absl::string_view reverse_file_name, + int reverse_rate_hz, + int reverse_channels, + absl::string_view dump_file_name, + bool enable_pre_amplifier); + + // Constructor that uses default input files. + explicit DebugDumpGenerator(const AudioProcessing::Config& apm_config); + + ~DebugDumpGenerator(); + + // Changes the sample rate of the input audio to the APM. + void SetInputRate(int rate_hz); + + // Sets if converts stereo input signal to mono by discarding other channels. + void ForceInputMono(bool mono); + + // Changes the sample rate of the reverse audio to the APM. + void SetReverseRate(int rate_hz); + + // Sets if converts stereo reverse signal to mono by discarding other + // channels. + void ForceReverseMono(bool mono); + + // Sets the required sample rate of the APM output. + void SetOutputRate(int rate_hz); + + // Sets the required channels of the APM output. + void SetOutputChannels(int channels); + + std::string dump_file_name() const { return dump_file_name_; } + + void StartRecording(); + void Process(size_t num_blocks); + void StopRecording(); + AudioProcessing* apm() const { return apm_.get(); } + + private: + static void ReadAndDeinterleave(ResampleInputAudioFile* audio, + int channels, + const StreamConfig& config, + float* const* buffer); + + // APM input/output settings. + StreamConfig input_config_; + StreamConfig reverse_config_; + StreamConfig output_config_; + + // Input file format. + const std::string input_file_name_; + ResampleInputAudioFile input_audio_; + const int input_file_channels_; + + // Reverse file format. + const std::string reverse_file_name_; + ResampleInputAudioFile reverse_audio_; + const int reverse_file_channels_; + + // Buffer for APM input/output. + std::unique_ptr> input_; + std::unique_ptr> reverse_; + std::unique_ptr> output_; + + bool enable_pre_amplifier_; + + TaskQueueForTest worker_queue_; + rtc::scoped_refptr apm_; + + const std::string dump_file_name_; +}; + +DebugDumpGenerator::DebugDumpGenerator(absl::string_view input_file_name, + int input_rate_hz, + int input_channels, + absl::string_view reverse_file_name, + int reverse_rate_hz, + int reverse_channels, + absl::string_view dump_file_name, + bool enable_pre_amplifier) + : input_config_(input_rate_hz, input_channels), + reverse_config_(reverse_rate_hz, reverse_channels), + output_config_(input_rate_hz, input_channels), + input_audio_(input_file_name, input_rate_hz, input_rate_hz), + input_file_channels_(input_channels), + reverse_audio_(reverse_file_name, reverse_rate_hz, reverse_rate_hz), + reverse_file_channels_(reverse_channels), + input_(new ChannelBuffer(input_config_.num_frames(), + input_config_.num_channels())), + reverse_(new ChannelBuffer(reverse_config_.num_frames(), + reverse_config_.num_channels())), + output_(new ChannelBuffer(output_config_.num_frames(), + output_config_.num_channels())), + enable_pre_amplifier_(enable_pre_amplifier), + worker_queue_("debug_dump_generator_worker_queue"), + dump_file_name_(dump_file_name) { + AudioProcessingBuilderForTesting apm_builder; + apm_ = apm_builder.Create(); +} + +DebugDumpGenerator::DebugDumpGenerator( + const AudioProcessing::Config& apm_config) + : DebugDumpGenerator(ResourcePath("near32_stereo", "pcm"), + 32000, + 2, + ResourcePath("far32_stereo", "pcm"), + 32000, + 2, + TempFilename(OutputPath(), "debug_aec"), + apm_config.pre_amplifier.enabled) { + apm_->ApplyConfig(apm_config); +} + +DebugDumpGenerator::~DebugDumpGenerator() { + remove(dump_file_name_.c_str()); +} + +void DebugDumpGenerator::SetInputRate(int rate_hz) { + input_audio_.set_output_rate_hz(rate_hz); + input_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&input_, input_config_); +} + +void DebugDumpGenerator::ForceInputMono(bool mono) { + const int channels = mono ? 1 : input_file_channels_; + input_config_.set_num_channels(channels); + MaybeResetBuffer(&input_, input_config_); +} + +void DebugDumpGenerator::SetReverseRate(int rate_hz) { + reverse_audio_.set_output_rate_hz(rate_hz); + reverse_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +void DebugDumpGenerator::ForceReverseMono(bool mono) { + const int channels = mono ? 1 : reverse_file_channels_; + reverse_config_.set_num_channels(channels); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +void DebugDumpGenerator::SetOutputRate(int rate_hz) { + output_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&output_, output_config_); +} + +void DebugDumpGenerator::SetOutputChannels(int channels) { + output_config_.set_num_channels(channels); + MaybeResetBuffer(&output_, output_config_); +} + +void DebugDumpGenerator::StartRecording() { + apm_->AttachAecDump( + AecDumpFactory::Create(dump_file_name_.c_str(), -1, &worker_queue_)); +} + +void DebugDumpGenerator::Process(size_t num_blocks) { + for (size_t i = 0; i < num_blocks; ++i) { + ReadAndDeinterleave(&reverse_audio_, reverse_file_channels_, + reverse_config_, reverse_->channels()); + ReadAndDeinterleave(&input_audio_, input_file_channels_, input_config_, + input_->channels()); + RTC_CHECK_EQ(AudioProcessing::kNoError, apm_->set_stream_delay_ms(100)); + apm_->set_stream_analog_level(100); + if (enable_pre_amplifier_) { + apm_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(1 + i % 10)); + } + apm_->set_stream_key_pressed(i % 10 == 9); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->ProcessStream(input_->channels(), input_config_, + output_config_, output_->channels())); + + RTC_CHECK_EQ( + AudioProcessing::kNoError, + apm_->ProcessReverseStream(reverse_->channels(), reverse_config_, + reverse_config_, reverse_->channels())); + } +} + +void DebugDumpGenerator::StopRecording() { + apm_->DetachAecDump(); +} + +void DebugDumpGenerator::ReadAndDeinterleave(ResampleInputAudioFile* audio, + int channels, + const StreamConfig& config, + float* const* buffer) { + const size_t num_frames = config.num_frames(); + const int out_channels = config.num_channels(); + + std::vector signal(channels * num_frames); + + audio->Read(num_frames * channels, &signal[0]); + + // We only allow reducing number of channels by discarding some channels. + RTC_CHECK_LE(out_channels, channels); + for (int channel = 0; channel < out_channels; ++channel) { + for (size_t i = 0; i < num_frames; ++i) { + buffer[channel][i] = S16ToFloat(signal[i * channels + channel]); + } + } +} + +} // namespace + +class DebugDumpTest : public ::testing::Test { + public: + // VerifyDebugDump replays a debug dump using APM and verifies that the result + // is bit-exact-identical to the output channel in the dump. This is only + // guaranteed if the debug dump is started on the first frame. + void VerifyDebugDump(absl::string_view in_filename); + + private: + DebugDumpReplayer debug_dump_replayer_; +}; + +void DebugDumpTest::VerifyDebugDump(absl::string_view in_filename) { + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(in_filename)); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::STREAM) { + const audioproc::Stream* msg = &event->stream(); + const StreamConfig output_config = debug_dump_replayer_.GetOutputConfig(); + const ChannelBuffer* output = debug_dump_replayer_.GetOutput(); + // Check that output of APM is bit-exact to the output in the dump. + ASSERT_EQ(output_config.num_channels(), + static_cast(msg->output_channel_size())); + ASSERT_EQ(output_config.num_frames() * sizeof(float), + msg->output_channel(0).size()); + for (int i = 0; i < msg->output_channel_size(); ++i) { + ASSERT_EQ(0, + memcmp(output->channels()[i], msg->output_channel(i).data(), + msg->output_channel(i).size())); + } + } + } +} + +TEST_F(DebugDumpTest, SimpleCase) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeInputFormat) { + DebugDumpGenerator generator(/*apm_config=*/{}); + + generator.StartRecording(); + generator.Process(100); + generator.SetInputRate(48000); + + generator.ForceInputMono(true); + // Number of output channel should not be larger than that of input. APM will + // fail otherwise. + generator.SetOutputChannels(1); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeReverseFormat) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + generator.SetReverseRate(48000); + generator.ForceReverseMono(true); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeOutputFormat) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + generator.SetOutputRate(48000); + generator.SetOutputChannels(1); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleAec) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + + apm_config.echo_canceller.enabled = false; + generator.apm()->ApplyConfig(apm_config); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, VerifyCombinedExperimentalStringInclusive) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + apm_config.gain_controller1.analog_gain_controller.enabled = true; + apm_config.gain_controller1.analog_gain_controller.startup_min_volume = 0; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(::testing::IsSubstring, "EchoController", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyCombinedExperimentalStringExclusive) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(::testing::IsNotSubstring, + "AgcClippingLevelExperiment", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyAec3ExperimentalString) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(::testing::IsSubstring, "EchoController", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyEmptyExperimentalString) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_EQ(0u, msg->experiments_description().size()); + } + } +} + +// AGC is not supported on Android or iOS. +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) +#define MAYBE_ToggleAgc DISABLED_ToggleAgc +#else +#define MAYBE_ToggleAgc ToggleAgc +#endif +TEST_F(DebugDumpTest, MAYBE_ToggleAgc) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + + AudioProcessing::Config apm_config = generator.apm()->GetConfig(); + apm_config.gain_controller1.enabled = !apm_config.gain_controller1.enabled; + generator.apm()->ApplyConfig(apm_config); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleNs) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + + AudioProcessing::Config apm_config = generator.apm()->GetConfig(); + apm_config.noise_suppression.enabled = !apm_config.noise_suppression.enabled; + generator.apm()->ApplyConfig(apm_config); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, TransientSuppressionOn) { + DebugDumpGenerator generator(/*apm_config=*/{}); + + AudioProcessing::Config apm_config = generator.apm()->GetConfig(); + apm_config.transient_suppression.enabled = true; + generator.apm()->ApplyConfig(apm_config); + + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, PreAmplifierIsOn) { + AudioProcessing::Config apm_config; + apm_config.pre_amplifier.enabled = true; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.cc b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.cc new file mode 100644 index 0000000000..1d36b954f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/echo_canceller_test_tools.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +void RandomizeSampleVector(Random* random_generator, rtc::ArrayView v) { + RandomizeSampleVector(random_generator, v, + /*amplitude=*/32767.f); +} + +void RandomizeSampleVector(Random* random_generator, + rtc::ArrayView v, + float amplitude) { + for (auto& v_k : v) { + v_k = 2 * amplitude * random_generator->Rand() - amplitude; + } +} + +template +void DelayBuffer::Delay(rtc::ArrayView x, + rtc::ArrayView x_delayed) { + RTC_DCHECK_EQ(x.size(), x_delayed.size()); + if (buffer_.empty()) { + std::copy(x.begin(), x.end(), x_delayed.begin()); + } else { + for (size_t k = 0; k < x.size(); ++k) { + x_delayed[k] = buffer_[next_insert_index_]; + buffer_[next_insert_index_] = x[k]; + next_insert_index_ = (next_insert_index_ + 1) % buffer_.size(); + } + } +} + +template class DelayBuffer; +template class DelayBuffer; +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.h b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.h new file mode 100644 index 0000000000..0d70cd39c6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_ECHO_CANCELLER_TEST_TOOLS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_ECHO_CANCELLER_TEST_TOOLS_H_ + +#include +#include + +#include "api/array_view.h" +#include "rtc_base/random.h" + +namespace webrtc { + +// Randomizes the elements in a vector with values -32767.f:32767.f. +void RandomizeSampleVector(Random* random_generator, rtc::ArrayView v); + +// Randomizes the elements in a vector with values -amplitude:amplitude. +void RandomizeSampleVector(Random* random_generator, + rtc::ArrayView v, + float amplitude); + +// Class for delaying a signal a fixed number of samples. +template +class DelayBuffer { + public: + explicit DelayBuffer(size_t delay) : buffer_(delay) {} + ~DelayBuffer() = default; + + // Produces a delayed signal copy of x. + void Delay(rtc::ArrayView x, rtc::ArrayView x_delayed); + + private: + std::vector buffer_; + size_t next_insert_index_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_ECHO_CANCELLER_TEST_TOOLS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc new file mode 100644 index 0000000000..164d28fa16 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/echo_canceller_test_tools.h" + +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(EchoCancellerTestTools, FloatDelayBuffer) { + constexpr size_t kDelay = 10; + DelayBuffer delay_buffer(kDelay); + std::vector v(1000, 0.f); + for (size_t k = 0; k < v.size(); ++k) { + v[k] = k; + } + std::vector v_delayed = v; + constexpr size_t kBlockSize = 50; + for (size_t k = 0; k < rtc::CheckedDivExact(v.size(), kBlockSize); ++k) { + delay_buffer.Delay( + rtc::ArrayView(&v[k * kBlockSize], kBlockSize), + rtc::ArrayView(&v_delayed[k * kBlockSize], kBlockSize)); + } + for (size_t k = kDelay; k < v.size(); ++k) { + EXPECT_EQ(v[k - kDelay], v_delayed[k]); + } +} + +TEST(EchoCancellerTestTools, IntDelayBuffer) { + constexpr size_t kDelay = 10; + DelayBuffer delay_buffer(kDelay); + std::vector v(1000, 0); + for (size_t k = 0; k < v.size(); ++k) { + v[k] = k; + } + std::vector v_delayed = v; + const size_t kBlockSize = 50; + for (size_t k = 0; k < rtc::CheckedDivExact(v.size(), kBlockSize); ++k) { + delay_buffer.Delay( + rtc::ArrayView(&v[k * kBlockSize], kBlockSize), + rtc::ArrayView(&v_delayed[k * kBlockSize], kBlockSize)); + } + for (size_t k = kDelay; k < v.size(); ++k) { + EXPECT_EQ(v[k - kDelay], v_delayed[k]); + } +} + +TEST(EchoCancellerTestTools, RandomizeSampleVector) { + Random random_generator(42U); + std::vector v(50, 0.f); + std::vector v_ref = v; + RandomizeSampleVector(&random_generator, v); + EXPECT_NE(v, v_ref); + v_ref = v; + RandomizeSampleVector(&random_generator, v); + EXPECT_NE(v, v_ref); +} + +TEST(EchoCancellerTestTools, RandomizeSampleVectorWithAmplitude) { + Random random_generator(42U); + std::vector v(50, 0.f); + RandomizeSampleVector(&random_generator, v, 1000.f); + EXPECT_GE(1000.f, *std::max_element(v.begin(), v.end())); + EXPECT_LE(-1000.f, *std::min_element(v.begin(), v.end())); + RandomizeSampleVector(&random_generator, v, 100.f); + EXPECT_GE(100.f, *std::max_element(v.begin(), v.end())); + EXPECT_LE(-100.f, *std::min_element(v.begin(), v.end())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/echo_control_mock.h b/third_party/libwebrtc/modules/audio_processing/test/echo_control_mock.h new file mode 100644 index 0000000000..763d6e4f0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/echo_control_mock.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_ECHO_CONTROL_MOCK_H_ +#define MODULES_AUDIO_PROCESSING_TEST_ECHO_CONTROL_MOCK_H_ + +#include "api/audio/echo_control.h" +#include "test/gmock.h" + +namespace webrtc { + +class AudioBuffer; + +class MockEchoControl : public EchoControl { + public: + MOCK_METHOD(void, AnalyzeRender, (AudioBuffer * render), (override)); + MOCK_METHOD(void, AnalyzeCapture, (AudioBuffer * capture), (override)); + MOCK_METHOD(void, + ProcessCapture, + (AudioBuffer * capture, bool echo_path_change), + (override)); + MOCK_METHOD(void, + ProcessCapture, + (AudioBuffer * capture, + AudioBuffer* linear_output, + bool echo_path_change), + (override)); + MOCK_METHOD(EchoControl::Metrics, GetMetrics, (), (const, override)); + MOCK_METHOD(void, SetAudioBufferDelay, (int delay_ms), (override)); + MOCK_METHOD(void, + SetCaptureOutputUsage, + (bool capture_output_used), + (override)); + MOCK_METHOD(bool, ActiveProcessing, (), (const, override)); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_ECHO_CONTROL_MOCK_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.cc b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.cc new file mode 100644 index 0000000000..3fd80b2ede --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.cc @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/fake_recording_device.h" + +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace test { + +namespace { + +constexpr float kFloatSampleMin = -32768.f; +constexpr float kFloatSampleMax = 32767.0f; + +} // namespace + +// Abstract class for the different fake recording devices. +class FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceWorker(const int initial_mic_level) + : mic_level_(initial_mic_level) {} + int mic_level() const { return mic_level_; } + void set_mic_level(const int level) { mic_level_ = level; } + void set_undo_mic_level(const int level) { undo_mic_level_ = level; } + virtual ~FakeRecordingDeviceWorker() = default; + virtual void ModifyBufferInt16(rtc::ArrayView buffer) = 0; + virtual void ModifyBufferFloat(ChannelBuffer* buffer) = 0; + + protected: + // Mic level to simulate. + int mic_level_; + // Optional mic level to undo. + absl::optional undo_mic_level_; +}; + +namespace { + +// Identity fake recording device. The samples are not modified, which is +// equivalent to a constant gain curve at 1.0 - only used for testing. +class FakeRecordingDeviceIdentity final : public FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceIdentity(const int initial_mic_level) + : FakeRecordingDeviceWorker(initial_mic_level) {} + ~FakeRecordingDeviceIdentity() override = default; + void ModifyBufferInt16(rtc::ArrayView buffer) override {} + void ModifyBufferFloat(ChannelBuffer* buffer) override {} +}; + +// Linear fake recording device. The gain curve is a linear function mapping the +// mic levels range [0, 255] to [0.0, 1.0]. +class FakeRecordingDeviceLinear final : public FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceLinear(const int initial_mic_level) + : FakeRecordingDeviceWorker(initial_mic_level) {} + ~FakeRecordingDeviceLinear() override = default; + void ModifyBufferInt16(rtc::ArrayView buffer) override { + const size_t number_of_samples = buffer.size(); + int16_t* data = buffer.data(); + // If an undo level is specified, virtually restore the unmodified + // microphone level; otherwise simulate the mic gain only. + const float divisor = + (undo_mic_level_ && *undo_mic_level_ > 0) ? *undo_mic_level_ : 255.f; + for (size_t i = 0; i < number_of_samples; ++i) { + data[i] = rtc::saturated_cast(data[i] * mic_level_ / divisor); + } + } + void ModifyBufferFloat(ChannelBuffer* buffer) override { + // If an undo level is specified, virtually restore the unmodified + // microphone level; otherwise simulate the mic gain only. + const float divisor = + (undo_mic_level_ && *undo_mic_level_ > 0) ? *undo_mic_level_ : 255.f; + for (size_t c = 0; c < buffer->num_channels(); ++c) { + for (size_t i = 0; i < buffer->num_frames(); ++i) { + buffer->channels()[c][i] = + rtc::SafeClamp(buffer->channels()[c][i] * mic_level_ / divisor, + kFloatSampleMin, kFloatSampleMax); + } + } + } +}; + +float ComputeAgcLinearFactor(const absl::optional& undo_mic_level, + int mic_level) { + // If an undo level is specified, virtually restore the unmodified + // microphone level; otherwise simulate the mic gain only. + const int undo_level = + (undo_mic_level && *undo_mic_level > 0) ? *undo_mic_level : 100; + return DbToRatio(kGainMap[mic_level] - kGainMap[undo_level]); +} + +// Roughly dB-scale fake recording device. Valid levels are [0, 255]. The mic +// applies a gain from kGainMap in agc/gain_map_internal.h. +class FakeRecordingDeviceAgc final : public FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceAgc(const int initial_mic_level) + : FakeRecordingDeviceWorker(initial_mic_level) {} + ~FakeRecordingDeviceAgc() override = default; + void ModifyBufferInt16(rtc::ArrayView buffer) override { + const float scaling_factor = + ComputeAgcLinearFactor(undo_mic_level_, mic_level_); + const size_t number_of_samples = buffer.size(); + int16_t* data = buffer.data(); + for (size_t i = 0; i < number_of_samples; ++i) { + data[i] = rtc::saturated_cast(data[i] * scaling_factor); + } + } + void ModifyBufferFloat(ChannelBuffer* buffer) override { + const float scaling_factor = + ComputeAgcLinearFactor(undo_mic_level_, mic_level_); + for (size_t c = 0; c < buffer->num_channels(); ++c) { + for (size_t i = 0; i < buffer->num_frames(); ++i) { + buffer->channels()[c][i] = + rtc::SafeClamp(buffer->channels()[c][i] * scaling_factor, + kFloatSampleMin, kFloatSampleMax); + } + } + } +}; + +} // namespace + +FakeRecordingDevice::FakeRecordingDevice(int initial_mic_level, + int device_kind) { + switch (device_kind) { + case 0: + worker_ = + std::make_unique(initial_mic_level); + break; + case 1: + worker_ = std::make_unique(initial_mic_level); + break; + case 2: + worker_ = std::make_unique(initial_mic_level); + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } +} + +FakeRecordingDevice::~FakeRecordingDevice() = default; + +int FakeRecordingDevice::MicLevel() const { + RTC_CHECK(worker_); + return worker_->mic_level(); +} + +void FakeRecordingDevice::SetMicLevel(const int level) { + RTC_CHECK(worker_); + if (level != worker_->mic_level()) + RTC_LOG(LS_INFO) << "Simulate mic level update: " << level; + worker_->set_mic_level(level); +} + +void FakeRecordingDevice::SetUndoMicLevel(const int level) { + RTC_DCHECK(worker_); + // TODO(alessiob): The behavior with undo level equal to zero is not clear yet + // and will be defined in future CLs once more FakeRecordingDeviceWorker + // implementations need to be added. + RTC_CHECK(level > 0) << "Zero undo mic level is unsupported"; + worker_->set_undo_mic_level(level); +} + +void FakeRecordingDevice::SimulateAnalogGain(rtc::ArrayView buffer) { + RTC_DCHECK(worker_); + worker_->ModifyBufferInt16(buffer); +} + +void FakeRecordingDevice::SimulateAnalogGain(ChannelBuffer* buffer) { + RTC_DCHECK(worker_); + worker_->ModifyBufferFloat(buffer); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.h b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.h new file mode 100644 index 0000000000..da3c0cf794 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_FAKE_RECORDING_DEVICE_H_ +#define MODULES_AUDIO_PROCESSING_TEST_FAKE_RECORDING_DEVICE_H_ + +#include +#include +#include + +#include "api/array_view.h" +#include "common_audio/channel_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +class FakeRecordingDeviceWorker; + +// Class for simulating a microphone with analog gain. +// +// The intended modes of operation are the following: +// +// FakeRecordingDevice fake_mic(255, 1); +// +// fake_mic.SetMicLevel(170); +// fake_mic.SimulateAnalogGain(buffer); +// +// When the mic level to undo is known: +// +// fake_mic.SetMicLevel(170); +// fake_mic.SetUndoMicLevel(30); +// fake_mic.SimulateAnalogGain(buffer); +// +// The second option virtually restores the unmodified microphone level. Calling +// SimulateAnalogGain() will first "undo" the gain applied by the real +// microphone (e.g., 30). +class FakeRecordingDevice final { + public: + FakeRecordingDevice(int initial_mic_level, int device_kind); + ~FakeRecordingDevice(); + + int MicLevel() const; + void SetMicLevel(int level); + void SetUndoMicLevel(int level); + + // Simulates the analog gain. + // If `real_device_level` is a valid level, the unmodified mic signal is + // virtually restored. To skip the latter step set `real_device_level` to + // an empty value. + void SimulateAnalogGain(rtc::ArrayView buffer); + + // Simulates the analog gain. + // If `real_device_level` is a valid level, the unmodified mic signal is + // virtually restored. To skip the latter step set `real_device_level` to + // an empty value. + void SimulateAnalogGain(ChannelBuffer* buffer); + + private: + // Fake recording device worker. + std::unique_ptr worker_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_FAKE_RECORDING_DEVICE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device_unittest.cc b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device_unittest.cc new file mode 100644 index 0000000000..2ac8b1dc48 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device_unittest.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/fake_recording_device.h" + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr int kInitialMicLevel = 100; + +// TODO(alessiob): Add new fake recording device kind values here as they are +// added in FakeRecordingDevice::FakeRecordingDevice. +const std::vector kFakeRecDeviceKinds = {0, 1, 2}; + +const std::vector> kTestMultiChannelSamples{ + std::vector{-10.f, -1.f, -0.1f, 0.f, 0.1f, 1.f, 10.f}}; + +// Writes samples into ChannelBuffer. +void WritesDataIntoChannelBuffer(const std::vector>& data, + ChannelBuffer* buff) { + EXPECT_EQ(data.size(), buff->num_channels()); + EXPECT_EQ(data[0].size(), buff->num_frames()); + for (size_t c = 0; c < buff->num_channels(); ++c) { + for (size_t f = 0; f < buff->num_frames(); ++f) { + buff->channels()[c][f] = data[c][f]; + } + } +} + +std::unique_ptr> CreateChannelBufferWithData( + const std::vector>& data) { + auto buff = + std::make_unique>(data[0].size(), data.size()); + WritesDataIntoChannelBuffer(data, buff.get()); + return buff; +} + +// Checks that the samples modified using monotonic level values are also +// monotonic. +void CheckIfMonotoneSamplesModules(const ChannelBuffer* prev, + const ChannelBuffer* curr) { + RTC_DCHECK_EQ(prev->num_channels(), curr->num_channels()); + RTC_DCHECK_EQ(prev->num_frames(), curr->num_frames()); + bool valid = true; + for (size_t i = 0; i < prev->num_channels(); ++i) { + for (size_t j = 0; j < prev->num_frames(); ++j) { + valid = std::fabs(prev->channels()[i][j]) <= + std::fabs(curr->channels()[i][j]); + if (!valid) { + break; + } + } + if (!valid) { + break; + } + } + EXPECT_TRUE(valid); +} + +// Checks that the samples in each pair have the same sign unless the sample in +// `dst` is zero (because of zero gain). +void CheckSameSign(const ChannelBuffer* src, + const ChannelBuffer* dst) { + RTC_DCHECK_EQ(src->num_channels(), dst->num_channels()); + RTC_DCHECK_EQ(src->num_frames(), dst->num_frames()); + const auto fsgn = [](float x) { return ((x < 0) ? -1 : (x > 0) ? 1 : 0); }; + bool valid = true; + for (size_t i = 0; i < src->num_channels(); ++i) { + for (size_t j = 0; j < src->num_frames(); ++j) { + valid = dst->channels()[i][j] == 0.0f || + fsgn(src->channels()[i][j]) == fsgn(dst->channels()[i][j]); + if (!valid) { + break; + } + } + if (!valid) { + break; + } + } + EXPECT_TRUE(valid); +} + +std::string FakeRecordingDeviceKindToString(int fake_rec_device_kind) { + rtc::StringBuilder ss; + ss << "fake recording device: " << fake_rec_device_kind; + return ss.Release(); +} + +std::string AnalogLevelToString(int level) { + rtc::StringBuilder ss; + ss << "analog level: " << level; + return ss.Release(); +} + +} // namespace + +TEST(FakeRecordingDevice, CheckHelperFunctions) { + constexpr size_t kC = 0; // Channel index. + constexpr size_t kS = 1; // Sample index. + + // Check read. + auto buff = CreateChannelBufferWithData(kTestMultiChannelSamples); + for (size_t c = 0; c < kTestMultiChannelSamples.size(); ++c) { + for (size_t s = 0; s < kTestMultiChannelSamples[0].size(); ++s) { + EXPECT_EQ(kTestMultiChannelSamples[c][s], buff->channels()[c][s]); + } + } + + // Check write. + buff->channels()[kC][kS] = -5.0f; + RTC_DCHECK_NE(buff->channels()[kC][kS], kTestMultiChannelSamples[kC][kS]); + + // Check reset. + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff.get()); + EXPECT_EQ(buff->channels()[kC][kS], kTestMultiChannelSamples[kC][kS]); +} + +// Implicitly checks that changes to the mic and undo levels are visible to the +// FakeRecordingDeviceWorker implementation are injected in FakeRecordingDevice. +TEST(FakeRecordingDevice, TestWorkerAbstractClass) { + FakeRecordingDevice fake_recording_device(kInitialMicLevel, 1); + + auto buff1 = CreateChannelBufferWithData(kTestMultiChannelSamples); + fake_recording_device.SetMicLevel(100); + fake_recording_device.SimulateAnalogGain(buff1.get()); + + auto buff2 = CreateChannelBufferWithData(kTestMultiChannelSamples); + fake_recording_device.SetMicLevel(200); + fake_recording_device.SimulateAnalogGain(buff2.get()); + + for (size_t c = 0; c < kTestMultiChannelSamples.size(); ++c) { + for (size_t s = 0; s < kTestMultiChannelSamples[0].size(); ++s) { + EXPECT_LE(std::abs(buff1->channels()[c][s]), + std::abs(buff2->channels()[c][s])); + } + } + + auto buff3 = CreateChannelBufferWithData(kTestMultiChannelSamples); + fake_recording_device.SetMicLevel(200); + fake_recording_device.SetUndoMicLevel(100); + fake_recording_device.SimulateAnalogGain(buff3.get()); + + for (size_t c = 0; c < kTestMultiChannelSamples.size(); ++c) { + for (size_t s = 0; s < kTestMultiChannelSamples[0].size(); ++s) { + EXPECT_LE(std::abs(buff1->channels()[c][s]), + std::abs(buff3->channels()[c][s])); + EXPECT_LE(std::abs(buff2->channels()[c][s]), + std::abs(buff3->channels()[c][s])); + } + } +} + +TEST(FakeRecordingDevice, GainCurveShouldBeMonotone) { + // Create input-output buffers. + auto buff_prev = CreateChannelBufferWithData(kTestMultiChannelSamples); + auto buff_curr = CreateChannelBufferWithData(kTestMultiChannelSamples); + + // Test different mappings. + for (auto fake_rec_device_kind : kFakeRecDeviceKinds) { + SCOPED_TRACE(FakeRecordingDeviceKindToString(fake_rec_device_kind)); + FakeRecordingDevice fake_recording_device(kInitialMicLevel, + fake_rec_device_kind); + // TODO(alessiob): The test below is designed for state-less recording + // devices. If, for instance, a device has memory, the test might need + // to be redesigned (e.g., re-initialize fake recording device). + + // Apply lowest analog level. + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff_prev.get()); + fake_recording_device.SetMicLevel(0); + fake_recording_device.SimulateAnalogGain(buff_prev.get()); + + // Increment analog level to check monotonicity. + for (int i = 1; i <= 255; ++i) { + SCOPED_TRACE(AnalogLevelToString(i)); + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff_curr.get()); + fake_recording_device.SetMicLevel(i); + fake_recording_device.SimulateAnalogGain(buff_curr.get()); + CheckIfMonotoneSamplesModules(buff_prev.get(), buff_curr.get()); + + // Update prev. + buff_prev.swap(buff_curr); + } + } +} + +TEST(FakeRecordingDevice, GainCurveShouldNotChangeSign) { + // Create view on original samples. + std::unique_ptr> buff_orig = + CreateChannelBufferWithData(kTestMultiChannelSamples); + + // Create output buffer. + auto buff = CreateChannelBufferWithData(kTestMultiChannelSamples); + + // Test different mappings. + for (auto fake_rec_device_kind : kFakeRecDeviceKinds) { + SCOPED_TRACE(FakeRecordingDeviceKindToString(fake_rec_device_kind)); + FakeRecordingDevice fake_recording_device(kInitialMicLevel, + fake_rec_device_kind); + + // TODO(alessiob): The test below is designed for state-less recording + // devices. If, for instance, a device has memory, the test might need + // to be redesigned (e.g., re-initialize fake recording device). + for (int i = 0; i <= 255; ++i) { + SCOPED_TRACE(AnalogLevelToString(i)); + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff.get()); + fake_recording_device.SetMicLevel(i); + fake_recording_device.SimulateAnalogGain(buff.get()); + CheckSameSign(buff_orig.get(), buff.get()); + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/performance_timer.cc b/third_party/libwebrtc/modules/audio_processing/test/performance_timer.cc new file mode 100644 index 0000000000..1a82258903 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/performance_timer.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/performance_timer.h" + +#include + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +PerformanceTimer::PerformanceTimer(int num_frames_to_process) + : clock_(webrtc::Clock::GetRealTimeClock()) { + timestamps_us_.reserve(num_frames_to_process); +} + +PerformanceTimer::~PerformanceTimer() = default; + +void PerformanceTimer::StartTimer() { + start_timestamp_us_ = clock_->TimeInMicroseconds(); +} + +void PerformanceTimer::StopTimer() { + RTC_DCHECK(start_timestamp_us_); + timestamps_us_.push_back(clock_->TimeInMicroseconds() - *start_timestamp_us_); +} + +double PerformanceTimer::GetDurationAverage() const { + return GetDurationAverage(0); +} + +double PerformanceTimer::GetDurationStandardDeviation() const { + return GetDurationStandardDeviation(0); +} + +double PerformanceTimer::GetDurationAverage( + size_t number_of_warmup_samples) const { + RTC_DCHECK_GT(timestamps_us_.size(), number_of_warmup_samples); + const size_t number_of_samples = + timestamps_us_.size() - number_of_warmup_samples; + return static_cast( + std::accumulate(timestamps_us_.begin() + number_of_warmup_samples, + timestamps_us_.end(), static_cast(0))) / + number_of_samples; +} + +double PerformanceTimer::GetDurationStandardDeviation( + size_t number_of_warmup_samples) const { + RTC_DCHECK_GT(timestamps_us_.size(), number_of_warmup_samples); + const size_t number_of_samples = + timestamps_us_.size() - number_of_warmup_samples; + RTC_DCHECK_GT(number_of_samples, 0); + double average_duration = GetDurationAverage(number_of_warmup_samples); + + double variance = std::accumulate( + timestamps_us_.begin() + number_of_warmup_samples, timestamps_us_.end(), + 0.0, [average_duration](const double& a, const int64_t& b) { + return a + (b - average_duration) * (b - average_duration); + }); + + return sqrt(variance / number_of_samples); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/performance_timer.h b/third_party/libwebrtc/modules/audio_processing/test/performance_timer.h new file mode 100644 index 0000000000..5375ba74e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/performance_timer.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_PERFORMANCE_TIMER_H_ +#define MODULES_AUDIO_PROCESSING_TEST_PERFORMANCE_TIMER_H_ + +#include + +#include "absl/types/optional.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +namespace test { + +class PerformanceTimer { + public: + explicit PerformanceTimer(int num_frames_to_process); + ~PerformanceTimer(); + + void StartTimer(); + void StopTimer(); + + double GetDurationAverage() const; + double GetDurationStandardDeviation() const; + + // These methods are the same as those above, but they ignore the first + // `number_of_warmup_samples` measurements. + double GetDurationAverage(size_t number_of_warmup_samples) const; + double GetDurationStandardDeviation(size_t number_of_warmup_samples) const; + + private: + webrtc::Clock* clock_; + absl::optional start_timestamp_us_; + std::vector timestamps_us_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_PERFORMANCE_TIMER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.cc b/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.cc new file mode 100644 index 0000000000..75574961b0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.cc @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/protobuf_utils.h" + +#include + +#include "rtc_base/system/arch.h" + +namespace { +// Allocates new memory in the memory owned by the unique_ptr to fit the raw +// message and returns the number of bytes read when having a string stream as +// input. +size_t ReadMessageBytesFromString(std::stringstream* input, + std::unique_ptr* bytes) { + int32_t size = 0; + input->read(reinterpret_cast(&size), sizeof(int32_t)); + int32_t size_read = input->gcount(); + if (size_read != sizeof(int32_t)) + return 0; + if (size <= 0) + return 0; + + *bytes = std::make_unique(size); + input->read(reinterpret_cast(bytes->get()), + size * sizeof((*bytes)[0])); + size_read = input->gcount(); + return size_read == size ? size : 0; +} +} // namespace + +namespace webrtc { + +size_t ReadMessageBytesFromFile(FILE* file, std::unique_ptr* bytes) { +// The "wire format" for the size is little-endian. Assume we're running on +// a little-endian machine. +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Need to convert messsage from little-endian." +#endif + int32_t size = 0; + if (fread(&size, sizeof(size), 1, file) != 1) + return 0; + if (size <= 0) + return 0; + + *bytes = std::make_unique(size); + return fread(bytes->get(), sizeof((*bytes)[0]), size, file); +} + +// Returns true on success, false on error or end-of-file. +bool ReadMessageFromFile(FILE* file, MessageLite* msg) { + std::unique_ptr bytes; + size_t size = ReadMessageBytesFromFile(file, &bytes); + if (!size) + return false; + + msg->Clear(); + return msg->ParseFromArray(bytes.get(), size); +} + +// Returns true on success, false on error or end of string stream. +bool ReadMessageFromString(std::stringstream* input, MessageLite* msg) { + std::unique_ptr bytes; + size_t size = ReadMessageBytesFromString(input, &bytes); + if (!size) + return false; + + msg->Clear(); + return msg->ParseFromArray(bytes.get(), size); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.h b/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.h new file mode 100644 index 0000000000..b9c2e819f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_PROTOBUF_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_PROTOBUF_UTILS_H_ + +#include +#include // no-presubmit-check TODO(webrtc:8982) + +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/protobuf_utils.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "modules/audio_processing/debug.pb.h" +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { + +// Allocates new memory in the unique_ptr to fit the raw message and returns the +// number of bytes read. +size_t ReadMessageBytesFromFile(FILE* file, std::unique_ptr* bytes); + +// Returns true on success, false on error or end-of-file. +bool ReadMessageFromFile(FILE* file, MessageLite* msg); + +// Returns true on success, false on error or end of string stream. +bool ReadMessageFromString( + std::stringstream* input, // no-presubmit-check TODO(webrtc:8982) + MessageLite* msg); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_PROTOBUF_UTILS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn new file mode 100644 index 0000000000..e53a829623 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn @@ -0,0 +1,170 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +if (!build_with_chromium) { + group("py_quality_assessment") { + testonly = true + deps = [ + ":scripts", + ":unit_tests", + ] + } + + copy("scripts") { + testonly = true + sources = [ + "README.md", + "apm_quality_assessment.py", + "apm_quality_assessment.sh", + "apm_quality_assessment_boxplot.py", + "apm_quality_assessment_export.py", + "apm_quality_assessment_gencfgs.py", + "apm_quality_assessment_optimize.py", + ] + outputs = [ "$root_build_dir/py_quality_assessment/{{source_file_part}}" ] + deps = [ + ":apm_configs", + ":lib", + ":output", + "../../../../resources/audio_processing/test/py_quality_assessment:probing_signals", + "../../../../rtc_tools:audioproc_f", + ] + } + + copy("apm_configs") { + testonly = true + sources = [ "apm_configs/default.json" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ + "$root_build_dir/py_quality_assessment/apm_configs/{{source_file_part}}", + ] + } # apm_configs + + copy("lib") { + testonly = true + sources = [ + "quality_assessment/__init__.py", + "quality_assessment/annotations.py", + "quality_assessment/audioproc_wrapper.py", + "quality_assessment/collect_data.py", + "quality_assessment/data_access.py", + "quality_assessment/echo_path_simulation.py", + "quality_assessment/echo_path_simulation_factory.py", + "quality_assessment/eval_scores.py", + "quality_assessment/eval_scores_factory.py", + "quality_assessment/evaluation.py", + "quality_assessment/exceptions.py", + "quality_assessment/export.py", + "quality_assessment/export_unittest.py", + "quality_assessment/external_vad.py", + "quality_assessment/input_mixer.py", + "quality_assessment/input_signal_creator.py", + "quality_assessment/results.css", + "quality_assessment/results.js", + "quality_assessment/signal_processing.py", + "quality_assessment/simulation.py", + "quality_assessment/test_data_generation.py", + "quality_assessment/test_data_generation_factory.py", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ "$root_build_dir/py_quality_assessment/quality_assessment/{{source_file_part}}" ] + deps = [ "../../../../resources/audio_processing/test/py_quality_assessment:noise_tracks" ] + } + + copy("output") { + testonly = true + sources = [ "output/README.md" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = + [ "$root_build_dir/py_quality_assessment/output/{{source_file_part}}" ] + } + + group("unit_tests") { + testonly = true + visibility = [ ":*" ] # Only targets in this file can depend on this. + deps = [ + ":apm_vad", + ":fake_polqa", + ":lib_unit_tests", + ":scripts_unit_tests", + ":vad", + ] + } + + rtc_executable("fake_polqa") { + testonly = true + sources = [ "quality_assessment/fake_polqa.cc" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + output_dir = "${root_out_dir}/py_quality_assessment/quality_assessment" + deps = [ + "../../../../rtc_base:checks", + "//third_party/abseil-cpp/absl/strings", + ] + } + + rtc_executable("vad") { + testonly = true + sources = [ "quality_assessment/vad.cc" ] + deps = [ + "../../../../common_audio", + "../../../../rtc_base:logging", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + + rtc_executable("apm_vad") { + testonly = true + sources = [ "quality_assessment/apm_vad.cc" ] + deps = [ + "../..", + "../../../../common_audio", + "../../../../rtc_base:logging", + "../../vad", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + + rtc_executable("sound_level") { + testonly = true + sources = [ "quality_assessment/sound_level.cc" ] + deps = [ + "../..", + "../../../../common_audio", + "../../../../rtc_base:logging", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + + copy("lib_unit_tests") { + testonly = true + sources = [ + "quality_assessment/annotations_unittest.py", + "quality_assessment/echo_path_simulation_unittest.py", + "quality_assessment/eval_scores_unittest.py", + "quality_assessment/fake_external_vad.py", + "quality_assessment/input_mixer_unittest.py", + "quality_assessment/signal_processing_unittest.py", + "quality_assessment/simulation_unittest.py", + "quality_assessment/test_data_generation_unittest.py", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ "$root_build_dir/py_quality_assessment/quality_assessment/{{source_file_part}}" ] + } + + copy("scripts_unit_tests") { + testonly = true + sources = [ "apm_quality_assessment_unittest.py" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ "$root_build_dir/py_quality_assessment/{{source_file_part}}" ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/OWNERS b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/OWNERS new file mode 100644 index 0000000000..9f56bb830d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/OWNERS @@ -0,0 +1,5 @@ +aleloi@webrtc.org +alessiob@webrtc.org +henrik.lundin@webrtc.org +ivoc@webrtc.org +peah@webrtc.org diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/README.md b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/README.md new file mode 100644 index 0000000000..4156112df2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/README.md @@ -0,0 +1,125 @@ +# APM Quality Assessment tool + +Python wrapper of APM simulators (e.g., `audioproc_f`) with which quality +assessment can be automatized. The tool allows to simulate different noise +conditions, input signals, APM configurations and it computes different scores. +Once the scores are computed, the results can be easily exported to an HTML page +which allows to listen to the APM input and output signals and also the +reference one used for evaluation. + +## Dependencies + - OS: Linux + - Python 2.7 + - Python libraries: enum34, numpy, scipy, pydub (0.17.0+), pandas (0.20.1+), + pyquery (1.2+), jsmin (2.2+), csscompressor (0.9.4) + - It is recommended that a dedicated Python environment is used + - install `virtualenv` + - `$ sudo apt-get install python-virtualenv` + - setup a new Python environment (e.g., `my_env`) + - `$ cd ~ && virtualenv my_env` + - activate the new Python environment + - `$ source ~/my_env/bin/activate` + - add dependcies via `pip` + - `(my_env)$ pip install enum34 numpy pydub scipy pandas pyquery jsmin \` + `csscompressor` + - PolqaOem64 (see http://www.polqa.info/) + - Tested with POLQA Library v1.180 / P863 v2.400 + - Aachen Impulse Response (AIR) Database + - Download https://www2.iks.rwth-aachen.de/air/air_database_release_1_4.zip + - Input probing signals and noise tracks (you can make your own dataset - *1) + +## Build + - Compile WebRTC + - Go to `out/Default/py_quality_assessment` and check that + `apm_quality_assessment.py` exists + +## Unit tests + - Compile WebRTC + - Go to `out/Default/py_quality_assessment` + - Run `python -m unittest discover -p "*_unittest.py"` + +## First time setup + - Deploy PolqaOem64 and set the `POLQA_PATH` environment variable + - e.g., `$ export POLQA_PATH=/var/opt/PolqaOem64` + - Deploy the AIR Database and set the `AECHEN_IR_DATABASE_PATH` environment + variable + - e.g., `$ export AECHEN_IR_DATABASE_PATH=/var/opt/AIR_1_4` + - Deploy probing signal tracks into + - `out/Default/py_quality_assessment/probing_signals` (*1) + - Deploy noise tracks into + - `out/Default/py_quality_assessment/noise_tracks` (*1, *2) + +(*1) You can use custom files as long as they are mono tracks sampled at 48kHz +encoded in the 16 bit signed format (it is recommended that the tracks are +converted and exported with Audacity). + +## Usage (scores computation) + - Go to `out/Default/py_quality_assessment` + - Check the `apm_quality_assessment.sh` as an example script to parallelize the + experiments + - Adjust the script according to your preferences (e.g., output path) + - Run `apm_quality_assessment.sh` + - The script will end by opening the browser and showing ALL the computed + scores + +## Usage (export reports) +Showing all the results at once can be confusing. You therefore may want to +export separate reports. In this case, you can use the +`apm_quality_assessment_export.py` script as follows: + + - Set `--output_dir, -o` to the same value used in `apm_quality_assessment.sh` + - Use regular expressions to select/filter out scores by + - APM configurations: `--config_names, -c` + - capture signals: `--capture_names, -i` + - render signals: `--render_names, -r` + - echo simulator: `--echo_simulator_names, -e` + - test data generators: `--test_data_generators, -t` + - scores: `--eval_scores, -s` + - Assign a suffix to the report name using `-f ` + +For instance: + +``` +$ ./apm_quality_assessment_export.py \ + -o output/ \ + -c "(^default$)|(.*AE.*)" \ + -t \(white_noise\) \ + -s \(polqa\) \ + -f echo +``` + +## Usage (boxplot) +After generating stats, it can help to visualize how a score depends on a +certain APM simulator parameter. The `apm_quality_assessment_boxplot.py` script +helps with that, producing plots similar to [this +one](https://matplotlib.org/mpl_examples/pylab_examples/boxplot_demo_06.png). + +Suppose some scores come from running the APM simulator `audioproc_f` with +or without the level controller: `--lc=1` or `--lc=0`. Then two boxplots +side by side can be generated with + +``` +$ ./apm_quality_assessment_boxplot.py \ + -o /path/to/output + -v + -n /path/to/dir/with/apm_configs + -z lc +``` + +## Troubleshooting +The input wav file must be: + - sampled at a sample rate that is a multiple of 100 (required by POLQA) + - in the 16 bit format (required by `audioproc_f`) + - encoded in the Microsoft WAV signed 16 bit PCM format (Audacity default + when exporting) + +Depending on the license, the POLQA tool may take “breaks” as a way to limit the +throughput. When this happens, the APM Quality Assessment tool is slowed down. +For more details about this limitation, check Section 10.9.1 in the POLQA manual +v.1.18. + +In case of issues with the POLQA score computation, check +`py_quality_assessment/eval_scores.py` and adapt +`PolqaScore._parse_output_file()`. +The code can be also fixed directly into the build directory (namely, +`out/Default/py_quality_assessment/eval_scores.py`). diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json new file mode 100644 index 0000000000..5c3277bac0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json @@ -0,0 +1 @@ +{"-all_default": null} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py new file mode 100755 index 0000000000..e067ecb692 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Perform APM module quality assessment on one or more input files using one or + more APM simulator configuration files and one or more test data generators. + +Usage: apm_quality_assessment.py -i audio1.wav [audio2.wav ...] + -c cfg1.json [cfg2.json ...] + -n white [echo ...] + -e audio_level [polqa ...] + -o /path/to/output +""" + +import argparse +import logging +import os +import sys + +import quality_assessment.audioproc_wrapper as audioproc_wrapper +import quality_assessment.echo_path_simulation as echo_path_simulation +import quality_assessment.eval_scores as eval_scores +import quality_assessment.evaluation as evaluation +import quality_assessment.eval_scores_factory as eval_scores_factory +import quality_assessment.external_vad as external_vad +import quality_assessment.test_data_generation as test_data_generation +import quality_assessment.test_data_generation_factory as \ + test_data_generation_factory +import quality_assessment.simulation as simulation + +_ECHO_PATH_SIMULATOR_NAMES = ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES) +_TEST_DATA_GENERATOR_CLASSES = ( + test_data_generation.TestDataGenerator.REGISTERED_CLASSES) +_TEST_DATA_GENERATORS_NAMES = _TEST_DATA_GENERATOR_CLASSES.keys() +_EVAL_SCORE_WORKER_CLASSES = eval_scores.EvaluationScore.REGISTERED_CLASSES +_EVAL_SCORE_WORKER_NAMES = _EVAL_SCORE_WORKER_CLASSES.keys() + +_DEFAULT_CONFIG_FILE = 'apm_configs/default.json' + +_POLQA_BIN_NAME = 'PolqaOem64' + + +def _InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = argparse.ArgumentParser(description=( + 'Perform APM module quality assessment on one or more input files using ' + 'one or more APM simulator configuration files and one or more ' + 'test data generators.')) + + parser.add_argument('-c', + '--config_files', + nargs='+', + required=False, + help=('path to the configuration files defining the ' + 'arguments with which the APM simulator tool is ' + 'called'), + default=[_DEFAULT_CONFIG_FILE]) + + parser.add_argument( + '-i', + '--capture_input_files', + nargs='+', + required=True, + help='path to the capture input wav files (one or more)') + + parser.add_argument('-r', + '--render_input_files', + nargs='+', + required=False, + help=('path to the render input wav files; either ' + 'omitted or one file for each file in ' + '--capture_input_files (files will be paired by ' + 'index)'), + default=None) + + parser.add_argument('-p', + '--echo_path_simulator', + required=False, + help=('custom echo path simulator name; required if ' + '--render_input_files is specified'), + choices=_ECHO_PATH_SIMULATOR_NAMES, + default=echo_path_simulation.NoEchoPathSimulator.NAME) + + parser.add_argument('-t', + '--test_data_generators', + nargs='+', + required=False, + help='custom list of test data generators to use', + choices=_TEST_DATA_GENERATORS_NAMES, + default=_TEST_DATA_GENERATORS_NAMES) + + parser.add_argument('--additive_noise_tracks_path', required=False, + help='path to the wav files for the additive', + default=test_data_generation. \ + AdditiveNoiseTestDataGenerator. \ + DEFAULT_NOISE_TRACKS_PATH) + + parser.add_argument('-e', + '--eval_scores', + nargs='+', + required=False, + help='custom list of evaluation scores to use', + choices=_EVAL_SCORE_WORKER_NAMES, + default=_EVAL_SCORE_WORKER_NAMES) + + parser.add_argument('-o', + '--output_dir', + required=False, + help=('base path to the output directory in which the ' + 'output wav files and the evaluation outcomes ' + 'are saved'), + default='output') + + parser.add_argument('--polqa_path', + required=True, + help='path to the POLQA tool') + + parser.add_argument('--air_db_path', + required=True, + help='path to the Aechen IR database') + + parser.add_argument('--apm_sim_path', required=False, + help='path to the APM simulator tool', + default=audioproc_wrapper. \ + AudioProcWrapper. \ + DEFAULT_APM_SIMULATOR_BIN_PATH) + + parser.add_argument('--echo_metric_tool_bin_path', + required=False, + help=('path to the echo metric binary ' + '(required for the echo eval score)'), + default=None) + + parser.add_argument( + '--copy_with_identity_generator', + required=False, + help=('If true, the identity test data generator makes a ' + 'copy of the clean speech input file.'), + default=False) + + parser.add_argument('--external_vad_paths', + nargs='+', + required=False, + help=('Paths to external VAD programs. Each must take' + '\'-i -o \' inputs'), + default=[]) + + parser.add_argument('--external_vad_names', + nargs='+', + required=False, + help=('Keys to the vad paths. Must be different and ' + 'as many as the paths.'), + default=[]) + + return parser + + +def _ValidateArguments(args, parser): + if args.capture_input_files and args.render_input_files and (len( + args.capture_input_files) != len(args.render_input_files)): + parser.error( + '--render_input_files and --capture_input_files must be lists ' + 'having the same length') + sys.exit(1) + + if args.render_input_files and not args.echo_path_simulator: + parser.error( + 'when --render_input_files is set, --echo_path_simulator is ' + 'also required') + sys.exit(1) + + if len(args.external_vad_names) != len(args.external_vad_paths): + parser.error('If provided, --external_vad_paths and ' + '--external_vad_names must ' + 'have the same number of arguments.') + sys.exit(1) + + +def main(): + # TODO(alessiob): level = logging.INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = _InstanceArgumentsParser() + args = parser.parse_args() + _ValidateArguments(args, parser) + + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path=args.air_db_path, + noise_tracks_path=args.additive_noise_tracks_path, + copy_with_identity=args.copy_with_identity_generator)), + evaluation_score_factory=eval_scores_factory. + EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join(args.polqa_path, _POLQA_BIN_NAME), + echo_metric_tool_bin_path=args.echo_metric_tool_bin_path), + ap_wrapper=audioproc_wrapper.AudioProcWrapper(args.apm_sim_path), + evaluator=evaluation.ApmModuleEvaluator(), + external_vads=external_vad.ExternalVad.ConstructVadDict( + args.external_vad_paths, args.external_vad_names)) + simulator.Run(config_filepaths=args.config_files, + capture_input_filepaths=args.capture_input_files, + render_input_filepaths=args.render_input_files, + echo_path_simulator_name=args.echo_path_simulator, + test_data_generator_names=args.test_data_generators, + eval_score_names=args.eval_scores, + output_dir=args.output_dir) + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh new file mode 100755 index 0000000000..aa563ee26b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +# Path to the POLQA tool. +if [ -z ${POLQA_PATH} ]; then # Check if defined. + # Default location. + export POLQA_PATH='/var/opt/PolqaOem64' +fi +if [ -d "${POLQA_PATH}" ]; then + echo "POLQA found in ${POLQA_PATH}" +else + echo "POLQA not found in ${POLQA_PATH}" + exit 1 +fi + +# Path to the Aechen IR database. +if [ -z ${AECHEN_IR_DATABASE_PATH} ]; then # Check if defined. + # Default location. + export AECHEN_IR_DATABASE_PATH='/var/opt/AIR_1_4' +fi +if [ -d "${AECHEN_IR_DATABASE_PATH}" ]; then + echo "AIR database found in ${AECHEN_IR_DATABASE_PATH}" +else + echo "AIR database not found in ${AECHEN_IR_DATABASE_PATH}" + exit 1 +fi + +# Customize probing signals, test data generators and scores if needed. +CAPTURE_SIGNALS=(probing_signals/*.wav) +TEST_DATA_GENERATORS=( \ + "identity" \ + "white_noise" \ + # "environmental_noise" \ + # "reverberation" \ +) +SCORES=( \ + # "polqa" \ + "audio_level_peak" \ + "audio_level_mean" \ +) +OUTPUT_PATH=output + +# Generate standard APM config files. +chmod +x apm_quality_assessment_gencfgs.py +./apm_quality_assessment_gencfgs.py + +# Customize APM configurations if needed. +APM_CONFIGS=(apm_configs/*.json) + +# Add output path if missing. +if [ ! -d ${OUTPUT_PATH} ]; then + mkdir ${OUTPUT_PATH} +fi + +# Start one process for each "probing signal"-"test data source" pair. +chmod +x apm_quality_assessment.py +for capture_signal_filepath in "${CAPTURE_SIGNALS[@]}" ; do + probing_signal_name="$(basename $capture_signal_filepath)" + probing_signal_name="${probing_signal_name%.*}" + for test_data_gen_name in "${TEST_DATA_GENERATORS[@]}" ; do + LOG_FILE="${OUTPUT_PATH}/apm_qa-${probing_signal_name}-"` + `"${test_data_gen_name}.log" + echo "Starting ${probing_signal_name} ${test_data_gen_name} "` + `"(see ${LOG_FILE})" + ./apm_quality_assessment.py \ + --polqa_path ${POLQA_PATH}\ + --air_db_path ${AECHEN_IR_DATABASE_PATH}\ + -i ${capture_signal_filepath} \ + -o ${OUTPUT_PATH} \ + -t ${test_data_gen_name} \ + -c "${APM_CONFIGS[@]}" \ + -e "${SCORES[@]}" > $LOG_FILE 2>&1 & + done +done + +# Join Python processes running apm_quality_assessment.py. +wait + +# Export results. +chmod +x ./apm_quality_assessment_export.py +./apm_quality_assessment_export.py -o ${OUTPUT_PATH} + +# Show results in the browser. +RESULTS_FILE="$(realpath ${OUTPUT_PATH}/results.html)" +sensible-browser "file://${RESULTS_FILE}" > /dev/null 2>&1 & diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py new file mode 100644 index 0000000000..c425885b95 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Shows boxplots of given score for different values of selected +parameters. Can be used to compare scores by audioproc_f flag. + +Usage: apm_quality_assessment_boxplot.py -o /path/to/output + -v polqa + -n /path/to/dir/with/apm_configs + -z audioproc_f_arg1 [arg2 ...] + +Arguments --config_names, --render_names, --echo_simulator_names, +--test_data_generators, --eval_scores can be used to filter the data +used for plotting. +""" + +import collections +import logging +import matplotlib.pyplot as plt +import os + +import quality_assessment.data_access as data_access +import quality_assessment.collect_data as collect_data + + +def InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = collect_data.InstanceArgumentsParser() + parser.description = ( + 'Shows boxplot of given score for different values of selected' + 'parameters. Can be used to compare scores by audioproc_f flag') + + parser.add_argument('-v', + '--eval_score', + required=True, + help=('Score name for constructing boxplots')) + + parser.add_argument( + '-n', + '--config_dir', + required=False, + help=('path to the folder with the configuration files'), + default='apm_configs') + + parser.add_argument('-z', + '--params_to_plot', + required=True, + nargs='+', + help=('audioproc_f parameter values' + 'by which to group scores (no leading dash)')) + + return parser + + +def FilterScoresByParams(data_frame, filter_params, score_name, config_dir): + """Filters data on the values of one or more parameters. + + Args: + data_frame: pandas.DataFrame of all used input data. + + filter_params: each config of the input data is assumed to have + exactly one parameter from `filter_params` defined. Every value + of the parameters in `filter_params` is a key in the returned + dict; the associated value is all cells of the data with that + value of the parameter. + + score_name: Name of score which value is boxplotted. Currently cannot do + more than one value. + + config_dir: path to dir with APM configs. + + Returns: dictionary, key is a param value, result is all scores for + that param value (see `filter_params` for explanation). + """ + results = collections.defaultdict(dict) + config_names = data_frame['apm_config'].drop_duplicates().values.tolist() + + for config_name in config_names: + config_json = data_access.AudioProcConfigFile.Load( + os.path.join(config_dir, config_name + '.json')) + data_with_config = data_frame[data_frame.apm_config == config_name] + data_cell_scores = data_with_config[data_with_config.eval_score_name == + score_name] + + # Exactly one of `params_to_plot` must match: + (matching_param, ) = [ + x for x in filter_params if '-' + x in config_json + ] + + # Add scores for every track to the result. + for capture_name in data_cell_scores.capture: + result_score = float(data_cell_scores[data_cell_scores.capture == + capture_name].score) + config_dict = results[config_json['-' + matching_param]] + if capture_name not in config_dict: + config_dict[capture_name] = {} + + config_dict[capture_name][matching_param] = result_score + + return results + + +def _FlattenToScoresList(config_param_score_dict): + """Extracts a list of scores from input data structure. + + Args: + config_param_score_dict: of the form {'capture_name': + {'param_name' : score_value,.. } ..} + + Returns: Plain list of all score value present in input data + structure + """ + result = [] + for capture_name in config_param_score_dict: + result += list(config_param_score_dict[capture_name].values()) + return result + + +def main(): + # Init. + # TODO(alessiob): INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = InstanceArgumentsParser() + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug(src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + + # Filter the data by `args.params_to_plot` + scores_filtered = FilterScoresByParams(scores_data_frame, + args.params_to_plot, + args.eval_score, args.config_dir) + + data_list = sorted(scores_filtered.items()) + data_values = [_FlattenToScoresList(x) for (_, x) in data_list] + data_labels = [x for (x, _) in data_list] + + _, axes = plt.subplots(nrows=1, ncols=1, figsize=(6, 6)) + axes.boxplot(data_values, labels=data_labels) + axes.set_ylabel(args.eval_score) + axes.set_xlabel('/'.join(args.params_to_plot)) + plt.show() + + +if __name__ == "__main__": + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py new file mode 100755 index 0000000000..c20accb9dc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Export the scores computed by the apm_quality_assessment.py script into an + HTML file. +""" + +import logging +import os +import sys + +import quality_assessment.collect_data as collect_data +import quality_assessment.export as export + + +def _BuildOutputFilename(filename_suffix): + """Builds the filename for the exported file. + + Args: + filename_suffix: suffix for the output file name. + + Returns: + A string. + """ + if filename_suffix is None: + return 'results.html' + return 'results-{}.html'.format(filename_suffix) + + +def main(): + # Init. + logging.basicConfig( + level=logging.DEBUG) # TODO(alessio): INFO once debugged. + parser = collect_data.InstanceArgumentsParser() + parser.add_argument('-f', + '--filename_suffix', + help=('suffix of the exported file')) + parser.description = ('Exports pre-computed APM module quality assessment ' + 'results into HTML tables') + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug(src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + + # Export. + output_filepath = os.path.join(args.output_dir, + _BuildOutputFilename(args.filename_suffix)) + exporter = export.HtmlExport(output_filepath) + exporter.Export(scores_data_frame) + + logging.info('output file successfully written in %s', output_filepath) + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py new file mode 100755 index 0000000000..ca80f85bd1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Generate .json files with which the APM module can be tested using the + apm_quality_assessment.py script and audioproc_f as APM simulator. +""" + +import logging +import os + +import quality_assessment.data_access as data_access + +OUTPUT_PATH = os.path.abspath('apm_configs') + + +def _GenerateDefaultOverridden(config_override): + """Generates one or more APM overriden configurations. + + For each item in config_override, it overrides the default configuration and + writes a new APM configuration file. + + The default settings are loaded via "-all_default". + Check "src/modules/audio_processing/test/audioproc_float.cc" and search + for "if (FLAG_all_default) {". + + For instance, in 55eb6d621489730084927868fed195d3645a9ec9 the default is this: + settings.use_aec = rtc::Optional(true); + settings.use_aecm = rtc::Optional(false); + settings.use_agc = rtc::Optional(true); + settings.use_bf = rtc::Optional(false); + settings.use_ed = rtc::Optional(false); + settings.use_hpf = rtc::Optional(true); + settings.use_le = rtc::Optional(true); + settings.use_ns = rtc::Optional(true); + settings.use_ts = rtc::Optional(true); + settings.use_vad = rtc::Optional(true); + + Args: + config_override: dict of APM configuration file names as keys; the values + are dict instances encoding the audioproc_f flags. + """ + for config_filename in config_override: + config = config_override[config_filename] + config['-all_default'] = None + + config_filepath = os.path.join( + OUTPUT_PATH, 'default-{}.json'.format(config_filename)) + logging.debug('config file <%s> | %s', config_filepath, config) + + data_access.AudioProcConfigFile.Save(config_filepath, config) + logging.info('config file created: <%s>', config_filepath) + + +def _GenerateAllDefaultButOne(): + """Disables the flags enabled by default one-by-one. + """ + config_sets = { + 'no_AEC': { + '-aec': 0, + }, + 'no_AGC': { + '-agc': 0, + }, + 'no_HP_filter': { + '-hpf': 0, + }, + 'no_level_estimator': { + '-le': 0, + }, + 'no_noise_suppressor': { + '-ns': 0, + }, + 'no_transient_suppressor': { + '-ts': 0, + }, + 'no_vad': { + '-vad': 0, + }, + } + _GenerateDefaultOverridden(config_sets) + + +def _GenerateAllDefaultPlusOne(): + """Enables the flags disabled by default one-by-one. + """ + config_sets = { + 'with_AECM': { + '-aec': 0, + '-aecm': 1, + }, # AEC and AECM are exclusive. + 'with_AGC_limiter': { + '-agc_limiter': 1, + }, + 'with_AEC_delay_agnostic': { + '-delay_agnostic': 1, + }, + 'with_drift_compensation': { + '-drift_compensation': 1, + }, + 'with_residual_echo_detector': { + '-ed': 1, + }, + 'with_AEC_extended_filter': { + '-extended_filter': 1, + }, + 'with_LC': { + '-lc': 1, + }, + 'with_refined_adaptive_filter': { + '-refined_adaptive_filter': 1, + }, + } + _GenerateDefaultOverridden(config_sets) + + +def main(): + logging.basicConfig(level=logging.INFO) + _GenerateAllDefaultPlusOne() + _GenerateAllDefaultButOne() + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py new file mode 100644 index 0000000000..ecae2ed995 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Finds the APM configuration that maximizes a provided metric by +parsing the output generated apm_quality_assessment.py. +""" + +from __future__ import division + +import collections +import logging +import os + +import quality_assessment.data_access as data_access +import quality_assessment.collect_data as collect_data + + +def _InstanceArgumentsParser(): + """Arguments parser factory. Extends the arguments from 'collect_data' + with a few extra for selecting what parameters to optimize for. + """ + parser = collect_data.InstanceArgumentsParser() + parser.description = ( + 'Rudimentary optimization of a function over different parameter' + 'combinations.') + + parser.add_argument( + '-n', + '--config_dir', + required=False, + help=('path to the folder with the configuration files'), + default='apm_configs') + + parser.add_argument('-p', + '--params', + required=True, + nargs='+', + help=('parameters to parse from the config files in' + 'config_dir')) + + parser.add_argument( + '-z', + '--params_not_to_optimize', + required=False, + nargs='+', + default=[], + help=('parameters from `params` not to be optimized for')) + + return parser + + +def _ConfigurationAndScores(data_frame, params, params_not_to_optimize, + config_dir): + """Returns a list of all configurations and scores. + + Args: + data_frame: A pandas data frame with the scores and config name + returned by _FindScores. + params: The parameter names to parse from configs the config + directory + + params_not_to_optimize: The parameter names which shouldn't affect + the optimal parameter + selection. E.g., fixed settings and not + tunable parameters. + + config_dir: Path to folder with config files. + + Returns: + Dictionary of the form + {param_combination: [{params: {param1: value1, ...}, + scores: {score1: value1, ...}}]}. + + The key `param_combination` runs over all parameter combinations + of the parameters in `params` and not in + `params_not_to_optimize`. A corresponding value is a list of all + param combinations for params in `params_not_to_optimize` and + their scores. + """ + results = collections.defaultdict(list) + config_names = data_frame['apm_config'].drop_duplicates().values.tolist() + score_names = data_frame['eval_score_name'].drop_duplicates( + ).values.tolist() + + # Normalize the scores + normalization_constants = {} + for score_name in score_names: + scores = data_frame[data_frame.eval_score_name == score_name].score + normalization_constants[score_name] = max(scores) + + params_to_optimize = [p for p in params if p not in params_not_to_optimize] + param_combination = collections.namedtuple("ParamCombination", + params_to_optimize) + + for config_name in config_names: + config_json = data_access.AudioProcConfigFile.Load( + os.path.join(config_dir, config_name + ".json")) + scores = {} + data_cell = data_frame[data_frame.apm_config == config_name] + for score_name in score_names: + data_cell_scores = data_cell[data_cell.eval_score_name == + score_name].score + scores[score_name] = sum(data_cell_scores) / len(data_cell_scores) + scores[score_name] /= normalization_constants[score_name] + + result = {'scores': scores, 'params': {}} + config_optimize_params = {} + for param in params: + if param in params_to_optimize: + config_optimize_params[param] = config_json['-' + param] + else: + result['params'][param] = config_json['-' + param] + + current_param_combination = param_combination(**config_optimize_params) + results[current_param_combination].append(result) + return results + + +def _FindOptimalParameter(configs_and_scores, score_weighting): + """Finds the config producing the maximal score. + + Args: + configs_and_scores: structure of the form returned by + _ConfigurationAndScores + + score_weighting: a function to weight together all score values of + the form [{params: {param1: value1, ...}, scores: + {score1: value1, ...}}] into a numeric + value + Returns: + the config that has the largest values of `score_weighting` applied + to its scores. + """ + + min_score = float('+inf') + best_params = None + for config in configs_and_scores: + scores_and_params = configs_and_scores[config] + current_score = score_weighting(scores_and_params) + if current_score < min_score: + min_score = current_score + best_params = config + logging.debug("Score: %f", current_score) + logging.debug("Config: %s", str(config)) + return best_params + + +def _ExampleWeighting(scores_and_configs): + """Example argument to `_FindOptimalParameter` + Args: + scores_and_configs: a list of configs and scores, in the form + described in _FindOptimalParameter + Returns: + numeric value, the sum of all scores + """ + res = 0 + for score_config in scores_and_configs: + res += sum(score_config['scores'].values()) + return res + + +def main(): + # Init. + # TODO(alessiob): INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = _InstanceArgumentsParser() + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug('Src path <%s>', src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + all_scores = _ConfigurationAndScores(scores_data_frame, args.params, + args.params_not_to_optimize, + args.config_dir) + + opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting) + + logging.info('Optimal parameter combination: <%s>', opt_param) + logging.info('It\'s score values: <%s>', all_scores[opt_param]) + + +if __name__ == "__main__": + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py new file mode 100644 index 0000000000..80338c1373 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py @@ -0,0 +1,28 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the apm_quality_assessment module. +""" + +import sys +import unittest + +import mock + +import apm_quality_assessment + + +class TestSimulationScript(unittest.TestCase): + """Unit tests for the apm_quality_assessment module. + """ + + def testMain(self): + # Exit with error code if no arguments are passed. + with self.assertRaises(SystemExit) as cm, mock.patch.object( + sys, 'argv', ['apm_quality_assessment.py']): + apm_quality_assessment.main() + self.assertGreater(cm.exception.code, 0) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/output/README.md b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/output/README.md new file mode 100644 index 0000000000..66e2a1c848 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/output/README.md @@ -0,0 +1 @@ +You can use this folder for the output generated by the apm_quality_assessment scripts. diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py new file mode 100644 index 0000000000..b870dfaef3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py new file mode 100644 index 0000000000..93a8248397 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py @@ -0,0 +1,296 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Extraction of annotations from audio files. +""" + +from __future__ import division +import logging +import os +import shutil +import struct +import subprocess +import sys +import tempfile + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import external_vad +from . import exceptions +from . import signal_processing + + +class AudioAnnotationsExtractor(object): + """Extracts annotations from audio files. + """ + + class VadType(object): + ENERGY_THRESHOLD = 1 # TODO(alessiob): Consider switching to P56 standard. + WEBRTC_COMMON_AUDIO = 2 # common_audio/vad/include/vad.h + WEBRTC_APM = 4 # modules/audio_processing/vad/vad.h + + def __init__(self, value): + if (not isinstance(value, int)) or not 0 <= value <= 7: + raise exceptions.InitializationException('Invalid vad type: ' + + value) + self._value = value + + def Contains(self, vad_type): + return self._value | vad_type == self._value + + def __str__(self): + vads = [] + if self.Contains(self.ENERGY_THRESHOLD): + vads.append("energy") + if self.Contains(self.WEBRTC_COMMON_AUDIO): + vads.append("common_audio") + if self.Contains(self.WEBRTC_APM): + vads.append("apm") + return "VadType({})".format(", ".join(vads)) + + _OUTPUT_FILENAME_TEMPLATE = '{}annotations.npz' + + # Level estimation params. + _ONE_DB_REDUCTION = np.power(10.0, -1.0 / 20.0) + _LEVEL_FRAME_SIZE_MS = 1.0 + # The time constants in ms indicate the time it takes for the level estimate + # to go down/up by 1 db if the signal is zero. + _LEVEL_ATTACK_MS = 5.0 + _LEVEL_DECAY_MS = 20.0 + + # VAD params. + _VAD_THRESHOLD = 1 + _VAD_WEBRTC_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), + os.pardir, os.pardir) + _VAD_WEBRTC_COMMON_AUDIO_PATH = os.path.join(_VAD_WEBRTC_PATH, 'vad') + + _VAD_WEBRTC_APM_PATH = os.path.join(_VAD_WEBRTC_PATH, 'apm_vad') + + def __init__(self, vad_type, external_vads=None): + self._signal = None + self._level = None + self._level_frame_size = None + self._common_audio_vad = None + self._energy_vad = None + self._apm_vad_probs = None + self._apm_vad_rms = None + self._vad_frame_size = None + self._vad_frame_size_ms = None + self._c_attack = None + self._c_decay = None + + self._vad_type = self.VadType(vad_type) + logging.info('VADs used for annotations: ' + str(self._vad_type)) + + if external_vads is None: + external_vads = {} + self._external_vads = external_vads + + assert len(self._external_vads) == len(external_vads), ( + 'The external VAD names must be unique.') + for vad in external_vads.values(): + if not isinstance(vad, external_vad.ExternalVad): + raise exceptions.InitializationException('Invalid vad type: ' + + str(type(vad))) + logging.info('External VAD used for annotation: ' + str(vad.name)) + + assert os.path.exists(self._VAD_WEBRTC_COMMON_AUDIO_PATH), \ + self._VAD_WEBRTC_COMMON_AUDIO_PATH + assert os.path.exists(self._VAD_WEBRTC_APM_PATH), \ + self._VAD_WEBRTC_APM_PATH + + @classmethod + def GetOutputFileNameTemplate(cls): + return cls._OUTPUT_FILENAME_TEMPLATE + + def GetLevel(self): + return self._level + + def GetLevelFrameSize(self): + return self._level_frame_size + + @classmethod + def GetLevelFrameSizeMs(cls): + return cls._LEVEL_FRAME_SIZE_MS + + def GetVadOutput(self, vad_type): + if vad_type == self.VadType.ENERGY_THRESHOLD: + return self._energy_vad + elif vad_type == self.VadType.WEBRTC_COMMON_AUDIO: + return self._common_audio_vad + elif vad_type == self.VadType.WEBRTC_APM: + return (self._apm_vad_probs, self._apm_vad_rms) + else: + raise exceptions.InitializationException('Invalid vad type: ' + + vad_type) + + def GetVadFrameSize(self): + return self._vad_frame_size + + def GetVadFrameSizeMs(self): + return self._vad_frame_size_ms + + def Extract(self, filepath): + # Load signal. + self._signal = signal_processing.SignalProcessingUtils.LoadWav( + filepath) + if self._signal.channels != 1: + raise NotImplementedError( + 'Multiple-channel annotations not implemented') + + # Level estimation params. + self._level_frame_size = int(self._signal.frame_rate / 1000 * + (self._LEVEL_FRAME_SIZE_MS)) + self._c_attack = 0.0 if self._LEVEL_ATTACK_MS == 0 else ( + self._ONE_DB_REDUCTION**(self._LEVEL_FRAME_SIZE_MS / + self._LEVEL_ATTACK_MS)) + self._c_decay = 0.0 if self._LEVEL_DECAY_MS == 0 else ( + self._ONE_DB_REDUCTION**(self._LEVEL_FRAME_SIZE_MS / + self._LEVEL_DECAY_MS)) + + # Compute level. + self._LevelEstimation() + + # Ideal VAD output, it requires clean speech with high SNR as input. + if self._vad_type.Contains(self.VadType.ENERGY_THRESHOLD): + # Naive VAD based on level thresholding. + vad_threshold = np.percentile(self._level, self._VAD_THRESHOLD) + self._energy_vad = np.uint8(self._level > vad_threshold) + self._vad_frame_size = self._level_frame_size + self._vad_frame_size_ms = self._LEVEL_FRAME_SIZE_MS + if self._vad_type.Contains(self.VadType.WEBRTC_COMMON_AUDIO): + # WebRTC common_audio/ VAD. + self._RunWebRtcCommonAudioVad(filepath, self._signal.frame_rate) + if self._vad_type.Contains(self.VadType.WEBRTC_APM): + # WebRTC modules/audio_processing/ VAD. + self._RunWebRtcApmVad(filepath) + for extvad_name in self._external_vads: + self._external_vads[extvad_name].Run(filepath) + + def Save(self, output_path, annotation_name=""): + ext_kwargs = { + 'extvad_conf-' + ext_vad: + self._external_vads[ext_vad].GetVadOutput() + for ext_vad in self._external_vads + } + np.savez_compressed(file=os.path.join( + output_path, + self.GetOutputFileNameTemplate().format(annotation_name)), + level=self._level, + level_frame_size=self._level_frame_size, + level_frame_size_ms=self._LEVEL_FRAME_SIZE_MS, + vad_output=self._common_audio_vad, + vad_energy_output=self._energy_vad, + vad_frame_size=self._vad_frame_size, + vad_frame_size_ms=self._vad_frame_size_ms, + vad_probs=self._apm_vad_probs, + vad_rms=self._apm_vad_rms, + **ext_kwargs) + + def _LevelEstimation(self): + # Read samples. + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + self._signal).astype(np.float32) / 32768.0 + num_frames = len(samples) // self._level_frame_size + num_samples = num_frames * self._level_frame_size + + # Envelope. + self._level = np.max(np.reshape(np.abs(samples[:num_samples]), + (num_frames, self._level_frame_size)), + axis=1) + assert len(self._level) == num_frames + + # Envelope smoothing. + smooth = lambda curr, prev, k: (1 - k) * curr + k * prev + self._level[0] = smooth(self._level[0], 0.0, self._c_attack) + for i in range(1, num_frames): + self._level[i] = smooth( + self._level[i], self._level[i - 1], self._c_attack if + (self._level[i] > self._level[i - 1]) else self._c_decay) + + def _RunWebRtcCommonAudioVad(self, wav_file_path, sample_rate): + self._common_audio_vad = None + self._vad_frame_size = None + + # Create temporary output path. + tmp_path = tempfile.mkdtemp() + output_file_path = os.path.join( + tmp_path, + os.path.split(wav_file_path)[1] + '_vad.tmp') + + # Call WebRTC VAD. + try: + subprocess.call([ + self._VAD_WEBRTC_COMMON_AUDIO_PATH, '-i', wav_file_path, '-o', + output_file_path + ], + cwd=self._VAD_WEBRTC_PATH) + + # Read bytes. + with open(output_file_path, 'rb') as f: + raw_data = f.read() + + # Parse side information. + self._vad_frame_size_ms = struct.unpack('B', raw_data[0])[0] + self._vad_frame_size = self._vad_frame_size_ms * sample_rate / 1000 + assert self._vad_frame_size_ms in [10, 20, 30] + extra_bits = struct.unpack('B', raw_data[-1])[0] + assert 0 <= extra_bits <= 8 + + # Init VAD vector. + num_bytes = len(raw_data) + num_frames = 8 * (num_bytes - + 2) - extra_bits # 8 frames for each byte. + self._common_audio_vad = np.zeros(num_frames, np.uint8) + + # Read VAD decisions. + for i, byte in enumerate(raw_data[1:-1]): + byte = struct.unpack('B', byte)[0] + for j in range(8 if i < num_bytes - 3 else (8 - extra_bits)): + self._common_audio_vad[i * 8 + j] = int(byte & 1) + byte = byte >> 1 + except Exception as e: + logging.error('Error while running the WebRTC VAD (' + e.message + + ')') + finally: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + + def _RunWebRtcApmVad(self, wav_file_path): + # Create temporary output path. + tmp_path = tempfile.mkdtemp() + output_file_path_probs = os.path.join( + tmp_path, + os.path.split(wav_file_path)[1] + '_vad_probs.tmp') + output_file_path_rms = os.path.join( + tmp_path, + os.path.split(wav_file_path)[1] + '_vad_rms.tmp') + + # Call WebRTC VAD. + try: + subprocess.call([ + self._VAD_WEBRTC_APM_PATH, '-i', wav_file_path, '-o_probs', + output_file_path_probs, '-o_rms', output_file_path_rms + ], + cwd=self._VAD_WEBRTC_PATH) + + # Parse annotations. + self._apm_vad_probs = np.fromfile(output_file_path_probs, + np.double) + self._apm_vad_rms = np.fromfile(output_file_path_rms, np.double) + assert len(self._apm_vad_rms) == len(self._apm_vad_probs) + + except Exception as e: + logging.error('Error while running the WebRTC APM VAD (' + + e.message + ')') + finally: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py new file mode 100644 index 0000000000..8230208808 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py @@ -0,0 +1,160 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the annotations module. +""" + +from __future__ import division +import logging +import os +import shutil +import tempfile +import unittest + +import numpy as np + +from . import annotations +from . import external_vad +from . import input_signal_creator +from . import signal_processing + + +class TestAnnotationsExtraction(unittest.TestCase): + """Unit tests for the annotations module. + """ + + _CLEAN_TMP_OUTPUT = True + _DEBUG_PLOT_VAD = False + _VAD_TYPE_CLASS = annotations.AudioAnnotationsExtractor.VadType + _ALL_VAD_TYPES = (_VAD_TYPE_CLASS.ENERGY_THRESHOLD + | _VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO + | _VAD_TYPE_CLASS.WEBRTC_APM) + + def setUp(self): + """Create temporary folder.""" + self._tmp_path = tempfile.mkdtemp() + self._wav_file_path = os.path.join(self._tmp_path, 'tone.wav') + pure_tone, _ = input_signal_creator.InputSignalCreator.Create( + 'pure_tone', [440, 1000]) + signal_processing.SignalProcessingUtils.SaveWav( + self._wav_file_path, pure_tone) + self._sample_rate = pure_tone.frame_rate + + def tearDown(self): + """Recursively delete temporary folder.""" + if self._CLEAN_TMP_OUTPUT: + shutil.rmtree(self._tmp_path) + else: + logging.warning(self.id() + ' did not clean the temporary path ' + + (self._tmp_path)) + + def testFrameSizes(self): + e = annotations.AudioAnnotationsExtractor(self._ALL_VAD_TYPES) + e.Extract(self._wav_file_path) + samples_to_ms = lambda n, sr: 1000 * n // sr + self.assertEqual( + samples_to_ms(e.GetLevelFrameSize(), self._sample_rate), + e.GetLevelFrameSizeMs()) + self.assertEqual(samples_to_ms(e.GetVadFrameSize(), self._sample_rate), + e.GetVadFrameSizeMs()) + + def testVoiceActivityDetectors(self): + for vad_type_value in range(0, self._ALL_VAD_TYPES + 1): + vad_type = self._VAD_TYPE_CLASS(vad_type_value) + e = annotations.AudioAnnotationsExtractor(vad_type=vad_type_value) + e.Extract(self._wav_file_path) + if vad_type.Contains(self._VAD_TYPE_CLASS.ENERGY_THRESHOLD): + # pylint: disable=unpacking-non-sequence + vad_output = e.GetVadOutput( + self._VAD_TYPE_CLASS.ENERGY_THRESHOLD) + self.assertGreater(len(vad_output), 0) + self.assertGreaterEqual( + float(np.sum(vad_output)) / len(vad_output), 0.95) + + if vad_type.Contains(self._VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO): + # pylint: disable=unpacking-non-sequence + vad_output = e.GetVadOutput( + self._VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO) + self.assertGreater(len(vad_output), 0) + self.assertGreaterEqual( + float(np.sum(vad_output)) / len(vad_output), 0.95) + + if vad_type.Contains(self._VAD_TYPE_CLASS.WEBRTC_APM): + # pylint: disable=unpacking-non-sequence + (vad_probs, + vad_rms) = e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_APM) + self.assertGreater(len(vad_probs), 0) + self.assertGreater(len(vad_rms), 0) + self.assertGreaterEqual( + float(np.sum(vad_probs)) / len(vad_probs), 0.5) + self.assertGreaterEqual( + float(np.sum(vad_rms)) / len(vad_rms), 20000) + + if self._DEBUG_PLOT_VAD: + frame_times_s = lambda num_frames, frame_size_ms: np.arange( + num_frames).astype(np.float32) * frame_size_ms / 1000.0 + level = e.GetLevel() + t_level = frame_times_s(num_frames=len(level), + frame_size_ms=e.GetLevelFrameSizeMs()) + t_vad = frame_times_s(num_frames=len(vad_output), + frame_size_ms=e.GetVadFrameSizeMs()) + import matplotlib.pyplot as plt + plt.figure() + plt.hold(True) + plt.plot(t_level, level) + plt.plot(t_vad, vad_output * np.max(level), '.') + plt.show() + + def testSaveLoad(self): + e = annotations.AudioAnnotationsExtractor(self._ALL_VAD_TYPES) + e.Extract(self._wav_file_path) + e.Save(self._tmp_path, "fake-annotation") + + data = np.load( + os.path.join( + self._tmp_path, + e.GetOutputFileNameTemplate().format("fake-annotation"))) + np.testing.assert_array_equal(e.GetLevel(), data['level']) + self.assertEqual(np.float32, data['level'].dtype) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.ENERGY_THRESHOLD), + data['vad_energy_output']) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO), + data['vad_output']) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_APM)[0], + data['vad_probs']) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_APM)[1], + data['vad_rms']) + self.assertEqual(np.uint8, data['vad_energy_output'].dtype) + self.assertEqual(np.float64, data['vad_probs'].dtype) + self.assertEqual(np.float64, data['vad_rms'].dtype) + + def testEmptyExternalShouldNotCrash(self): + for vad_type_value in range(0, self._ALL_VAD_TYPES + 1): + annotations.AudioAnnotationsExtractor(vad_type_value, {}) + + def testFakeExternalSaveLoad(self): + def FakeExternalFactory(): + return external_vad.ExternalVad( + os.path.join(os.path.dirname(os.path.abspath(__file__)), + 'fake_external_vad.py'), 'fake') + + for vad_type_value in range(0, self._ALL_VAD_TYPES + 1): + e = annotations.AudioAnnotationsExtractor( + vad_type_value, {'fake': FakeExternalFactory()}) + e.Extract(self._wav_file_path) + e.Save(self._tmp_path, annotation_name="fake-annotation") + data = np.load( + os.path.join( + self._tmp_path, + e.GetOutputFileNameTemplate().format("fake-annotation"))) + self.assertEqual(np.float32, data['extvad_conf-fake'].dtype) + np.testing.assert_almost_equal(np.arange(100, dtype=np.float32), + data['extvad_conf-fake']) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json new file mode 100644 index 0000000000..5c3277bac0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json @@ -0,0 +1 @@ +{"-all_default": null} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc new file mode 100644 index 0000000000..73ce4ed3f7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/vad/voice_activity_detector.h" +#include "rtc_base/logging.h" + +ABSL_FLAG(std::string, i, "", "Input wav file"); +ABSL_FLAG(std::string, o_probs, "", "VAD probabilities output file"); +ABSL_FLAG(std::string, o_rms, "", "VAD output file"); + +namespace webrtc { +namespace test { +namespace { + +constexpr uint8_t kAudioFrameLengthMilliseconds = 10; +constexpr int kMaxSampleRate = 48000; +constexpr size_t kMaxFrameLen = + kAudioFrameLengthMilliseconds * kMaxSampleRate / 1000; + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + const std::string input_file = absl::GetFlag(FLAGS_i); + const std::string output_probs_file = absl::GetFlag(FLAGS_o_probs); + const std::string output_file = absl::GetFlag(FLAGS_o_rms); + // Open wav input file and check properties. + WavReader wav_reader(input_file); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files supported"; + return 1; + } + if (wav_reader.sample_rate() > kMaxSampleRate) { + RTC_LOG(LS_ERROR) << "Beyond maximum sample rate (" << kMaxSampleRate + << ")"; + return 1; + } + const size_t audio_frame_len = rtc::CheckedDivExact( + kAudioFrameLengthMilliseconds * wav_reader.sample_rate(), 1000); + if (audio_frame_len > kMaxFrameLen) { + RTC_LOG(LS_ERROR) << "The frame size and/or the sample rate are too large."; + return 1; + } + + // Create output file and write header. + std::ofstream out_probs_file(output_probs_file, std::ofstream::binary); + std::ofstream out_rms_file(output_file, std::ofstream::binary); + + // Run VAD and write decisions. + VoiceActivityDetector vad; + std::array samples; + + while (true) { + // Process frame. + const auto read_samples = + wav_reader.ReadSamples(audio_frame_len, samples.data()); + if (read_samples < audio_frame_len) { + break; + } + vad.ProcessChunk(samples.data(), audio_frame_len, wav_reader.sample_rate()); + // Write output. + auto probs = vad.chunkwise_voice_probabilities(); + auto rms = vad.chunkwise_rms(); + RTC_CHECK_EQ(probs.size(), rms.size()); + RTC_CHECK_EQ(sizeof(double), 8); + + for (const auto& p : probs) { + out_probs_file.write(reinterpret_cast(&p), 8); + } + for (const auto& r : rms) { + out_rms_file.write(reinterpret_cast(&r), 8); + } + } + + out_probs_file.close(); + out_rms_file.close(); + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py new file mode 100644 index 0000000000..04aeaa95b9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py @@ -0,0 +1,100 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Class implementing a wrapper for APM simulators. +""" + +import cProfile +import logging +import os +import subprocess + +from . import data_access +from . import exceptions + + +class AudioProcWrapper(object): + """Wrapper for APM simulators. + """ + + DEFAULT_APM_SIMULATOR_BIN_PATH = os.path.abspath( + os.path.join(os.pardir, 'audioproc_f')) + OUTPUT_FILENAME = 'output.wav' + + def __init__(self, simulator_bin_path): + """Ctor. + + Args: + simulator_bin_path: path to the APM simulator binary. + """ + self._simulator_bin_path = simulator_bin_path + self._config = None + self._output_signal_filepath = None + + # Profiler instance to measure running time. + self._profiler = cProfile.Profile() + + @property + def output_filepath(self): + return self._output_signal_filepath + + def Run(self, + config_filepath, + capture_input_filepath, + output_path, + render_input_filepath=None): + """Runs APM simulator. + + Args: + config_filepath: path to the configuration file specifying the arguments + for the APM simulator. + capture_input_filepath: path to the capture audio track input file (aka + forward or near-end). + output_path: path of the audio track output file. + render_input_filepath: path to the render audio track input file (aka + reverse or far-end). + """ + # Init. + self._output_signal_filepath = os.path.join(output_path, + self.OUTPUT_FILENAME) + profiling_stats_filepath = os.path.join(output_path, 'profiling.stats') + + # Skip if the output has already been generated. + if os.path.exists(self._output_signal_filepath) and os.path.exists( + profiling_stats_filepath): + return + + # Load configuration. + self._config = data_access.AudioProcConfigFile.Load(config_filepath) + + # Set remaining parameters. + if not os.path.exists(capture_input_filepath): + raise exceptions.FileNotFoundError( + 'cannot find capture input file') + self._config['-i'] = capture_input_filepath + self._config['-o'] = self._output_signal_filepath + if render_input_filepath is not None: + if not os.path.exists(render_input_filepath): + raise exceptions.FileNotFoundError( + 'cannot find render input file') + self._config['-ri'] = render_input_filepath + + # Build arguments list. + args = [self._simulator_bin_path] + for param_name in self._config: + args.append(param_name) + if self._config[param_name] is not None: + args.append(str(self._config[param_name])) + logging.debug(' '.join(args)) + + # Run. + self._profiler.enable() + subprocess.call(args) + self._profiler.disable() + + # Save profiling stats. + self._profiler.dump_stats(profiling_stats_filepath) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py new file mode 100644 index 0000000000..38aac0cbe2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py @@ -0,0 +1,243 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Imports a filtered subset of the scores and configurations computed +by apm_quality_assessment.py into a pandas data frame. +""" + +import argparse +import glob +import logging +import os +import re +import sys + +try: + import pandas as pd +except ImportError: + logging.critical('Cannot import the third-party Python package pandas') + sys.exit(1) + +from . import data_access as data_access +from . import simulation as sim + +# Compiled regular expressions used to extract score descriptors. +RE_CONFIG_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixApmConfig() + + r'(.+)') +RE_CAPTURE_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixCapture() + + r'(.+)') +RE_RENDER_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)') +RE_ECHO_SIM_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixEchoSimulator() + + r'(.+)') +RE_TEST_DATA_GEN_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)') +RE_TEST_DATA_GEN_PARAMS = re.compile( + sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)') +RE_SCORE_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixScore() + + r'(.+)(\..+)') + + +def InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = argparse.ArgumentParser( + description=('Override this description in a user script by changing' + ' `parser.description` of the returned parser.')) + + parser.add_argument('-o', + '--output_dir', + required=True, + help=('the same base path used with the ' + 'apm_quality_assessment tool')) + + parser.add_argument( + '-c', + '--config_names', + type=re.compile, + help=('regular expression to filter the APM configuration' + ' names')) + + parser.add_argument( + '-i', + '--capture_names', + type=re.compile, + help=('regular expression to filter the capture signal ' + 'names')) + + parser.add_argument('-r', + '--render_names', + type=re.compile, + help=('regular expression to filter the render signal ' + 'names')) + + parser.add_argument( + '-e', + '--echo_simulator_names', + type=re.compile, + help=('regular expression to filter the echo simulator ' + 'names')) + + parser.add_argument('-t', + '--test_data_generators', + type=re.compile, + help=('regular expression to filter the test data ' + 'generator names')) + + parser.add_argument( + '-s', + '--eval_scores', + type=re.compile, + help=('regular expression to filter the evaluation score ' + 'names')) + + return parser + + +def _GetScoreDescriptors(score_filepath): + """Extracts a score descriptor from the given score file path. + + Args: + score_filepath: path to the score file. + + Returns: + A tuple of strings (APM configuration name, capture audio track name, + render audio track name, echo simulator name, test data generator name, + test data generator parameters as string, evaluation score name). + """ + fields = score_filepath.split(os.sep)[-7:] + extract_name = lambda index, reg_expr: (reg_expr.match(fields[index]). + groups(0)[0]) + return ( + extract_name(0, RE_CONFIG_NAME), + extract_name(1, RE_CAPTURE_NAME), + extract_name(2, RE_RENDER_NAME), + extract_name(3, RE_ECHO_SIM_NAME), + extract_name(4, RE_TEST_DATA_GEN_NAME), + extract_name(5, RE_TEST_DATA_GEN_PARAMS), + extract_name(6, RE_SCORE_NAME), + ) + + +def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name, + test_data_gen_name, score_name, args): + """Decides whether excluding a score. + + A set of optional regular expressions in args is used to determine if the + score should be excluded (depending on its |*_name| descriptors). + + Args: + config_name: APM configuration name. + capture_name: capture audio track name. + render_name: render audio track name. + echo_simulator_name: echo simulator name. + test_data_gen_name: test data generator name. + score_name: evaluation score name. + args: parsed arguments. + + Returns: + A boolean. + """ + value_regexpr_pairs = [ + (config_name, args.config_names), + (capture_name, args.capture_names), + (render_name, args.render_names), + (echo_simulator_name, args.echo_simulator_names), + (test_data_gen_name, args.test_data_generators), + (score_name, args.eval_scores), + ] + + # Score accepted if each value matches the corresponding regular expression. + for value, regexpr in value_regexpr_pairs: + if regexpr is None: + continue + if not regexpr.match(value): + return True + + return False + + +def FindScores(src_path, args): + """Given a search path, find scores and return a DataFrame object. + + Args: + src_path: Search path pattern. + args: parsed arguments. + + Returns: + A DataFrame object. + """ + # Get scores. + scores = [] + for score_filepath in glob.iglob(src_path): + # Extract score descriptor fields from the path. + (config_name, capture_name, render_name, echo_simulator_name, + test_data_gen_name, test_data_gen_params, + score_name) = _GetScoreDescriptors(score_filepath) + + # Ignore the score if required. + if _ExcludeScore(config_name, capture_name, render_name, + echo_simulator_name, test_data_gen_name, score_name, + args): + logging.info('ignored score: %s %s %s %s %s %s', config_name, + capture_name, render_name, echo_simulator_name, + test_data_gen_name, score_name) + continue + + # Read metadata and score. + metadata = data_access.Metadata.LoadAudioTestDataPaths( + os.path.split(score_filepath)[0]) + score = data_access.ScoreFile.Load(score_filepath) + + # Add a score with its descriptor fields. + scores.append(( + metadata['clean_capture_input_filepath'], + metadata['echo_free_capture_filepath'], + metadata['echo_filepath'], + metadata['render_filepath'], + metadata['capture_filepath'], + metadata['apm_output_filepath'], + metadata['apm_reference_filepath'], + config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + test_data_gen_params, + score_name, + score, + )) + + return pd.DataFrame(data=scores, + columns=( + 'clean_capture_input_filepath', + 'echo_free_capture_filepath', + 'echo_filepath', + 'render_filepath', + 'capture_filepath', + 'apm_output_filepath', + 'apm_reference_filepath', + 'apm_config', + 'capture', + 'render', + 'echo_simulator', + 'test_data_gen', + 'test_data_gen_params', + 'eval_score_name', + 'score', + )) + + +def ConstructSrcPath(args): + return os.path.join( + args.output_dir, + sim.ApmModuleSimulator.GetPrefixApmConfig() + '*', + sim.ApmModuleSimulator.GetPrefixCapture() + '*', + sim.ApmModuleSimulator.GetPrefixRender() + '*', + sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*', + sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*', + sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*', + sim.ApmModuleSimulator.GetPrefixScore() + '*') diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py new file mode 100644 index 0000000000..c1aebb67f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py @@ -0,0 +1,154 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Data access utility functions and classes. +""" + +import json +import os + + +def MakeDirectory(path): + """Makes a directory recursively without rising exceptions if existing. + + Args: + path: path to the directory to be created. + """ + if os.path.exists(path): + return + os.makedirs(path) + + +class Metadata(object): + """Data access class to save and load metadata. + """ + + def __init__(self): + pass + + _GENERIC_METADATA_SUFFIX = '.mdata' + _AUDIO_TEST_DATA_FILENAME = 'audio_test_data.json' + + @classmethod + def LoadFileMetadata(cls, filepath): + """Loads generic metadata linked to a file. + + Args: + filepath: path to the metadata file to read. + + Returns: + A dict. + """ + with open(filepath + cls._GENERIC_METADATA_SUFFIX) as f: + return json.load(f) + + @classmethod + def SaveFileMetadata(cls, filepath, metadata): + """Saves generic metadata linked to a file. + + Args: + filepath: path to the metadata file to write. + metadata: a dict. + """ + with open(filepath + cls._GENERIC_METADATA_SUFFIX, 'w') as f: + json.dump(metadata, f) + + @classmethod + def LoadAudioTestDataPaths(cls, metadata_path): + """Loads the input and the reference audio track paths. + + Args: + metadata_path: path to the directory containing the metadata file. + + Returns: + Tuple with the paths to the input and output audio tracks. + """ + metadata_filepath = os.path.join(metadata_path, + cls._AUDIO_TEST_DATA_FILENAME) + with open(metadata_filepath) as f: + return json.load(f) + + @classmethod + def SaveAudioTestDataPaths(cls, output_path, **filepaths): + """Saves the input and the reference audio track paths. + + Args: + output_path: path to the directory containing the metadata file. + + Keyword Args: + filepaths: collection of audio track file paths to save. + """ + output_filepath = os.path.join(output_path, + cls._AUDIO_TEST_DATA_FILENAME) + with open(output_filepath, 'w') as f: + json.dump(filepaths, f) + + +class AudioProcConfigFile(object): + """Data access to load/save APM simulator argument lists. + + The arguments stored in the config files are used to control the APM flags. + """ + + def __init__(self): + pass + + @classmethod + def Load(cls, filepath): + """Loads a configuration file for an APM simulator. + + Args: + filepath: path to the configuration file. + + Returns: + A dict containing the configuration. + """ + with open(filepath) as f: + return json.load(f) + + @classmethod + def Save(cls, filepath, config): + """Saves a configuration file for an APM simulator. + + Args: + filepath: path to the configuration file. + config: a dict containing the configuration. + """ + with open(filepath, 'w') as f: + json.dump(config, f) + + +class ScoreFile(object): + """Data access class to save and load float scalar scores. + """ + + def __init__(self): + pass + + @classmethod + def Load(cls, filepath): + """Loads a score from file. + + Args: + filepath: path to the score file. + + Returns: + A float encoding the score. + """ + with open(filepath) as f: + return float(f.readline().strip()) + + @classmethod + def Save(cls, filepath, score): + """Saves a score into a file. + + Args: + filepath: path to the score file. + score: float encoding the score. + """ + with open(filepath, 'w') as f: + f.write('{0:f}\n'.format(score)) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py new file mode 100644 index 0000000000..65903ea32d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py @@ -0,0 +1,136 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Echo path simulation module. +""" + +import hashlib +import os + +from . import signal_processing + + +class EchoPathSimulator(object): + """Abstract class for the echo path simulators. + + In general, an echo path simulator is a function of the render signal and + simulates the propagation of the latter into the microphone (e.g., due to + mechanical or electrical paths). + """ + + NAME = None + REGISTERED_CLASSES = {} + + def __init__(self): + pass + + def Simulate(self, output_path): + """Creates the echo signal and stores it in an audio file (abstract method). + + Args: + output_path: Path in which any output can be saved. + + Returns: + Path to the generated audio track file or None if no echo is present. + """ + raise NotImplementedError() + + @classmethod + def RegisterClass(cls, class_to_register): + """Registers an EchoPathSimulator implementation. + + Decorator to automatically register the classes that extend + EchoPathSimulator. + Example usage: + + @EchoPathSimulator.RegisterClass + class NoEchoPathSimulator(EchoPathSimulator): + pass + """ + cls.REGISTERED_CLASSES[class_to_register.NAME] = class_to_register + return class_to_register + + +@EchoPathSimulator.RegisterClass +class NoEchoPathSimulator(EchoPathSimulator): + """Simulates absence of echo.""" + + NAME = 'noecho' + + def __init__(self): + EchoPathSimulator.__init__(self) + + def Simulate(self, output_path): + return None + + +@EchoPathSimulator.RegisterClass +class LinearEchoPathSimulator(EchoPathSimulator): + """Simulates linear echo path. + + This class applies a given impulse response to the render input and then it + sums the signal to the capture input signal. + """ + + NAME = 'linear' + + def __init__(self, render_input_filepath, impulse_response): + """ + Args: + render_input_filepath: Render audio track file. + impulse_response: list or numpy vector of float values. + """ + EchoPathSimulator.__init__(self) + self._render_input_filepath = render_input_filepath + self._impulse_response = impulse_response + + def Simulate(self, output_path): + """Simulates linear echo path.""" + # Form the file name with a hash of the impulse response. + impulse_response_hash = hashlib.sha256( + str(self._impulse_response).encode('utf-8', 'ignore')).hexdigest() + echo_filepath = os.path.join( + output_path, 'linear_echo_{}.wav'.format(impulse_response_hash)) + + # If the simulated echo audio track file does not exists, create it. + if not os.path.exists(echo_filepath): + render = signal_processing.SignalProcessingUtils.LoadWav( + self._render_input_filepath) + echo = signal_processing.SignalProcessingUtils.ApplyImpulseResponse( + render, self._impulse_response) + signal_processing.SignalProcessingUtils.SaveWav( + echo_filepath, echo) + + return echo_filepath + + +@EchoPathSimulator.RegisterClass +class RecordedEchoPathSimulator(EchoPathSimulator): + """Uses recorded echo. + + This class uses the clean capture input file name to build the file name of + the corresponding recording containing echo (a predefined suffix is used). + Such a file is expected to be already existing. + """ + + NAME = 'recorded' + + _FILE_NAME_SUFFIX = '_echo' + + def __init__(self, render_input_filepath): + EchoPathSimulator.__init__(self) + self._render_input_filepath = render_input_filepath + + def Simulate(self, output_path): + """Uses recorded echo path.""" + path, file_name_ext = os.path.split(self._render_input_filepath) + file_name, file_ext = os.path.splitext(file_name_ext) + echo_filepath = os.path.join( + path, '{}{}{}'.format(file_name, self._FILE_NAME_SUFFIX, file_ext)) + assert os.path.exists(echo_filepath), ( + 'cannot find the echo audio track file {}'.format(echo_filepath)) + return echo_filepath diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py new file mode 100644 index 0000000000..4b46b36b47 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py @@ -0,0 +1,48 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Echo path simulation factory module. +""" + +import numpy as np + +from . import echo_path_simulation + + +class EchoPathSimulatorFactory(object): + + # TODO(alessiob): Replace 20 ms delay (at 48 kHz sample rate) with a more + # realistic impulse response. + _LINEAR_ECHO_IMPULSE_RESPONSE = np.array([0.0] * (20 * 48) + [0.15]) + + def __init__(self): + pass + + @classmethod + def GetInstance(cls, echo_path_simulator_class, render_input_filepath): + """Creates an EchoPathSimulator instance given a class object. + + Args: + echo_path_simulator_class: EchoPathSimulator class object (not an + instance). + render_input_filepath: Path to the render audio track file. + + Returns: + An EchoPathSimulator instance. + """ + assert render_input_filepath is not None or ( + echo_path_simulator_class == + echo_path_simulation.NoEchoPathSimulator) + + if echo_path_simulator_class == echo_path_simulation.NoEchoPathSimulator: + return echo_path_simulation.NoEchoPathSimulator() + elif echo_path_simulator_class == ( + echo_path_simulation.LinearEchoPathSimulator): + return echo_path_simulation.LinearEchoPathSimulator( + render_input_filepath, cls._LINEAR_ECHO_IMPULSE_RESPONSE) + else: + return echo_path_simulator_class(render_input_filepath) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py new file mode 100644 index 0000000000..b6cc8abdde --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py @@ -0,0 +1,82 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the echo path simulation module. +""" + +import shutil +import os +import tempfile +import unittest + +import pydub + +from . import echo_path_simulation +from . import echo_path_simulation_factory +from . import signal_processing + + +class TestEchoPathSimulators(unittest.TestCase): + """Unit tests for the eval_scores module. + """ + + def setUp(self): + """Creates temporary data.""" + self._tmp_path = tempfile.mkdtemp() + + # Create and save white noise. + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + white_noise = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + self._audio_track_num_samples = ( + signal_processing.SignalProcessingUtils.CountSamples(white_noise)) + self._audio_track_filepath = os.path.join(self._tmp_path, + 'white_noise.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._audio_track_filepath, white_noise) + + # Make a copy the white noise audio track file; it will be used by + # echo_path_simulation.RecordedEchoPathSimulator. + shutil.copy(self._audio_track_filepath, + os.path.join(self._tmp_path, 'white_noise_echo.wav')) + + def tearDown(self): + """Recursively deletes temporary folders.""" + shutil.rmtree(self._tmp_path) + + def testRegisteredClasses(self): + # Check that there is at least one registered echo path simulator. + registered_classes = ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES) + self.assertIsInstance(registered_classes, dict) + self.assertGreater(len(registered_classes), 0) + + # Instance factory. + factory = echo_path_simulation_factory.EchoPathSimulatorFactory() + + # Try each registered echo path simulator. + for echo_path_simulator_name in registered_classes: + simulator = factory.GetInstance( + echo_path_simulator_class=registered_classes[ + echo_path_simulator_name], + render_input_filepath=self._audio_track_filepath) + + echo_filepath = simulator.Simulate(self._tmp_path) + if echo_filepath is None: + self.assertEqual(echo_path_simulation.NoEchoPathSimulator.NAME, + echo_path_simulator_name) + # No other tests in this case. + continue + + # Check that the echo audio track file exists and its length is greater or + # equal to that of the render audio track. + self.assertTrue(os.path.exists(echo_filepath)) + echo = signal_processing.SignalProcessingUtils.LoadWav( + echo_filepath) + self.assertGreaterEqual( + signal_processing.SignalProcessingUtils.CountSamples(echo), + self._audio_track_num_samples) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py new file mode 100644 index 0000000000..59c5f74be4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py @@ -0,0 +1,427 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Evaluation score abstract class and implementations. +""" + +from __future__ import division +import logging +import os +import re +import subprocess +import sys + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import data_access +from . import exceptions +from . import signal_processing + + +class EvaluationScore(object): + + NAME = None + REGISTERED_CLASSES = {} + + def __init__(self, score_filename_prefix): + self._score_filename_prefix = score_filename_prefix + self._input_signal_metadata = None + self._reference_signal = None + self._reference_signal_filepath = None + self._tested_signal = None + self._tested_signal_filepath = None + self._output_filepath = None + self._score = None + self._render_signal_filepath = None + + @classmethod + def RegisterClass(cls, class_to_register): + """Registers an EvaluationScore implementation. + + Decorator to automatically register the classes that extend EvaluationScore. + Example usage: + + @EvaluationScore.RegisterClass + class AudioLevelScore(EvaluationScore): + pass + """ + cls.REGISTERED_CLASSES[class_to_register.NAME] = class_to_register + return class_to_register + + @property + def output_filepath(self): + return self._output_filepath + + @property + def score(self): + return self._score + + def SetInputSignalMetadata(self, metadata): + """Sets input signal metadata. + + Args: + metadata: dict instance. + """ + self._input_signal_metadata = metadata + + def SetReferenceSignalFilepath(self, filepath): + """Sets the path to the audio track used as reference signal. + + Args: + filepath: path to the reference audio track. + """ + self._reference_signal_filepath = filepath + + def SetTestedSignalFilepath(self, filepath): + """Sets the path to the audio track used as test signal. + + Args: + filepath: path to the test audio track. + """ + self._tested_signal_filepath = filepath + + def SetRenderSignalFilepath(self, filepath): + """Sets the path to the audio track used as render signal. + + Args: + filepath: path to the test audio track. + """ + self._render_signal_filepath = filepath + + def Run(self, output_path): + """Extracts the score for the set test data pair. + + Args: + output_path: path to the directory where the output is written. + """ + self._output_filepath = os.path.join( + output_path, self._score_filename_prefix + self.NAME + '.txt') + try: + # If the score has already been computed, load. + self._LoadScore() + logging.debug('score found and loaded') + except IOError: + # Compute the score. + logging.debug('score not found, compute') + self._Run(output_path) + + def _Run(self, output_path): + # Abstract method. + raise NotImplementedError() + + def _LoadReferenceSignal(self): + assert self._reference_signal_filepath is not None + self._reference_signal = signal_processing.SignalProcessingUtils.LoadWav( + self._reference_signal_filepath) + + def _LoadTestedSignal(self): + assert self._tested_signal_filepath is not None + self._tested_signal = signal_processing.SignalProcessingUtils.LoadWav( + self._tested_signal_filepath) + + def _LoadScore(self): + return data_access.ScoreFile.Load(self._output_filepath) + + def _SaveScore(self): + return data_access.ScoreFile.Save(self._output_filepath, self._score) + + +@EvaluationScore.RegisterClass +class AudioLevelPeakScore(EvaluationScore): + """Peak audio level score. + + Defined as the difference between the peak audio level of the tested and + the reference signals. + + Unit: dB + Ideal: 0 dB + Worst case: +/-inf dB + """ + + NAME = 'audio_level_peak' + + def __init__(self, score_filename_prefix): + EvaluationScore.__init__(self, score_filename_prefix) + + def _Run(self, output_path): + self._LoadReferenceSignal() + self._LoadTestedSignal() + self._score = self._tested_signal.dBFS - self._reference_signal.dBFS + self._SaveScore() + + +@EvaluationScore.RegisterClass +class MeanAudioLevelScore(EvaluationScore): + """Mean audio level score. + + Defined as the difference between the mean audio level of the tested and + the reference signals. + + Unit: dB + Ideal: 0 dB + Worst case: +/-inf dB + """ + + NAME = 'audio_level_mean' + + def __init__(self, score_filename_prefix): + EvaluationScore.__init__(self, score_filename_prefix) + + def _Run(self, output_path): + self._LoadReferenceSignal() + self._LoadTestedSignal() + + dbfs_diffs_sum = 0.0 + seconds = min(len(self._tested_signal), len( + self._reference_signal)) // 1000 + for t in range(seconds): + t0 = t * seconds + t1 = t0 + seconds + dbfs_diffs_sum += (self._tested_signal[t0:t1].dBFS - + self._reference_signal[t0:t1].dBFS) + self._score = dbfs_diffs_sum / float(seconds) + self._SaveScore() + + +@EvaluationScore.RegisterClass +class EchoMetric(EvaluationScore): + """Echo score. + + Proportion of detected echo. + + Unit: ratio + Ideal: 0 + Worst case: 1 + """ + + NAME = 'echo_metric' + + def __init__(self, score_filename_prefix, echo_detector_bin_filepath): + EvaluationScore.__init__(self, score_filename_prefix) + + # POLQA binary file path. + self._echo_detector_bin_filepath = echo_detector_bin_filepath + if not os.path.exists(self._echo_detector_bin_filepath): + logging.error('cannot find EchoMetric tool binary file') + raise exceptions.FileNotFoundError() + + self._echo_detector_bin_path, _ = os.path.split( + self._echo_detector_bin_filepath) + + def _Run(self, output_path): + echo_detector_out_filepath = os.path.join(output_path, + 'echo_detector.out') + if os.path.exists(echo_detector_out_filepath): + os.unlink(echo_detector_out_filepath) + + logging.debug("Render signal filepath: %s", + self._render_signal_filepath) + if not os.path.exists(self._render_signal_filepath): + logging.error( + "Render input required for evaluating the echo metric.") + + args = [ + self._echo_detector_bin_filepath, '--output_file', + echo_detector_out_filepath, '--', '-i', + self._tested_signal_filepath, '-ri', self._render_signal_filepath + ] + logging.debug(' '.join(args)) + subprocess.call(args, cwd=self._echo_detector_bin_path) + + # Parse Echo detector tool output and extract the score. + self._score = self._ParseOutputFile(echo_detector_out_filepath) + self._SaveScore() + + @classmethod + def _ParseOutputFile(cls, echo_metric_file_path): + """ + Parses the POLQA tool output formatted as a table ('-t' option). + + Args: + polqa_out_filepath: path to the POLQA tool output file. + + Returns: + The score as a number in [0, 1]. + """ + with open(echo_metric_file_path) as f: + return float(f.read()) + + +@EvaluationScore.RegisterClass +class PolqaScore(EvaluationScore): + """POLQA score. + + See http://www.polqa.info/. + + Unit: MOS + Ideal: 4.5 + Worst case: 1.0 + """ + + NAME = 'polqa' + + def __init__(self, score_filename_prefix, polqa_bin_filepath): + EvaluationScore.__init__(self, score_filename_prefix) + + # POLQA binary file path. + self._polqa_bin_filepath = polqa_bin_filepath + if not os.path.exists(self._polqa_bin_filepath): + logging.error('cannot find POLQA tool binary file') + raise exceptions.FileNotFoundError() + + # Path to the POLQA directory with binary and license files. + self._polqa_tool_path, _ = os.path.split(self._polqa_bin_filepath) + + def _Run(self, output_path): + polqa_out_filepath = os.path.join(output_path, 'polqa.out') + if os.path.exists(polqa_out_filepath): + os.unlink(polqa_out_filepath) + + args = [ + self._polqa_bin_filepath, + '-t', + '-q', + '-Overwrite', + '-Ref', + self._reference_signal_filepath, + '-Test', + self._tested_signal_filepath, + '-LC', + 'NB', + '-Out', + polqa_out_filepath, + ] + logging.debug(' '.join(args)) + subprocess.call(args, cwd=self._polqa_tool_path) + + # Parse POLQA tool output and extract the score. + polqa_output = self._ParseOutputFile(polqa_out_filepath) + self._score = float(polqa_output['PolqaScore']) + + self._SaveScore() + + @classmethod + def _ParseOutputFile(cls, polqa_out_filepath): + """ + Parses the POLQA tool output formatted as a table ('-t' option). + + Args: + polqa_out_filepath: path to the POLQA tool output file. + + Returns: + A dict. + """ + data = [] + with open(polqa_out_filepath) as f: + for line in f: + line = line.strip() + if len(line) == 0 or line.startswith('*'): + # Ignore comments. + continue + # Read fields. + data.append(re.split(r'\t+', line)) + + # Two rows expected (header and values). + assert len(data) == 2, 'Cannot parse POLQA output' + number_of_fields = len(data[0]) + assert number_of_fields == len(data[1]) + + # Build and return a dictionary with field names (header) as keys and the + # corresponding field values as values. + return { + data[0][index]: data[1][index] + for index in range(number_of_fields) + } + + +@EvaluationScore.RegisterClass +class TotalHarmonicDistorsionScore(EvaluationScore): + """Total harmonic distorsion plus noise score. + + Total harmonic distorsion plus noise score. + See "https://en.wikipedia.org/wiki/Total_harmonic_distortion#THD.2BN". + + Unit: -. + Ideal: 0. + Worst case: +inf + """ + + NAME = 'thd' + + def __init__(self, score_filename_prefix): + EvaluationScore.__init__(self, score_filename_prefix) + self._input_frequency = None + + def _Run(self, output_path): + self._CheckInputSignal() + + self._LoadTestedSignal() + if self._tested_signal.channels != 1: + raise exceptions.EvaluationScoreException( + 'unsupported number of channels') + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + self._tested_signal) + + # Init. + num_samples = len(samples) + duration = len(self._tested_signal) / 1000.0 + scaling = 2.0 / num_samples + max_freq = self._tested_signal.frame_rate / 2 + f0_freq = float(self._input_frequency) + t = np.linspace(0, duration, num_samples) + + # Analyze harmonics. + b_terms = [] + n = 1 + while f0_freq * n < max_freq: + x_n = np.sum( + samples * np.sin(2.0 * np.pi * n * f0_freq * t)) * scaling + y_n = np.sum( + samples * np.cos(2.0 * np.pi * n * f0_freq * t)) * scaling + b_terms.append(np.sqrt(x_n**2 + y_n**2)) + n += 1 + + output_without_fundamental = samples - b_terms[0] * np.sin( + 2.0 * np.pi * f0_freq * t) + distortion_and_noise = np.sqrt( + np.sum(output_without_fundamental**2) * np.pi * scaling) + + # TODO(alessiob): Fix or remove if not needed. + # thd = np.sqrt(np.sum(b_terms[1:]**2)) / b_terms[0] + + # TODO(alessiob): Check the range of `thd_plus_noise` and update the class + # docstring above if accordingly. + thd_plus_noise = distortion_and_noise / b_terms[0] + + self._score = thd_plus_noise + self._SaveScore() + + def _CheckInputSignal(self): + # Check input signal and get properties. + try: + if self._input_signal_metadata['signal'] != 'pure_tone': + raise exceptions.EvaluationScoreException( + 'The THD score requires a pure tone as input signal') + self._input_frequency = self._input_signal_metadata['frequency'] + if self._input_signal_metadata[ + 'test_data_gen_name'] != 'identity' or ( + self._input_signal_metadata['test_data_gen_config'] != + 'default'): + raise exceptions.EvaluationScoreException( + 'The THD score cannot be used with any test data generator other ' + 'than "identity"') + except TypeError: + raise exceptions.EvaluationScoreException( + 'The THD score requires an input signal with associated metadata' + ) + except KeyError: + raise exceptions.EvaluationScoreException( + 'Invalid input signal metadata to compute the THD score') diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py new file mode 100644 index 0000000000..5749a8924b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py @@ -0,0 +1,55 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""EvaluationScore factory class. +""" + +import logging + +from . import exceptions +from . import eval_scores + + +class EvaluationScoreWorkerFactory(object): + """Factory class used to instantiate evaluation score workers. + + The ctor gets the parametrs that are used to instatiate the evaluation score + workers. + """ + + def __init__(self, polqa_tool_bin_path, echo_metric_tool_bin_path): + self._score_filename_prefix = None + self._polqa_tool_bin_path = polqa_tool_bin_path + self._echo_metric_tool_bin_path = echo_metric_tool_bin_path + + def SetScoreFilenamePrefix(self, prefix): + self._score_filename_prefix = prefix + + def GetInstance(self, evaluation_score_class): + """Creates an EvaluationScore instance given a class object. + + Args: + evaluation_score_class: EvaluationScore class object (not an instance). + + Returns: + An EvaluationScore instance. + """ + if self._score_filename_prefix is None: + raise exceptions.InitializationException( + 'The score file name prefix for evaluation score workers is not set' + ) + logging.debug('factory producing a %s evaluation score', + evaluation_score_class) + + if evaluation_score_class == eval_scores.PolqaScore: + return eval_scores.PolqaScore(self._score_filename_prefix, + self._polqa_tool_bin_path) + elif evaluation_score_class == eval_scores.EchoMetric: + return eval_scores.EchoMetric(self._score_filename_prefix, + self._echo_metric_tool_bin_path) + else: + return evaluation_score_class(self._score_filename_prefix) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py new file mode 100644 index 0000000000..12e043320e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py @@ -0,0 +1,137 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the eval_scores module. +""" + +import os +import shutil +import tempfile +import unittest + +import pydub + +from . import data_access +from . import eval_scores +from . import eval_scores_factory +from . import signal_processing + + +class TestEvalScores(unittest.TestCase): + """Unit tests for the eval_scores module. + """ + + def setUp(self): + """Create temporary output folder and two audio track files.""" + self._output_path = tempfile.mkdtemp() + + # Create fake reference and tested (i.e., APM output) audio track files. + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + fake_reference_signal = (signal_processing.SignalProcessingUtils. + GenerateWhiteNoise(silence)) + fake_tested_signal = (signal_processing.SignalProcessingUtils. + GenerateWhiteNoise(silence)) + + # Save fake audio tracks. + self._fake_reference_signal_filepath = os.path.join( + self._output_path, 'fake_ref.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._fake_reference_signal_filepath, fake_reference_signal) + self._fake_tested_signal_filepath = os.path.join( + self._output_path, 'fake_test.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._fake_tested_signal_filepath, fake_tested_signal) + + def tearDown(self): + """Recursively delete temporary folder.""" + shutil.rmtree(self._output_path) + + def testRegisteredClasses(self): + # Evaluation score names to exclude (tested separately). + exceptions = ['thd', 'echo_metric'] + + # Preliminary check. + self.assertTrue(os.path.exists(self._output_path)) + + # Check that there is at least one registered evaluation score worker. + registered_classes = eval_scores.EvaluationScore.REGISTERED_CLASSES + self.assertIsInstance(registered_classes, dict) + self.assertGreater(len(registered_classes), 0) + + # Instance evaluation score workers factory with fake dependencies. + eval_score_workers_factory = ( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'fake_polqa'), + echo_metric_tool_bin_path=None)) + eval_score_workers_factory.SetScoreFilenamePrefix('scores-') + + # Try each registered evaluation score worker. + for eval_score_name in registered_classes: + if eval_score_name in exceptions: + continue + + # Instance evaluation score worker. + eval_score_worker = eval_score_workers_factory.GetInstance( + registered_classes[eval_score_name]) + + # Set fake input metadata and reference and test file paths, then run. + eval_score_worker.SetReferenceSignalFilepath( + self._fake_reference_signal_filepath) + eval_score_worker.SetTestedSignalFilepath( + self._fake_tested_signal_filepath) + eval_score_worker.Run(self._output_path) + + # Check output. + score = data_access.ScoreFile.Load( + eval_score_worker.output_filepath) + self.assertTrue(isinstance(score, float)) + + def testTotalHarmonicDistorsionScore(self): + # Init. + pure_tone_freq = 5000.0 + eval_score_worker = eval_scores.TotalHarmonicDistorsionScore('scores-') + eval_score_worker.SetInputSignalMetadata({ + 'signal': + 'pure_tone', + 'frequency': + pure_tone_freq, + 'test_data_gen_name': + 'identity', + 'test_data_gen_config': + 'default', + }) + template = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + + # Create 3 test signals: pure tone, pure tone + white noise, white noise + # only. + pure_tone = signal_processing.SignalProcessingUtils.GeneratePureTone( + template, pure_tone_freq) + white_noise = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + template) + noisy_tone = signal_processing.SignalProcessingUtils.MixSignals( + pure_tone, white_noise) + + # Compute scores for increasingly distorted pure tone signals. + scores = [None, None, None] + for index, tested_signal in enumerate( + [pure_tone, noisy_tone, white_noise]): + # Save signal. + tmp_filepath = os.path.join(self._output_path, 'tmp_thd.wav') + signal_processing.SignalProcessingUtils.SaveWav( + tmp_filepath, tested_signal) + + # Compute score. + eval_score_worker.SetTestedSignalFilepath(tmp_filepath) + eval_score_worker.Run(self._output_path) + scores[index] = eval_score_worker.score + + # Remove output file to avoid caching. + os.remove(eval_score_worker.output_filepath) + + # Validate scores (lowest score with a pure tone). + self.assertTrue(all([scores[i + 1] > scores[i] for i in range(2)])) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py new file mode 100644 index 0000000000..2599085329 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py @@ -0,0 +1,57 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Evaluator of the APM module. +""" + +import logging + + +class ApmModuleEvaluator(object): + """APM evaluator class. + """ + + def __init__(self): + pass + + @classmethod + def Run(cls, evaluation_score_workers, apm_input_metadata, + apm_output_filepath, reference_input_filepath, + render_input_filepath, output_path): + """Runs the evaluation. + + Iterates over the given evaluation score workers. + + Args: + evaluation_score_workers: list of EvaluationScore instances. + apm_input_metadata: dictionary with metadata of the APM input. + apm_output_filepath: path to the audio track file with the APM output. + reference_input_filepath: path to the reference audio track file. + output_path: output path. + + Returns: + A dict of evaluation score name and score pairs. + """ + # Init. + scores = {} + + for evaluation_score_worker in evaluation_score_workers: + logging.info(' computing <%s> score', + evaluation_score_worker.NAME) + evaluation_score_worker.SetInputSignalMetadata(apm_input_metadata) + evaluation_score_worker.SetReferenceSignalFilepath( + reference_input_filepath) + evaluation_score_worker.SetTestedSignalFilepath( + apm_output_filepath) + evaluation_score_worker.SetRenderSignalFilepath( + render_input_filepath) + + evaluation_score_worker.Run(output_path) + scores[ + evaluation_score_worker.NAME] = evaluation_score_worker.score + + return scores diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py new file mode 100644 index 0000000000..893901d359 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py @@ -0,0 +1,45 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Exception classes. +""" + + +class FileNotFoundError(Exception): + """File not found exception. + """ + pass + + +class SignalProcessingException(Exception): + """Signal processing exception. + """ + pass + + +class InputMixerException(Exception): + """Input mixer exception. + """ + pass + + +class InputSignalCreatorException(Exception): + """Input signal creator exception. + """ + pass + + +class EvaluationScoreException(Exception): + """Evaluation score exception. + """ + pass + + +class InitializationException(Exception): + """Initialization exception. + """ + pass diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py new file mode 100644 index 0000000000..fe3a6c7cb9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py @@ -0,0 +1,426 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import functools +import hashlib +import logging +import os +import re +import sys + +try: + import csscompressor +except ImportError: + logging.critical( + 'Cannot import the third-party Python package csscompressor') + sys.exit(1) + +try: + import jsmin +except ImportError: + logging.critical('Cannot import the third-party Python package jsmin') + sys.exit(1) + + +class HtmlExport(object): + """HTML exporter class for APM quality scores.""" + + _NEW_LINE = '\n' + + # CSS and JS file paths. + _PATH = os.path.dirname(os.path.realpath(__file__)) + _CSS_FILEPATH = os.path.join(_PATH, 'results.css') + _CSS_MINIFIED = True + _JS_FILEPATH = os.path.join(_PATH, 'results.js') + _JS_MINIFIED = True + + def __init__(self, output_filepath): + self._scores_data_frame = None + self._output_filepath = output_filepath + + def Export(self, scores_data_frame): + """Exports scores into an HTML file. + + Args: + scores_data_frame: DataFrame instance. + """ + self._scores_data_frame = scores_data_frame + html = [ + '', + self._BuildHeader(), + (''), '', + self._BuildBody(), '', '' + ] + self._Save(self._output_filepath, self._NEW_LINE.join(html)) + + def _BuildHeader(self): + """Builds the section of the HTML file. + + The header contains the page title and either embedded or linked CSS and JS + files. + + Returns: + A string with ... HTML. + """ + html = ['', 'Results'] + + # Add Material Design hosted libs. + html.append('') + html.append( + '') + html.append( + '') + html.append('') + + # Embed custom JavaScript and CSS files. + html.append('') + html.append('') + + html.append('') + + return self._NEW_LINE.join(html) + + def _BuildBody(self): + """Builds the content of the section.""" + score_names = self._scores_data_frame[ + 'eval_score_name'].drop_duplicates().values.tolist() + + html = [ + ('
'), + '
', + '
', + 'APM QA results ({})'.format( + self._output_filepath), + '
', + ] + + # Tab selectors. + html.append('
') + for tab_index, score_name in enumerate(score_names): + is_active = tab_index == 0 + html.append('' + '{}'.format(tab_index, + ' is-active' if is_active else '', + self._FormatName(score_name))) + html.append('
') + + html.append('
') + html.append( + '
') + + # Tabs content. + for tab_index, score_name in enumerate(score_names): + html.append('
'.format( + ' is-active' if is_active else '', tab_index)) + html.append('
') + html.append( + self._BuildScoreTab(score_name, ('s{}'.format(tab_index), ))) + html.append('
') + html.append('
') + + html.append('
') + html.append('
') + + # Add snackbar for notifications. + html.append( + '
' + '
' + '' + '
') + + return self._NEW_LINE.join(html) + + def _BuildScoreTab(self, score_name, anchor_data): + """Builds the content of a tab.""" + # Find unique values. + scores = self._scores_data_frame[ + self._scores_data_frame.eval_score_name == score_name] + apm_configs = sorted(self._FindUniqueTuples(scores, ['apm_config'])) + test_data_gen_configs = sorted( + self._FindUniqueTuples(scores, + ['test_data_gen', 'test_data_gen_params'])) + + html = [ + '
', + '
', + '
', + (''), + ] + + # Header. + html.append('') + for test_data_gen_info in test_data_gen_configs: + html.append(''.format( + self._FormatName(test_data_gen_info[0]), + test_data_gen_info[1])) + html.append('') + + # Body. + html.append('') + for apm_config in apm_configs: + html.append('') + for test_data_gen_info in test_data_gen_configs: + dialog_id = self._ScoreStatsInspectorDialogId( + score_name, apm_config[0], test_data_gen_info[0], + test_data_gen_info[1]) + html.append( + ''. + format( + dialog_id, + self._BuildScoreTableCell(score_name, + test_data_gen_info[0], + test_data_gen_info[1], + apm_config[0]))) + html.append('') + html.append('') + + html.append( + '
APM config / Test data generator{} {}
' + self._FormatName(apm_config[0]) + '{}
') + + html.append( + self._BuildScoreStatsInspectorDialogs(score_name, apm_configs, + test_data_gen_configs, + anchor_data)) + + return self._NEW_LINE.join(html) + + def _BuildScoreTableCell(self, score_name, test_data_gen, + test_data_gen_params, apm_config): + """Builds the content of a table cell for a score table.""" + scores = self._SliceDataForScoreTableCell(score_name, apm_config, + test_data_gen, + test_data_gen_params) + stats = self._ComputeScoreStats(scores) + + html = [] + items_id_prefix = (score_name + test_data_gen + test_data_gen_params + + apm_config) + if stats['count'] == 1: + # Show the only available score. + item_id = hashlib.md5(items_id_prefix.encode('utf-8')).hexdigest() + html.append('
{1:f}
'.format( + item_id, scores['score'].mean())) + html.append( + '
{}' + '
'.format(item_id, 'single value')) + else: + # Show stats. + for stat_name in ['min', 'max', 'mean', 'std dev']: + item_id = hashlib.md5( + (items_id_prefix + stat_name).encode('utf-8')).hexdigest() + html.append('
{1:f}
'.format( + item_id, stats[stat_name])) + html.append( + '
{}' + '
'.format(item_id, stat_name)) + + return self._NEW_LINE.join(html) + + def _BuildScoreStatsInspectorDialogs(self, score_name, apm_configs, + test_data_gen_configs, anchor_data): + """Builds a set of score stats inspector dialogs.""" + html = [] + for apm_config in apm_configs: + for test_data_gen_info in test_data_gen_configs: + dialog_id = self._ScoreStatsInspectorDialogId( + score_name, apm_config[0], test_data_gen_info[0], + test_data_gen_info[1]) + + html.append(''.format(dialog_id)) + + # Content. + html.append('
') + html.append( + '
APM config preset: {}
' + 'Test data generator: {} ({})
'. + format(self._FormatName(apm_config[0]), + self._FormatName(test_data_gen_info[0]), + test_data_gen_info[1])) + html.append( + self._BuildScoreStatsInspectorDialog( + score_name, apm_config[0], test_data_gen_info[0], + test_data_gen_info[1], anchor_data + (dialog_id, ))) + html.append('
') + + # Actions. + html.append('
') + html.append('') + html.append('
') + + html.append('
') + + return self._NEW_LINE.join(html) + + def _BuildScoreStatsInspectorDialog(self, score_name, apm_config, + test_data_gen, test_data_gen_params, + anchor_data): + """Builds one score stats inspector dialog.""" + scores = self._SliceDataForScoreTableCell(score_name, apm_config, + test_data_gen, + test_data_gen_params) + + capture_render_pairs = sorted( + self._FindUniqueTuples(scores, ['capture', 'render'])) + echo_simulators = sorted( + self._FindUniqueTuples(scores, ['echo_simulator'])) + + html = [ + '' + ] + + # Header. + html.append('') + for echo_simulator in echo_simulators: + html.append('') + html.append('') + + # Body. + html.append('') + for row, (capture, render) in enumerate(capture_render_pairs): + html.append(''.format( + capture, render)) + for col, echo_simulator in enumerate(echo_simulators): + score_tuple = self._SliceDataForScoreStatsTableCell( + scores, capture, render, echo_simulator[0]) + cell_class = 'r{}c{}'.format(row, col) + html.append(''.format( + cell_class, + self._BuildScoreStatsInspectorTableCell( + score_tuple, anchor_data + (cell_class, )))) + html.append('') + html.append('') + + html.append('
Capture-Render / Echo simulator' + self._FormatName(echo_simulator[0]) + '
{}
{}
{}
') + + # Placeholder for the audio inspector. + html.append('
') + + return self._NEW_LINE.join(html) + + def _BuildScoreStatsInspectorTableCell(self, score_tuple, anchor_data): + """Builds the content of a cell of a score stats inspector.""" + anchor = '&'.join(anchor_data) + html = [('
{}
' + '').format(score_tuple.score, anchor)] + + # Add all the available file paths as hidden data. + for field_name in score_tuple.keys(): + if field_name.endswith('_filepath'): + html.append( + ''.format( + field_name, score_tuple[field_name])) + + return self._NEW_LINE.join(html) + + def _SliceDataForScoreTableCell(self, score_name, apm_config, + test_data_gen, test_data_gen_params): + """Slices `self._scores_data_frame` to extract the data for a tab.""" + masks = [] + masks.append(self._scores_data_frame.eval_score_name == score_name) + masks.append(self._scores_data_frame.apm_config == apm_config) + masks.append(self._scores_data_frame.test_data_gen == test_data_gen) + masks.append(self._scores_data_frame.test_data_gen_params == + test_data_gen_params) + mask = functools.reduce((lambda i1, i2: i1 & i2), masks) + del masks + return self._scores_data_frame[mask] + + @classmethod + def _SliceDataForScoreStatsTableCell(cls, scores, capture, render, + echo_simulator): + """Slices `scores` to extract the data for a tab.""" + masks = [] + + masks.append(scores.capture == capture) + masks.append(scores.render == render) + masks.append(scores.echo_simulator == echo_simulator) + mask = functools.reduce((lambda i1, i2: i1 & i2), masks) + del masks + + sliced_data = scores[mask] + assert len(sliced_data) == 1, 'single score is expected' + return sliced_data.iloc[0] + + @classmethod + def _FindUniqueTuples(cls, data_frame, fields): + """Slices `data_frame` to a list of fields and finds unique tuples.""" + return data_frame[fields].drop_duplicates().values.tolist() + + @classmethod + def _ComputeScoreStats(cls, data_frame): + """Computes score stats.""" + scores = data_frame['score'] + return { + 'count': scores.count(), + 'min': scores.min(), + 'max': scores.max(), + 'mean': scores.mean(), + 'std dev': scores.std(), + } + + @classmethod + def _ScoreStatsInspectorDialogId(cls, score_name, apm_config, + test_data_gen, test_data_gen_params): + """Assigns a unique name to a dialog.""" + return 'score-stats-dialog-' + hashlib.md5( + 'score-stats-inspector-{}-{}-{}-{}'.format( + score_name, apm_config, test_data_gen, + test_data_gen_params).encode('utf-8')).hexdigest() + + @classmethod + def _Save(cls, output_filepath, html): + """Writes the HTML file. + + Args: + output_filepath: output file path. + html: string with the HTML content. + """ + with open(output_filepath, 'w') as f: + f.write(html) + + @classmethod + def _FormatName(cls, name): + """Formats a name. + + Args: + name: a string. + + Returns: + A copy of name in which underscores and dashes are replaced with a space. + """ + return re.sub(r'[_\-]', ' ', name) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py new file mode 100644 index 0000000000..412aa7c4e7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py @@ -0,0 +1,86 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the export module. +""" + +import logging +import os +import shutil +import tempfile +import unittest + +import pyquery as pq + +from . import audioproc_wrapper +from . import collect_data +from . import eval_scores_factory +from . import evaluation +from . import export +from . import simulation +from . import test_data_generation_factory + + +class TestExport(unittest.TestCase): + """Unit tests for the export module. + """ + + _CLEAN_TMP_OUTPUT = True + + def setUp(self): + """Creates temporary data to export.""" + self._tmp_path = tempfile.mkdtemp() + + # Run a fake experiment to produce data to export. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(os.path.abspath(__file__)), + 'fake_polqa'), + echo_metric_tool_bin_path=None)), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper. + DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + simulator.Run( + config_filepaths=['apm_configs/default.json'], + capture_input_filepaths=[ + os.path.join(self._tmp_path, 'pure_tone-440_1000.wav'), + os.path.join(self._tmp_path, 'pure_tone-880_1000.wav'), + ], + test_data_generator_names=['identity', 'white_noise'], + eval_score_names=['audio_level_peak', 'audio_level_mean'], + output_dir=self._tmp_path) + + # Export results. + p = collect_data.InstanceArgumentsParser() + args = p.parse_args(['--output_dir', self._tmp_path]) + src_path = collect_data.ConstructSrcPath(args) + self._data_to_export = collect_data.FindScores(src_path, args) + + def tearDown(self): + """Recursively deletes temporary folders.""" + if self._CLEAN_TMP_OUTPUT: + shutil.rmtree(self._tmp_path) + else: + logging.warning(self.id() + ' did not clean the temporary path ' + + (self._tmp_path)) + + def testCreateHtmlReport(self): + fn_out = os.path.join(self._tmp_path, 'results.html') + exporter = export.HtmlExport(fn_out) + exporter.Export(self._data_to_export) + + document = pq.PyQuery(filename=fn_out) + self.assertIsInstance(document, pq.PyQuery) + # TODO(alessiob): Use PyQuery API to check the HTML file. diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py new file mode 100644 index 0000000000..a7db7b4840 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py @@ -0,0 +1,75 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +from __future__ import division + +import logging +import os +import subprocess +import shutil +import sys +import tempfile + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import signal_processing + + +class ExternalVad(object): + def __init__(self, path_to_binary, name): + """Args: + path_to_binary: path to binary that accepts '-i ', '-o + '. There must be one float value per + 10ms audio + name: a name to identify the external VAD. Used for saving + the output as extvad_output-. + """ + self._path_to_binary = path_to_binary + self.name = name + assert os.path.exists(self._path_to_binary), (self._path_to_binary) + self._vad_output = None + + def Run(self, wav_file_path): + _signal = signal_processing.SignalProcessingUtils.LoadWav( + wav_file_path) + if _signal.channels != 1: + raise NotImplementedError('Multiple-channel' + ' annotations not implemented') + if _signal.frame_rate != 48000: + raise NotImplementedError('Frame rates ' + 'other than 48000 not implemented') + + tmp_path = tempfile.mkdtemp() + try: + output_file_path = os.path.join(tmp_path, self.name + '_vad.tmp') + subprocess.call([ + self._path_to_binary, '-i', wav_file_path, '-o', + output_file_path + ]) + self._vad_output = np.fromfile(output_file_path, np.float32) + except Exception as e: + logging.error('Error while running the ' + self.name + ' VAD (' + + e.message + ')') + finally: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + + def GetVadOutput(self): + assert self._vad_output is not None + return self._vad_output + + @classmethod + def ConstructVadDict(cls, vad_paths, vad_names): + external_vads = {} + for path, name in zip(vad_paths, vad_names): + external_vads[name] = ExternalVad(path, name) + return external_vads diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py new file mode 100755 index 0000000000..f679f8c94a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py @@ -0,0 +1,25 @@ +#!/usr/bin/python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +import argparse +import numpy as np + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', required=True) + parser.add_argument('-o', required=True) + + args = parser.parse_args() + + array = np.arange(100, dtype=np.float32) + array.tofile(open(args.o, 'w')) + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc new file mode 100644 index 0000000000..6f3b2d1dd7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { +namespace { + +const char* const kErrorMessage = "-Out /path/to/output/file is mandatory"; + +// Writes fake output intended to be parsed by +// quality_assessment.eval_scores.PolqaScore. +void WriteOutputFile(absl::string_view output_file_path) { + RTC_CHECK_NE(output_file_path, ""); + std::ofstream out(std::string{output_file_path}); + RTC_CHECK(!out.bad()); + out << "* Fake Polqa output" << std::endl; + out << "FakeField1\tPolqaScore\tFakeField2" << std::endl; + out << "FakeValue1\t3.25\tFakeValue2" << std::endl; + out.close(); +} + +} // namespace + +int main(int argc, char* argv[]) { + // Find "-Out" and use its next argument as output file path. + RTC_CHECK_GE(argc, 3) << kErrorMessage; + const std::string kSoughtFlagName = "-Out"; + for (int i = 1; i < argc - 1; ++i) { + if (kSoughtFlagName.compare(argv[i]) == 0) { + WriteOutputFile(argv[i + 1]); + return 0; + } + } + RTC_FATAL() << kErrorMessage; +} + +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py new file mode 100644 index 0000000000..af022bd461 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py @@ -0,0 +1,97 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Input mixer module. +""" + +import logging +import os + +from . import exceptions +from . import signal_processing + + +class ApmInputMixer(object): + """Class to mix a set of audio segments down to the APM input.""" + + _HARD_CLIPPING_LOG_MSG = 'hard clipping detected in the mixed signal' + + def __init__(self): + pass + + @classmethod + def HardClippingLogMessage(cls): + """Returns the log message used when hard clipping is detected in the mix. + + This method is mainly intended to be used by the unit tests. + """ + return cls._HARD_CLIPPING_LOG_MSG + + @classmethod + def Mix(cls, output_path, capture_input_filepath, echo_filepath): + """Mixes capture and echo. + + Creates the overall capture input for APM by mixing the "echo-free" capture + signal with the echo signal (e.g., echo simulated via the + echo_path_simulation module). + + The echo signal cannot be shorter than the capture signal and the generated + mix will have the same duration of the capture signal. The latter property + is enforced in order to let the input of APM and the reference signal + created by TestDataGenerator have the same length (required for the + evaluation step). + + Hard-clipping may occur in the mix; a warning is raised when this happens. + + If `echo_filepath` is None, nothing is done and `capture_input_filepath` is + returned. + + Args: + speech: AudioSegment instance. + echo_path: AudioSegment instance or None. + + Returns: + Path to the mix audio track file. + """ + if echo_filepath is None: + return capture_input_filepath + + # Build the mix output file name as a function of the echo file name. + # This ensures that if the internal parameters of the echo path simulator + # change, no erroneous cache hit occurs. + echo_file_name, _ = os.path.splitext(os.path.split(echo_filepath)[1]) + capture_input_file_name, _ = os.path.splitext( + os.path.split(capture_input_filepath)[1]) + mix_filepath = os.path.join( + output_path, + 'mix_capture_{}_{}.wav'.format(capture_input_file_name, + echo_file_name)) + + # Create the mix if not done yet. + mix = None + if not os.path.exists(mix_filepath): + echo_free_capture = signal_processing.SignalProcessingUtils.LoadWav( + capture_input_filepath) + echo = signal_processing.SignalProcessingUtils.LoadWav( + echo_filepath) + + if signal_processing.SignalProcessingUtils.CountSamples(echo) < ( + signal_processing.SignalProcessingUtils.CountSamples( + echo_free_capture)): + raise exceptions.InputMixerException( + 'echo cannot be shorter than capture') + + mix = echo_free_capture.overlay(echo) + signal_processing.SignalProcessingUtils.SaveWav(mix_filepath, mix) + + # Check if hard clipping occurs. + if mix is None: + mix = signal_processing.SignalProcessingUtils.LoadWav(mix_filepath) + if signal_processing.SignalProcessingUtils.DetectHardClipping(mix): + logging.warning(cls._HARD_CLIPPING_LOG_MSG) + + return mix_filepath diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py new file mode 100644 index 0000000000..4fd5e4f1ee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py @@ -0,0 +1,140 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the input mixer module. +""" + +import logging +import os +import shutil +import tempfile +import unittest + +import mock + +from . import exceptions +from . import input_mixer +from . import signal_processing + + +class TestApmInputMixer(unittest.TestCase): + """Unit tests for the ApmInputMixer class. + """ + + # Audio track file names created in setUp(). + _FILENAMES = ['capture', 'echo_1', 'echo_2', 'shorter', 'longer'] + + # Target peak power level (dBFS) of each audio track file created in setUp(). + # These values are hand-crafted in order to make saturation happen when + # capture and echo_2 are mixed and the contrary for capture and echo_1. + # None means that the power is not changed. + _MAX_PEAK_POWER_LEVELS = [-10.0, -5.0, 0.0, None, None] + + # Audio track file durations in milliseconds. + _DURATIONS = [1000, 1000, 1000, 800, 1200] + + _SAMPLE_RATE = 48000 + + def setUp(self): + """Creates temporary data.""" + self._tmp_path = tempfile.mkdtemp() + + # Create audio track files. + self._audio_tracks = {} + for filename, peak_power, duration in zip(self._FILENAMES, + self._MAX_PEAK_POWER_LEVELS, + self._DURATIONS): + audio_track_filepath = os.path.join(self._tmp_path, + '{}.wav'.format(filename)) + + # Create a pure tone with the target peak power level. + template = signal_processing.SignalProcessingUtils.GenerateSilence( + duration=duration, sample_rate=self._SAMPLE_RATE) + signal = signal_processing.SignalProcessingUtils.GeneratePureTone( + template) + if peak_power is not None: + signal = signal.apply_gain(-signal.max_dBFS + peak_power) + + signal_processing.SignalProcessingUtils.SaveWav( + audio_track_filepath, signal) + self._audio_tracks[filename] = { + 'filepath': + audio_track_filepath, + 'num_samples': + signal_processing.SignalProcessingUtils.CountSamples(signal) + } + + def tearDown(self): + """Recursively deletes temporary folders.""" + shutil.rmtree(self._tmp_path) + + def testCheckMixSameDuration(self): + """Checks the duration when mixing capture and echo with same duration.""" + mix_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_1']['filepath']) + self.assertTrue(os.path.exists(mix_filepath)) + + mix = signal_processing.SignalProcessingUtils.LoadWav(mix_filepath) + self.assertEqual( + self._audio_tracks['capture']['num_samples'], + signal_processing.SignalProcessingUtils.CountSamples(mix)) + + def testRejectShorterEcho(self): + """Rejects echo signals that are shorter than the capture signal.""" + try: + _ = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['shorter']['filepath']) + self.fail('no exception raised') + except exceptions.InputMixerException: + pass + + def testCheckMixDurationWithLongerEcho(self): + """Checks the duration when mixing an echo longer than the capture.""" + mix_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['longer']['filepath']) + self.assertTrue(os.path.exists(mix_filepath)) + + mix = signal_processing.SignalProcessingUtils.LoadWav(mix_filepath) + self.assertEqual( + self._audio_tracks['capture']['num_samples'], + signal_processing.SignalProcessingUtils.CountSamples(mix)) + + def testCheckOutputFileNamesConflict(self): + """Checks that different echo files lead to different output file names.""" + mix1_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_1']['filepath']) + self.assertTrue(os.path.exists(mix1_filepath)) + + mix2_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_2']['filepath']) + self.assertTrue(os.path.exists(mix2_filepath)) + + self.assertNotEqual(mix1_filepath, mix2_filepath) + + def testHardClippingLogExpected(self): + """Checks that hard clipping warning is raised when occurring.""" + logging.warning = mock.MagicMock(name='warning') + _ = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_2']['filepath']) + logging.warning.assert_called_once_with( + input_mixer.ApmInputMixer.HardClippingLogMessage()) + + def testHardClippingLogNotExpected(self): + """Checks that hard clipping warning is not raised when not occurring.""" + logging.warning = mock.MagicMock(name='warning') + _ = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_1']['filepath']) + self.assertNotIn( + mock.call(input_mixer.ApmInputMixer.HardClippingLogMessage()), + logging.warning.call_args_list) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py new file mode 100644 index 0000000000..b64fdcca89 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py @@ -0,0 +1,68 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Input signal creator module. +""" + +from . import exceptions +from . import signal_processing + + +class InputSignalCreator(object): + """Input signal creator class. + """ + + @classmethod + def Create(cls, name, raw_params): + """Creates a input signal and its metadata. + + Args: + name: Input signal creator name. + raw_params: Tuple of parameters to pass to the specific signal creator. + + Returns: + (AudioSegment, dict) tuple. + """ + try: + signal = {} + params = {} + + if name == 'pure_tone': + params['frequency'] = float(raw_params[0]) + params['duration'] = int(raw_params[1]) + signal = cls._CreatePureTone(params['frequency'], + params['duration']) + else: + raise exceptions.InputSignalCreatorException( + 'Invalid input signal creator name') + + # Complete metadata. + params['signal'] = name + + return signal, params + except (TypeError, AssertionError) as e: + raise exceptions.InputSignalCreatorException( + 'Invalid signal creator parameters: {}'.format(e)) + + @classmethod + def _CreatePureTone(cls, frequency, duration): + """ + Generates a pure tone at 48000 Hz. + + Args: + frequency: Float in (0-24000] (Hz). + duration: Integer (milliseconds). + + Returns: + AudioSegment instance. + """ + assert 0 < frequency <= 24000 + assert duration > 0 + template = signal_processing.SignalProcessingUtils.GenerateSilence( + duration) + return signal_processing.SignalProcessingUtils.GeneratePureTone( + template, frequency) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css new file mode 100644 index 0000000000..2f406bb002 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css @@ -0,0 +1,32 @@ +/* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +td.selected-score { + background-color: #DDD; +} + +td.single-score-cell{ + text-align: center; +} + +.audio-inspector { + text-align: center; +} + +.audio-inspector div{ + margin-bottom: 0; + padding-bottom: 0; + padding-top: 0; +} + +.audio-inspector div div{ + margin-bottom: 0; + padding-bottom: 0; + padding-top: 0; +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js new file mode 100644 index 0000000000..8e47411058 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js @@ -0,0 +1,376 @@ +// Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +/** + * Opens the score stats inspector dialog. + * @param {String} dialogId: identifier of the dialog to show. + * @return {DOMElement} The dialog element that has been opened. + */ +function openScoreStatsInspector(dialogId) { + var dialog = document.getElementById(dialogId); + dialog.showModal(); + return dialog; +} + +/** + * Closes the score stats inspector dialog. + */ +function closeScoreStatsInspector() { + var dialog = document.querySelector('dialog[open]'); + if (dialog == null) + return; + dialog.close(); +} + +/** + * Audio inspector class. + * @constructor + */ +function AudioInspector() { + console.debug('Creating an AudioInspector instance.'); + this.audioPlayer_ = new Audio(); + this.metadata_ = {}; + this.currentScore_ = null; + this.audioInspector_ = null; + this.snackbarContainer_ = document.querySelector('#snackbar'); + + // Get base URL without anchors. + this.baseUrl_ = window.location.href; + var index = this.baseUrl_.indexOf('#'); + if (index > 0) + this.baseUrl_ = this.baseUrl_.substr(0, index) + console.info('Base URL set to "' + window.location.href + '".'); + + window.event.stopPropagation(); + this.createTextAreasForCopy_(); + this.createAudioInspector_(); + this.initializeEventHandlers_(); + + // When MDL is ready, parse the anchor (if any) to show the requested + // experiment. + var self = this; + document.querySelectorAll('header a')[0].addEventListener( + 'mdl-componentupgraded', function() { + if (!self.parseWindowAnchor()) { + // If not experiment is requested, open the first section. + console.info('No anchor parsing, opening the first section.'); + document.querySelectorAll('header a > span')[0].click(); + } + }); +} + +/** + * Parse the anchor in the window URL. + * @return {bool} True if the parsing succeeded. + */ +AudioInspector.prototype.parseWindowAnchor = function() { + var index = location.href.indexOf('#'); + if (index == -1) { + console.debug('No # found in the URL.'); + return false; + } + + var anchor = location.href.substr(index - location.href.length + 1); + console.info('Anchor changed: "' + anchor + '".'); + + var parts = anchor.split('&'); + if (parts.length != 3) { + console.info('Ignoring anchor with invalid number of fields.'); + return false; + } + + var openDialog = document.querySelector('dialog[open]'); + try { + // Open the requested dialog if not already open. + if (!openDialog || openDialog.id != parts[1]) { + !openDialog || openDialog.close(); + document.querySelectorAll('header a > span')[ + parseInt(parts[0].substr(1))].click(); + openDialog = openScoreStatsInspector(parts[1]); + } + + // Trigger click on cell. + var cell = openDialog.querySelector('td.' + parts[2]); + cell.focus(); + cell.click(); + + this.showNotification_('Experiment selected.'); + return true; + } catch (e) { + this.showNotification_('Cannot select experiment :('); + console.error('Exception caught while selecting experiment: "' + e + '".'); + } + + return false; +} + +/** + * Set up the inspector for a new score. + * @param {DOMElement} element: Element linked to the selected score. + */ +AudioInspector.prototype.selectedScoreChange = function(element) { + if (this.currentScore_ == element) { return; } + if (this.currentScore_ != null) { + this.currentScore_.classList.remove('selected-score'); + } + this.currentScore_ = element; + this.currentScore_.classList.add('selected-score'); + this.stopAudio(); + + // Read metadata. + var matches = element.querySelectorAll('input[type=hidden]'); + this.metadata_ = {}; + for (var index = 0; index < matches.length; ++index) { + this.metadata_[matches[index].name] = matches[index].value; + } + + // Show the audio inspector interface. + var container = element.parentNode.parentNode.parentNode.parentNode; + var audioInspectorPlaceholder = container.querySelector( + '.audio-inspector-placeholder'); + this.moveInspector_(audioInspectorPlaceholder); +}; + +/** + * Stop playing audio. + */ +AudioInspector.prototype.stopAudio = function() { + console.info('Pausing audio play out.'); + this.audioPlayer_.pause(); +}; + +/** + * Show a text message using the snackbar. + */ +AudioInspector.prototype.showNotification_ = function(text) { + try { + this.snackbarContainer_.MaterialSnackbar.showSnackbar({ + message: text, timeout: 2000}); + } catch (e) { + // Fallback to an alert. + alert(text); + console.warn('Cannot use snackbar: "' + e + '"'); + } +} + +/** + * Move the audio inspector DOM node into the given parent. + * @param {DOMElement} newParentNode: New parent for the inspector. + */ +AudioInspector.prototype.moveInspector_ = function(newParentNode) { + newParentNode.appendChild(this.audioInspector_); +}; + +/** + * Play audio file from url. + * @param {string} metadataFieldName: Metadata field name. + */ +AudioInspector.prototype.playAudio = function(metadataFieldName) { + if (this.metadata_[metadataFieldName] == undefined) { return; } + if (this.metadata_[metadataFieldName] == 'None') { + alert('The selected stream was not used during the experiment.'); + return; + } + this.stopAudio(); + this.audioPlayer_.src = this.metadata_[metadataFieldName]; + console.debug('Audio source URL: "' + this.audioPlayer_.src + '"'); + this.audioPlayer_.play(); + console.info('Playing out audio.'); +}; + +/** + * Create hidden text areas to copy URLs. + * + * For each dialog, one text area is created since it is not possible to select + * text on a text area outside of the active dialog. + */ +AudioInspector.prototype.createTextAreasForCopy_ = function() { + var self = this; + document.querySelectorAll('dialog.mdl-dialog').forEach(function(element) { + var textArea = document.createElement("textarea"); + textArea.classList.add('url-copy'); + textArea.style.position = 'fixed'; + textArea.style.bottom = 0; + textArea.style.left = 0; + textArea.style.width = '2em'; + textArea.style.height = '2em'; + textArea.style.border = 'none'; + textArea.style.outline = 'none'; + textArea.style.boxShadow = 'none'; + textArea.style.background = 'transparent'; + textArea.style.fontSize = '6px'; + element.appendChild(textArea); + }); +} + +/** + * Create audio inspector. + */ +AudioInspector.prototype.createAudioInspector_ = function() { + var buttonIndex = 0; + function getButtonHtml(icon, toolTipText, caption, metadataFieldName) { + var buttonId = 'audioInspectorButton' + buttonIndex++; + html = caption == null ? '' : caption; + html += '' + + return html; + } + + // TODO(alessiob): Add timeline and highlight current track by changing icon + // color. + + this.audioInspector_ = document.createElement('div'); + this.audioInspector_.classList.add('audio-inspector'); + this.audioInspector_.innerHTML = + '
' + + '
' + + '
' + + getButtonHtml('play_arrow', 'Simulated echo', 'Ein', + 'echo_filepath') + + '
' + + '
' + + getButtonHtml('stop', 'Stop playing [S]', null, '__stop__') + + '
' + + '
' + + getButtonHtml('play_arrow', 'Render stream', 'Rin', + 'render_filepath') + + '
' + + '
' + + '
' + + '
' + + '
' + + '
' + + getButtonHtml('play_arrow', 'Capture stream (APM input) [1]', + 'Y\'in', 'capture_filepath') + + '
' + + '
APM
' + + '
' + + getButtonHtml('play_arrow', 'APM output [2]', 'Yout', + 'apm_output_filepath') + + '
' + + '
' + + '
' + + '
' + + '
' + + '
' + + getButtonHtml('play_arrow', 'Echo-free capture stream', + 'Yin', 'echo_free_capture_filepath') + + '
' + + '
' + + getButtonHtml('play_arrow', 'Clean capture stream', + 'Yclean', 'clean_capture_input_filepath') + + '
' + + '
' + + getButtonHtml('play_arrow', 'APM reference [3]', 'Yref', + 'apm_reference_filepath') + + '
' + + '
' + + '
'; + + // Add an invisible node as initial container for the audio inspector. + var parent = document.createElement('div'); + parent.style.display = 'none'; + this.moveInspector_(parent); + document.body.appendChild(parent); +}; + +/** + * Initialize event handlers. + */ +AudioInspector.prototype.initializeEventHandlers_ = function() { + var self = this; + + // Score cells. + document.querySelectorAll('td.single-score-cell').forEach(function(element) { + element.onclick = function() { + self.selectedScoreChange(this); + } + }); + + // Copy anchor URLs icons. + if (document.queryCommandSupported('copy')) { + document.querySelectorAll('td.single-score-cell button').forEach( + function(element) { + element.onclick = function() { + // Find the text area in the dialog. + var textArea = element.closest('dialog').querySelector( + 'textarea.url-copy'); + + // Copy. + textArea.value = self.baseUrl_ + '#' + element.getAttribute( + 'data-anchor'); + textArea.select(); + try { + if (!document.execCommand('copy')) + throw 'Copy returned false'; + self.showNotification_('Experiment URL copied.'); + } catch (e) { + self.showNotification_('Cannot copy experiment URL :('); + console.error(e); + } + } + }); + } else { + self.showNotification_( + 'The copy command is disabled. URL copy is not enabled.'); + } + + // Audio inspector buttons. + this.audioInspector_.querySelectorAll('button').forEach(function(element) { + var target = element.querySelector('input[type=hidden]'); + if (target == null) { return; } + element.onclick = function() { + if (target.value == '__stop__') { + self.stopAudio(); + } else { + self.playAudio(target.value); + } + }; + }); + + // Dialog close handlers. + var dialogs = document.querySelectorAll('dialog').forEach(function(element) { + element.onclose = function() { + self.stopAudio(); + } + }); + + // Keyboard shortcuts. + window.onkeyup = function(e) { + var key = e.keyCode ? e.keyCode : e.which; + switch (key) { + case 49: // 1. + self.playAudio('capture_filepath'); + break; + case 50: // 2. + self.playAudio('apm_output_filepath'); + break; + case 51: // 3. + self.playAudio('apm_reference_filepath'); + break; + case 83: // S. + case 115: // s. + self.stopAudio(); + break; + } + }; + + // Hash change. + window.onhashchange = function(e) { + self.parseWindowAnchor(); + } +}; diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py new file mode 100644 index 0000000000..95e801903d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py @@ -0,0 +1,359 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Signal processing utility module. +""" + +import array +import logging +import os +import sys +import enum + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +try: + import pydub + import pydub.generators +except ImportError: + logging.critical('Cannot import the third-party Python package pydub') + sys.exit(1) + +try: + import scipy.signal + import scipy.fftpack +except ImportError: + logging.critical('Cannot import the third-party Python package scipy') + sys.exit(1) + +from . import exceptions + + +class SignalProcessingUtils(object): + """Collection of signal processing utilities. + """ + + @enum.unique + class MixPadding(enum.Enum): + NO_PADDING = 0 + ZERO_PADDING = 1 + LOOP = 2 + + def __init__(self): + pass + + @classmethod + def LoadWav(cls, filepath, channels=1): + """Loads wav file. + + Args: + filepath: path to the wav audio track file to load. + channels: number of channels (downmixing to mono by default). + + Returns: + AudioSegment instance. + """ + if not os.path.exists(filepath): + logging.error('cannot find the <%s> audio track file', filepath) + raise exceptions.FileNotFoundError() + return pydub.AudioSegment.from_file(filepath, + format='wav', + channels=channels) + + @classmethod + def SaveWav(cls, output_filepath, signal): + """Saves wav file. + + Args: + output_filepath: path to the wav audio track file to save. + signal: AudioSegment instance. + """ + return signal.export(output_filepath, format='wav') + + @classmethod + def CountSamples(cls, signal): + """Number of samples per channel. + + Args: + signal: AudioSegment instance. + + Returns: + An integer. + """ + number_of_samples = len(signal.get_array_of_samples()) + assert signal.channels > 0 + assert number_of_samples % signal.channels == 0 + return number_of_samples / signal.channels + + @classmethod + def GenerateSilence(cls, duration=1000, sample_rate=48000): + """Generates silence. + + This method can also be used to create a template AudioSegment instance. + A template can then be used with other Generate*() methods accepting an + AudioSegment instance as argument. + + Args: + duration: duration in ms. + sample_rate: sample rate. + + Returns: + AudioSegment instance. + """ + return pydub.AudioSegment.silent(duration, sample_rate) + + @classmethod + def GeneratePureTone(cls, template, frequency=440.0): + """Generates a pure tone. + + The pure tone is generated with the same duration and in the same format of + the given template signal. + + Args: + template: AudioSegment instance. + frequency: Frequency of the pure tone in Hz. + + Return: + AudioSegment instance. + """ + if frequency > template.frame_rate >> 1: + raise exceptions.SignalProcessingException('Invalid frequency') + + generator = pydub.generators.Sine(sample_rate=template.frame_rate, + bit_depth=template.sample_width * 8, + freq=frequency) + + return generator.to_audio_segment(duration=len(template), volume=0.0) + + @classmethod + def GenerateWhiteNoise(cls, template): + """Generates white noise. + + The white noise is generated with the same duration and in the same format + of the given template signal. + + Args: + template: AudioSegment instance. + + Return: + AudioSegment instance. + """ + generator = pydub.generators.WhiteNoise( + sample_rate=template.frame_rate, + bit_depth=template.sample_width * 8) + return generator.to_audio_segment(duration=len(template), volume=0.0) + + @classmethod + def AudioSegmentToRawData(cls, signal): + samples = signal.get_array_of_samples() + if samples.typecode != 'h': + raise exceptions.SignalProcessingException( + 'Unsupported samples type') + return np.array(signal.get_array_of_samples(), np.int16) + + @classmethod + def Fft(cls, signal, normalize=True): + if signal.channels != 1: + raise NotImplementedError('multiple-channel FFT not implemented') + x = cls.AudioSegmentToRawData(signal).astype(np.float32) + if normalize: + x /= max(abs(np.max(x)), 1.0) + y = scipy.fftpack.fft(x) + return y[:len(y) / 2] + + @classmethod + def DetectHardClipping(cls, signal, threshold=2): + """Detects hard clipping. + + Hard clipping is simply detected by counting samples that touch either the + lower or upper bound too many times in a row (according to `threshold`). + The presence of a single sequence of samples meeting such property is enough + to label the signal as hard clipped. + + Args: + signal: AudioSegment instance. + threshold: minimum number of samples at full-scale in a row. + + Returns: + True if hard clipping is detect, False otherwise. + """ + if signal.channels != 1: + raise NotImplementedError( + 'multiple-channel clipping not implemented') + if signal.sample_width != 2: # Note that signal.sample_width is in bytes. + raise exceptions.SignalProcessingException( + 'hard-clipping detection only supported for 16 bit samples') + samples = cls.AudioSegmentToRawData(signal) + + # Detect adjacent clipped samples. + samples_type_info = np.iinfo(samples.dtype) + mask_min = samples == samples_type_info.min + mask_max = samples == samples_type_info.max + + def HasLongSequence(vector, min_legth=threshold): + """Returns True if there are one or more long sequences of True flags.""" + seq_length = 0 + for b in vector: + seq_length = seq_length + 1 if b else 0 + if seq_length >= min_legth: + return True + return False + + return HasLongSequence(mask_min) or HasLongSequence(mask_max) + + @classmethod + def ApplyImpulseResponse(cls, signal, impulse_response): + """Applies an impulse response to a signal. + + Args: + signal: AudioSegment instance. + impulse_response: list or numpy vector of float values. + + Returns: + AudioSegment instance. + """ + # Get samples. + assert signal.channels == 1, ( + 'multiple-channel recordings not supported') + samples = signal.get_array_of_samples() + + # Convolve. + logging.info( + 'applying %d order impulse response to a signal lasting %d ms', + len(impulse_response), len(signal)) + convolved_samples = scipy.signal.fftconvolve(in1=samples, + in2=impulse_response, + mode='full').astype( + np.int16) + logging.info('convolution computed') + + # Cast. + convolved_samples = array.array(signal.array_type, convolved_samples) + + # Verify. + logging.debug('signal length: %d samples', len(samples)) + logging.debug('convolved signal length: %d samples', + len(convolved_samples)) + assert len(convolved_samples) > len(samples) + + # Generate convolved signal AudioSegment instance. + convolved_signal = pydub.AudioSegment(data=convolved_samples, + metadata={ + 'sample_width': + signal.sample_width, + 'frame_rate': + signal.frame_rate, + 'frame_width': + signal.frame_width, + 'channels': signal.channels, + }) + assert len(convolved_signal) > len(signal) + + return convolved_signal + + @classmethod + def Normalize(cls, signal): + """Normalizes a signal. + + Args: + signal: AudioSegment instance. + + Returns: + An AudioSegment instance. + """ + return signal.apply_gain(-signal.max_dBFS) + + @classmethod + def Copy(cls, signal): + """Makes a copy os a signal. + + Args: + signal: AudioSegment instance. + + Returns: + An AudioSegment instance. + """ + return pydub.AudioSegment(data=signal.get_array_of_samples(), + metadata={ + 'sample_width': signal.sample_width, + 'frame_rate': signal.frame_rate, + 'frame_width': signal.frame_width, + 'channels': signal.channels, + }) + + @classmethod + def MixSignals(cls, + signal, + noise, + target_snr=0.0, + pad_noise=MixPadding.NO_PADDING): + """Mixes `signal` and `noise` with a target SNR. + + Mix `signal` and `noise` with a desired SNR by scaling `noise`. + If the target SNR is +/- infinite, a copy of signal/noise is returned. + If `signal` is shorter than `noise`, the length of the mix equals that of + `signal`. Otherwise, the mix length depends on whether padding is applied. + When padding is not applied, that is `pad_noise` is set to NO_PADDING + (default), the mix length equals that of `noise` - i.e., `signal` is + truncated. Otherwise, `noise` is extended and the resulting mix has the same + length of `signal`. + + Args: + signal: AudioSegment instance (signal). + noise: AudioSegment instance (noise). + target_snr: float, numpy.Inf or -numpy.Inf (dB). + pad_noise: SignalProcessingUtils.MixPadding, default: NO_PADDING. + + Returns: + An AudioSegment instance. + """ + # Handle infinite target SNR. + if target_snr == -np.Inf: + # Return a copy of noise. + logging.warning('SNR = -Inf, returning noise') + return cls.Copy(noise) + elif target_snr == np.Inf: + # Return a copy of signal. + logging.warning('SNR = +Inf, returning signal') + return cls.Copy(signal) + + # Check signal and noise power. + signal_power = float(signal.dBFS) + noise_power = float(noise.dBFS) + if signal_power == -np.Inf: + logging.error('signal has -Inf power, cannot mix') + raise exceptions.SignalProcessingException( + 'cannot mix a signal with -Inf power') + if noise_power == -np.Inf: + logging.error('noise has -Inf power, cannot mix') + raise exceptions.SignalProcessingException( + 'cannot mix a signal with -Inf power') + + # Mix. + gain_db = signal_power - noise_power - target_snr + signal_duration = len(signal) + noise_duration = len(noise) + if signal_duration <= noise_duration: + # Ignore `pad_noise`, `noise` is truncated if longer that `signal`, the + # mix will have the same length of `signal`. + return signal.overlay(noise.apply_gain(gain_db)) + elif pad_noise == cls.MixPadding.NO_PADDING: + # `signal` is longer than `noise`, but no padding is applied to `noise`. + # Truncate `signal`. + return noise.overlay(signal, gain_during_overlay=gain_db) + elif pad_noise == cls.MixPadding.ZERO_PADDING: + # TODO(alessiob): Check that this works as expected. + return signal.overlay(noise.apply_gain(gain_db)) + elif pad_noise == cls.MixPadding.LOOP: + # `signal` is longer than `noise`, extend `noise` by looping. + return signal.overlay(noise.apply_gain(gain_db), loop=True) + else: + raise exceptions.SignalProcessingException('invalid padding type') diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py new file mode 100644 index 0000000000..881fb66800 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py @@ -0,0 +1,183 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the signal_processing module. +""" + +import unittest + +import numpy as np +import pydub + +from . import exceptions +from . import signal_processing + + +class TestSignalProcessing(unittest.TestCase): + """Unit tests for the signal_processing module. + """ + + def testMixSignals(self): + # Generate a template signal with which white noise can be generated. + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + + # Generate two distinct AudioSegment instances with 1 second of white noise. + signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + noise = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + + # Extract samples. + signal_samples = signal.get_array_of_samples() + noise_samples = noise.get_array_of_samples() + + # Test target SNR -Inf (noise expected). + mix_neg_inf = signal_processing.SignalProcessingUtils.MixSignals( + signal, noise, -np.Inf) + self.assertTrue(len(noise), len(mix_neg_inf)) # Check duration. + mix_neg_inf_samples = mix_neg_inf.get_array_of_samples() + self.assertTrue( # Check samples. + all([x == y for x, y in zip(noise_samples, mix_neg_inf_samples)])) + + # Test target SNR 0.0 (different data expected). + mix_0 = signal_processing.SignalProcessingUtils.MixSignals( + signal, noise, 0.0) + self.assertTrue(len(signal), len(mix_0)) # Check duration. + self.assertTrue(len(noise), len(mix_0)) + mix_0_samples = mix_0.get_array_of_samples() + self.assertTrue( + any([x != y for x, y in zip(signal_samples, mix_0_samples)])) + self.assertTrue( + any([x != y for x, y in zip(noise_samples, mix_0_samples)])) + + # Test target SNR +Inf (signal expected). + mix_pos_inf = signal_processing.SignalProcessingUtils.MixSignals( + signal, noise, np.Inf) + self.assertTrue(len(signal), len(mix_pos_inf)) # Check duration. + mix_pos_inf_samples = mix_pos_inf.get_array_of_samples() + self.assertTrue( # Check samples. + all([x == y for x, y in zip(signal_samples, mix_pos_inf_samples)])) + + def testMixSignalsMinInfPower(self): + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + + with self.assertRaises(exceptions.SignalProcessingException): + _ = signal_processing.SignalProcessingUtils.MixSignals( + signal, silence, 0.0) + + with self.assertRaises(exceptions.SignalProcessingException): + _ = signal_processing.SignalProcessingUtils.MixSignals( + silence, signal, 0.0) + + def testMixSignalNoiseDifferentLengths(self): + # Test signals. + shorter = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + pydub.AudioSegment.silent(duration=1000, frame_rate=8000)) + longer = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + pydub.AudioSegment.silent(duration=2000, frame_rate=8000)) + + # When the signal is shorter than the noise, the mix length always equals + # that of the signal regardless of whether padding is applied. + # No noise padding, length of signal less than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=shorter, + noise=longer, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + NO_PADDING) + self.assertEqual(len(shorter), len(mix)) + # With noise padding, length of signal less than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=shorter, + noise=longer, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + ZERO_PADDING) + self.assertEqual(len(shorter), len(mix)) + + # When the signal is longer than the noise, the mix length depends on + # whether padding is applied. + # No noise padding, length of signal greater than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + NO_PADDING) + self.assertEqual(len(shorter), len(mix)) + # With noise padding, length of signal greater than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + ZERO_PADDING) + self.assertEqual(len(longer), len(mix)) + + def testMixSignalNoisePaddingTypes(self): + # Test signals. + shorter = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + pydub.AudioSegment.silent(duration=1000, frame_rate=8000)) + longer = signal_processing.SignalProcessingUtils.GeneratePureTone( + pydub.AudioSegment.silent(duration=2000, frame_rate=8000), 440.0) + + # Zero padding: expect pure tone only in 1-2s. + mix_zero_pad = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + target_snr=-6, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + ZERO_PADDING) + + # Loop: expect pure tone plus noise in 1-2s. + mix_loop = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + target_snr=-6, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding.LOOP) + + def Energy(signal): + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + signal).astype(np.float32) + return np.sum(samples * samples) + + e_mix_zero_pad = Energy(mix_zero_pad[-1000:]) + e_mix_loop = Energy(mix_loop[-1000:]) + self.assertLess(0, e_mix_zero_pad) + self.assertLess(e_mix_zero_pad, e_mix_loop) + + def testMixSignalSnr(self): + # Test signals. + tone_low = signal_processing.SignalProcessingUtils.GeneratePureTone( + pydub.AudioSegment.silent(duration=64, frame_rate=8000), 250.0) + tone_high = signal_processing.SignalProcessingUtils.GeneratePureTone( + pydub.AudioSegment.silent(duration=64, frame_rate=8000), 3000.0) + + def ToneAmplitudes(mix): + """Returns the amplitude of the coefficients #16 and #192, which + correspond to the tones at 250 and 3k Hz respectively.""" + mix_fft = np.absolute( + signal_processing.SignalProcessingUtils.Fft(mix)) + return mix_fft[16], mix_fft[192] + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_low, noise=tone_high, target_snr=-6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_low, ampl_high) + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_high, noise=tone_low, target_snr=-6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_high, ampl_low) + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_low, noise=tone_high, target_snr=6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_high, ampl_low) + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_high, noise=tone_low, target_snr=6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_low, ampl_high) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py new file mode 100644 index 0000000000..69b3a1624e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py @@ -0,0 +1,446 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""APM module simulator. +""" + +import logging +import os + +from . import annotations +from . import data_access +from . import echo_path_simulation +from . import echo_path_simulation_factory +from . import eval_scores +from . import exceptions +from . import input_mixer +from . import input_signal_creator +from . import signal_processing +from . import test_data_generation + + +class ApmModuleSimulator(object): + """Audio processing module (APM) simulator class. + """ + + _TEST_DATA_GENERATOR_CLASSES = ( + test_data_generation.TestDataGenerator.REGISTERED_CLASSES) + _EVAL_SCORE_WORKER_CLASSES = eval_scores.EvaluationScore.REGISTERED_CLASSES + + _PREFIX_APM_CONFIG = 'apmcfg-' + _PREFIX_CAPTURE = 'capture-' + _PREFIX_RENDER = 'render-' + _PREFIX_ECHO_SIMULATOR = 'echosim-' + _PREFIX_TEST_DATA_GEN = 'datagen-' + _PREFIX_TEST_DATA_GEN_PARAMS = 'datagen_params-' + _PREFIX_SCORE = 'score-' + + def __init__(self, + test_data_generator_factory, + evaluation_score_factory, + ap_wrapper, + evaluator, + external_vads=None): + if external_vads is None: + external_vads = {} + self._test_data_generator_factory = test_data_generator_factory + self._evaluation_score_factory = evaluation_score_factory + self._audioproc_wrapper = ap_wrapper + self._evaluator = evaluator + self._annotator = annotations.AudioAnnotationsExtractor( + annotations.AudioAnnotationsExtractor.VadType.ENERGY_THRESHOLD + | annotations.AudioAnnotationsExtractor.VadType.WEBRTC_COMMON_AUDIO + | annotations.AudioAnnotationsExtractor.VadType.WEBRTC_APM, + external_vads) + + # Init. + self._test_data_generator_factory.SetOutputDirectoryPrefix( + self._PREFIX_TEST_DATA_GEN_PARAMS) + self._evaluation_score_factory.SetScoreFilenamePrefix( + self._PREFIX_SCORE) + + # Properties for each run. + self._base_output_path = None + self._output_cache_path = None + self._test_data_generators = None + self._evaluation_score_workers = None + self._config_filepaths = None + self._capture_input_filepaths = None + self._render_input_filepaths = None + self._echo_path_simulator_class = None + + @classmethod + def GetPrefixApmConfig(cls): + return cls._PREFIX_APM_CONFIG + + @classmethod + def GetPrefixCapture(cls): + return cls._PREFIX_CAPTURE + + @classmethod + def GetPrefixRender(cls): + return cls._PREFIX_RENDER + + @classmethod + def GetPrefixEchoSimulator(cls): + return cls._PREFIX_ECHO_SIMULATOR + + @classmethod + def GetPrefixTestDataGenerator(cls): + return cls._PREFIX_TEST_DATA_GEN + + @classmethod + def GetPrefixTestDataGeneratorParameters(cls): + return cls._PREFIX_TEST_DATA_GEN_PARAMS + + @classmethod + def GetPrefixScore(cls): + return cls._PREFIX_SCORE + + def Run(self, + config_filepaths, + capture_input_filepaths, + test_data_generator_names, + eval_score_names, + output_dir, + render_input_filepaths=None, + echo_path_simulator_name=( + echo_path_simulation.NoEchoPathSimulator.NAME)): + """Runs the APM simulation. + + Initializes paths and required instances, then runs all the simulations. + The render input can be optionally added. If added, the number of capture + input audio tracks and the number of render input audio tracks have to be + equal. The two lists are used to form pairs of capture and render input. + + Args: + config_filepaths: set of APM configuration files to test. + capture_input_filepaths: set of capture input audio track files to test. + test_data_generator_names: set of test data generator names to test. + eval_score_names: set of evaluation score names to test. + output_dir: base path to the output directory for wav files and outcomes. + render_input_filepaths: set of render input audio track files to test. + echo_path_simulator_name: name of the echo path simulator to use when + render input is provided. + """ + assert render_input_filepaths is None or ( + len(capture_input_filepaths) == len(render_input_filepaths)), ( + 'render input set size not matching input set size') + assert render_input_filepaths is None or echo_path_simulator_name in ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES), ( + 'invalid echo path simulator') + self._base_output_path = os.path.abspath(output_dir) + + # Output path used to cache the data shared across simulations. + self._output_cache_path = os.path.join(self._base_output_path, + '_cache') + + # Instance test data generators. + self._test_data_generators = [ + self._test_data_generator_factory.GetInstance( + test_data_generators_class=( + self._TEST_DATA_GENERATOR_CLASSES[name])) + for name in (test_data_generator_names) + ] + + # Instance evaluation score workers. + self._evaluation_score_workers = [ + self._evaluation_score_factory.GetInstance( + evaluation_score_class=self._EVAL_SCORE_WORKER_CLASSES[name]) + for (name) in eval_score_names + ] + + # Set APM configuration file paths. + self._config_filepaths = self._CreatePathsCollection(config_filepaths) + + # Set probing signal file paths. + if render_input_filepaths is None: + # Capture input only. + self._capture_input_filepaths = self._CreatePathsCollection( + capture_input_filepaths) + self._render_input_filepaths = None + else: + # Set both capture and render input signals. + self._SetTestInputSignalFilePaths(capture_input_filepaths, + render_input_filepaths) + + # Set the echo path simulator class. + self._echo_path_simulator_class = ( + echo_path_simulation.EchoPathSimulator. + REGISTERED_CLASSES[echo_path_simulator_name]) + + self._SimulateAll() + + def _SimulateAll(self): + """Runs all the simulations. + + Iterates over the combinations of APM configurations, probing signals, and + test data generators. This method is mainly responsible for the creation of + the cache and output directories required in order to call _Simulate(). + """ + without_render_input = self._render_input_filepaths is None + + # Try different APM config files. + for config_name in self._config_filepaths: + config_filepath = self._config_filepaths[config_name] + + # Try different capture-render pairs. + for capture_input_name in self._capture_input_filepaths: + # Output path for the capture signal annotations. + capture_annotations_cache_path = os.path.join( + self._output_cache_path, + self._PREFIX_CAPTURE + capture_input_name) + data_access.MakeDirectory(capture_annotations_cache_path) + + # Capture. + capture_input_filepath = self._capture_input_filepaths[ + capture_input_name] + if not os.path.exists(capture_input_filepath): + # If the input signal file does not exist, try to create using the + # available input signal creators. + self._CreateInputSignal(capture_input_filepath) + assert os.path.exists(capture_input_filepath) + self._ExtractCaptureAnnotations( + capture_input_filepath, capture_annotations_cache_path) + + # Render and simulated echo path (optional). + render_input_filepath = None if without_render_input else ( + self._render_input_filepaths[capture_input_name]) + render_input_name = '(none)' if without_render_input else ( + self._ExtractFileName(render_input_filepath)) + echo_path_simulator = (echo_path_simulation_factory. + EchoPathSimulatorFactory.GetInstance( + self._echo_path_simulator_class, + render_input_filepath)) + + # Try different test data generators. + for test_data_generators in self._test_data_generators: + logging.info( + 'APM config preset: <%s>, capture: <%s>, render: <%s>,' + 'test data generator: <%s>, echo simulator: <%s>', + config_name, capture_input_name, render_input_name, + test_data_generators.NAME, echo_path_simulator.NAME) + + # Output path for the generated test data. + test_data_cache_path = os.path.join( + capture_annotations_cache_path, + self._PREFIX_TEST_DATA_GEN + test_data_generators.NAME) + data_access.MakeDirectory(test_data_cache_path) + logging.debug('test data cache path: <%s>', + test_data_cache_path) + + # Output path for the echo simulator and APM input mixer output. + echo_test_data_cache_path = os.path.join( + test_data_cache_path, + 'echosim-{}'.format(echo_path_simulator.NAME)) + data_access.MakeDirectory(echo_test_data_cache_path) + logging.debug('echo test data cache path: <%s>', + echo_test_data_cache_path) + + # Full output path. + output_path = os.path.join( + self._base_output_path, + self._PREFIX_APM_CONFIG + config_name, + self._PREFIX_CAPTURE + capture_input_name, + self._PREFIX_RENDER + render_input_name, + self._PREFIX_ECHO_SIMULATOR + echo_path_simulator.NAME, + self._PREFIX_TEST_DATA_GEN + test_data_generators.NAME) + data_access.MakeDirectory(output_path) + logging.debug('output path: <%s>', output_path) + + self._Simulate(test_data_generators, + capture_input_filepath, + render_input_filepath, test_data_cache_path, + echo_test_data_cache_path, output_path, + config_filepath, echo_path_simulator) + + @staticmethod + def _CreateInputSignal(input_signal_filepath): + """Creates a missing input signal file. + + The file name is parsed to extract input signal creator and params. If a + creator is matched and the parameters are valid, a new signal is generated + and written in `input_signal_filepath`. + + Args: + input_signal_filepath: Path to the input signal audio file to write. + + Raises: + InputSignalCreatorException + """ + filename = os.path.splitext( + os.path.split(input_signal_filepath)[-1])[0] + filename_parts = filename.split('-') + + if len(filename_parts) < 2: + raise exceptions.InputSignalCreatorException( + 'Cannot parse input signal file name') + + signal, metadata = input_signal_creator.InputSignalCreator.Create( + filename_parts[0], filename_parts[1].split('_')) + + signal_processing.SignalProcessingUtils.SaveWav( + input_signal_filepath, signal) + data_access.Metadata.SaveFileMetadata(input_signal_filepath, metadata) + + def _ExtractCaptureAnnotations(self, + input_filepath, + output_path, + annotation_name=""): + self._annotator.Extract(input_filepath) + self._annotator.Save(output_path, annotation_name) + + def _Simulate(self, test_data_generators, clean_capture_input_filepath, + render_input_filepath, test_data_cache_path, + echo_test_data_cache_path, output_path, config_filepath, + echo_path_simulator): + """Runs a single set of simulation. + + Simulates a given combination of APM configuration, probing signal, and + test data generator. It iterates over the test data generator + internal configurations. + + Args: + test_data_generators: TestDataGenerator instance. + clean_capture_input_filepath: capture input audio track file to be + processed by a test data generator and + not affected by echo. + render_input_filepath: render input audio track file to test. + test_data_cache_path: path for the generated test audio track files. + echo_test_data_cache_path: path for the echo simulator. + output_path: base output path for the test data generator. + config_filepath: APM configuration file to test. + echo_path_simulator: EchoPathSimulator instance. + """ + # Generate pairs of noisy input and reference signal files. + test_data_generators.Generate( + input_signal_filepath=clean_capture_input_filepath, + test_data_cache_path=test_data_cache_path, + base_output_path=output_path) + + # Extract metadata linked to the clean input file (if any). + apm_input_metadata = None + try: + apm_input_metadata = data_access.Metadata.LoadFileMetadata( + clean_capture_input_filepath) + except IOError as e: + apm_input_metadata = {} + apm_input_metadata['test_data_gen_name'] = test_data_generators.NAME + apm_input_metadata['test_data_gen_config'] = None + + # For each test data pair, simulate a call and evaluate. + for config_name in test_data_generators.config_names: + logging.info(' - test data generator config: <%s>', config_name) + apm_input_metadata['test_data_gen_config'] = config_name + + # Paths to the test data generator output. + # Note that the reference signal does not depend on the render input + # which is optional. + noisy_capture_input_filepath = ( + test_data_generators.noisy_signal_filepaths[config_name]) + reference_signal_filepath = ( + test_data_generators.reference_signal_filepaths[config_name]) + + # Output path for the evaluation (e.g., APM output file). + evaluation_output_path = test_data_generators.apm_output_paths[ + config_name] + + # Paths to the APM input signals. + echo_path_filepath = echo_path_simulator.Simulate( + echo_test_data_cache_path) + apm_input_filepath = input_mixer.ApmInputMixer.Mix( + echo_test_data_cache_path, noisy_capture_input_filepath, + echo_path_filepath) + + # Extract annotations for the APM input mix. + apm_input_basepath, apm_input_filename = os.path.split( + apm_input_filepath) + self._ExtractCaptureAnnotations( + apm_input_filepath, apm_input_basepath, + os.path.splitext(apm_input_filename)[0] + '-') + + # Simulate a call using APM. + self._audioproc_wrapper.Run( + config_filepath=config_filepath, + capture_input_filepath=apm_input_filepath, + render_input_filepath=render_input_filepath, + output_path=evaluation_output_path) + + try: + # Evaluate. + self._evaluator.Run( + evaluation_score_workers=self._evaluation_score_workers, + apm_input_metadata=apm_input_metadata, + apm_output_filepath=self._audioproc_wrapper. + output_filepath, + reference_input_filepath=reference_signal_filepath, + render_input_filepath=render_input_filepath, + output_path=evaluation_output_path, + ) + + # Save simulation metadata. + data_access.Metadata.SaveAudioTestDataPaths( + output_path=evaluation_output_path, + clean_capture_input_filepath=clean_capture_input_filepath, + echo_free_capture_filepath=noisy_capture_input_filepath, + echo_filepath=echo_path_filepath, + render_filepath=render_input_filepath, + capture_filepath=apm_input_filepath, + apm_output_filepath=self._audioproc_wrapper. + output_filepath, + apm_reference_filepath=reference_signal_filepath, + apm_config_filepath=config_filepath, + ) + except exceptions.EvaluationScoreException as e: + logging.warning('the evaluation failed: %s', e.message) + continue + + def _SetTestInputSignalFilePaths(self, capture_input_filepaths, + render_input_filepaths): + """Sets input and render input file paths collections. + + Pairs the input and render input files by storing the file paths into two + collections. The key is the file name of the input file. + + Args: + capture_input_filepaths: list of file paths. + render_input_filepaths: list of file paths. + """ + self._capture_input_filepaths = {} + self._render_input_filepaths = {} + assert len(capture_input_filepaths) == len(render_input_filepaths) + for capture_input_filepath, render_input_filepath in zip( + capture_input_filepaths, render_input_filepaths): + name = self._ExtractFileName(capture_input_filepath) + self._capture_input_filepaths[name] = os.path.abspath( + capture_input_filepath) + self._render_input_filepaths[name] = os.path.abspath( + render_input_filepath) + + @classmethod + def _CreatePathsCollection(cls, filepaths): + """Creates a collection of file paths. + + Given a list of file paths, makes a collection with one item for each file + path. The value is absolute path, the key is the file name without + extenstion. + + Args: + filepaths: list of file paths. + + Returns: + A dict. + """ + filepaths_collection = {} + for filepath in filepaths: + name = cls._ExtractFileName(filepath) + filepaths_collection[name] = os.path.abspath(filepath) + return filepaths_collection + + @classmethod + def _ExtractFileName(cls, filepath): + return os.path.splitext(os.path.split(filepath)[-1])[0] diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py new file mode 100644 index 0000000000..78ca17f589 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py @@ -0,0 +1,203 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the simulation module. +""" + +import logging +import os +import shutil +import tempfile +import unittest + +import mock +import pydub + +from . import audioproc_wrapper +from . import eval_scores_factory +from . import evaluation +from . import external_vad +from . import signal_processing +from . import simulation +from . import test_data_generation_factory + + +class TestApmModuleSimulator(unittest.TestCase): + """Unit tests for the ApmModuleSimulator class. + """ + + def setUp(self): + """Create temporary folders and fake audio track.""" + self._output_path = tempfile.mkdtemp() + self._tmp_path = tempfile.mkdtemp() + + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + fake_signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + self._fake_audio_track_path = os.path.join(self._output_path, + 'fake.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._fake_audio_track_path, fake_signal) + + def tearDown(self): + """Recursively delete temporary folders.""" + shutil.rmtree(self._output_path) + shutil.rmtree(self._tmp_path) + + def testSimulation(self): + # Instance dependencies to mock and inject. + ap_wrapper = audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper.DEFAULT_APM_SIMULATOR_BIN_PATH) + evaluator = evaluation.ApmModuleEvaluator() + ap_wrapper.Run = mock.MagicMock(name='Run') + evaluator.Run = mock.MagicMock(name='Run') + + # Instance non-mocked dependencies. + test_data_generator_factory = ( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)) + evaluation_score_factory = eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join(os.path.dirname(__file__), + 'fake_polqa'), + echo_metric_tool_bin_path=None) + + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=test_data_generator_factory, + evaluation_score_factory=evaluation_score_factory, + ap_wrapper=ap_wrapper, + evaluator=evaluator, + external_vads={ + 'fake': + external_vad.ExternalVad( + os.path.join(os.path.dirname(__file__), + 'fake_external_vad.py'), 'fake') + }) + + # What to simulate. + config_files = ['apm_configs/default.json'] + input_files = [self._fake_audio_track_path] + test_data_generators = ['identity', 'white_noise'] + eval_scores = ['audio_level_mean', 'polqa'] + + # Run all simulations. + simulator.Run(config_filepaths=config_files, + capture_input_filepaths=input_files, + test_data_generator_names=test_data_generators, + eval_score_names=eval_scores, + output_dir=self._output_path) + + # Check. + # TODO(alessiob): Once the TestDataGenerator classes can be configured by + # the client code (e.g., number of SNR pairs for the white noise test data + # generator), the exact number of calls to ap_wrapper.Run and evaluator.Run + # is known; use that with assertEqual. + min_number_of_simulations = len(config_files) * len(input_files) * len( + test_data_generators) + self.assertGreaterEqual(len(ap_wrapper.Run.call_args_list), + min_number_of_simulations) + self.assertGreaterEqual(len(evaluator.Run.call_args_list), + min_number_of_simulations) + + def testInputSignalCreation(self): + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join(os.path.dirname(__file__), + 'fake_polqa'), + echo_metric_tool_bin_path=None)), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper. + DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + + # Inexistent input files to be silently created. + input_files = [ + os.path.join(self._tmp_path, 'pure_tone-440_1000.wav'), + os.path.join(self._tmp_path, 'pure_tone-1000_500.wav'), + ] + self.assertFalse( + any([os.path.exists(input_file) for input_file in (input_files)])) + + # The input files are created during the simulation. + simulator.Run(config_filepaths=['apm_configs/default.json'], + capture_input_filepaths=input_files, + test_data_generator_names=['identity'], + eval_score_names=['audio_level_peak'], + output_dir=self._output_path) + self.assertTrue( + all([os.path.exists(input_file) for input_file in (input_files)])) + + def testPureToneGenerationWithTotalHarmonicDistorsion(self): + logging.warning = mock.MagicMock(name='warning') + + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join(os.path.dirname(__file__), + 'fake_polqa'), + echo_metric_tool_bin_path=None)), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper. + DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + + # What to simulate. + config_files = ['apm_configs/default.json'] + input_files = [os.path.join(self._tmp_path, 'pure_tone-440_1000.wav')] + eval_scores = ['thd'] + + # Should work. + simulator.Run(config_filepaths=config_files, + capture_input_filepaths=input_files, + test_data_generator_names=['identity'], + eval_score_names=eval_scores, + output_dir=self._output_path) + self.assertFalse(logging.warning.called) + + # Warning expected. + simulator.Run( + config_filepaths=config_files, + capture_input_filepaths=input_files, + test_data_generator_names=['white_noise'], # Not allowed with THD. + eval_score_names=eval_scores, + output_dir=self._output_path) + logging.warning.assert_called_with('the evaluation failed: %s', ( + 'The THD score cannot be used with any test data generator other than ' + '"identity"')) + + # # Init. + # generator = test_data_generation.IdentityTestDataGenerator('tmp') + # input_signal_filepath = os.path.join( + # self._test_data_cache_path, 'pure_tone-440_1000.wav') + + # # Check that the input signal is generated. + # self.assertFalse(os.path.exists(input_signal_filepath)) + # generator.Generate( + # input_signal_filepath=input_signal_filepath, + # test_data_cache_path=self._test_data_cache_path, + # base_output_path=self._base_output_path) + # self.assertTrue(os.path.exists(input_signal_filepath)) + + # # Check input signal properties. + # input_signal = signal_processing.SignalProcessingUtils.LoadWav( + # input_signal_filepath) + # self.assertEqual(1000, len(input_signal)) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/sound_level.cc b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/sound_level.cc new file mode 100644 index 0000000000..1f24d9d370 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/sound_level.cc @@ -0,0 +1,127 @@ +// Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/wav_file.h" +#include "rtc_base/logging.h" + +ABSL_FLAG(std::string, i, "", "Input wav file"); +ABSL_FLAG(std::string, oc, "", "Config output file"); +ABSL_FLAG(std::string, ol, "", "Levels output file"); +ABSL_FLAG(float, a, 5.f, "Attack (ms)"); +ABSL_FLAG(float, d, 20.f, "Decay (ms)"); +ABSL_FLAG(int, f, 10, "Frame length (ms)"); + +namespace webrtc { +namespace test { +namespace { + +constexpr int kMaxSampleRate = 48000; +constexpr uint8_t kMaxFrameLenMs = 30; +constexpr size_t kMaxFrameLen = kMaxFrameLenMs * kMaxSampleRate / 1000; + +const double kOneDbReduction = DbToRatio(-1.0); + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + // Check parameters. + if (absl::GetFlag(FLAGS_f) < 1 || absl::GetFlag(FLAGS_f) > kMaxFrameLenMs) { + RTC_LOG(LS_ERROR) << "Invalid frame length (min: 1, max: " << kMaxFrameLenMs + << ")"; + return 1; + } + if (absl::GetFlag(FLAGS_a) < 0 || absl::GetFlag(FLAGS_d) < 0) { + RTC_LOG(LS_ERROR) << "Attack and decay must be non-negative"; + return 1; + } + + // Open wav input file and check properties. + const std::string input_file = absl::GetFlag(FLAGS_i); + const std::string config_output_file = absl::GetFlag(FLAGS_oc); + const std::string levels_output_file = absl::GetFlag(FLAGS_ol); + WavReader wav_reader(input_file); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files supported"; + return 1; + } + if (wav_reader.sample_rate() > kMaxSampleRate) { + RTC_LOG(LS_ERROR) << "Beyond maximum sample rate (" << kMaxSampleRate + << ")"; + return 1; + } + + // Map from milliseconds to samples. + const size_t audio_frame_length = rtc::CheckedDivExact( + absl::GetFlag(FLAGS_f) * wav_reader.sample_rate(), 1000); + auto time_const = [](double c) { + return std::pow(kOneDbReduction, absl::GetFlag(FLAGS_f) / c); + }; + const float attack = + absl::GetFlag(FLAGS_a) == 0.0 ? 0.0 : time_const(absl::GetFlag(FLAGS_a)); + const float decay = + absl::GetFlag(FLAGS_d) == 0.0 ? 0.0 : time_const(absl::GetFlag(FLAGS_d)); + + // Write config to file. + std::ofstream out_config(config_output_file); + out_config << "{" + "'frame_len_ms': " + << absl::GetFlag(FLAGS_f) + << ", " + "'attack_ms': " + << absl::GetFlag(FLAGS_a) + << ", " + "'decay_ms': " + << absl::GetFlag(FLAGS_d) << "}\n"; + out_config.close(); + + // Measure level frame-by-frame. + std::ofstream out_levels(levels_output_file, std::ofstream::binary); + std::array samples; + float level_prev = 0.f; + while (true) { + // Process frame. + const auto read_samples = + wav_reader.ReadSamples(audio_frame_length, samples.data()); + if (read_samples < audio_frame_length) + break; // EOF. + + // Frame peak level. + std::transform(samples.begin(), samples.begin() + audio_frame_length, + samples.begin(), [](int16_t s) { return std::abs(s); }); + const int16_t peak_level = *std::max_element( + samples.cbegin(), samples.cbegin() + audio_frame_length); + const float level_curr = static_cast(peak_level) / 32768.f; + + // Temporal smoothing. + auto smooth = [&level_prev, &level_curr](float c) { + return (1.0 - c) * level_curr + c * level_prev; + }; + level_prev = smooth(level_curr > level_prev ? attack : decay); + + // Write output. + out_levels.write(reinterpret_cast(&level_prev), sizeof(float)); + } + out_levels.close(); + + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py new file mode 100644 index 0000000000..7e86faccec --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py @@ -0,0 +1,526 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Test data generators producing signals pairs intended to be used to +test the APM module. Each pair consists of a noisy input and a reference signal. +The former is used as APM input and it is generated by adding noise to a +clean audio track. The reference is the expected APM output. + +Throughout this file, the following naming convention is used: + - input signal: the clean signal (e.g., speech), + - noise signal: the noise to be summed up to the input signal (e.g., white + noise, Gaussian noise), + - noisy signal: input + noise. +The noise signal may or may not be a function of the clean signal. For +instance, white noise is independently generated, whereas reverberation is +obtained by convolving the input signal with an impulse response. +""" + +import logging +import os +import shutil +import sys + +try: + import scipy.io +except ImportError: + logging.critical('Cannot import the third-party Python package scipy') + sys.exit(1) + +from . import data_access +from . import exceptions +from . import signal_processing + + +class TestDataGenerator(object): + """Abstract class responsible for the generation of noisy signals. + + Given a clean signal, it generates two streams named noisy signal and + reference. The former is the clean signal deteriorated by the noise source, + the latter goes through the same deterioration process, but more "gently". + Noisy signal and reference are produced so that the reference is the signal + expected at the output of the APM module when the latter is fed with the noisy + signal. + + An test data generator generates one or more pairs. + """ + + NAME = None + REGISTERED_CLASSES = {} + + def __init__(self, output_directory_prefix): + self._output_directory_prefix = output_directory_prefix + # Init dictionaries with one entry for each test data generator + # configuration (e.g., different SNRs). + # Noisy audio track files (stored separately in a cache folder). + self._noisy_signal_filepaths = None + # Path to be used for the APM simulation output files. + self._apm_output_paths = None + # Reference audio track files (stored separately in a cache folder). + self._reference_signal_filepaths = None + self.Clear() + + @classmethod + def RegisterClass(cls, class_to_register): + """Registers a TestDataGenerator implementation. + + Decorator to automatically register the classes that extend + TestDataGenerator. + Example usage: + + @TestDataGenerator.RegisterClass + class IdentityGenerator(TestDataGenerator): + pass + """ + cls.REGISTERED_CLASSES[class_to_register.NAME] = class_to_register + return class_to_register + + @property + def config_names(self): + return self._noisy_signal_filepaths.keys() + + @property + def noisy_signal_filepaths(self): + return self._noisy_signal_filepaths + + @property + def apm_output_paths(self): + return self._apm_output_paths + + @property + def reference_signal_filepaths(self): + return self._reference_signal_filepaths + + def Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + """Generates a set of noisy input and reference audiotrack file pairs. + + This method initializes an empty set of pairs and calls the _Generate() + method implemented in a concrete class. + + Args: + input_signal_filepath: path to the clean input audio track file. + test_data_cache_path: path to the cache of the generated audio track + files. + base_output_path: base path where output is written. + """ + self.Clear() + self._Generate(input_signal_filepath, test_data_cache_path, + base_output_path) + + def Clear(self): + """Clears the generated output path dictionaries. + """ + self._noisy_signal_filepaths = {} + self._apm_output_paths = {} + self._reference_signal_filepaths = {} + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + """Abstract method to be implemented in each concrete class. + """ + raise NotImplementedError() + + def _AddNoiseSnrPairs(self, base_output_path, noisy_mix_filepaths, + snr_value_pairs): + """Adds noisy-reference signal pairs. + + Args: + base_output_path: noisy tracks base output path. + noisy_mix_filepaths: nested dictionary of noisy signal paths organized + by noisy track name and SNR level. + snr_value_pairs: list of SNR pairs. + """ + for noise_track_name in noisy_mix_filepaths: + for snr_noisy, snr_refence in snr_value_pairs: + config_name = '{0}_{1:d}_{2:d}_SNR'.format( + noise_track_name, snr_noisy, snr_refence) + output_path = self._MakeDir(base_output_path, config_name) + self._AddNoiseReferenceFilesPair( + config_name=config_name, + noisy_signal_filepath=noisy_mix_filepaths[noise_track_name] + [snr_noisy], + reference_signal_filepath=noisy_mix_filepaths[ + noise_track_name][snr_refence], + output_path=output_path) + + def _AddNoiseReferenceFilesPair(self, config_name, noisy_signal_filepath, + reference_signal_filepath, output_path): + """Adds one noisy-reference signal pair. + + Args: + config_name: name of the APM configuration. + noisy_signal_filepath: path to noisy audio track file. + reference_signal_filepath: path to reference audio track file. + output_path: APM output path. + """ + assert config_name not in self._noisy_signal_filepaths + self._noisy_signal_filepaths[config_name] = os.path.abspath( + noisy_signal_filepath) + self._apm_output_paths[config_name] = os.path.abspath(output_path) + self._reference_signal_filepaths[config_name] = os.path.abspath( + reference_signal_filepath) + + def _MakeDir(self, base_output_path, test_data_generator_config_name): + output_path = os.path.join( + base_output_path, + self._output_directory_prefix + test_data_generator_config_name) + data_access.MakeDirectory(output_path) + return output_path + + +@TestDataGenerator.RegisterClass +class IdentityTestDataGenerator(TestDataGenerator): + """Generator that adds no noise. + + Both the noisy and the reference signals are the input signal. + """ + + NAME = 'identity' + + def __init__(self, output_directory_prefix, copy_with_identity): + TestDataGenerator.__init__(self, output_directory_prefix) + self._copy_with_identity = copy_with_identity + + @property + def copy_with_identity(self): + return self._copy_with_identity + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + config_name = 'default' + output_path = self._MakeDir(base_output_path, config_name) + + if self._copy_with_identity: + input_signal_filepath_new = os.path.join( + test_data_cache_path, + os.path.split(input_signal_filepath)[1]) + logging.info('copying ' + input_signal_filepath + ' to ' + + (input_signal_filepath_new)) + shutil.copy(input_signal_filepath, input_signal_filepath_new) + input_signal_filepath = input_signal_filepath_new + + self._AddNoiseReferenceFilesPair( + config_name=config_name, + noisy_signal_filepath=input_signal_filepath, + reference_signal_filepath=input_signal_filepath, + output_path=output_path) + + +@TestDataGenerator.RegisterClass +class WhiteNoiseTestDataGenerator(TestDataGenerator): + """Generator that adds white noise. + """ + + NAME = 'white_noise' + + # Each pair indicates the clean vs. noisy and reference vs. noisy SNRs. + # The reference (second value of each pair) always has a lower amount of noise + # - i.e., the SNR is 10 dB higher. + _SNR_VALUE_PAIRS = [ + [20, 30], # Smallest noise. + [10, 20], + [5, 15], + [0, 10], # Largest noise. + ] + + _NOISY_SIGNAL_FILENAME_TEMPLATE = 'noise_{0:d}_SNR.wav' + + def __init__(self, output_directory_prefix): + TestDataGenerator.__init__(self, output_directory_prefix) + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + # Load the input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + # Create the noise track. + noise_signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + input_signal) + + # Create the noisy mixes (once for each unique SNR value). + noisy_mix_filepaths = {} + snr_values = set( + [snr for pair in self._SNR_VALUE_PAIRS for snr in pair]) + for snr in snr_values: + noisy_signal_filepath = os.path.join( + test_data_cache_path, + self._NOISY_SIGNAL_FILENAME_TEMPLATE.format(snr)) + + # Create and save if not done. + if not os.path.exists(noisy_signal_filepath): + # Create noisy signal. + noisy_signal = signal_processing.SignalProcessingUtils.MixSignals( + input_signal, noise_signal, snr) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noisy_signal_filepath, noisy_signal) + + # Add file to the collection of mixes. + noisy_mix_filepaths[snr] = noisy_signal_filepath + + # Add all the noisy-reference signal pairs. + for snr_noisy, snr_refence in self._SNR_VALUE_PAIRS: + config_name = '{0:d}_{1:d}_SNR'.format(snr_noisy, snr_refence) + output_path = self._MakeDir(base_output_path, config_name) + self._AddNoiseReferenceFilesPair( + config_name=config_name, + noisy_signal_filepath=noisy_mix_filepaths[snr_noisy], + reference_signal_filepath=noisy_mix_filepaths[snr_refence], + output_path=output_path) + + +# TODO(alessiob): remove comment when class implemented. +# @TestDataGenerator.RegisterClass +class NarrowBandNoiseTestDataGenerator(TestDataGenerator): + """Generator that adds narrow-band noise. + """ + + NAME = 'narrow_band_noise' + + def __init__(self, output_directory_prefix): + TestDataGenerator.__init__(self, output_directory_prefix) + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + # TODO(alessiob): implement. + pass + + +@TestDataGenerator.RegisterClass +class AdditiveNoiseTestDataGenerator(TestDataGenerator): + """Generator that adds noise loops. + + This generator uses all the wav files in a given path (default: noise_tracks/) + and mixes them to the clean speech with different target SNRs (hard-coded). + """ + + NAME = 'additive_noise' + _NOISY_SIGNAL_FILENAME_TEMPLATE = '{0}_{1:d}_SNR.wav' + + DEFAULT_NOISE_TRACKS_PATH = os.path.join(os.path.dirname(__file__), + os.pardir, 'noise_tracks') + + # TODO(alessiob): Make the list of SNR pairs customizable. + # Each pair indicates the clean vs. noisy and reference vs. noisy SNRs. + # The reference (second value of each pair) always has a lower amount of noise + # - i.e., the SNR is 10 dB higher. + _SNR_VALUE_PAIRS = [ + [20, 30], # Smallest noise. + [10, 20], + [5, 15], + [0, 10], # Largest noise. + ] + + def __init__(self, output_directory_prefix, noise_tracks_path): + TestDataGenerator.__init__(self, output_directory_prefix) + self._noise_tracks_path = noise_tracks_path + self._noise_tracks_file_names = [ + n for n in os.listdir(self._noise_tracks_path) + if n.lower().endswith('.wav') + ] + if len(self._noise_tracks_file_names) == 0: + raise exceptions.InitializationException( + 'No wav files found in the noise tracks path %s' % + (self._noise_tracks_path)) + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + """Generates test data pairs using environmental noise. + + For each noise track and pair of SNR values, the following two audio tracks + are created: the noisy signal and the reference signal. The former is + obtained by mixing the (clean) input signal to the corresponding noise + track enforcing the target SNR. + """ + # Init. + snr_values = set( + [snr for pair in self._SNR_VALUE_PAIRS for snr in pair]) + + # Load the input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + noisy_mix_filepaths = {} + for noise_track_filename in self._noise_tracks_file_names: + # Load the noise track. + noise_track_name, _ = os.path.splitext(noise_track_filename) + noise_track_filepath = os.path.join(self._noise_tracks_path, + noise_track_filename) + if not os.path.exists(noise_track_filepath): + logging.error('cannot find the <%s> noise track', + noise_track_filename) + raise exceptions.FileNotFoundError() + + noise_signal = signal_processing.SignalProcessingUtils.LoadWav( + noise_track_filepath) + + # Create the noisy mixes (once for each unique SNR value). + noisy_mix_filepaths[noise_track_name] = {} + for snr in snr_values: + noisy_signal_filepath = os.path.join( + test_data_cache_path, + self._NOISY_SIGNAL_FILENAME_TEMPLATE.format( + noise_track_name, snr)) + + # Create and save if not done. + if not os.path.exists(noisy_signal_filepath): + # Create noisy signal. + noisy_signal = signal_processing.SignalProcessingUtils.MixSignals( + input_signal, + noise_signal, + snr, + pad_noise=signal_processing.SignalProcessingUtils. + MixPadding.LOOP) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noisy_signal_filepath, noisy_signal) + + # Add file to the collection of mixes. + noisy_mix_filepaths[noise_track_name][ + snr] = noisy_signal_filepath + + # Add all the noise-SNR pairs. + self._AddNoiseSnrPairs(base_output_path, noisy_mix_filepaths, + self._SNR_VALUE_PAIRS) + + +@TestDataGenerator.RegisterClass +class ReverberationTestDataGenerator(TestDataGenerator): + """Generator that adds reverberation noise. + + TODO(alessiob): Make this class more generic since the impulse response can be + anything (not just reverberation); call it e.g., + ConvolutionalNoiseTestDataGenerator. + """ + + NAME = 'reverberation' + + _IMPULSE_RESPONSES = { + 'lecture': 'air_binaural_lecture_0_0_1.mat', # Long echo. + 'booth': 'air_binaural_booth_0_0_1.mat', # Short echo. + } + _MAX_IMPULSE_RESPONSE_LENGTH = None + + # Each pair indicates the clean vs. noisy and reference vs. noisy SNRs. + # The reference (second value of each pair) always has a lower amount of noise + # - i.e., the SNR is 5 dB higher. + _SNR_VALUE_PAIRS = [ + [3, 8], # Smallest noise. + [-3, 2], # Largest noise. + ] + + _NOISE_TRACK_FILENAME_TEMPLATE = '{0}.wav' + _NOISY_SIGNAL_FILENAME_TEMPLATE = '{0}_{1:d}_SNR.wav' + + def __init__(self, output_directory_prefix, aechen_ir_database_path): + TestDataGenerator.__init__(self, output_directory_prefix) + self._aechen_ir_database_path = aechen_ir_database_path + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + """Generates test data pairs using reverberation noise. + + For each impulse response, one noise track is created. For each impulse + response and pair of SNR values, the following 2 audio tracks are + created: the noisy signal and the reference signal. The former is + obtained by mixing the (clean) input signal to the corresponding noise + track enforcing the target SNR. + """ + # Init. + snr_values = set( + [snr for pair in self._SNR_VALUE_PAIRS for snr in pair]) + + # Load the input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + noisy_mix_filepaths = {} + for impulse_response_name in self._IMPULSE_RESPONSES: + noise_track_filename = self._NOISE_TRACK_FILENAME_TEMPLATE.format( + impulse_response_name) + noise_track_filepath = os.path.join(test_data_cache_path, + noise_track_filename) + noise_signal = None + try: + # Load noise track. + noise_signal = signal_processing.SignalProcessingUtils.LoadWav( + noise_track_filepath) + except exceptions.FileNotFoundError: + # Generate noise track by applying the impulse response. + impulse_response_filepath = os.path.join( + self._aechen_ir_database_path, + self._IMPULSE_RESPONSES[impulse_response_name]) + noise_signal = self._GenerateNoiseTrack( + noise_track_filepath, input_signal, + impulse_response_filepath) + assert noise_signal is not None + + # Create the noisy mixes (once for each unique SNR value). + noisy_mix_filepaths[impulse_response_name] = {} + for snr in snr_values: + noisy_signal_filepath = os.path.join( + test_data_cache_path, + self._NOISY_SIGNAL_FILENAME_TEMPLATE.format( + impulse_response_name, snr)) + + # Create and save if not done. + if not os.path.exists(noisy_signal_filepath): + # Create noisy signal. + noisy_signal = signal_processing.SignalProcessingUtils.MixSignals( + input_signal, noise_signal, snr) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noisy_signal_filepath, noisy_signal) + + # Add file to the collection of mixes. + noisy_mix_filepaths[impulse_response_name][ + snr] = noisy_signal_filepath + + # Add all the noise-SNR pairs. + self._AddNoiseSnrPairs(base_output_path, noisy_mix_filepaths, + self._SNR_VALUE_PAIRS) + + def _GenerateNoiseTrack(self, noise_track_filepath, input_signal, + impulse_response_filepath): + """Generates noise track. + + Generate a signal by convolving input_signal with the impulse response in + impulse_response_filepath; then save to noise_track_filepath. + + Args: + noise_track_filepath: output file path for the noise track. + input_signal: (clean) input signal samples. + impulse_response_filepath: impulse response file path. + + Returns: + AudioSegment instance. + """ + # Load impulse response. + data = scipy.io.loadmat(impulse_response_filepath) + impulse_response = data['h_air'].flatten() + if self._MAX_IMPULSE_RESPONSE_LENGTH is not None: + logging.info('truncating impulse response from %d to %d samples', + len(impulse_response), + self._MAX_IMPULSE_RESPONSE_LENGTH) + impulse_response = impulse_response[:self. + _MAX_IMPULSE_RESPONSE_LENGTH] + + # Apply impulse response. + processed_signal = ( + signal_processing.SignalProcessingUtils.ApplyImpulseResponse( + input_signal, impulse_response)) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noise_track_filepath, processed_signal) + + return processed_signal diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py new file mode 100644 index 0000000000..948888e775 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py @@ -0,0 +1,71 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""TestDataGenerator factory class. +""" + +import logging + +from . import exceptions +from . import test_data_generation + + +class TestDataGeneratorFactory(object): + """Factory class used to create test data generators. + + Usage: Create a factory passing parameters to the ctor with which the + generators will be produced. + """ + + def __init__(self, aechen_ir_database_path, noise_tracks_path, + copy_with_identity): + """Ctor. + + Args: + aechen_ir_database_path: Path to the Aechen Impulse Response database. + noise_tracks_path: Path to the noise tracks to add. + copy_with_identity: Flag indicating whether the identity generator has to + make copies of the clean speech input files. + """ + self._output_directory_prefix = None + self._aechen_ir_database_path = aechen_ir_database_path + self._noise_tracks_path = noise_tracks_path + self._copy_with_identity = copy_with_identity + + def SetOutputDirectoryPrefix(self, prefix): + self._output_directory_prefix = prefix + + def GetInstance(self, test_data_generators_class): + """Creates an TestDataGenerator instance given a class object. + + Args: + test_data_generators_class: TestDataGenerator class object (not an + instance). + + Returns: + TestDataGenerator instance. + """ + if self._output_directory_prefix is None: + raise exceptions.InitializationException( + 'The output directory prefix for test data generators is not set' + ) + logging.debug('factory producing %s', test_data_generators_class) + + if test_data_generators_class == ( + test_data_generation.IdentityTestDataGenerator): + return test_data_generation.IdentityTestDataGenerator( + self._output_directory_prefix, self._copy_with_identity) + elif test_data_generators_class == ( + test_data_generation.ReverberationTestDataGenerator): + return test_data_generation.ReverberationTestDataGenerator( + self._output_directory_prefix, self._aechen_ir_database_path) + elif test_data_generators_class == ( + test_data_generation.AdditiveNoiseTestDataGenerator): + return test_data_generation.AdditiveNoiseTestDataGenerator( + self._output_directory_prefix, self._noise_tracks_path) + else: + return test_data_generators_class(self._output_directory_prefix) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py new file mode 100644 index 0000000000..f75098ae2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py @@ -0,0 +1,207 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the test_data_generation module. +""" + +import os +import shutil +import tempfile +import unittest + +import numpy as np +import scipy.io + +from . import test_data_generation +from . import test_data_generation_factory +from . import signal_processing + + +class TestTestDataGenerators(unittest.TestCase): + """Unit tests for the test_data_generation module. + """ + + def setUp(self): + """Create temporary folders.""" + self._base_output_path = tempfile.mkdtemp() + self._test_data_cache_path = tempfile.mkdtemp() + self._fake_air_db_path = tempfile.mkdtemp() + + # Fake AIR DB impulse responses. + # TODO(alessiob): ReverberationTestDataGenerator will change to allow custom + # impulse responses. When changed, the coupling below between + # impulse_response_mat_file_names and + # ReverberationTestDataGenerator._IMPULSE_RESPONSES can be removed. + impulse_response_mat_file_names = [ + 'air_binaural_lecture_0_0_1.mat', + 'air_binaural_booth_0_0_1.mat', + ] + for impulse_response_mat_file_name in impulse_response_mat_file_names: + data = {'h_air': np.random.rand(1, 1000).astype(' +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/wav_file.h" +#include "rtc_base/logging.h" + +ABSL_FLAG(std::string, i, "", "Input wav file"); +ABSL_FLAG(std::string, o, "", "VAD output file"); + +namespace webrtc { +namespace test { +namespace { + +// The allowed values are 10, 20 or 30 ms. +constexpr uint8_t kAudioFrameLengthMilliseconds = 30; +constexpr int kMaxSampleRate = 48000; +constexpr size_t kMaxFrameLen = + kAudioFrameLengthMilliseconds * kMaxSampleRate / 1000; + +constexpr uint8_t kBitmaskBuffSize = 8; + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + const std::string input_file = absl::GetFlag(FLAGS_i); + const std::string output_file = absl::GetFlag(FLAGS_o); + // Open wav input file and check properties. + WavReader wav_reader(input_file); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files supported"; + return 1; + } + if (wav_reader.sample_rate() > kMaxSampleRate) { + RTC_LOG(LS_ERROR) << "Beyond maximum sample rate (" << kMaxSampleRate + << ")"; + return 1; + } + const size_t audio_frame_length = rtc::CheckedDivExact( + kAudioFrameLengthMilliseconds * wav_reader.sample_rate(), 1000); + if (audio_frame_length > kMaxFrameLen) { + RTC_LOG(LS_ERROR) << "The frame size and/or the sample rate are too large."; + return 1; + } + + // Create output file and write header. + std::ofstream out_file(output_file, std::ofstream::binary); + const char audio_frame_length_ms = kAudioFrameLengthMilliseconds; + out_file.write(&audio_frame_length_ms, 1); // Header. + + // Run VAD and write decisions. + std::unique_ptr vad = CreateVad(Vad::Aggressiveness::kVadNormal); + std::array samples; + char buff = 0; // Buffer to write one bit per frame. + uint8_t next = 0; // Points to the next bit to write in `buff`. + while (true) { + // Process frame. + const auto read_samples = + wav_reader.ReadSamples(audio_frame_length, samples.data()); + if (read_samples < audio_frame_length) + break; + const auto is_speech = vad->VoiceActivity( + samples.data(), audio_frame_length, wav_reader.sample_rate()); + + // Write output. + buff = is_speech ? buff | (1 << next) : buff & ~(1 << next); + if (++next == kBitmaskBuffSize) { + out_file.write(&buff, 1); // Flush. + buff = 0; // Reset. + next = 0; + } + } + + // Finalize. + char extra_bits = 0; + if (next > 0) { + extra_bits = kBitmaskBuffSize - next; + out_file.write(&buff, 1); // Flush. + } + out_file.write(&extra_bits, 1); + out_file.close(); + + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.cc b/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.cc new file mode 100644 index 0000000000..4899d2d459 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/runtime_setting_util.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +void ReplayRuntimeSetting(AudioProcessing* apm, + const webrtc::audioproc::RuntimeSetting& setting) { + RTC_CHECK(apm); + // TODO(bugs.webrtc.org/9138): Add ability to handle different types + // of settings. Currently CapturePreGain, CaptureFixedPostGain and + // PlayoutVolumeChange are supported. + RTC_CHECK(setting.has_capture_pre_gain() || + setting.has_capture_fixed_post_gain() || + setting.has_playout_volume_change()); + + if (setting.has_capture_pre_gain()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain( + setting.capture_pre_gain())); + } else if (setting.has_capture_fixed_post_gain()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureFixedPostGain( + setting.capture_fixed_post_gain())); + } else if (setting.has_playout_volume_change()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange( + setting.playout_volume_change())); + } else if (setting.has_playout_audio_device_change()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutAudioDeviceChange( + {setting.playout_audio_device_change().id(), + setting.playout_audio_device_change().max_volume()})); + } else if (setting.has_capture_output_used()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + setting.capture_output_used())); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.h b/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.h new file mode 100644 index 0000000000..d8cbe82076 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_RUNTIME_SETTING_UTIL_H_ +#define MODULES_AUDIO_PROCESSING_TEST_RUNTIME_SETTING_UTIL_H_ + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/protobuf_utils.h" + +namespace webrtc { + +void ReplayRuntimeSetting(AudioProcessing* apm, + const webrtc::audioproc::RuntimeSetting& setting); +} + +#endif // MODULES_AUDIO_PROCESSING_TEST_RUNTIME_SETTING_UTIL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.cc b/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.cc new file mode 100644 index 0000000000..458f6ced76 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/simulator_buffers.h" + +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +SimulatorBuffers::SimulatorBuffers(int render_input_sample_rate_hz, + int capture_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_render_input_channels, + size_t num_capture_input_channels, + size_t num_render_output_channels, + size_t num_capture_output_channels) { + Random rand_gen(42); + CreateConfigAndBuffer(render_input_sample_rate_hz, num_render_input_channels, + &rand_gen, &render_input_buffer, &render_input_config, + &render_input, &render_input_samples); + + CreateConfigAndBuffer(render_output_sample_rate_hz, + num_render_output_channels, &rand_gen, + &render_output_buffer, &render_output_config, + &render_output, &render_output_samples); + + CreateConfigAndBuffer(capture_input_sample_rate_hz, + num_capture_input_channels, &rand_gen, + &capture_input_buffer, &capture_input_config, + &capture_input, &capture_input_samples); + + CreateConfigAndBuffer(capture_output_sample_rate_hz, + num_capture_output_channels, &rand_gen, + &capture_output_buffer, &capture_output_config, + &capture_output, &capture_output_samples); + + UpdateInputBuffers(); +} + +SimulatorBuffers::~SimulatorBuffers() = default; + +void SimulatorBuffers::CreateConfigAndBuffer( + int sample_rate_hz, + size_t num_channels, + Random* rand_gen, + std::unique_ptr* buffer, + StreamConfig* config, + std::vector* buffer_data, + std::vector* buffer_data_samples) { + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + *config = StreamConfig(sample_rate_hz, num_channels); + buffer->reset( + new AudioBuffer(config->sample_rate_hz(), config->num_channels(), + config->sample_rate_hz(), config->num_channels(), + config->sample_rate_hz(), config->num_channels())); + + buffer_data_samples->resize(samples_per_channel * num_channels); + for (auto& v : *buffer_data_samples) { + v = rand_gen->Rand(); + } + + buffer_data->resize(num_channels); + for (size_t ch = 0; ch < num_channels; ++ch) { + (*buffer_data)[ch] = &(*buffer_data_samples)[ch * samples_per_channel]; + } +} + +void SimulatorBuffers::UpdateInputBuffers() { + test::CopyVectorToAudioBuffer(capture_input_config, capture_input_samples, + capture_input_buffer.get()); + test::CopyVectorToAudioBuffer(render_input_config, render_input_samples, + render_input_buffer.get()); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.h b/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.h new file mode 100644 index 0000000000..36dcf301a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_SIMULATOR_BUFFERS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_SIMULATOR_BUFFERS_H_ + +#include +#include + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/random.h" + +namespace webrtc { +namespace test { + +struct SimulatorBuffers { + SimulatorBuffers(int render_input_sample_rate_hz, + int capture_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_render_input_channels, + size_t num_capture_input_channels, + size_t num_render_output_channels, + size_t num_capture_output_channels); + ~SimulatorBuffers(); + + void CreateConfigAndBuffer(int sample_rate_hz, + size_t num_channels, + Random* rand_gen, + std::unique_ptr* buffer, + StreamConfig* config, + std::vector* buffer_data, + std::vector* buffer_data_samples); + + void UpdateInputBuffers(); + + std::unique_ptr render_input_buffer; + std::unique_ptr capture_input_buffer; + std::unique_ptr render_output_buffer; + std::unique_ptr capture_output_buffer; + StreamConfig render_input_config; + StreamConfig capture_input_config; + StreamConfig render_output_config; + StreamConfig capture_output_config; + std::vector render_input; + std::vector render_input_samples; + std::vector capture_input; + std::vector capture_input_samples; + std::vector render_output; + std::vector render_output_samples; + std::vector capture_output; + std::vector capture_output_samples; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_SIMULATOR_BUFFERS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/test_utils.cc b/third_party/libwebrtc/modules/audio_processing/test/test_utils.cc new file mode 100644 index 0000000000..9aeebe5155 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/test_utils.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/test_utils.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { + +ChannelBufferWavReader::ChannelBufferWavReader(std::unique_ptr file) + : file_(std::move(file)) {} + +ChannelBufferWavReader::~ChannelBufferWavReader() = default; + +bool ChannelBufferWavReader::Read(ChannelBuffer* buffer) { + RTC_CHECK_EQ(file_->num_channels(), buffer->num_channels()); + interleaved_.resize(buffer->size()); + if (file_->ReadSamples(interleaved_.size(), &interleaved_[0]) != + interleaved_.size()) { + return false; + } + + FloatS16ToFloat(&interleaved_[0], interleaved_.size(), &interleaved_[0]); + Deinterleave(&interleaved_[0], buffer->num_frames(), buffer->num_channels(), + buffer->channels()); + return true; +} + +ChannelBufferWavWriter::ChannelBufferWavWriter(std::unique_ptr file) + : file_(std::move(file)) {} + +ChannelBufferWavWriter::~ChannelBufferWavWriter() = default; + +void ChannelBufferWavWriter::Write(const ChannelBuffer& buffer) { + RTC_CHECK_EQ(file_->num_channels(), buffer.num_channels()); + interleaved_.resize(buffer.size()); + Interleave(buffer.channels(), buffer.num_frames(), buffer.num_channels(), + &interleaved_[0]); + FloatToFloatS16(&interleaved_[0], interleaved_.size(), &interleaved_[0]); + file_->WriteSamples(&interleaved_[0], interleaved_.size()); +} + +ChannelBufferVectorWriter::ChannelBufferVectorWriter(std::vector* output) + : output_(output) { + RTC_DCHECK(output_); +} + +ChannelBufferVectorWriter::~ChannelBufferVectorWriter() = default; + +void ChannelBufferVectorWriter::Write(const ChannelBuffer& buffer) { + // Account for sample rate changes throughout a simulation. + interleaved_buffer_.resize(buffer.size()); + Interleave(buffer.channels(), buffer.num_frames(), buffer.num_channels(), + interleaved_buffer_.data()); + size_t old_size = output_->size(); + output_->resize(old_size + interleaved_buffer_.size()); + FloatToFloatS16(interleaved_buffer_.data(), interleaved_buffer_.size(), + output_->data() + old_size); +} + +FILE* OpenFile(absl::string_view filename, absl::string_view mode) { + std::string filename_str(filename); + FILE* file = fopen(filename_str.c_str(), std::string(mode).c_str()); + if (!file) { + printf("Unable to open file %s\n", filename_str.c_str()); + exit(1); + } + return file; +} + +void SetFrameSampleRate(Int16FrameData* frame, int sample_rate_hz) { + frame->sample_rate_hz = sample_rate_hz; + frame->samples_per_channel = + AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/test_utils.h b/third_party/libwebrtc/modules/audio_processing/test/test_utils.h new file mode 100644 index 0000000000..bf82f9d66d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/test_utils.h @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ + +#include + +#include +#include +#include +#include // no-presubmit-check TODO(webrtc:8982) +#include +#include + +#include "absl/strings/string_view.h" +#include "common_audio/channel_buffer.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +static const AudioProcessing::Error kNoErr = AudioProcessing::kNoError; +#define EXPECT_NOERR(expr) EXPECT_EQ(kNoErr, (expr)) + +// Encapsulates samples and metadata for an integer frame. +struct Int16FrameData { + // Max data size that matches the data size of the AudioFrame class, providing + // storage for 8 channels of 96 kHz data. + static const int kMaxDataSizeSamples = 7680; + + Int16FrameData() { + sample_rate_hz = 0; + num_channels = 0; + samples_per_channel = 0; + data.fill(0); + } + + void CopyFrom(const Int16FrameData& src) { + samples_per_channel = src.samples_per_channel; + sample_rate_hz = src.sample_rate_hz; + num_channels = src.num_channels; + + const size_t length = samples_per_channel * num_channels; + RTC_CHECK_LE(length, kMaxDataSizeSamples); + memcpy(data.data(), src.data.data(), sizeof(int16_t) * length); + } + std::array data; + int32_t sample_rate_hz; + size_t num_channels; + size_t samples_per_channel; +}; + +// Reads ChannelBuffers from a provided WavReader. +class ChannelBufferWavReader final { + public: + explicit ChannelBufferWavReader(std::unique_ptr file); + ~ChannelBufferWavReader(); + + ChannelBufferWavReader(const ChannelBufferWavReader&) = delete; + ChannelBufferWavReader& operator=(const ChannelBufferWavReader&) = delete; + + // Reads data from the file according to the `buffer` format. Returns false if + // a full buffer can't be read from the file. + bool Read(ChannelBuffer* buffer); + + private: + std::unique_ptr file_; + std::vector interleaved_; +}; + +// Writes ChannelBuffers to a provided WavWriter. +class ChannelBufferWavWriter final { + public: + explicit ChannelBufferWavWriter(std::unique_ptr file); + ~ChannelBufferWavWriter(); + + ChannelBufferWavWriter(const ChannelBufferWavWriter&) = delete; + ChannelBufferWavWriter& operator=(const ChannelBufferWavWriter&) = delete; + + void Write(const ChannelBuffer& buffer); + + private: + std::unique_ptr file_; + std::vector interleaved_; +}; + +// Takes a pointer to a vector. Allows appending the samples of channel buffers +// to the given vector, by interleaving the samples and converting them to float +// S16. +class ChannelBufferVectorWriter final { + public: + explicit ChannelBufferVectorWriter(std::vector* output); + ChannelBufferVectorWriter(const ChannelBufferVectorWriter&) = delete; + ChannelBufferVectorWriter& operator=(const ChannelBufferVectorWriter&) = + delete; + ~ChannelBufferVectorWriter(); + + // Creates an interleaved copy of `buffer`, converts the samples to float S16 + // and appends the result to output_. + void Write(const ChannelBuffer& buffer); + + private: + std::vector interleaved_buffer_; + std::vector* output_; +}; + +// Exits on failure; do not use in unit tests. +FILE* OpenFile(absl::string_view filename, absl::string_view mode); + +void SetFrameSampleRate(Int16FrameData* frame, int sample_rate_hz); + +template +void SetContainerFormat(int sample_rate_hz, + size_t num_channels, + Int16FrameData* frame, + std::unique_ptr >* cb) { + SetFrameSampleRate(frame, sample_rate_hz); + frame->num_channels = num_channels; + cb->reset(new ChannelBuffer(frame->samples_per_channel, num_channels)); +} + +template +float ComputeSNR(const T* ref, const T* test, size_t length, float* variance) { + float mse = 0; + float mean = 0; + *variance = 0; + for (size_t i = 0; i < length; ++i) { + T error = ref[i] - test[i]; + mse += error * error; + *variance += ref[i] * ref[i]; + mean += ref[i]; + } + mse /= length; + *variance /= length; + mean /= length; + *variance -= mean * mean; + + float snr = 100; // We assign 100 dB to the zero-error case. + if (mse > 0) + snr = 10 * log10(*variance / mse); + return snr; +} + +// Returns a vector parsed from whitespace delimited values in to_parse, +// or an empty vector if the string could not be parsed. +template +std::vector ParseList(absl::string_view to_parse) { + std::vector values; + + std::istringstream str( // no-presubmit-check TODO(webrtc:8982) + std::string{to_parse}); + std::copy( + std::istream_iterator(str), // no-presubmit-check TODO(webrtc:8982) + std::istream_iterator(), // no-presubmit-check TODO(webrtc:8982) + std::back_inserter(values)); + + return values; +} + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/unittest.proto b/third_party/libwebrtc/modules/audio_processing/test/unittest.proto new file mode 100644 index 0000000000..07d1cda6c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/unittest.proto @@ -0,0 +1,48 @@ +syntax = "proto2"; +option optimize_for = LITE_RUNTIME; +package webrtc.audioproc; + +message Test { + optional int32 num_reverse_channels = 1; + optional int32 num_input_channels = 2; + optional int32 num_output_channels = 3; + optional int32 sample_rate = 4; + + message Frame { + } + + repeated Frame frame = 5; + + optional int32 analog_level_average = 6; + optional int32 max_output_average = 7; + optional int32 has_voice_count = 9; + optional int32 is_saturated_count = 10; + + message EchoMetrics { + optional float echo_return_loss = 1; + optional float echo_return_loss_enhancement = 2; + optional float divergent_filter_fraction = 3; + optional float residual_echo_likelihood = 4; + optional float residual_echo_likelihood_recent_max = 5; + } + + repeated EchoMetrics echo_metrics = 11; + + message DelayMetrics { + optional int32 median = 1; + optional int32 std = 2; + } + + repeated DelayMetrics delay_metrics = 12; + + optional float rms_dbfs_average = 13; + + optional float ns_speech_probability_average = 14; + + optional bool use_aec_extended_filter = 15; +} + +message OutputData { + repeated Test test = 1; +} + diff --git a/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.cc b/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.cc new file mode 100644 index 0000000000..ee87f9e1a8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/wav_based_simulator.h" + +#include + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/system/file_wrapper.h" + +namespace webrtc { +namespace test { + +std::vector +WavBasedSimulator::GetCustomEventChain(absl::string_view filename) { + std::vector call_chain; + FileWrapper file_wrapper = FileWrapper::OpenReadOnly(filename); + + RTC_CHECK(file_wrapper.is_open()) + << "Could not open the custom call order file, reverting " + "to using the default call order"; + + char c; + size_t num_read = file_wrapper.Read(&c, sizeof(char)); + while (num_read > 0) { + switch (c) { + case 'r': + call_chain.push_back(SimulationEventType::kProcessReverseStream); + break; + case 'c': + call_chain.push_back(SimulationEventType::kProcessStream); + break; + case '\n': + break; + default: + RTC_FATAL() << "Incorrect custom call order file"; + } + + num_read = file_wrapper.Read(&c, sizeof(char)); + } + + return call_chain; +} + +WavBasedSimulator::WavBasedSimulator( + const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder) + : AudioProcessingSimulator(settings, + std::move(audio_processing), + std::move(ap_builder)) { + if (settings_.call_order_input_filename) { + call_chain_ = WavBasedSimulator::GetCustomEventChain( + *settings_.call_order_input_filename); + } else { + call_chain_ = WavBasedSimulator::GetDefaultEventChain(); + } +} + +WavBasedSimulator::~WavBasedSimulator() = default; + +std::vector +WavBasedSimulator::GetDefaultEventChain() { + std::vector call_chain(2); + call_chain[0] = SimulationEventType::kProcessStream; + call_chain[1] = SimulationEventType::kProcessReverseStream; + return call_chain; +} + +void WavBasedSimulator::PrepareProcessStreamCall() { + if (settings_.fixed_interface) { + fwd_frame_.CopyFrom(*in_buf_); + } + ap_->set_stream_key_pressed(settings_.override_key_pressed.value_or(false)); + + if (!settings_.use_stream_delay || *settings_.use_stream_delay) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->set_stream_delay_ms( + settings_.stream_delay ? *settings_.stream_delay : 0)); + } +} + +void WavBasedSimulator::PrepareReverseProcessStreamCall() { + if (settings_.fixed_interface) { + rev_frame_.CopyFrom(*reverse_in_buf_); + } +} + +void WavBasedSimulator::Process() { + ConfigureAudioProcessor(); + + Initialize(); + + bool samples_left_to_process = true; + int call_chain_index = 0; + int capture_frames_since_init = 0; + constexpr int kInitIndex = 1; + while (samples_left_to_process) { + switch (call_chain_[call_chain_index]) { + case SimulationEventType::kProcessStream: + SelectivelyToggleDataDumping(kInitIndex, capture_frames_since_init); + + samples_left_to_process = HandleProcessStreamCall(); + ++capture_frames_since_init; + break; + case SimulationEventType::kProcessReverseStream: + if (settings_.reverse_input_filename) { + samples_left_to_process = HandleProcessReverseStreamCall(); + } + break; + default: + RTC_CHECK_NOTREACHED(); + } + + call_chain_index = (call_chain_index + 1) % call_chain_.size(); + } + + DetachAecDump(); +} + +void WavBasedSimulator::Analyze() { + std::cout << "Inits:" << std::endl; + std::cout << "1: -->" << std::endl; + std::cout << " Time:" << std::endl; + std::cout << " Capture: 0 s (0 frames) " << std::endl; + std::cout << " Render: 0 s (0 frames)" << std::endl; +} + +bool WavBasedSimulator::HandleProcessStreamCall() { + bool samples_left_to_process = buffer_reader_->Read(in_buf_.get()); + if (samples_left_to_process) { + PrepareProcessStreamCall(); + ProcessStream(settings_.fixed_interface); + } + return samples_left_to_process; +} + +bool WavBasedSimulator::HandleProcessReverseStreamCall() { + bool samples_left_to_process = + reverse_buffer_reader_->Read(reverse_in_buf_.get()); + if (samples_left_to_process) { + PrepareReverseProcessStreamCall(); + ProcessReverseStream(settings_.fixed_interface); + } + return samples_left_to_process; +} + +void WavBasedSimulator::Initialize() { + std::unique_ptr in_file( + new WavReader(settings_.input_filename->c_str())); + int input_sample_rate_hz = in_file->sample_rate(); + int input_num_channels = in_file->num_channels(); + buffer_reader_.reset(new ChannelBufferWavReader(std::move(in_file))); + + int output_sample_rate_hz = settings_.output_sample_rate_hz + ? *settings_.output_sample_rate_hz + : input_sample_rate_hz; + int output_num_channels = settings_.output_num_channels + ? *settings_.output_num_channels + : input_num_channels; + + int reverse_sample_rate_hz = 48000; + int reverse_num_channels = 1; + int reverse_output_sample_rate_hz = 48000; + int reverse_output_num_channels = 1; + if (settings_.reverse_input_filename) { + std::unique_ptr reverse_in_file( + new WavReader(settings_.reverse_input_filename->c_str())); + reverse_sample_rate_hz = reverse_in_file->sample_rate(); + reverse_num_channels = reverse_in_file->num_channels(); + reverse_buffer_reader_.reset( + new ChannelBufferWavReader(std::move(reverse_in_file))); + + reverse_output_sample_rate_hz = + settings_.reverse_output_sample_rate_hz + ? *settings_.reverse_output_sample_rate_hz + : reverse_sample_rate_hz; + reverse_output_num_channels = settings_.reverse_output_num_channels + ? *settings_.reverse_output_num_channels + : reverse_num_channels; + } + + SetupBuffersConfigsOutputs( + input_sample_rate_hz, output_sample_rate_hz, reverse_sample_rate_hz, + reverse_output_sample_rate_hz, input_num_channels, output_num_channels, + reverse_num_channels, reverse_output_num_channels); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.h b/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.h new file mode 100644 index 0000000000..44e9ee2b7f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_WAV_BASED_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_WAV_BASED_SIMULATOR_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/audio_processing_simulator.h" + +namespace webrtc { +namespace test { + +// Used to perform an audio processing simulation from wav files. +class WavBasedSimulator final : public AudioProcessingSimulator { + public: + WavBasedSimulator(const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder); + + WavBasedSimulator() = delete; + WavBasedSimulator(const WavBasedSimulator&) = delete; + WavBasedSimulator& operator=(const WavBasedSimulator&) = delete; + + ~WavBasedSimulator() override; + + // Processes the WAV input. + void Process() override; + + // Only analyzes the data for the simulation, instead of perform any + // processing. + void Analyze() override; + + private: + enum SimulationEventType { + kProcessStream, + kProcessReverseStream, + }; + + void Initialize(); + bool HandleProcessStreamCall(); + bool HandleProcessReverseStreamCall(); + void PrepareProcessStreamCall(); + void PrepareReverseProcessStreamCall(); + static std::vector GetDefaultEventChain(); + static std::vector GetCustomEventChain( + absl::string_view filename); + + std::vector call_chain_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_WAV_BASED_SIMULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc b/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc new file mode 100644 index 0000000000..bd1c50477a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// +// The idea is to take a heterodyne system and change the order of the +// components to get something which is efficient to implement digitally. +// +// It is possible to separate the filter using the noble identity as follows: +// +// H(z) = H0(z^3) + z^-1 * H1(z^3) + z^-2 * H2(z^3) +// +// This is used in the analysis stage to first downsample serial to parallel +// and then filter each branch with one of these polyphase decompositions of the +// lowpass prototype. Because each filter is only a modulation of the prototype, +// it is enough to multiply each coefficient by the respective cosine value to +// shift it to the desired band. But because the cosine period is 12 samples, +// it requires separating the prototype even further using the noble identity. +// After filtering and modulating for each band, the output of all filters is +// accumulated to get the downsampled bands. +// +// A similar logic can be applied to the synthesis stage. + +#include "modules/audio_processing/three_band_filter_bank.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Factors to take into account when choosing `kFilterSize`: +// 1. Higher `kFilterSize`, means faster transition, which ensures less +// aliasing. This is especially important when there is non-linear +// processing between the splitting and merging. +// 2. The delay that this filter bank introduces is +// `kNumBands` * `kSparsity` * `kFilterSize` / 2, so it increases linearly +// with `kFilterSize`. +// 3. The computation complexity also increases linearly with `kFilterSize`. + +// The Matlab code to generate these `kFilterCoeffs` is: +// +// N = kNumBands * kSparsity * kFilterSize - 1; +// h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5)); +// reshape(h, kNumBands * kSparsity, kFilterSize); +// +// The code below uses the values of kFilterSize, kNumBands and kSparsity +// specified in the header. + +// Because the total bandwidth of the lower and higher band is double the middle +// one (because of the spectrum parity), the low-pass prototype is half the +// bandwidth of 1 / (2 * `kNumBands`) and is then shifted with cosine modulation +// to the right places. +// A Kaiser window is used because of its flexibility and the alpha is set to +// 3.5, since that sets a stop band attenuation of 40dB ensuring a fast +// transition. + +constexpr int kSubSampling = ThreeBandFilterBank::kNumBands; +constexpr int kDctSize = ThreeBandFilterBank::kNumBands; +static_assert(ThreeBandFilterBank::kNumBands * + ThreeBandFilterBank::kSplitBandSize == + ThreeBandFilterBank::kFullBandSize, + "The full band must be split in equally sized subbands"); + +const float + kFilterCoeffs[ThreeBandFilterBank::kNumNonZeroFilters][kFilterSize] = { + {-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f}, + {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f}, + {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f}, + {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f}, + {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f}, + {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f}, + {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f}, + {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f}, + {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f}, + {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}}; + +constexpr int kZeroFilterIndex1 = 3; +constexpr int kZeroFilterIndex2 = 9; + +const float kDctModulation[ThreeBandFilterBank::kNumNonZeroFilters][kDctSize] = + {{2.f, 2.f, 2.f}, + {1.73205077f, 0.f, -1.73205077f}, + {1.f, -2.f, 1.f}, + {-1.f, 2.f, -1.f}, + {-1.73205077f, 0.f, 1.73205077f}, + {-2.f, -2.f, -2.f}, + {-1.73205077f, 0.f, 1.73205077f}, + {-1.f, 2.f, -1.f}, + {1.f, -2.f, 1.f}, + {1.73205077f, 0.f, -1.73205077f}}; + +// Filters the input signal `in` with the filter `filter` using a shift by +// `in_shift`, taking into account the previous state. +void FilterCore( + rtc::ArrayView filter, + rtc::ArrayView in, + const int in_shift, + rtc::ArrayView out, + rtc::ArrayView state) { + constexpr int kMaxInShift = (kStride - 1); + RTC_DCHECK_GE(in_shift, 0); + RTC_DCHECK_LE(in_shift, kMaxInShift); + std::fill(out.begin(), out.end(), 0.f); + + for (int k = 0; k < in_shift; ++k) { + for (int i = 0, j = kMemorySize + k - in_shift; i < kFilterSize; + ++i, j -= kStride) { + out[k] += state[j] * filter[i]; + } + } + + for (int k = in_shift, shift = 0; k < kFilterSize * kStride; ++k, ++shift) { + RTC_DCHECK_GE(shift, 0); + const int loop_limit = std::min(kFilterSize, 1 + (shift >> kStrideLog2)); + for (int i = 0, j = shift; i < loop_limit; ++i, j -= kStride) { + out[k] += in[j] * filter[i]; + } + for (int i = loop_limit, j = kMemorySize + shift - loop_limit * kStride; + i < kFilterSize; ++i, j -= kStride) { + out[k] += state[j] * filter[i]; + } + } + + for (int k = kFilterSize * kStride, shift = kFilterSize * kStride - in_shift; + k < ThreeBandFilterBank::kSplitBandSize; ++k, ++shift) { + for (int i = 0, j = shift; i < kFilterSize; ++i, j -= kStride) { + out[k] += in[j] * filter[i]; + } + } + + // Update current state. + std::copy(in.begin() + ThreeBandFilterBank::kSplitBandSize - kMemorySize, + in.end(), state.begin()); +} + +} // namespace + +// Because the low-pass filter prototype has half bandwidth it is possible to +// use a DCT to shift it in both directions at the same time, to the center +// frequencies [1 / 12, 3 / 12, 5 / 12]. +ThreeBandFilterBank::ThreeBandFilterBank() { + RTC_DCHECK_EQ(state_analysis_.size(), kNumNonZeroFilters); + RTC_DCHECK_EQ(state_synthesis_.size(), kNumNonZeroFilters); + for (int k = 0; k < kNumNonZeroFilters; ++k) { + RTC_DCHECK_EQ(state_analysis_[k].size(), kMemorySize); + RTC_DCHECK_EQ(state_synthesis_[k].size(), kMemorySize); + + state_analysis_[k].fill(0.f); + state_synthesis_[k].fill(0.f); + } +} + +ThreeBandFilterBank::~ThreeBandFilterBank() = default; + +// The analysis can be separated in these steps: +// 1. Serial to parallel downsampling by a factor of `kNumBands`. +// 2. Filtering of `kSparsity` different delayed signals with polyphase +// decomposition of the low-pass prototype filter and upsampled by a factor +// of `kSparsity`. +// 3. Modulating with cosines and accumulating to get the desired band. +void ThreeBandFilterBank::Analysis( + rtc::ArrayView in, + rtc::ArrayView, ThreeBandFilterBank::kNumBands> + out) { + // Initialize the output to zero. + for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { + RTC_DCHECK_EQ(out[band].size(), kSplitBandSize); + std::fill(out[band].begin(), out[band].end(), 0); + } + + for (int downsampling_index = 0; downsampling_index < kSubSampling; + ++downsampling_index) { + // Downsample to form the filter input. + std::array in_subsampled; + for (int k = 0; k < kSplitBandSize; ++k) { + in_subsampled[k] = + in[(kSubSampling - 1) - downsampling_index + kSubSampling * k]; + } + + for (int in_shift = 0; in_shift < kStride; ++in_shift) { + // Choose filter, skip zero filters. + const int index = downsampling_index + in_shift * kSubSampling; + if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) { + continue; + } + const int filter_index = + index < kZeroFilterIndex1 + ? index + : (index < kZeroFilterIndex2 ? index - 1 : index - 2); + + rtc::ArrayView filter( + kFilterCoeffs[filter_index]); + rtc::ArrayView dct_modulation( + kDctModulation[filter_index]); + rtc::ArrayView state(state_analysis_[filter_index]); + + // Filter. + std::array out_subsampled; + FilterCore(filter, in_subsampled, in_shift, out_subsampled, state); + + // Band and modulate the output. + for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { + float* out_band = out[band].data(); + for (int n = 0; n < kSplitBandSize; ++n) { + out_band[n] += dct_modulation[band] * out_subsampled[n]; + } + } + } + } +} + +// The synthesis can be separated in these steps: +// 1. Modulating with cosines. +// 2. Filtering each one with a polyphase decomposition of the low-pass +// prototype filter upsampled by a factor of `kSparsity` and accumulating +// `kSparsity` signals with different delays. +// 3. Parallel to serial upsampling by a factor of `kNumBands`. +void ThreeBandFilterBank::Synthesis( + rtc::ArrayView, ThreeBandFilterBank::kNumBands> + in, + rtc::ArrayView out) { + std::fill(out.begin(), out.end(), 0); + for (int upsampling_index = 0; upsampling_index < kSubSampling; + ++upsampling_index) { + for (int in_shift = 0; in_shift < kStride; ++in_shift) { + // Choose filter, skip zero filters. + const int index = upsampling_index + in_shift * kSubSampling; + if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) { + continue; + } + const int filter_index = + index < kZeroFilterIndex1 + ? index + : (index < kZeroFilterIndex2 ? index - 1 : index - 2); + + rtc::ArrayView filter( + kFilterCoeffs[filter_index]); + rtc::ArrayView dct_modulation( + kDctModulation[filter_index]); + rtc::ArrayView state(state_synthesis_[filter_index]); + + // Prepare filter input by modulating the banded input. + std::array in_subsampled; + std::fill(in_subsampled.begin(), in_subsampled.end(), 0.f); + for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { + RTC_DCHECK_EQ(in[band].size(), kSplitBandSize); + const float* in_band = in[band].data(); + for (int n = 0; n < kSplitBandSize; ++n) { + in_subsampled[n] += dct_modulation[band] * in_band[n]; + } + } + + // Filter. + std::array out_subsampled; + FilterCore(filter, in_subsampled, in_shift, out_subsampled, state); + + // Upsample. + constexpr float kUpsamplingScaling = kSubSampling; + for (int k = 0; k < kSplitBandSize; ++k) { + out[upsampling_index + kSubSampling * k] += + kUpsamplingScaling * out_subsampled[k]; + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.h b/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.h new file mode 100644 index 0000000000..db66caba4a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ +#define MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ + +#include +#include +#include +#include + +#include "api/array_view.h" + +namespace webrtc { + +constexpr int kSparsity = 4; +constexpr int kStrideLog2 = 2; +constexpr int kStride = 1 << kStrideLog2; +constexpr int kNumZeroFilters = 2; +constexpr int kFilterSize = 4; +constexpr int kMemorySize = kFilterSize * kStride - 1; +static_assert(kMemorySize == 15, + "The memory size must be sufficient to provide memory for the " + "shifted filters"); + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// The low-pass filter prototype has these characteristics: +// * Pass-band ripple = 0.3dB +// * Pass-band frequency = 0.147 (7kHz at 48kHz) +// * Stop-band attenuation = 40dB +// * Stop-band frequency = 0.192 (9.2kHz at 48kHz) +// * Delay = 24 samples (500us at 48kHz) +// * Linear phase +// This filter bank does not satisfy perfect reconstruction. The SNR after +// analysis and synthesis (with no processing in between) is approximately 9.5dB +// depending on the input signal after compensating for the delay. +class ThreeBandFilterBank final { + public: + static const int kNumBands = 3; + static const int kFullBandSize = 480; + static const int kSplitBandSize = + ThreeBandFilterBank::kFullBandSize / ThreeBandFilterBank::kNumBands; + static const int kNumNonZeroFilters = + kSparsity * ThreeBandFilterBank::kNumBands - kNumZeroFilters; + + ThreeBandFilterBank(); + ~ThreeBandFilterBank(); + + // Splits `in` of size kFullBandSize into 3 downsampled frequency bands in + // `out`, each of size 160. + void Analysis(rtc::ArrayView in, + rtc::ArrayView, kNumBands> out); + + // Merges the 3 downsampled frequency bands in `in`, each of size 160, into + // `out`, which is of size kFullBandSize. + void Synthesis(rtc::ArrayView, kNumBands> in, + rtc::ArrayView out); + + private: + std::array, kNumNonZeroFilters> + state_analysis_; + std::array, kNumNonZeroFilters> + state_synthesis_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/transient/BUILD.gn new file mode 100644 index 0000000000..41aeab0abe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/BUILD.gn @@ -0,0 +1,133 @@ +# Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_source_set("transient_suppressor_api") { + sources = [ "transient_suppressor.h" ] +} + +rtc_library("transient_suppressor_impl") { + visibility = [ + ":click_annotate", + ":transient_suppression_test", + ":transient_suppression_unittests", + "..:optionally_built_submodule_creators", + ] + sources = [ + "common.h", + "daubechies_8_wavelet_coeffs.h", + "dyadic_decimator.h", + "moving_moments.cc", + "moving_moments.h", + "transient_detector.cc", + "transient_detector.h", + "transient_suppressor_impl.cc", + "transient_suppressor_impl.h", + "windows_private.h", + "wpd_node.cc", + "wpd_node.h", + "wpd_tree.cc", + "wpd_tree.h", + ] + deps = [ + ":transient_suppressor_api", + ":voice_probability_delay_unit", + "../../../common_audio:common_audio", + "../../../common_audio:common_audio_c", + "../../../common_audio:fir_filter", + "../../../common_audio:fir_filter_factory", + "../../../common_audio/third_party/ooura:fft_size_256", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:logging", + ] +} + +rtc_library("voice_probability_delay_unit") { + sources = [ + "voice_probability_delay_unit.cc", + "voice_probability_delay_unit.h", + ] + deps = [ "../../../rtc_base:checks" ] +} + +if (rtc_include_tests) { + if (!build_with_chromium) { + rtc_executable("click_annotate") { + testonly = true + sources = [ + "click_annotate.cc", + "file_utils.cc", + "file_utils.h", + ] + deps = [ + ":transient_suppressor_impl", + "..:audio_processing", + "../../../rtc_base/system:file_wrapper", + "../../../system_wrappers", + ] + } + + rtc_executable("transient_suppression_test") { + testonly = true + sources = [ + "file_utils.cc", + "file_utils.h", + "transient_suppression_test.cc", + "voice_probability_delay_unit_unittest.cc", + ] + deps = [ + ":transient_suppressor_api", + ":transient_suppressor_impl", + ":voice_probability_delay_unit", + "..:audio_processing", + "../../../common_audio", + "../../../rtc_base/system:file_wrapper", + "../../../system_wrappers", + "../../../test:fileutils", + "../../../test:test_support", + "../agc:level_estimation", + "//testing/gtest", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + "//third_party/abseil-cpp/absl/types:optional", + ] + } + } + + rtc_library("transient_suppression_unittests") { + testonly = true + sources = [ + "dyadic_decimator_unittest.cc", + "file_utils.cc", + "file_utils.h", + "file_utils_unittest.cc", + "moving_moments_unittest.cc", + "transient_detector_unittest.cc", + "transient_suppressor_unittest.cc", + "voice_probability_delay_unit_unittest.cc", + "wpd_node_unittest.cc", + "wpd_tree_unittest.cc", + ] + deps = [ + ":transient_suppressor_api", + ":transient_suppressor_impl", + ":voice_probability_delay_unit", + "../../../rtc_base:stringutils", + "../../../rtc_base/system:file_wrapper", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/transient/click_annotate.cc b/third_party/libwebrtc/modules/audio_processing/transient/click_annotate.cc new file mode 100644 index 0000000000..f3f040f9aa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/click_annotate.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/transient/file_utils.h" +#include "modules/audio_processing/transient/transient_detector.h" +#include "rtc_base/system/file_wrapper.h" + +using webrtc::FileWrapper; +using webrtc::TransientDetector; + +// Application to generate a RTP timing file. +// Opens the PCM file and divides the signal in frames. +// Creates a send times array, one for each step. +// Each block that contains a transient, has an infinite send time. +// The resultant array is written to a DAT file +// Returns -1 on error or `lost_packets` otherwise. +int main(int argc, char* argv[]) { + if (argc != 5) { + printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]); + printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]); + printf("Opens the PCMfile with sampleRate in Hertz.\n"); + printf("Creates a send times array, one for each chunkSize "); + printf("milliseconds step.\n"); + printf("Each block that contains a transient, has an infinite send time. "); + printf("The resultant array is written to a DATfile.\n\n"); + return 0; + } + + FileWrapper pcm_file = FileWrapper::OpenReadOnly(argv[1]); + if (!pcm_file.is_open()) { + printf("\nThe %s could not be opened.\n\n", argv[1]); + return -1; + } + + FileWrapper dat_file = FileWrapper::OpenWriteOnly(argv[2]); + if (!dat_file.is_open()) { + printf("\nThe %s could not be opened.\n\n", argv[2]); + return -1; + } + + int chunk_size_ms = atoi(argv[3]); + if (chunk_size_ms <= 0) { + printf("\nThe chunkSize must be a positive integer\n\n"); + return -1; + } + + int sample_rate_hz = atoi(argv[4]); + if (sample_rate_hz <= 0) { + printf("\nThe sampleRate must be a positive integer\n\n"); + return -1; + } + + TransientDetector detector(sample_rate_hz); + int lost_packets = 0; + size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000; + std::unique_ptr audio_buffer(new float[audio_buffer_length]); + std::vector send_times; + + // Read first buffer from the PCM test file. + size_t file_samples_read = ReadInt16FromFileToFloatBuffer( + &pcm_file, audio_buffer_length, audio_buffer.get()); + for (int time = 0; file_samples_read > 0; time += chunk_size_ms) { + // Pad the rest of the buffer with zeros. + for (size_t i = file_samples_read; i < audio_buffer_length; ++i) { + audio_buffer[i] = 0.0; + } + float value = + detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0); + if (value < 0.5f) { + value = time; + } else { + value = FLT_MAX; + ++lost_packets; + } + send_times.push_back(value); + + // Read next buffer from the PCM test file. + file_samples_read = ReadInt16FromFileToFloatBuffer( + &pcm_file, audio_buffer_length, audio_buffer.get()); + } + + size_t floats_written = + WriteFloatBufferToFile(&dat_file, send_times.size(), &send_times[0]); + + if (floats_written == 0) { + printf("\nThe send times could not be written to DAT file\n\n"); + return -1; + } + + pcm_file.Close(); + dat_file.Close(); + + return lost_packets; +} diff --git a/third_party/libwebrtc/modules/audio_processing/transient/common.h b/third_party/libwebrtc/modules/audio_processing/transient/common.h new file mode 100644 index 0000000000..63c9a7b315 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/common.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +namespace webrtc { +namespace ts { + +static const float kPi = 3.14159265358979323846f; +static const int kChunkSizeMs = 10; +enum { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000, + kSampleRate48kHz = 48000 +}; + +} // namespace ts +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h b/third_party/libwebrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h new file mode 100644 index 0000000000..92233bfd74 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file defines the coefficients of the FIR based approximation of +// the Meyer Wavelet +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ + +// Decomposition coefficients Daubechies 8. + +namespace webrtc { + +const int kDaubechies8CoefficientsLength = 16; + +const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength] = { + -5.44158422430816093862e-02f, 3.12871590914465924627e-01f, + -6.75630736298012846142e-01f, 5.85354683654869090148e-01f, + 1.58291052560238926228e-02f, -2.84015542962428091389e-01f, + -4.72484573997972536787e-04f, 1.28747426620186011803e-01f, + 1.73693010020221083600e-02f, -4.40882539310647192377e-02f, + -1.39810279170155156436e-02f, 8.74609404701565465445e-03f, + 4.87035299301066034600e-03f, -3.91740372995977108837e-04f, + -6.75449405998556772109e-04f, -1.17476784002281916305e-04f}; + +const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = { + -1.17476784002281916305e-04f, 6.75449405998556772109e-04f, + -3.91740372995977108837e-04f, -4.87035299301066034600e-03f, + 8.74609404701565465445e-03f, 1.39810279170155156436e-02f, + -4.40882539310647192377e-02f, -1.73693010020221083600e-02f, + 1.28747426620186011803e-01f, 4.72484573997972536787e-04f, + -2.84015542962428091389e-01f, -1.58291052560238926228e-02f, + 5.85354683654869090148e-01f, 6.75630736298012846142e-01f, + 3.12871590914465924627e-01f, 5.44158422430816093862e-02f}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator.h b/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator.h new file mode 100644 index 0000000000..52467e8c25 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ + +#include + +// Provides a set of static methods to perform dyadic decimations. + +namespace webrtc { + +// Returns the proper length of the output buffer that you should use for the +// given `in_length` and decimation `odd_sequence`. +// Return -1 on error. +inline size_t GetOutLengthToDyadicDecimate(size_t in_length, + bool odd_sequence) { + size_t out_length = in_length / 2; + + if (in_length % 2 == 1 && !odd_sequence) { + ++out_length; + } + + return out_length; +} + +// Performs a dyadic decimation: removes every odd/even member of a sequence +// halving its overall length. +// Arguments: +// in: array of `in_length`. +// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...); +// if true, the even members will be removed (0, 2, 4, ...). +// out: array of `out_length`. `out_length` must be large enough to +// hold the decimated output. The necessary length can be provided by +// GetOutLengthToDyadicDecimate(). +// Must be previously allocated. +// Returns the number of output samples, -1 on error. +template +static size_t DyadicDecimate(const T* in, + size_t in_length, + bool odd_sequence, + T* out, + size_t out_length) { + size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence); + + if (!in || !out || in_length <= 0 || out_length < half_length) { + return 0; + } + + size_t output_samples = 0; + size_t index_adjustment = odd_sequence ? 1 : 0; + for (output_samples = 0; output_samples < half_length; ++output_samples) { + out[output_samples] = in[output_samples * 2 + index_adjustment]; + } + + return output_samples; +} + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc new file mode 100644 index 0000000000..e4776d694f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/dyadic_decimator.h" + +#include "test/gtest.h" + +namespace webrtc { + +static const size_t kEvenBufferLength = 6; +static const size_t kOddBufferLength = 5; +static const size_t kOutBufferLength = 3; + +int16_t const test_buffer_even_len[] = {0, 1, 2, 3, 4, 5}; +int16_t const test_buffer_odd_len[] = {0, 1, 2, 3, 4}; +int16_t test_buffer_out[kOutBufferLength]; + +TEST(DyadicDecimatorTest, GetOutLengthToDyadicDecimate) { + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, false)); + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, true)); + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(5, false)); + EXPECT_EQ(2u, GetOutLengthToDyadicDecimate(5, true)); +} + +TEST(DyadicDecimatorTest, DyadicDecimateErrorValues) { + size_t out_samples = 0; + + out_samples = DyadicDecimate(static_cast(NULL), kEvenBufferLength, + false, // Even sequence. + test_buffer_out, kOutBufferLength); + EXPECT_EQ(0u, out_samples); + + out_samples = DyadicDecimate(test_buffer_even_len, kEvenBufferLength, + false, // Even sequence. + static_cast(NULL), kOutBufferLength); + EXPECT_EQ(0u, out_samples); + + // Less than required `out_length`. + out_samples = DyadicDecimate(test_buffer_even_len, kEvenBufferLength, + false, // Even sequence. + test_buffer_out, 2); + EXPECT_EQ(0u, out_samples); +} + +TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthEvenSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kEvenBufferLength, false); + + size_t out_samples = DyadicDecimate(test_buffer_even_len, kEvenBufferLength, + false, // Even sequence. + test_buffer_out, kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(0, test_buffer_out[0]); + EXPECT_EQ(2, test_buffer_out[1]); + EXPECT_EQ(4, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthOddSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kEvenBufferLength, true); + + size_t out_samples = DyadicDecimate(test_buffer_even_len, kEvenBufferLength, + true, // Odd sequence. + test_buffer_out, kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(1, test_buffer_out[0]); + EXPECT_EQ(3, test_buffer_out[1]); + EXPECT_EQ(5, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateOddLengthEvenSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kOddBufferLength, false); + + size_t out_samples = DyadicDecimate(test_buffer_odd_len, kOddBufferLength, + false, // Even sequence. + test_buffer_out, kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(0, test_buffer_out[0]); + EXPECT_EQ(2, test_buffer_out[1]); + EXPECT_EQ(4, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateOddLengthOddSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kOddBufferLength, true); + + size_t out_samples = DyadicDecimate(test_buffer_odd_len, kOddBufferLength, + true, // Odd sequence. + test_buffer_out, kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(1, test_buffer_out[0]); + EXPECT_EQ(3, test_buffer_out[1]); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/file_utils.cc b/third_party/libwebrtc/modules/audio_processing/transient/file_utils.cc new file mode 100644 index 0000000000..58f99325d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/file_utils.cc @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/file_utils.h" + +#include + +#include "rtc_base/system/file_wrapper.h" + +namespace webrtc { + +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) { + if (!bytes || !out) { + return -1; + } + + uint32_t binary_value = 0; + for (int i = 3; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast(binary_value); + + return 0; +} + +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) { + if (!bytes || !out) { + return -1; + } + + uint64_t binary_value = 0; + for (int i = 7; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast(binary_value); + + return 0; +} + +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) { + if (!out_bytes) { + return -1; + } + + uint32_t binary_value = bit_cast(value); + for (size_t i = 0; i < 4; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) { + if (!out_bytes) { + return -1; + } + + uint64_t binary_value = bit_cast(value); + for (size_t i = 0; i < 8; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[2]); + + size_t int16s_read = 0; + + while (int16s_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 2); + if (bytes_read < 2) { + break; + } + int16_t value = byte_array[1]; + value <<= 8; + value += byte_array[0]; + buffer[int16s_read] = value; + ++int16s_read; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadFloatBufferFromFile(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[4]); + + size_t floats_read = 0; + + while (floats_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 4); + if (bytes_read < 4) { + break; + } + ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]); + ++floats_read; + } + + return floats_read; +} + +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[8]); + + size_t doubles_read = 0; + + while (doubles_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 8); + if (bytes_read < 8) { + break; + } + ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]); + ++doubles_read; + } + + return doubles_read; +} + +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[2]); + + size_t int16s_written = 0; + + for (int16s_written = 0; int16s_written < length; ++int16s_written) { + // Get byte representation. + byte_array[0] = buffer[int16s_written] & 0xFF; + byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF; + + file->Write(byte_array.get(), 2); + } + + file->Flush(); + + return int16s_written; +} + +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[4]); + + size_t floats_written = 0; + + for (floats_written = 0; floats_written < length; ++floats_written) { + // Get byte representation. + ConvertFloatToByteArray(buffer[floats_written], byte_array.get()); + + file->Write(byte_array.get(), 4); + } + + file->Flush(); + + return floats_written; +} + +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[8]); + + size_t doubles_written = 0; + + for (doubles_written = 0; doubles_written < length; ++doubles_written) { + // Get byte representation. + ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get()); + + file->Write(byte_array.get(), 8); + } + + file->Flush(); + + return doubles_written; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/file_utils.h b/third_party/libwebrtc/modules/audio_processing/transient/file_utils.h new file mode 100644 index 0000000000..b748337773 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/file_utils.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ + +#include + +#include "rtc_base/system/file_wrapper.h" + +namespace webrtc { + +// This is a copy of the cast included in the Chromium codebase here: +// http://cs.chromium.org/src/third_party/cld/base/casts.h +template +inline Dest bit_cast(const Source& source) { + // A compile error here means your Dest and Source have different sizes. + static_assert(sizeof(Dest) == sizeof(Source), + "Dest and Source have different sizes"); + + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// Converts the byte array with binary float representation to float. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out); + +// Converts the byte array with binary double representation to double. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out); + +// Converts a float to a byte array with binary float representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]); + +// Converts a double to a byte array with binary double representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]); + +// Reads `length` 16-bit integers from `file` to `buffer`. +// `file` must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer); + +// Reads `length` 16-bit integers from `file` and stores those values +// (converting them) in `buffer`. +// `file` must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer); + +// Reads `length` 16-bit integers from `file` and stores those values +// (converting them) in `buffer`. +// `file` must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer); + +// Reads `length` floats in binary representation (4 bytes) from `file` to +// `buffer`. +// `file` must be previously opened. +// Returns the number of floats read or -1 on error. +size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer); + +// Reads `length` doubles in binary representation (8 bytes) from `file` to +// `buffer`. +// `file` must be previously opened. +// Returns the number of doubles read or -1 on error. +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer); + +// Writes `length` 16-bit integers from `buffer` in binary representation (2 +// bytes) to `file`. It flushes `file`, so after this call there are no +// writings pending. +// `file` must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer); + +// Writes `length` floats from `buffer` in binary representation (4 bytes) to +// `file`. It flushes `file`, so after this call there are no writtings pending. +// `file` must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer); + +// Writes `length` doubles from `buffer` in binary representation (8 bytes) to +// `file`. It flushes `file`, so after this call there are no writings pending. +// `file` must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/file_utils_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/file_utils_unittest.cc new file mode 100644 index 0000000000..a9dddb1eda --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/file_utils_unittest.cc @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/file_utils.h" + +#include + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/system/file_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +static const uint8_t kPiBytesf[4] = {0xDB, 0x0F, 0x49, 0x40}; +static const uint8_t kEBytesf[4] = {0x54, 0xF8, 0x2D, 0x40}; +static const uint8_t kAvogadroBytesf[4] = {0x2F, 0x0C, 0xFF, 0x66}; + +static const uint8_t kPiBytes[8] = {0x18, 0x2D, 0x44, 0x54, + 0xFB, 0x21, 0x09, 0x40}; +static const uint8_t kEBytes[8] = {0x69, 0x57, 0x14, 0x8B, + 0x0A, 0xBF, 0x05, 0x40}; +static const uint8_t kAvogadroBytes[8] = {0xF4, 0xBC, 0xA8, 0xDF, + 0x85, 0xE1, 0xDF, 0x44}; + +static const double kPi = 3.14159265358979323846; +static const double kE = 2.71828182845904523536; +static const double kAvogadro = 602214100000000000000000.0; + +class TransientFileUtilsTest : public ::testing::Test { + protected: + TransientFileUtilsTest() + : kTestFileName( + test::ResourcePath("audio_processing/transient/double-utils", + "dat")), + kTestFileNamef( + test::ResourcePath("audio_processing/transient/float-utils", + "dat")) {} + + ~TransientFileUtilsTest() override { CleanupTempFiles(); } + + std::string CreateTempFilename(absl::string_view dir, + absl::string_view prefix) { + std::string filename = test::TempFilename(dir, prefix); + temp_filenames_.push_back(filename); + return filename; + } + + void CleanupTempFiles() { + for (const std::string& filename : temp_filenames_) { + remove(filename.c_str()); + } + temp_filenames_.clear(); + } + + // This file (used in some tests) contains binary data. The data correspond to + // the double representation of the constants: Pi, E, and the Avogadro's + // Number; + // appended in that order. + const std::string kTestFileName; + + // This file (used in some tests) contains binary data. The data correspond to + // the float representation of the constants: Pi, E, and the Avogadro's + // Number; + // appended in that order. + const std::string kTestFileNamef; + + // List of temporary filenames created by CreateTempFilename. + std::vector temp_filenames_; +}; + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertByteArrayToFloat DISABLED_ConvertByteArrayToFloat +#else +#define MAYBE_ConvertByteArrayToFloat ConvertByteArrayToFloat +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertByteArrayToFloat) { + float value = 0.0; + + EXPECT_EQ(0, ConvertByteArrayToFloat(kPiBytesf, &value)); + EXPECT_FLOAT_EQ(kPi, value); + + EXPECT_EQ(0, ConvertByteArrayToFloat(kEBytesf, &value)); + EXPECT_FLOAT_EQ(kE, value); + + EXPECT_EQ(0, ConvertByteArrayToFloat(kAvogadroBytesf, &value)); + EXPECT_FLOAT_EQ(kAvogadro, value); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertByteArrayToDouble DISABLED_ConvertByteArrayToDouble +#else +#define MAYBE_ConvertByteArrayToDouble ConvertByteArrayToDouble +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertByteArrayToDouble) { + double value = 0.0; + + EXPECT_EQ(0, ConvertByteArrayToDouble(kPiBytes, &value)); + EXPECT_DOUBLE_EQ(kPi, value); + + EXPECT_EQ(0, ConvertByteArrayToDouble(kEBytes, &value)); + EXPECT_DOUBLE_EQ(kE, value); + + EXPECT_EQ(0, ConvertByteArrayToDouble(kAvogadroBytes, &value)); + EXPECT_DOUBLE_EQ(kAvogadro, value); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertFloatToByteArray DISABLED_ConvertFloatToByteArray +#else +#define MAYBE_ConvertFloatToByteArray ConvertFloatToByteArray +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertFloatToByteArray) { + std::unique_ptr bytes(new uint8_t[4]); + + EXPECT_EQ(0, ConvertFloatToByteArray(kPi, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kPiBytesf, 4)); + + EXPECT_EQ(0, ConvertFloatToByteArray(kE, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kEBytesf, 4)); + + EXPECT_EQ(0, ConvertFloatToByteArray(kAvogadro, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytesf, 4)); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertDoubleToByteArray DISABLED_ConvertDoubleToByteArray +#else +#define MAYBE_ConvertDoubleToByteArray ConvertDoubleToByteArray +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertDoubleToByteArray) { + std::unique_ptr bytes(new uint8_t[8]); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kPi, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kPiBytes, 8)); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kE, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kEBytes, 8)); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kAvogadro, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytes, 8)); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16BufferFromFile DISABLED_ReadInt16BufferFromFile +#else +#define MAYBE_ReadInt16BufferFromFile ReadInt16BufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16BufferFromFile) { + std::string test_filename = kTestFileName; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + std::unique_ptr buffer(new int16_t[kBufferLength]); + + EXPECT_EQ(kBufferLength, + ReadInt16BufferFromFile(&file, kBufferLength, buffer.get())); + EXPECT_EQ(22377, buffer[4]); + EXPECT_EQ(16389, buffer[7]); + EXPECT_EQ(17631, buffer[kBufferLength - 1]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new int16_t[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16BufferFromFile(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_EQ(11544, buffer[0]); + EXPECT_EQ(22377, buffer[4]); + EXPECT_EQ(16389, buffer[7]); + EXPECT_EQ(17631, buffer[kBufferLength - 1]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16FromFileToFloatBuffer \ + DISABLED_ReadInt16FromFileToFloatBuffer +#else +#define MAYBE_ReadInt16FromFileToFloatBuffer ReadInt16FromFileToFloatBuffer +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16FromFileToFloatBuffer) { + std::string test_filename = kTestFileName; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + std::unique_ptr buffer(new float[kBufferLength]); + + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToFloatBuffer(&file, kBufferLength, buffer.get())); + + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new float[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToFloatBuffer(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16FromFileToDoubleBuffer \ + DISABLED_ReadInt16FromFileToDoubleBuffer +#else +#define MAYBE_ReadInt16FromFileToDoubleBuffer ReadInt16FromFileToDoubleBuffer +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16FromFileToDoubleBuffer) { + std::string test_filename = kTestFileName; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + std::unique_ptr buffer(new double[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadInt16FromFileToDoubleBuffer(&file, kBufferLength, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new double[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToDoubleBuffer(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadFloatBufferFromFile DISABLED_ReadFloatBufferFromFile +#else +#define MAYBE_ReadFloatBufferFromFile ReadFloatBufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadFloatBufferFromFile) { + std::string test_filename = kTestFileNamef; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileNamef.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr buffer(new float[kBufferLength]); + + EXPECT_EQ(kBufferLength, + ReadFloatBufferFromFile(&file, kBufferLength, buffer.get())); + EXPECT_FLOAT_EQ(kPi, buffer[0]); + EXPECT_FLOAT_EQ(kE, buffer[1]); + EXPECT_FLOAT_EQ(kAvogadro, buffer[2]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // doubles read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new float[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadFloatBufferFromFile(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_FLOAT_EQ(kPi, buffer[0]); + EXPECT_FLOAT_EQ(kE, buffer[1]); + EXPECT_FLOAT_EQ(kAvogadro, buffer[2]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadDoubleBufferFromFile DISABLED_ReadDoubleBufferFromFile +#else +#define MAYBE_ReadDoubleBufferFromFile ReadDoubleBufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadDoubleBufferFromFile) { + std::string test_filename = kTestFileName; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr buffer(new double[kBufferLength]); + + EXPECT_EQ(kBufferLength, + ReadDoubleBufferFromFile(&file, kBufferLength, buffer.get())); + EXPECT_DOUBLE_EQ(kPi, buffer[0]); + EXPECT_DOUBLE_EQ(kE, buffer[1]); + EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // doubles read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new double[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadDoubleBufferFromFile(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(kPi, buffer[0]); + EXPECT_DOUBLE_EQ(kE, buffer[1]); + EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_WriteInt16BufferToFile DISABLED_WriteInt16BufferToFile +#else +#define MAYBE_WriteInt16BufferToFile WriteInt16BufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteInt16BufferToFile) { + std::string kOutFileName = + CreateTempFilename(test::OutputPath(), "utils_test"); + + FileWrapper file = FileWrapper::OpenWriteOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr written_buffer(new int16_t[kBufferLength]); + std::unique_ptr read_buffer(new int16_t[kBufferLength]); + + written_buffer[0] = 1; + written_buffer[1] = 2; + written_buffer[2] = 3; + + EXPECT_EQ(kBufferLength, + WriteInt16BufferToFile(&file, kBufferLength, written_buffer.get())); + + file.Close(); + + file = FileWrapper::OpenReadOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, + ReadInt16BufferFromFile(&file, kBufferLength, read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_WriteFloatBufferToFile DISABLED_WriteFloatBufferToFile +#else +#define MAYBE_WriteFloatBufferToFile WriteFloatBufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteFloatBufferToFile) { + std::string kOutFileName = + CreateTempFilename(test::OutputPath(), "utils_test"); + + FileWrapper file = FileWrapper::OpenWriteOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr written_buffer(new float[kBufferLength]); + std::unique_ptr read_buffer(new float[kBufferLength]); + + written_buffer[0] = static_cast(kPi); + written_buffer[1] = static_cast(kE); + written_buffer[2] = static_cast(kAvogadro); + + EXPECT_EQ(kBufferLength, + WriteFloatBufferToFile(&file, kBufferLength, written_buffer.get())); + + file.Close(); + + file = FileWrapper::OpenReadOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, + ReadFloatBufferFromFile(&file, kBufferLength, read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_WriteDoubleBufferToFile DISABLED_WriteDoubleBufferToFile +#else +#define MAYBE_WriteDoubleBufferToFile WriteDoubleBufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteDoubleBufferToFile) { + std::string kOutFileName = + CreateTempFilename(test::OutputPath(), "utils_test"); + + FileWrapper file = FileWrapper::OpenWriteOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr written_buffer(new double[kBufferLength]); + std::unique_ptr read_buffer(new double[kBufferLength]); + + written_buffer[0] = kPi; + written_buffer[1] = kE; + written_buffer[2] = kAvogadro; + + EXPECT_EQ(kBufferLength, WriteDoubleBufferToFile(&file, kBufferLength, + written_buffer.get())); + + file.Close(); + + file = FileWrapper::OpenReadOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, + ReadDoubleBufferFromFile(&file, kBufferLength, read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ExpectedErrorReturnValues DISABLED_ExpectedErrorReturnValues +#else +#define MAYBE_ExpectedErrorReturnValues ExpectedErrorReturnValues +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ExpectedErrorReturnValues) { + std::string test_filename = kTestFileName; + + double value; + std::unique_ptr int16_buffer(new int16_t[1]); + std::unique_ptr double_buffer(new double[1]); + FileWrapper file; + + EXPECT_EQ(-1, ConvertByteArrayToDouble(NULL, &value)); + EXPECT_EQ(-1, ConvertByteArrayToDouble(kPiBytes, NULL)); + + EXPECT_EQ(-1, ConvertDoubleToByteArray(kPi, NULL)); + + // Tests with file not opened. + EXPECT_EQ(0u, ReadInt16BufferFromFile(&file, 1, int16_buffer.get())); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(&file, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(&file, 1, double_buffer.get())); + EXPECT_EQ(0u, WriteInt16BufferToFile(&file, 1, int16_buffer.get())); + EXPECT_EQ(0u, WriteDoubleBufferToFile(&file, 1, double_buffer.get())); + + file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + EXPECT_EQ(0u, ReadInt16BufferFromFile(NULL, 1, int16_buffer.get())); + EXPECT_EQ(0u, ReadInt16BufferFromFile(&file, 1, NULL)); + EXPECT_EQ(0u, ReadInt16BufferFromFile(&file, 0, int16_buffer.get())); + + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(&file, 1, NULL)); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(&file, 0, double_buffer.get())); + + EXPECT_EQ(0u, ReadDoubleBufferFromFile(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(&file, 1, NULL)); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(&file, 0, double_buffer.get())); + + EXPECT_EQ(0u, WriteInt16BufferToFile(NULL, 1, int16_buffer.get())); + EXPECT_EQ(0u, WriteInt16BufferToFile(&file, 1, NULL)); + EXPECT_EQ(0u, WriteInt16BufferToFile(&file, 0, int16_buffer.get())); + + EXPECT_EQ(0u, WriteDoubleBufferToFile(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, WriteDoubleBufferToFile(&file, 1, NULL)); + EXPECT_EQ(0u, WriteDoubleBufferToFile(&file, 0, double_buffer.get())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc new file mode 100644 index 0000000000..83810bfe3c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/moving_moments.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +MovingMoments::MovingMoments(size_t length) + : length_(length), queue_(), sum_(0.0), sum_of_squares_(0.0) { + RTC_DCHECK_GT(length, 0); + for (size_t i = 0; i < length; ++i) { + queue_.push(0.0); + } +} + +MovingMoments::~MovingMoments() {} + +void MovingMoments::CalculateMoments(const float* in, + size_t in_length, + float* first, + float* second) { + RTC_DCHECK(in); + RTC_DCHECK_GT(in_length, 0); + RTC_DCHECK(first); + RTC_DCHECK(second); + + for (size_t i = 0; i < in_length; ++i) { + const float old_value = queue_.front(); + queue_.pop(); + queue_.push(in[i]); + + sum_ += in[i] - old_value; + sum_of_squares_ += in[i] * in[i] - old_value * old_value; + first[i] = sum_ / length_; + second[i] = std::max(0.f, sum_of_squares_ / length_); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.h b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.h new file mode 100644 index 0000000000..70451dcb71 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ + +#include + +#include + +namespace webrtc { + +// Calculates the first and second moments for each value of a buffer taking +// into account a given number of previous values. +// It preserves its state, so it can be multiple-called. +// TODO(chadan): Implement a function that takes a buffer of first moments and a +// buffer of second moments; and calculates the variances. When needed. +// TODO(chadan): Add functionality to update with a buffer but only output are +// the last values of the moments. When needed. +class MovingMoments { + public: + // Creates a Moving Moments object, that uses the last `length` values + // (including the new value introduced in every new calculation). + explicit MovingMoments(size_t length); + ~MovingMoments(); + + // Calculates the new values using `in`. Results will be in the out buffers. + // `first` and `second` must be allocated with at least `in_length`. + void CalculateMoments(const float* in, + size_t in_length, + float* first, + float* second); + + private: + size_t length_; + // A queue holding the `length_` latest input values. + std::queue queue_; + // Sum of the values of the queue. + float sum_; + // Sum of the squares of the values of the queue. + float sum_of_squares_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/moving_moments_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments_unittest.cc new file mode 100644 index 0000000000..b0e613e7ab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments_unittest.cc @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/moving_moments.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +static const float kTolerance = 0.0001f; + +class MovingMomentsTest : public ::testing::Test { + protected: + static const size_t kMovingMomentsBufferLength = 5; + static const size_t kMaxOutputLength = 20; // Valid for this tests only. + + virtual void SetUp(); + // Calls CalculateMoments and verifies that it produces the expected + // outputs. + void CalculateMomentsAndVerify(const float* input, + size_t input_length, + const float* expected_mean, + const float* expected_mean_squares); + + std::unique_ptr moving_moments_; + float output_mean_[kMaxOutputLength]; + float output_mean_squares_[kMaxOutputLength]; +}; + +const size_t MovingMomentsTest::kMaxOutputLength; + +void MovingMomentsTest::SetUp() { + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); +} + +void MovingMomentsTest::CalculateMomentsAndVerify( + const float* input, + size_t input_length, + const float* expected_mean, + const float* expected_mean_squares) { + ASSERT_LE(input_length, kMaxOutputLength); + + moving_moments_->CalculateMoments(input, input_length, output_mean_, + output_mean_squares_); + + for (size_t i = 1; i < input_length; ++i) { + EXPECT_NEAR(expected_mean[i], output_mean_[i], kTolerance); + EXPECT_NEAR(expected_mean_squares[i], output_mean_squares_[i], kTolerance); + } +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnAllZerosBuffer) { + const float kInput[] = {0.f, 0.f, 0.f, 0.f, 0.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f}; + const float expected_mean_squares[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAConstantBuffer) { + const float kInput[] = {5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {1.f, 2.f, 3.f, 4.f, 5.f, + 5.f, 5.f, 5.f, 5.f, 5.f}; + const float expected_mean_squares[kInputLength] = { + 5.f, 10.f, 15.f, 20.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnIncreasingBuffer) { + const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {0.2f, 0.6f, 1.2f, 2.f, 3.f, + 4.f, 5.f, 6.f, 7.f}; + const float expected_mean_squares[kInputLength] = { + 0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfADecreasingBuffer) { + const float kInput[] = {-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f, -9.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {-0.2f, -0.6f, -1.2f, -2.f, -3.f, + -4.f, -5.f, -6.f, -7.f}; + const float expected_mean_squares[kInputLength] = { + 0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAZeroMeanSequence) { + const size_t kMovingMomentsBufferLength = 4; + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); + const float kInput[] = {1.f, -1.f, 1.f, -1.f, 1.f, + -1.f, 1.f, -1.f, 1.f, -1.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {0.25f, 0.f, 0.25f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f}; + const float expected_mean_squares[kInputLength] = { + 0.25f, 0.5f, 0.75f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnArbitraryBuffer) { + const float kInput[] = {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, + 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = { + 0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f}; + const float expected_mean_squares[kInputLength] = {0.008f, 0.026f, 0.076f, + 0.174f, 0.1764f, 0.1718f, + 0.1596f, 0.1168f, 0.0294f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, MutipleCalculateMomentsCalls) { + const float kInputFirstCall[] = {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, + 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputFirstCallLength = + sizeof(kInputFirstCall) / sizeof(kInputFirstCall[0]); + const float kInputSecondCall[] = {0.29f, 0.31f}; + const size_t kInputSecondCallLength = + sizeof(kInputSecondCall) / sizeof(kInputSecondCall[0]); + const float kInputThirdCall[] = {0.37f, 0.41f, 0.43f, 0.47f}; + const size_t kInputThirdCallLength = + sizeof(kInputThirdCall) / sizeof(kInputThirdCall[0]); + + const float expected_mean_first_call[kInputFirstCallLength] = { + 0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f}; + const float expected_mean_squares_first_call[kInputFirstCallLength] = { + 0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, + 0.1718f, 0.1596f, 0.1168f, 0.0294f}; + + const float expected_mean_second_call[kInputSecondCallLength] = {0.202f, + 0.238f}; + const float expected_mean_squares_second_call[kInputSecondCallLength] = { + 0.0438f, 0.0596f}; + + const float expected_mean_third_call[kInputThirdCallLength] = { + 0.278f, 0.322f, 0.362f, 0.398f}; + const float expected_mean_squares_third_call[kInputThirdCallLength] = { + 0.0812f, 0.1076f, 0.134f, 0.1614f}; + + CalculateMomentsAndVerify(kInputFirstCall, kInputFirstCallLength, + expected_mean_first_call, + expected_mean_squares_first_call); + + CalculateMomentsAndVerify(kInputSecondCall, kInputSecondCallLength, + expected_mean_second_call, + expected_mean_squares_second_call); + + CalculateMomentsAndVerify(kInputThirdCall, kInputThirdCallLength, + expected_mean_third_call, + expected_mean_squares_third_call); +} + +TEST_F(MovingMomentsTest, VerifySampleBasedVsBlockBasedCalculation) { + const float kInput[] = {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, + 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + float output_mean_block_based[kInputLength]; + float output_mean_squares_block_based[kInputLength]; + + float output_mean_sample_based; + float output_mean_squares_sample_based; + + moving_moments_->CalculateMoments(kInput, kInputLength, + output_mean_block_based, + output_mean_squares_block_based); + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); + for (size_t i = 0; i < kInputLength; ++i) { + moving_moments_->CalculateMoments(&kInput[i], 1, &output_mean_sample_based, + &output_mean_squares_sample_based); + EXPECT_FLOAT_EQ(output_mean_block_based[i], output_mean_sample_based); + EXPECT_FLOAT_EQ(output_mean_squares_block_based[i], + output_mean_squares_sample_based); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/test/plotDetection.m b/third_party/libwebrtc/modules/audio_processing/transient/test/plotDetection.m new file mode 100644 index 0000000000..8e12ab920b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/test/plotDetection.m @@ -0,0 +1,22 @@ +% +% Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [] = plotDetection(PCMfile, DATfile, fs, chunkSize) +%[] = plotDetection(PCMfile, DATfile, fs, chunkSize) +% +%Plots the signal alongside the detection values. +% +%PCMfile: The file of the input signal in PCM format. +%DATfile: The file containing the detection values in binary float format. +%fs: The sample rate of the signal in Hertz. +%chunkSize: The chunk size used to compute the detection values in seconds. +[x, tx] = readPCM(PCMfile, fs); +[d, td] = readDetection(DATfile, fs, chunkSize); +plot(tx, x, td, d); diff --git a/third_party/libwebrtc/modules/audio_processing/transient/test/readDetection.m b/third_party/libwebrtc/modules/audio_processing/transient/test/readDetection.m new file mode 100644 index 0000000000..832bf31ec8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/test/readDetection.m @@ -0,0 +1,26 @@ +% +% Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [d, t] = readDetection(file, fs, chunkSize) +%[d, t] = readDetection(file, fs, chunkSize) +% +%Reads a detection signal from a DAT file. +% +%d: The detection signal. +%t: The respective time vector. +% +%file: The DAT file where the detection signal is stored in float format. +%fs: The signal sample rate in Hertz. +%chunkSize: The chunk size used for the detection in seconds. +fid = fopen(file); +d = fread(fid, inf, 'float'); +fclose(fid); +t = 0:(1 / fs):(length(d) * chunkSize - 1 / fs); +d = d(floor(t / chunkSize) + 1); diff --git a/third_party/libwebrtc/modules/audio_processing/transient/test/readPCM.m b/third_party/libwebrtc/modules/audio_processing/transient/test/readPCM.m new file mode 100644 index 0000000000..cd3cef8a3c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/test/readPCM.m @@ -0,0 +1,26 @@ +% +% Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [x, t] = readPCM(file, fs) +%[x, t] = readPCM(file, fs) +% +%Reads a signal from a PCM file. +% +%x: The read signal after normalization. +%t: The respective time vector. +% +%file: The PCM file where the signal is stored in int16 format. +%fs: The signal sample rate in Hertz. +fid = fopen(file); +x = fread(fid, inf, 'int16'); +fclose(fid); +x = x - mean(x); +x = x / max(abs(x)); +t = 0:(1 / fs):((length(x) - 1) / fs); diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc new file mode 100644 index 0000000000..5c35505368 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_detector.h" + +#include +#include + +#include +#include + +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" +#include "modules/audio_processing/transient/moving_moments.h" +#include "modules/audio_processing/transient/wpd_node.h" +#include "modules/audio_processing/transient/wpd_tree.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +static const int kTransientLengthMs = 30; +static const int kChunksAtStartupLeftToDelete = + kTransientLengthMs / ts::kChunkSizeMs; +static const float kDetectThreshold = 16.f; + +TransientDetector::TransientDetector(int sample_rate_hz) + : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000), + last_first_moment_(), + last_second_moment_(), + chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete), + reference_energy_(1.f), + using_reference_(false) { + RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz || + sample_rate_hz == ts::kSampleRate16kHz || + sample_rate_hz == ts::kSampleRate32kHz || + sample_rate_hz == ts::kSampleRate48kHz); + int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000; + // Adjustment to avoid data loss while downsampling, making + // `samples_per_chunk_` and `samples_per_transient` always divisible by + // `kLeaves`. + samples_per_chunk_ -= samples_per_chunk_ % kLeaves; + samples_per_transient -= samples_per_transient % kLeaves; + + tree_leaves_data_length_ = samples_per_chunk_ / kLeaves; + wpd_tree_.reset(new WPDTree(samples_per_chunk_, + kDaubechies8HighPassCoefficients, + kDaubechies8LowPassCoefficients, + kDaubechies8CoefficientsLength, kLevels)); + for (size_t i = 0; i < kLeaves; ++i) { + moving_moments_[i].reset( + new MovingMoments(samples_per_transient / kLeaves)); + } + + first_moments_.reset(new float[tree_leaves_data_length_]); + second_moments_.reset(new float[tree_leaves_data_length_]); + + for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) { + previous_results_.push_back(0.f); + } +} + +TransientDetector::~TransientDetector() {} + +float TransientDetector::Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length) { + RTC_DCHECK(data); + RTC_DCHECK_EQ(samples_per_chunk_, data_length); + + // TODO(aluebs): Check if these errors can logically happen and if not assert + // on them. + if (wpd_tree_->Update(data, samples_per_chunk_) != 0) { + return -1.f; + } + + float result = 0.f; + + for (size_t i = 0; i < kLeaves; ++i) { + WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i); + + moving_moments_[i]->CalculateMoments(leaf->data(), tree_leaves_data_length_, + first_moments_.get(), + second_moments_.get()); + + // Add value delayed (Use the last moments from the last call to Detect). + float unbiased_data = leaf->data()[0] - last_first_moment_[i]; + result += + unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN); + + // Add new values. + for (size_t j = 1; j < tree_leaves_data_length_; ++j) { + unbiased_data = leaf->data()[j] - first_moments_[j - 1]; + result += + unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN); + } + + last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1]; + last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1]; + } + + result /= tree_leaves_data_length_; + + result *= ReferenceDetectionValue(reference_data, reference_length); + + if (chunks_at_startup_left_to_delete_ > 0) { + chunks_at_startup_left_to_delete_--; + result = 0.f; + } + + if (result >= kDetectThreshold) { + result = 1.f; + } else { + // Get proportional value. + // Proportion achieved with a squared raised cosine function with domain + // [0, kDetectThreshold) and image [0, 1), it's always increasing. + const float horizontal_scaling = ts::kPi / kDetectThreshold; + const float kHorizontalShift = ts::kPi; + const float kVerticalScaling = 0.5f; + const float kVerticalShift = 1.f; + + result = (std::cos(result * horizontal_scaling + kHorizontalShift) + + kVerticalShift) * + kVerticalScaling; + result *= result; + } + + previous_results_.pop_front(); + previous_results_.push_back(result); + + // In the current implementation we return the max of the current result and + // the previous results, so the high results have a width equals to + // `transient_length`. + return *std::max_element(previous_results_.begin(), previous_results_.end()); +} + +// Looks for the highest slope and compares it with the previous ones. +// An exponential transformation takes this to the [0, 1] range. This value is +// multiplied by the detection result to avoid false positives. +float TransientDetector::ReferenceDetectionValue(const float* data, + size_t length) { + if (data == NULL) { + using_reference_ = false; + return 1.f; + } + static const float kEnergyRatioThreshold = 0.2f; + static const float kReferenceNonLinearity = 20.f; + static const float kMemory = 0.99f; + float reference_energy = 0.f; + for (size_t i = 1; i < length; ++i) { + reference_energy += data[i] * data[i]; + } + if (reference_energy == 0.f) { + using_reference_ = false; + return 1.f; + } + RTC_DCHECK_NE(0, reference_energy_); + float result = 1.f / (1.f + std::exp(kReferenceNonLinearity * + (kEnergyRatioThreshold - + reference_energy / reference_energy_))); + reference_energy_ = + kMemory * reference_energy_ + (1.f - kMemory) * reference_energy; + + using_reference_ = true; + + return result; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.h b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.h new file mode 100644 index 0000000000..a3dbb7ffde --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ + +#include + +#include +#include + +#include "modules/audio_processing/transient/moving_moments.h" +#include "modules/audio_processing/transient/wpd_tree.h" + +namespace webrtc { + +// This is an implementation of the transient detector described in "Causal +// Wavelet based transient detector". +// Calculates the log-likelihood of a transient to happen on a signal at any +// given time based on the previous samples; it uses a WPD tree to analyze the +// signal. It preserves its state, so it can be multiple-called. +class TransientDetector { + public: + // TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree + // of 3 levels. Make an overloaded constructor to allow different wavelets and + // depths of the tree. When needed. + + // Creates a wavelet based transient detector. + TransientDetector(int sample_rate_hz); + + ~TransientDetector(); + + // Calculates the log-likelihood of the existence of a transient in `data`. + // `data_length` has to be equal to `samples_per_chunk_`. + // Returns a value between 0 and 1, as a non linear representation of this + // likelihood. + // Returns a negative value on error. + float Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length); + + bool using_reference() { return using_reference_; } + + private: + float ReferenceDetectionValue(const float* data, size_t length); + + static const size_t kLevels = 3; + static const size_t kLeaves = 1 << kLevels; + + size_t samples_per_chunk_; + + std::unique_ptr wpd_tree_; + size_t tree_leaves_data_length_; + + // A MovingMoments object is needed for each leaf in the WPD tree. + std::unique_ptr moving_moments_[kLeaves]; + + std::unique_ptr first_moments_; + std::unique_ptr second_moments_; + + // Stores the last calculated moments from the previous detection. + float last_first_moment_[kLeaves]; + float last_second_moment_[kLeaves]; + + // We keep track of the previous results from the previous chunks, so it can + // be used to effectively give results according to the `transient_length`. + std::deque previous_results_; + + // Number of chunks that are going to return only zeros at the beginning of + // the detection. It helps to avoid infs and nans due to the lack of + // information. + int chunks_at_startup_left_to_delete_; + + float reference_energy_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector_unittest.cc new file mode 100644 index 0000000000..a7364626fd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector_unittest.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_detector.h" + +#include +#include + +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/file_utils.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/file_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +static const int kSampleRatesHz[] = {ts::kSampleRate8kHz, ts::kSampleRate16kHz, + ts::kSampleRate32kHz, + ts::kSampleRate48kHz}; +static const size_t kNumberOfSampleRates = + sizeof(kSampleRatesHz) / sizeof(*kSampleRatesHz); + +// This test is for the correctness of the transient detector. +// Checks the results comparing them with the ones stored in the detect files in +// the directory: resources/audio_processing/transient/ +// The files contain all the results in double precision (Little endian). +// The audio files used with different sample rates are stored in the same +// directory. +#if defined(WEBRTC_IOS) +TEST(TransientDetectorTest, DISABLED_CorrectnessBasedOnFiles) { +#else +TEST(TransientDetectorTest, CorrectnessBasedOnFiles) { +#endif + for (size_t i = 0; i < kNumberOfSampleRates; ++i) { + int sample_rate_hz = kSampleRatesHz[i]; + + // Prepare detect file. + rtc::StringBuilder detect_file_name; + detect_file_name << "audio_processing/transient/detect" + << (sample_rate_hz / 1000) << "kHz"; + + FileWrapper detect_file = FileWrapper::OpenReadOnly( + test::ResourcePath(detect_file_name.str(), "dat")); + + bool file_opened = detect_file.is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" + << detect_file_name.str().c_str(); + + // Prepare audio file. + rtc::StringBuilder audio_file_name; + audio_file_name << "audio_processing/transient/audio" + << (sample_rate_hz / 1000) << "kHz"; + + FileWrapper audio_file = FileWrapper::OpenReadOnly( + test::ResourcePath(audio_file_name.str(), "pcm")); + + // Create detector. + TransientDetector detector(sample_rate_hz); + + const size_t buffer_length = sample_rate_hz * ts::kChunkSizeMs / 1000; + std::unique_ptr buffer(new float[buffer_length]); + + const float kTolerance = 0.02f; + + size_t frames_read = 0; + + while (ReadInt16FromFileToFloatBuffer(&audio_file, buffer_length, + buffer.get()) == buffer_length) { + ++frames_read; + + float detector_value = + detector.Detect(buffer.get(), buffer_length, NULL, 0); + double file_value; + ASSERT_EQ(1u, ReadDoubleBufferFromFile(&detect_file, 1, &file_value)) + << "Detect test file is malformed.\n"; + + // Compare results with data from the matlab test file. + EXPECT_NEAR(file_value, detector_value, kTolerance) + << "Frame: " << frames_read; + } + + detect_file.Close(); + audio_file.Close(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppression_test.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppression_test.cc new file mode 100644 index 0000000000..2d8baf9416 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppression_test.cc @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc/agc.h" +#include "modules/audio_processing/transient/transient_suppressor.h" +#include "modules/audio_processing/transient/transient_suppressor_impl.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(std::string, in_file_name, "", "PCM file that contains the signal."); +ABSL_FLAG(std::string, + detection_file_name, + "", + "PCM file that contains the detection signal."); +ABSL_FLAG(std::string, + reference_file_name, + "", + "PCM file that contains the reference signal."); + +ABSL_FLAG(int, + chunk_size_ms, + 10, + "Time between each chunk of samples in milliseconds."); + +ABSL_FLAG(int, + sample_rate_hz, + 16000, + "Sampling frequency of the signal in Hertz."); +ABSL_FLAG(int, + detection_rate_hz, + 0, + "Sampling frequency of the detection signal in Hertz."); + +ABSL_FLAG(int, num_channels, 1, "Number of channels."); + +namespace webrtc { + +const char kUsage[] = + "\nDetects and suppresses transients from file.\n\n" + "This application loads the signal from the in_file_name with a specific\n" + "num_channels and sample_rate_hz, the detection signal from the\n" + "detection_file_name with a specific detection_rate_hz, and the reference\n" + "signal from the reference_file_name with sample_rate_hz, divides them\n" + "into chunk_size_ms blocks, computes its voice value and depending on the\n" + "voice_threshold does the respective restoration. You can always get the\n" + "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n" + "1 respectively.\n\n"; + +// Read next buffers from the test files (signed 16-bit host-endian PCM +// format). audio_buffer has int16 samples, detection_buffer has float samples +// with range [-32768,32767], and reference_buffer has float samples with range +// [-1,1]. Return true iff all the buffers were filled completely. +bool ReadBuffers(FILE* in_file, + size_t audio_buffer_size, + int num_channels, + int16_t* audio_buffer, + FILE* detection_file, + size_t detection_buffer_size, + float* detection_buffer, + FILE* reference_file, + float* reference_buffer) { + std::unique_ptr tmpbuf; + int16_t* read_ptr = audio_buffer; + if (num_channels > 1) { + tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]); + read_ptr = tmpbuf.get(); + } + if (fread(read_ptr, sizeof(*read_ptr), num_channels * audio_buffer_size, + in_file) != num_channels * audio_buffer_size) { + return false; + } + // De-interleave. + if (num_channels > 1) { + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < audio_buffer_size; ++j) { + audio_buffer[i * audio_buffer_size + j] = + read_ptr[i + j * num_channels]; + } + } + } + if (detection_file) { + std::unique_ptr ibuf(new int16_t[detection_buffer_size]); + if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size, + detection_file) != detection_buffer_size) + return false; + for (size_t i = 0; i < detection_buffer_size; ++i) + detection_buffer[i] = ibuf[i]; + } + if (reference_file) { + std::unique_ptr ibuf(new int16_t[audio_buffer_size]); + if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file) != + audio_buffer_size) + return false; + S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer); + } + return true; +} + +// Write a number of samples to an open signed 16-bit host-endian PCM file. +static void WritePCM(FILE* f, + size_t num_samples, + int num_channels, + const float* buffer) { + std::unique_ptr ibuf(new int16_t[num_channels * num_samples]); + // Interleave. + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_samples; ++j) { + ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]); + } + } + fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f); +} + +// This application tests the transient suppression by providing a processed +// PCM file, which has to be listened to in order to evaluate the +// performance. +// It gets an audio file, and its voice gain information, and the suppressor +// process it giving the output file "suppressed_keystrokes.pcm". +void void_main() { + // TODO(aluebs): Remove all FileWrappers. + // Prepare the input file. + FILE* in_file = fopen(absl::GetFlag(FLAGS_in_file_name).c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + + // Prepare the detection file. + FILE* detection_file = NULL; + if (!absl::GetFlag(FLAGS_detection_file_name).empty()) { + detection_file = + fopen(absl::GetFlag(FLAGS_detection_file_name).c_str(), "rb"); + } + + // Prepare the reference file. + FILE* reference_file = NULL; + if (!absl::GetFlag(FLAGS_reference_file_name).empty()) { + reference_file = + fopen(absl::GetFlag(FLAGS_reference_file_name).c_str(), "rb"); + } + + // Prepare the output file. + std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm"; + FILE* out_file = fopen(out_file_name.c_str(), "wb"); + ASSERT_TRUE(out_file != NULL); + + int detection_rate_hz = absl::GetFlag(FLAGS_detection_rate_hz); + if (detection_rate_hz == 0) { + detection_rate_hz = absl::GetFlag(FLAGS_sample_rate_hz); + } + + Agc agc; + + TransientSuppressorImpl suppressor(TransientSuppressor::VadMode::kDefault, + absl::GetFlag(FLAGS_sample_rate_hz), + detection_rate_hz, + absl::GetFlag(FLAGS_num_channels)); + + const size_t audio_buffer_size = absl::GetFlag(FLAGS_chunk_size_ms) * + absl::GetFlag(FLAGS_sample_rate_hz) / 1000; + const size_t detection_buffer_size = + absl::GetFlag(FLAGS_chunk_size_ms) * detection_rate_hz / 1000; + + // int16 and float variants of the same data. + std::unique_ptr audio_buffer_i( + new int16_t[absl::GetFlag(FLAGS_num_channels) * audio_buffer_size]); + std::unique_ptr audio_buffer_f( + new float[absl::GetFlag(FLAGS_num_channels) * audio_buffer_size]); + + std::unique_ptr detection_buffer, reference_buffer; + + if (detection_file) + detection_buffer.reset(new float[detection_buffer_size]); + if (reference_file) + reference_buffer.reset(new float[audio_buffer_size]); + + while (ReadBuffers( + in_file, audio_buffer_size, absl::GetFlag(FLAGS_num_channels), + audio_buffer_i.get(), detection_file, detection_buffer_size, + detection_buffer.get(), reference_file, reference_buffer.get())) { + agc.Process({audio_buffer_i.get(), audio_buffer_size}); + + for (size_t i = 0; + i < absl::GetFlag(FLAGS_num_channels) * audio_buffer_size; ++i) { + audio_buffer_f[i] = audio_buffer_i[i]; + } + + suppressor.Suppress(audio_buffer_f.get(), audio_buffer_size, + absl::GetFlag(FLAGS_num_channels), + detection_buffer.get(), detection_buffer_size, + reference_buffer.get(), audio_buffer_size, + agc.voice_probability(), true); + + // Write result to out file. + WritePCM(out_file, audio_buffer_size, absl::GetFlag(FLAGS_num_channels), + audio_buffer_f.get()); + } + + fclose(in_file); + if (detection_file) { + fclose(detection_file); + } + if (reference_file) { + fclose(reference_file); + } + fclose(out_file); +} + +} // namespace webrtc + +int main(int argc, char* argv[]) { + std::vector args = absl::ParseCommandLine(argc, argv); + if (args.size() != 1) { + printf("%s", webrtc::kUsage); + return 1; + } + RTC_CHECK_GT(absl::GetFlag(FLAGS_chunk_size_ms), 0); + RTC_CHECK_GT(absl::GetFlag(FLAGS_sample_rate_hz), 0); + RTC_CHECK_GT(absl::GetFlag(FLAGS_num_channels), 0); + + webrtc::void_main(); + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor.h b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor.h new file mode 100644 index 0000000000..ecb3c3baab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ + +#include + +namespace webrtc { + +// Detects transients in an audio stream and suppress them using a simple +// restoration algorithm that attenuates unexpected spikes in the spectrum. +class TransientSuppressor { + public: + // Type of VAD used by the caller to compute the `voice_probability` argument + // `Suppress()`. + enum class VadMode { + // By default, `TransientSuppressor` assumes that `voice_probability` is + // computed by `AgcManagerDirect`. + kDefault = 0, + // Use this mode when `TransientSuppressor` must assume that + // `voice_probability` is computed by the RNN VAD. + kRnnVad, + // Use this mode to let `TransientSuppressor::Suppressor()` ignore + // `voice_probability` and behave as if voice information is unavailable + // (regardless of the passed value). + kNoVad, + }; + + virtual ~TransientSuppressor() {} + + virtual void Initialize(int sample_rate_hz, + int detector_rate_hz, + int num_channels) = 0; + + // Processes a `data` chunk, and returns it with keystrokes suppressed from + // it. The float format is assumed to be int16 ranged. If there are more than + // one channel, the chunks are concatenated one after the other in `data`. + // `data_length` must be equal to `data_length_`. + // `num_channels` must be equal to `num_channels_`. + // A sub-band, ideally the higher, can be used as `detection_data`. If it is + // NULL, `data` is used for the detection too. The `detection_data` is always + // assumed mono. + // If a reference signal (e.g. keyboard microphone) is available, it can be + // passed in as `reference_data`. It is assumed mono and must have the same + // length as `data`. NULL is accepted if unavailable. + // This suppressor performs better if voice information is available. + // `voice_probability` is the probability of voice being present in this chunk + // of audio. If voice information is not available, `voice_probability` must + // always be set to 1. + // `key_pressed` determines if a key was pressed on this audio chunk. + // Returns a delayed version of `voice_probability` according to the + // algorithmic delay introduced by this method. In this way, the modified + // `data` and the returned voice probability will be temporally aligned. + virtual float Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_api_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_api_gn/moz.build new file mode 100644 index 0000000000..5988e89a6a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_api_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("transient_suppressor_api_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc new file mode 100644 index 0000000000..90428464e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc @@ -0,0 +1,455 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_suppressor_impl.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common_audio/include/audio_util.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "common_audio/third_party/ooura/fft_size_256/fft4g.h" +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/transient_detector.h" +#include "modules/audio_processing/transient/transient_suppressor.h" +#include "modules/audio_processing/transient/windows_private.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +static const float kMeanIIRCoefficient = 0.5f; + +// TODO(aluebs): Check if these values work also for 48kHz. +static const size_t kMinVoiceBin = 3; +static const size_t kMaxVoiceBin = 60; + +namespace { + +float ComplexMagnitude(float a, float b) { + return std::abs(a) + std::abs(b); +} + +std::string GetVadModeLabel(TransientSuppressor::VadMode vad_mode) { + switch (vad_mode) { + case TransientSuppressor::VadMode::kDefault: + return "default"; + case TransientSuppressor::VadMode::kRnnVad: + return "RNN VAD"; + case TransientSuppressor::VadMode::kNoVad: + return "no VAD"; + } +} + +} // namespace + +TransientSuppressorImpl::TransientSuppressorImpl(VadMode vad_mode, + int sample_rate_hz, + int detector_rate_hz, + int num_channels) + : vad_mode_(vad_mode), + voice_probability_delay_unit_(/*delay_num_samples=*/0, sample_rate_hz), + analyzed_audio_is_silent_(false), + data_length_(0), + detection_length_(0), + analysis_length_(0), + buffer_delay_(0), + complex_analysis_length_(0), + num_channels_(0), + window_(NULL), + detector_smoothed_(0.f), + keypress_counter_(0), + chunks_since_keypress_(0), + detection_enabled_(false), + suppression_enabled_(false), + use_hard_restoration_(false), + chunks_since_voice_change_(0), + seed_(182), + using_reference_(false) { + RTC_LOG(LS_INFO) << "VAD mode: " << GetVadModeLabel(vad_mode_); + Initialize(sample_rate_hz, detector_rate_hz, num_channels); +} + +TransientSuppressorImpl::~TransientSuppressorImpl() {} + +void TransientSuppressorImpl::Initialize(int sample_rate_hz, + int detection_rate_hz, + int num_channels) { + RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz || + sample_rate_hz == ts::kSampleRate16kHz || + sample_rate_hz == ts::kSampleRate32kHz || + sample_rate_hz == ts::kSampleRate48kHz); + RTC_DCHECK(detection_rate_hz == ts::kSampleRate8kHz || + detection_rate_hz == ts::kSampleRate16kHz || + detection_rate_hz == ts::kSampleRate32kHz || + detection_rate_hz == ts::kSampleRate48kHz); + RTC_DCHECK_GT(num_channels, 0); + + switch (sample_rate_hz) { + case ts::kSampleRate8kHz: + analysis_length_ = 128u; + window_ = kBlocks80w128; + break; + case ts::kSampleRate16kHz: + analysis_length_ = 256u; + window_ = kBlocks160w256; + break; + case ts::kSampleRate32kHz: + analysis_length_ = 512u; + window_ = kBlocks320w512; + break; + case ts::kSampleRate48kHz: + analysis_length_ = 1024u; + window_ = kBlocks480w1024; + break; + default: + RTC_DCHECK_NOTREACHED(); + return; + } + + detector_.reset(new TransientDetector(detection_rate_hz)); + data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000; + RTC_DCHECK_LE(data_length_, analysis_length_); + buffer_delay_ = analysis_length_ - data_length_; + + voice_probability_delay_unit_.Initialize(/*delay_num_samples=*/buffer_delay_, + sample_rate_hz); + + complex_analysis_length_ = analysis_length_ / 2 + 1; + RTC_DCHECK_GE(complex_analysis_length_, kMaxVoiceBin); + num_channels_ = num_channels; + in_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(in_buffer_.get(), 0, + analysis_length_ * num_channels_ * sizeof(in_buffer_[0])); + detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000; + detection_buffer_.reset(new float[detection_length_]); + memset(detection_buffer_.get(), 0, + detection_length_ * sizeof(detection_buffer_[0])); + out_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(out_buffer_.get(), 0, + analysis_length_ * num_channels_ * sizeof(out_buffer_[0])); + // ip[0] must be zero to trigger initialization using rdft(). + size_t ip_length = 2 + sqrtf(analysis_length_); + ip_.reset(new size_t[ip_length]()); + memset(ip_.get(), 0, ip_length * sizeof(ip_[0])); + wfft_.reset(new float[complex_analysis_length_ - 1]); + memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0])); + spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]); + memset(spectral_mean_.get(), 0, + complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0])); + fft_buffer_.reset(new float[analysis_length_ + 2]); + memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0])); + magnitudes_.reset(new float[complex_analysis_length_]); + memset(magnitudes_.get(), 0, + complex_analysis_length_ * sizeof(magnitudes_[0])); + mean_factor_.reset(new float[complex_analysis_length_]); + + static const float kFactorHeight = 10.f; + static const float kLowSlope = 1.f; + static const float kHighSlope = 0.3f; + for (size_t i = 0; i < complex_analysis_length_; ++i) { + mean_factor_[i] = + kFactorHeight / + (1.f + std::exp(kLowSlope * static_cast(i - kMinVoiceBin))) + + kFactorHeight / + (1.f + std::exp(kHighSlope * static_cast(kMaxVoiceBin - i))); + } + detector_smoothed_ = 0.f; + keypress_counter_ = 0; + chunks_since_keypress_ = 0; + detection_enabled_ = false; + suppression_enabled_ = false; + use_hard_restoration_ = false; + chunks_since_voice_change_ = 0; + seed_ = 182; + using_reference_ = false; +} + +float TransientSuppressorImpl::Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed) { + if (!data || data_length != data_length_ || num_channels != num_channels_ || + detection_length != detection_length_ || voice_probability < 0 || + voice_probability > 1) { + // The audio is not modified, so the voice probability is returned as is + // (delay not applied). + return voice_probability; + } + + UpdateKeypress(key_pressed); + UpdateBuffers(data); + + if (detection_enabled_) { + UpdateRestoration(voice_probability); + + if (!detection_data) { + // Use the input data of the first channel if special detection data is + // not supplied. + detection_data = &in_buffer_[buffer_delay_]; + } + + float detector_result = detector_->Detect(detection_data, detection_length, + reference_data, reference_length); + if (detector_result < 0) { + // The audio is not modified, so the voice probability is returned as is + // (delay not applied). + return voice_probability; + } + + using_reference_ = detector_->using_reference(); + + // `detector_smoothed_` follows the `detector_result` when this last one is + // increasing, but has an exponential decaying tail to be able to suppress + // the ringing of keyclicks. + float smooth_factor = using_reference_ ? 0.6 : 0.1; + detector_smoothed_ = detector_result >= detector_smoothed_ + ? detector_result + : smooth_factor * detector_smoothed_ + + (1 - smooth_factor) * detector_result; + + for (int i = 0; i < num_channels_; ++i) { + Suppress(&in_buffer_[i * analysis_length_], + &spectral_mean_[i * complex_analysis_length_], + &out_buffer_[i * analysis_length_]); + } + } + + // If the suppression isn't enabled, we use the in buffer to delay the signal + // appropriately. This also gives time for the out buffer to be refreshed with + // new data between detection and suppression getting enabled. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&data[i * data_length_], + suppression_enabled_ ? &out_buffer_[i * analysis_length_] + : &in_buffer_[i * analysis_length_], + data_length_ * sizeof(*data)); + } + + // The audio has been modified, return the delayed voice probability. + return voice_probability_delay_unit_.Delay(voice_probability); +} + +// This should only be called when detection is enabled. UpdateBuffers() must +// have been called. At return, `out_buffer_` will be filled with the +// processed output. +void TransientSuppressorImpl::Suppress(float* in_ptr, + float* spectral_mean, + float* out_ptr) { + // Go to frequency domain. + for (size_t i = 0; i < analysis_length_; ++i) { + // TODO(aluebs): Rename windows + fft_buffer_[i] = in_ptr[i] * window_[i]; + } + + WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get()); + + // Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end + // for convenience. + fft_buffer_[analysis_length_] = fft_buffer_[1]; + fft_buffer_[analysis_length_ + 1] = 0.f; + fft_buffer_[1] = 0.f; + + for (size_t i = 0; i < complex_analysis_length_; ++i) { + magnitudes_[i] = + ComplexMagnitude(fft_buffer_[i * 2], fft_buffer_[i * 2 + 1]); + } + // Restore audio if necessary. + if (suppression_enabled_) { + if (use_hard_restoration_) { + HardRestoration(spectral_mean); + } else { + SoftRestoration(spectral_mean); + } + } + + // Update the spectral mean. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] + + kMeanIIRCoefficient * magnitudes_[i]; + } + + // Back to time domain. + // Put R[n/2] back in fft_buffer_[1]. + fft_buffer_[1] = fft_buffer_[analysis_length_]; + + WebRtc_rdft(analysis_length_, -1, fft_buffer_.get(), ip_.get(), wfft_.get()); + const float fft_scaling = 2.f / analysis_length_; + + for (size_t i = 0; i < analysis_length_; ++i) { + out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling; + } +} + +void TransientSuppressorImpl::UpdateKeypress(bool key_pressed) { + const int kKeypressPenalty = 1000 / ts::kChunkSizeMs; + const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs; + const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds. + + if (key_pressed) { + keypress_counter_ += kKeypressPenalty; + chunks_since_keypress_ = 0; + detection_enabled_ = true; + } + keypress_counter_ = std::max(0, keypress_counter_ - 1); + + if (keypress_counter_ > kIsTypingThreshold) { + if (!suppression_enabled_) { + RTC_LOG(LS_INFO) << "[ts] Transient suppression is now enabled."; + } + suppression_enabled_ = true; + keypress_counter_ = 0; + } + + if (detection_enabled_ && ++chunks_since_keypress_ > kChunksUntilNotTyping) { + if (suppression_enabled_) { + RTC_LOG(LS_INFO) << "[ts] Transient suppression is now disabled."; + } + detection_enabled_ = false; + suppression_enabled_ = false; + keypress_counter_ = 0; + } +} + +void TransientSuppressorImpl::UpdateRestoration(float voice_probability) { + bool not_voiced; + switch (vad_mode_) { + case TransientSuppressor::VadMode::kDefault: { + constexpr float kVoiceThreshold = 0.02f; + not_voiced = voice_probability < kVoiceThreshold; + break; + } + case TransientSuppressor::VadMode::kRnnVad: { + constexpr float kVoiceThreshold = 0.7f; + not_voiced = voice_probability < kVoiceThreshold; + break; + } + case TransientSuppressor::VadMode::kNoVad: + // Always assume that voice is detected. + not_voiced = false; + break; + } + + if (not_voiced == use_hard_restoration_) { + chunks_since_voice_change_ = 0; + } else { + ++chunks_since_voice_change_; + + // Number of 10 ms frames to wait to transition to and from hard + // restoration. + constexpr int kHardRestorationOffsetDelay = 3; + constexpr int kHardRestorationOnsetDelay = 80; + + if ((use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOffsetDelay) || + (!use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOnsetDelay)) { + use_hard_restoration_ = not_voiced; + chunks_since_voice_change_ = 0; + } + } +} + +// Shift buffers to make way for new data. Must be called after +// `detection_enabled_` is updated by UpdateKeypress(). +void TransientSuppressorImpl::UpdateBuffers(float* data) { + // TODO(aluebs): Change to ring buffer. + memmove(in_buffer_.get(), &in_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(in_buffer_[0])); + // Copy new chunk to buffer. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_], + &data[i * data_length_], data_length_ * sizeof(*data)); + } + if (detection_enabled_) { + // Shift previous chunk in out buffer. + memmove(out_buffer_.get(), &out_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(out_buffer_[0])); + // Initialize new chunk in out buffer. + for (int i = 0; i < num_channels_; ++i) { + memset(&out_buffer_[buffer_delay_ + i * analysis_length_], 0, + data_length_ * sizeof(out_buffer_[0])); + } + } +} + +// Restores the unvoiced signal if a click is present. +// Attenuates by a certain factor every peak in the `fft_buffer_` that exceeds +// the spectral mean. The attenuation depends on `detector_smoothed_`. +// If a restoration takes place, the `magnitudes_` are updated to the new value. +void TransientSuppressorImpl::HardRestoration(float* spectral_mean) { + const float detector_result = + 1.f - std::pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f); + // To restore, we get the peaks in the spectrum. If higher than the previous + // spectral mean we adjust them. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) { + // RandU() generates values on [0, int16::max()] + const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) / + std::numeric_limits::max(); + const float scaled_mean = detector_result * spectral_mean[i]; + + fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] + + scaled_mean * cosf(phase); + fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] + + scaled_mean * sinf(phase); + magnitudes_[i] = magnitudes_[i] - + detector_result * (magnitudes_[i] - spectral_mean[i]); + } + } +} + +// Restores the voiced signal if a click is present. +// Attenuates by a certain factor every peak in the `fft_buffer_` that exceeds +// the spectral mean and that is lower than some function of the current block +// frequency mean. The attenuation depends on `detector_smoothed_`. +// If a restoration takes place, the `magnitudes_` are updated to the new value. +void TransientSuppressorImpl::SoftRestoration(float* spectral_mean) { + // Get the spectral magnitude mean of the current block. + float block_frequency_mean = 0; + for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) { + block_frequency_mean += magnitudes_[i]; + } + block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin); + + // To restore, we get the peaks in the spectrum. If higher than the + // previous spectral mean and lower than a factor of the block mean + // we adjust them. The factor is a double sigmoid that has a minimum in the + // voice frequency range (300Hz - 3kHz). + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 && + (using_reference_ || + magnitudes_[i] < block_frequency_mean * mean_factor_[i])) { + const float new_magnitude = + magnitudes_[i] - + detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]); + const float magnitude_ratio = new_magnitude / magnitudes_[i]; + + fft_buffer_[i * 2] *= magnitude_ratio; + fft_buffer_[i * 2 + 1] *= magnitude_ratio; + magnitudes_[i] = new_magnitude; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.h b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.h new file mode 100644 index 0000000000..4005a16b0a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_IMPL_H_ + +#include +#include + +#include + +#include "modules/audio_processing/transient/transient_suppressor.h" +#include "modules/audio_processing/transient/voice_probability_delay_unit.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { + +class TransientDetector; + +// Detects transients in an audio stream and suppress them using a simple +// restoration algorithm that attenuates unexpected spikes in the spectrum. +class TransientSuppressorImpl : public TransientSuppressor { + public: + TransientSuppressorImpl(VadMode vad_mode, + int sample_rate_hz, + int detector_rate_hz, + int num_channels); + ~TransientSuppressorImpl() override; + + void Initialize(int sample_rate_hz, + int detector_rate_hz, + int num_channels) override; + + float Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed) override; + + private: + FRIEND_TEST_ALL_PREFIXES(TransientSuppressorVadModeParametrization, + TypingDetectionLogicWorksAsExpectedForMono); + void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr); + + void UpdateKeypress(bool key_pressed); + void UpdateRestoration(float voice_probability); + + void UpdateBuffers(float* data); + + void HardRestoration(float* spectral_mean); + void SoftRestoration(float* spectral_mean); + + const VadMode vad_mode_; + VoiceProbabilityDelayUnit voice_probability_delay_unit_; + + std::unique_ptr detector_; + + bool analyzed_audio_is_silent_; + + size_t data_length_; + size_t detection_length_; + size_t analysis_length_; + size_t buffer_delay_; + size_t complex_analysis_length_; + int num_channels_; + // Input buffer where the original samples are stored. + std::unique_ptr in_buffer_; + std::unique_ptr detection_buffer_; + // Output buffer where the restored samples are stored. + std::unique_ptr out_buffer_; + + // Arrays for fft. + std::unique_ptr ip_; + std::unique_ptr wfft_; + + std::unique_ptr spectral_mean_; + + // Stores the data for the fft. + std::unique_ptr fft_buffer_; + + std::unique_ptr magnitudes_; + + const float* window_; + + std::unique_ptr mean_factor_; + + float detector_smoothed_; + + int keypress_counter_; + int chunks_since_keypress_; + bool detection_enabled_; + bool suppression_enabled_; + + bool use_hard_restoration_; + int chunks_since_voice_change_; + + uint32_t seed_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl_gn/moz.build new file mode 100644 index 0000000000..ee6b82a2f6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl_gn/moz.build @@ -0,0 +1,236 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc", + "/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc", + "/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc", + "/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("transient_suppressor_impl_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc new file mode 100644 index 0000000000..ab48504af6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_suppressor.h" + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/transient_suppressor_impl.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +constexpr int kMono = 1; + +// Returns the index of the first non-zero sample in `samples` or an unspecified +// value if no value is zero. +absl::optional FindFirstNonZeroSample(const std::vector& samples) { + for (size_t i = 0; i < samples.size(); ++i) { + if (samples[i] != 0.0f) { + return i; + } + } + return absl::nullopt; +} + +} // namespace + +class TransientSuppressorVadModeParametrization + : public ::testing::TestWithParam {}; + +TEST_P(TransientSuppressorVadModeParametrization, + TypingDetectionLogicWorksAsExpectedForMono) { + TransientSuppressorImpl ts(GetParam(), ts::kSampleRate16kHz, + ts::kSampleRate16kHz, kMono); + + // Each key-press enables detection. + EXPECT_FALSE(ts.detection_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + + // It takes four seconds without any key-press to disable the detection + for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + } + ts.UpdateKeypress(false); + EXPECT_FALSE(ts.detection_enabled_); + + // Key-presses that are more than a second apart from each other don't enable + // suppression. + for (int i = 0; i < 100; ++i) { + EXPECT_FALSE(ts.suppression_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + for (int time_ms = 0; time_ms < 990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + } + ts.UpdateKeypress(false); + } + + // Two consecutive key-presses is enough to enable the suppression. + ts.UpdateKeypress(true); + EXPECT_FALSE(ts.suppression_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.suppression_enabled_); + + // Key-presses that are less than a second apart from each other don't disable + // detection nor suppression. + for (int i = 0; i < 100; ++i) { + for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + + // It takes four seconds without any key-press to disable the detection and + // suppression. + for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_FALSE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + } +} + +INSTANTIATE_TEST_SUITE_P( + TransientSuppressorImplTest, + TransientSuppressorVadModeParametrization, + ::testing::Values(TransientSuppressor::VadMode::kDefault, + TransientSuppressor::VadMode::kRnnVad, + TransientSuppressor::VadMode::kNoVad)); + +class TransientSuppressorSampleRateParametrization + : public ::testing::TestWithParam {}; + +// Checks that voice probability and processed audio data are temporally aligned +// after `Suppress()` is called. +TEST_P(TransientSuppressorSampleRateParametrization, + CheckAudioAndVoiceProbabilityTemporallyAligned) { + const int sample_rate_hz = GetParam(); + TransientSuppressorImpl ts(TransientSuppressor::VadMode::kDefault, + sample_rate_hz, + /*detection_rate_hz=*/sample_rate_hz, kMono); + + const int frame_size = sample_rate_hz * ts::kChunkSizeMs / 1000; + std::vector frame(frame_size); + + constexpr int kMaxAttempts = 3; + for (int i = 0; i < kMaxAttempts; ++i) { + SCOPED_TRACE(i); + + // Call `Suppress()` on frames of non-zero audio samples. + std::fill(frame.begin(), frame.end(), 1000.0f); + float delayed_voice_probability = ts.Suppress( + frame.data(), frame.size(), kMono, /*detection_data=*/nullptr, + /*detection_length=*/frame_size, /*reference_data=*/nullptr, + /*reference_length=*/frame_size, /*voice_probability=*/1.0f, + /*key_pressed=*/false); + + // Detect the algorithmic delay of `TransientSuppressorImpl`. + absl::optional frame_delay = FindFirstNonZeroSample(frame); + + // Check that the delayed voice probability is delayed according to the + // measured delay. + if (frame_delay.has_value()) { + if (*frame_delay == 0) { + // When the delay is a multiple integer of the frame duration, + // `Suppress()` returns a copy of a previously observed voice + // probability value. + EXPECT_EQ(delayed_voice_probability, 1.0f); + } else { + // Instead, when the delay is fractional, `Suppress()` returns an + // interpolated value. Since the exact value depends on the + // interpolation method, we only check that the delayed voice + // probability is not zero as it must converge towards the previoulsy + // observed value. + EXPECT_GT(delayed_voice_probability, 0.0f); + } + break; + } else { + // The algorithmic delay is longer than the duration of a single frame. + // Until the delay is detected, the delayed voice probability is zero. + EXPECT_EQ(delayed_voice_probability, 0.0f); + } + } +} + +INSTANTIATE_TEST_SUITE_P(TransientSuppressorImplTest, + TransientSuppressorSampleRateParametrization, + ::testing::Values(ts::kSampleRate8kHz, + ts::kSampleRate16kHz, + ts::kSampleRate32kHz, + ts::kSampleRate48kHz)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc new file mode 100644 index 0000000000..27b2b42b38 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/voice_probability_delay_unit.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +VoiceProbabilityDelayUnit::VoiceProbabilityDelayUnit(int delay_num_samples, + int sample_rate_hz) { + Initialize(delay_num_samples, sample_rate_hz); +} + +void VoiceProbabilityDelayUnit::Initialize(int delay_num_samples, + int sample_rate_hz) { + RTC_DCHECK_GE(delay_num_samples, 0); + RTC_DCHECK_LE(delay_num_samples, sample_rate_hz / 50) + << "The implementation does not support delays greater than 20 ms."; + int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100); // 10 ms. + if (delay_num_samples <= frame_size) { + weights_[0] = 0.0f; + weights_[1] = static_cast(delay_num_samples) / frame_size; + weights_[2] = + static_cast(frame_size - delay_num_samples) / frame_size; + } else { + delay_num_samples -= frame_size; + weights_[0] = static_cast(delay_num_samples) / frame_size; + weights_[1] = + static_cast(frame_size - delay_num_samples) / frame_size; + weights_[2] = 0.0f; + } + + // Resets the delay unit. + last_probabilities_.fill(0.0f); +} + +float VoiceProbabilityDelayUnit::Delay(float voice_probability) { + float weighted_probability = weights_[0] * last_probabilities_[0] + + weights_[1] * last_probabilities_[1] + + weights_[2] * voice_probability; + last_probabilities_[0] = last_probabilities_[1]; + last_probabilities_[1] = voice_probability; + return weighted_probability; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.h b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.h new file mode 100644 index 0000000000..05961663e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_VOICE_PROBABILITY_DELAY_UNIT_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_VOICE_PROBABILITY_DELAY_UNIT_H_ + +#include + +namespace webrtc { + +// Iteratively produces a sequence of delayed voice probability values given a +// fixed delay between 0 and 20 ms and given a sequence of voice probability +// values observed every 10 ms. Supports fractional delays, that are delays +// which are not a multiple integer of 10 ms. Applies interpolation with +// fractional delays; otherwise, returns a previously observed value according +// to the given fixed delay. +class VoiceProbabilityDelayUnit { + public: + // Ctor. `delay_num_samples` is the delay in number of samples and it must be + // non-negative and less than 20 ms. + VoiceProbabilityDelayUnit(int delay_num_samples, int sample_rate_hz); + + // Handles delay and sample rate changes and resets the delay unit. + void Initialize(int delay_num_samples, int sample_rate_hz); + + // Observes `voice_probability` and returns a delayed voice probability. + float Delay(float voice_probability); + + private: + std::array weights_; + std::array last_probabilities_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_VOICE_PROBABILITY_DELAY_UNIT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_gn/moz.build new file mode 100644 index 0000000000..e2abcb8490 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("voice_probability_delay_unit_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_unittest.cc new file mode 100644 index 0000000000..04848e6f2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_unittest.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/voice_probability_delay_unit.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// Checks that with zero delay, the observed value is immediately returned as +// delayed value. +TEST(VoiceProbabilityDelayUnit, NoDelay) { + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/0, + /*sample_rate_hz=*/48000); + constexpr int kMax = 5; + for (int i = 0; i <= kMax; ++i) { + SCOPED_TRACE(i); + float voice_probability = static_cast(i) / kMax; + EXPECT_EQ(voice_probability, delay_unit.Delay(voice_probability)); + } +} + +// Checks that with integer delays, an exact copy of a previously observed value +// is returned. +TEST(VoiceProbabilityDelayUnit, IntegerDelay) { + VoiceProbabilityDelayUnit delay_unit_10ms(/*delay_num_samples=*/480, + /*sample_rate_hz=*/48000); + delay_unit_10ms.Delay(0.125f); + EXPECT_EQ(0.125f, delay_unit_10ms.Delay(0.9f)); + + VoiceProbabilityDelayUnit delay_unit_20ms(/*delay_num_samples=*/960, + /*sample_rate_hz=*/48000); + delay_unit_20ms.Delay(0.125f); + delay_unit_20ms.Delay(0.8f); + EXPECT_EQ(0.125f, delay_unit_20ms.Delay(0.9f)); +} + +// Checks that with a fractional delay < 10 ms, interpolation is applied. +TEST(VoiceProbabilityDelayUnit, FractionalDelayLessThan10ms) { + // Create delay unit with fractional delay of 6 ms. + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/288, + /*sample_rate_hz=*/48000); + // frame 0 + // --------- frame 1 + // --------- + // 0000001111 + delay_unit.Delay(1.0f); + EXPECT_FLOAT_EQ(0.68f, delay_unit.Delay(0.2f)); +} + +// Checks that with a fractional delay > 10 ms, interpolation is applied. +TEST(VoiceProbabilityDelayUnit, FractionalDelayGreaterThan10ms) { + // Create delay unit with fractional delay of 14 ms. + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/672, + /*sample_rate_hz=*/48000); + // frame 0 + // --------- frame 1 + // --------- frame 2 + // --------- + // 0000111111 + delay_unit.Delay(1.0f); + delay_unit.Delay(0.2f); + EXPECT_FLOAT_EQ(0.52f, delay_unit.Delay(1.0f)); +} + +// Checks that `Initialize()` resets the delay unit. +TEST(VoiceProbabilityDelayUnit, InitializeResetsDelayUnit) { + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/960, + /*sample_rate_hz=*/48000); + delay_unit.Delay(1.0f); + delay_unit.Delay(0.9f); + + delay_unit.Initialize(/*delay_num_samples=*/160, /*sample_rate_hz=*/8000); + EXPECT_EQ(0.0f, delay_unit.Delay(0.1f)); + EXPECT_EQ(0.0f, delay_unit.Delay(0.2f)); + EXPECT_EQ(0.1f, delay_unit.Delay(0.3f)); +} + +// Checks that `Initialize()` handles delay changes. +TEST(VoiceProbabilityDelayUnit, InitializeHandlesDelayChanges) { + // Start with a 20 ms delay. + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/960, + /*sample_rate_hz=*/48000); + delay_unit.Delay(1.0f); + delay_unit.Delay(0.9f); + + // Lower the delay to 10 ms. + delay_unit.Initialize(/*delay_num_samples=*/80, /*sample_rate_hz=*/8000); + EXPECT_EQ(0.0f, delay_unit.Delay(0.1f)); + EXPECT_EQ(0.1f, delay_unit.Delay(0.2f)); + + // Increase the delay to 15 ms. + delay_unit.Initialize(/*delay_num_samples=*/120, /*sample_rate_hz=*/8000); + EXPECT_EQ(0.0f, delay_unit.Delay(0.1f)); + EXPECT_EQ(0.05f, delay_unit.Delay(0.2f)); + EXPECT_EQ(0.15f, delay_unit.Delay(0.3f)); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/windows_private.h b/third_party/libwebrtc/modules/audio_processing/transient/windows_private.h new file mode 100644 index 0000000000..54e3c25785 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/windows_private.h @@ -0,0 +1,557 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_ + +namespace webrtc { + +// Hanning window for 4ms 16kHz +static const float kHanning64w128[128] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f, + 0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f, + 0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f, + 0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f, + 0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f, + 0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f, + 0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f, + 0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f, + 0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f, + 0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f, + 0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f, + 0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f, + 0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f, + 0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f, + 0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f, + 0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f}; + +// hybrib Hanning & flat window +static const float kBlocks80w128[128] = { + 0.00000000f, 0.03271908f, 0.06540313f, 0.09801714f, 0.13052619f, + 0.16289547f, 0.19509032f, 0.22707626f, 0.25881905f, 0.29028468f, + 0.32143947f, 0.35225005f, 0.38268343f, 0.41270703f, 0.44228869f, + 0.47139674f, 0.50000000f, 0.52806785f, 0.55557023f, 0.58247770f, + 0.60876143f, 0.63439328f, 0.65934582f, 0.68359230f, 0.70710678f, + 0.72986407f, 0.75183981f, 0.77301045f, 0.79335334f, 0.81284668f, + 0.83146961f, 0.84920218f, 0.86602540f, 0.88192126f, 0.89687274f, + 0.91086382f, 0.92387953f, 0.93590593f, 0.94693013f, 0.95694034f, + 0.96592583f, 0.97387698f, 0.98078528f, 0.98664333f, 0.99144486f, + 0.99518473f, 0.99785892f, 0.99946459f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 0.99946459f, 0.99785892f, 0.99518473f, 0.99144486f, + 0.98664333f, 0.98078528f, 0.97387698f, 0.96592583f, 0.95694034f, + 0.94693013f, 0.93590593f, 0.92387953f, 0.91086382f, 0.89687274f, + 0.88192126f, 0.86602540f, 0.84920218f, 0.83146961f, 0.81284668f, + 0.79335334f, 0.77301045f, 0.75183981f, 0.72986407f, 0.70710678f, + 0.68359230f, 0.65934582f, 0.63439328f, 0.60876143f, 0.58247770f, + 0.55557023f, 0.52806785f, 0.50000000f, 0.47139674f, 0.44228869f, + 0.41270703f, 0.38268343f, 0.35225005f, 0.32143947f, 0.29028468f, + 0.25881905f, 0.22707626f, 0.19509032f, 0.16289547f, 0.13052619f, + 0.09801714f, 0.06540313f, 0.03271908f}; + +// hybrib Hanning & flat window +static const float kBlocks160w256[256] = { + 0.00000000f, 0.01636173f, 0.03271908f, 0.04906767f, 0.06540313f, + 0.08172107f, 0.09801714f, 0.11428696f, 0.13052619f, 0.14673047f, + 0.16289547f, 0.17901686f, 0.19509032f, 0.21111155f, 0.22707626f, + 0.24298018f, 0.25881905f, 0.27458862f, 0.29028468f, 0.30590302f, + 0.32143947f, 0.33688985f, 0.35225005f, 0.36751594f, 0.38268343f, + 0.39774847f, 0.41270703f, 0.42755509f, 0.44228869f, 0.45690388f, + 0.47139674f, 0.48576339f, 0.50000000f, 0.51410274f, 0.52806785f, + 0.54189158f, 0.55557023f, 0.56910015f, 0.58247770f, 0.59569930f, + 0.60876143f, 0.62166057f, 0.63439328f, 0.64695615f, 0.65934582f, + 0.67155895f, 0.68359230f, 0.69544264f, 0.70710678f, 0.71858162f, + 0.72986407f, 0.74095113f, 0.75183981f, 0.76252720f, 0.77301045f, + 0.78328675f, 0.79335334f, 0.80320753f, 0.81284668f, 0.82226822f, + 0.83146961f, 0.84044840f, 0.84920218f, 0.85772861f, 0.86602540f, + 0.87409034f, 0.88192126f, 0.88951608f, 0.89687274f, 0.90398929f, + 0.91086382f, 0.91749450f, 0.92387953f, 0.93001722f, 0.93590593f, + 0.94154407f, 0.94693013f, 0.95206268f, 0.95694034f, 0.96156180f, + 0.96592583f, 0.97003125f, 0.97387698f, 0.97746197f, 0.98078528f, + 0.98384601f, 0.98664333f, 0.98917651f, 0.99144486f, 0.99344778f, + 0.99518473f, 0.99665524f, 0.99785892f, 0.99879546f, 0.99946459f, + 0.99986614f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 0.99986614f, 0.99946459f, 0.99879546f, 0.99785892f, + 0.99665524f, 0.99518473f, 0.99344778f, 0.99144486f, 0.98917651f, + 0.98664333f, 0.98384601f, 0.98078528f, 0.97746197f, 0.97387698f, + 0.97003125f, 0.96592583f, 0.96156180f, 0.95694034f, 0.95206268f, + 0.94693013f, 0.94154407f, 0.93590593f, 0.93001722f, 0.92387953f, + 0.91749450f, 0.91086382f, 0.90398929f, 0.89687274f, 0.88951608f, + 0.88192126f, 0.87409034f, 0.86602540f, 0.85772861f, 0.84920218f, + 0.84044840f, 0.83146961f, 0.82226822f, 0.81284668f, 0.80320753f, + 0.79335334f, 0.78328675f, 0.77301045f, 0.76252720f, 0.75183981f, + 0.74095113f, 0.72986407f, 0.71858162f, 0.70710678f, 0.69544264f, + 0.68359230f, 0.67155895f, 0.65934582f, 0.64695615f, 0.63439328f, + 0.62166057f, 0.60876143f, 0.59569930f, 0.58247770f, 0.56910015f, + 0.55557023f, 0.54189158f, 0.52806785f, 0.51410274f, 0.50000000f, + 0.48576339f, 0.47139674f, 0.45690388f, 0.44228869f, 0.42755509f, + 0.41270703f, 0.39774847f, 0.38268343f, 0.36751594f, 0.35225005f, + 0.33688985f, 0.32143947f, 0.30590302f, 0.29028468f, 0.27458862f, + 0.25881905f, 0.24298018f, 0.22707626f, 0.21111155f, 0.19509032f, + 0.17901686f, 0.16289547f, 0.14673047f, 0.13052619f, 0.11428696f, + 0.09801714f, 0.08172107f, 0.06540313f, 0.04906767f, 0.03271908f, + 0.01636173f}; + +// hybrib Hanning & flat window: for 20ms +static const float kBlocks320w512[512] = { + 0.00000000f, 0.00818114f, 0.01636173f, 0.02454123f, 0.03271908f, + 0.04089475f, 0.04906767f, 0.05723732f, 0.06540313f, 0.07356456f, + 0.08172107f, 0.08987211f, 0.09801714f, 0.10615561f, 0.11428696f, + 0.12241068f, 0.13052619f, 0.13863297f, 0.14673047f, 0.15481816f, + 0.16289547f, 0.17096189f, 0.17901686f, 0.18705985f, 0.19509032f, + 0.20310773f, 0.21111155f, 0.21910124f, 0.22707626f, 0.23503609f, + 0.24298018f, 0.25090801f, 0.25881905f, 0.26671276f, 0.27458862f, + 0.28244610f, 0.29028468f, 0.29810383f, 0.30590302f, 0.31368174f, + 0.32143947f, 0.32917568f, 0.33688985f, 0.34458148f, 0.35225005f, + 0.35989504f, 0.36751594f, 0.37511224f, 0.38268343f, 0.39022901f, + 0.39774847f, 0.40524131f, 0.41270703f, 0.42014512f, 0.42755509f, + 0.43493645f, 0.44228869f, 0.44961133f, 0.45690388f, 0.46416584f, + 0.47139674f, 0.47859608f, 0.48576339f, 0.49289819f, 0.50000000f, + 0.50706834f, 0.51410274f, 0.52110274f, 0.52806785f, 0.53499762f, + 0.54189158f, 0.54874927f, 0.55557023f, 0.56235401f, 0.56910015f, + 0.57580819f, 0.58247770f, 0.58910822f, 0.59569930f, 0.60225052f, + 0.60876143f, 0.61523159f, 0.62166057f, 0.62804795f, 0.63439328f, + 0.64069616f, 0.64695615f, 0.65317284f, 0.65934582f, 0.66547466f, + 0.67155895f, 0.67759830f, 0.68359230f, 0.68954054f, 0.69544264f, + 0.70129818f, 0.70710678f, 0.71286806f, 0.71858162f, 0.72424708f, + 0.72986407f, 0.73543221f, 0.74095113f, 0.74642045f, 0.75183981f, + 0.75720885f, 0.76252720f, 0.76779452f, 0.77301045f, 0.77817464f, + 0.78328675f, 0.78834643f, 0.79335334f, 0.79830715f, 0.80320753f, + 0.80805415f, 0.81284668f, 0.81758481f, 0.82226822f, 0.82689659f, + 0.83146961f, 0.83598698f, 0.84044840f, 0.84485357f, 0.84920218f, + 0.85349396f, 0.85772861f, 0.86190585f, 0.86602540f, 0.87008699f, + 0.87409034f, 0.87803519f, 0.88192126f, 0.88574831f, 0.88951608f, + 0.89322430f, 0.89687274f, 0.90046115f, 0.90398929f, 0.90745693f, + 0.91086382f, 0.91420976f, 0.91749450f, 0.92071783f, 0.92387953f, + 0.92697940f, 0.93001722f, 0.93299280f, 0.93590593f, 0.93875641f, + 0.94154407f, 0.94426870f, 0.94693013f, 0.94952818f, 0.95206268f, + 0.95453345f, 0.95694034f, 0.95928317f, 0.96156180f, 0.96377607f, + 0.96592583f, 0.96801094f, 0.97003125f, 0.97198664f, 0.97387698f, + 0.97570213f, 0.97746197f, 0.97915640f, 0.98078528f, 0.98234852f, + 0.98384601f, 0.98527764f, 0.98664333f, 0.98794298f, 0.98917651f, + 0.99034383f, 0.99144486f, 0.99247953f, 0.99344778f, 0.99434953f, + 0.99518473f, 0.99595331f, 0.99665524f, 0.99729046f, 0.99785892f, + 0.99836060f, 0.99879546f, 0.99916346f, 0.99946459f, 0.99969882f, + 0.99986614f, 0.99996653f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 0.99996653f, 0.99986614f, 0.99969882f, 0.99946459f, + 0.99916346f, 0.99879546f, 0.99836060f, 0.99785892f, 0.99729046f, + 0.99665524f, 0.99595331f, 0.99518473f, 0.99434953f, 0.99344778f, + 0.99247953f, 0.99144486f, 0.99034383f, 0.98917651f, 0.98794298f, + 0.98664333f, 0.98527764f, 0.98384601f, 0.98234852f, 0.98078528f, + 0.97915640f, 0.97746197f, 0.97570213f, 0.97387698f, 0.97198664f, + 0.97003125f, 0.96801094f, 0.96592583f, 0.96377607f, 0.96156180f, + 0.95928317f, 0.95694034f, 0.95453345f, 0.95206268f, 0.94952818f, + 0.94693013f, 0.94426870f, 0.94154407f, 0.93875641f, 0.93590593f, + 0.93299280f, 0.93001722f, 0.92697940f, 0.92387953f, 0.92071783f, + 0.91749450f, 0.91420976f, 0.91086382f, 0.90745693f, 0.90398929f, + 0.90046115f, 0.89687274f, 0.89322430f, 0.88951608f, 0.88574831f, + 0.88192126f, 0.87803519f, 0.87409034f, 0.87008699f, 0.86602540f, + 0.86190585f, 0.85772861f, 0.85349396f, 0.84920218f, 0.84485357f, + 0.84044840f, 0.83598698f, 0.83146961f, 0.82689659f, 0.82226822f, + 0.81758481f, 0.81284668f, 0.80805415f, 0.80320753f, 0.79830715f, + 0.79335334f, 0.78834643f, 0.78328675f, 0.77817464f, 0.77301045f, + 0.76779452f, 0.76252720f, 0.75720885f, 0.75183981f, 0.74642045f, + 0.74095113f, 0.73543221f, 0.72986407f, 0.72424708f, 0.71858162f, + 0.71286806f, 0.70710678f, 0.70129818f, 0.69544264f, 0.68954054f, + 0.68359230f, 0.67759830f, 0.67155895f, 0.66547466f, 0.65934582f, + 0.65317284f, 0.64695615f, 0.64069616f, 0.63439328f, 0.62804795f, + 0.62166057f, 0.61523159f, 0.60876143f, 0.60225052f, 0.59569930f, + 0.58910822f, 0.58247770f, 0.57580819f, 0.56910015f, 0.56235401f, + 0.55557023f, 0.54874927f, 0.54189158f, 0.53499762f, 0.52806785f, + 0.52110274f, 0.51410274f, 0.50706834f, 0.50000000f, 0.49289819f, + 0.48576339f, 0.47859608f, 0.47139674f, 0.46416584f, 0.45690388f, + 0.44961133f, 0.44228869f, 0.43493645f, 0.42755509f, 0.42014512f, + 0.41270703f, 0.40524131f, 0.39774847f, 0.39022901f, 0.38268343f, + 0.37511224f, 0.36751594f, 0.35989504f, 0.35225005f, 0.34458148f, + 0.33688985f, 0.32917568f, 0.32143947f, 0.31368174f, 0.30590302f, + 0.29810383f, 0.29028468f, 0.28244610f, 0.27458862f, 0.26671276f, + 0.25881905f, 0.25090801f, 0.24298018f, 0.23503609f, 0.22707626f, + 0.21910124f, 0.21111155f, 0.20310773f, 0.19509032f, 0.18705985f, + 0.17901686f, 0.17096189f, 0.16289547f, 0.15481816f, 0.14673047f, + 0.13863297f, 0.13052619f, 0.12241068f, 0.11428696f, 0.10615561f, + 0.09801714f, 0.08987211f, 0.08172107f, 0.07356456f, 0.06540313f, + 0.05723732f, 0.04906767f, 0.04089475f, 0.03271908f, 0.02454123f, + 0.01636173f, 0.00818114f}; + +// Hanning window: for 15ms at 16kHz with symmetric zeros +static const float kBlocks240w512[512] = { + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00654494f, 0.01308960f, 0.01963369f, + 0.02617695f, 0.03271908f, 0.03925982f, 0.04579887f, 0.05233596f, + 0.05887080f, 0.06540313f, 0.07193266f, 0.07845910f, 0.08498218f, + 0.09150162f, 0.09801714f, 0.10452846f, 0.11103531f, 0.11753740f, + 0.12403446f, 0.13052620f, 0.13701233f, 0.14349262f, 0.14996676f, + 0.15643448f, 0.16289547f, 0.16934951f, 0.17579629f, 0.18223552f, + 0.18866697f, 0.19509032f, 0.20150533f, 0.20791170f, 0.21430916f, + 0.22069745f, 0.22707628f, 0.23344538f, 0.23980446f, 0.24615330f, + 0.25249159f, 0.25881904f, 0.26513544f, 0.27144045f, 0.27773386f, + 0.28401536f, 0.29028466f, 0.29654160f, 0.30278578f, 0.30901700f, + 0.31523499f, 0.32143945f, 0.32763019f, 0.33380687f, 0.33996925f, + 0.34611708f, 0.35225007f, 0.35836795f, 0.36447051f, 0.37055743f, + 0.37662852f, 0.38268346f, 0.38872197f, 0.39474389f, 0.40074885f, + 0.40673664f, 0.41270703f, 0.41865975f, 0.42459452f, 0.43051112f, + 0.43640924f, 0.44228873f, 0.44814920f, 0.45399052f, 0.45981237f, + 0.46561453f, 0.47139674f, 0.47715878f, 0.48290035f, 0.48862126f, + 0.49432120f, 0.50000000f, 0.50565743f, 0.51129311f, 0.51690692f, + 0.52249855f, 0.52806789f, 0.53361452f, 0.53913832f, 0.54463905f, + 0.55011642f, 0.55557024f, 0.56100029f, 0.56640625f, 0.57178795f, + 0.57714522f, 0.58247769f, 0.58778524f, 0.59306765f, 0.59832460f, + 0.60355598f, 0.60876143f, 0.61394083f, 0.61909395f, 0.62422055f, + 0.62932038f, 0.63439333f, 0.63943899f, 0.64445734f, 0.64944810f, + 0.65441096f, 0.65934587f, 0.66425246f, 0.66913062f, 0.67398012f, + 0.67880076f, 0.68359232f, 0.68835455f, 0.69308740f, 0.69779050f, + 0.70246369f, 0.70710677f, 0.71171963f, 0.71630198f, 0.72085363f, + 0.72537440f, 0.72986406f, 0.73432255f, 0.73874950f, 0.74314487f, + 0.74750835f, 0.75183982f, 0.75613910f, 0.76040596f, 0.76464027f, + 0.76884186f, 0.77301043f, 0.77714598f, 0.78124821f, 0.78531694f, + 0.78935206f, 0.79335338f, 0.79732066f, 0.80125386f, 0.80515265f, + 0.80901700f, 0.81284672f, 0.81664157f, 0.82040149f, 0.82412618f, + 0.82781565f, 0.83146966f, 0.83508795f, 0.83867061f, 0.84221727f, + 0.84572780f, 0.84920216f, 0.85264021f, 0.85604161f, 0.85940641f, + 0.86273444f, 0.86602545f, 0.86927933f, 0.87249607f, 0.87567532f, + 0.87881714f, 0.88192129f, 0.88498765f, 0.88801610f, 0.89100653f, + 0.89395881f, 0.89687276f, 0.89974827f, 0.90258533f, 0.90538365f, + 0.90814316f, 0.91086388f, 0.91354549f, 0.91618794f, 0.91879123f, + 0.92135513f, 0.92387950f, 0.92636442f, 0.92880958f, 0.93121493f, + 0.93358046f, 0.93590593f, 0.93819135f, 0.94043654f, 0.94264150f, + 0.94480604f, 0.94693011f, 0.94901365f, 0.95105654f, 0.95305866f, + 0.95501995f, 0.95694035f, 0.95881975f, 0.96065807f, 0.96245527f, + 0.96421117f, 0.96592581f, 0.96759909f, 0.96923089f, 0.97082120f, + 0.97236991f, 0.97387701f, 0.97534233f, 0.97676587f, 0.97814763f, + 0.97948742f, 0.98078531f, 0.98204112f, 0.98325491f, 0.98442656f, + 0.98555607f, 0.98664331f, 0.98768836f, 0.98869103f, 0.98965138f, + 0.99056935f, 0.99144489f, 0.99227792f, 0.99306846f, 0.99381649f, + 0.99452192f, 0.99518472f, 0.99580491f, 0.99638247f, 0.99691731f, + 0.99740952f, 0.99785894f, 0.99826562f, 0.99862951f, 0.99895066f, + 0.99922901f, 0.99946457f, 0.99965733f, 0.99980724f, 0.99991435f, + 0.99997860f, 1.00000000f, 0.99997860f, 0.99991435f, 0.99980724f, + 0.99965733f, 0.99946457f, 0.99922901f, 0.99895066f, 0.99862951f, + 0.99826562f, 0.99785894f, 0.99740946f, 0.99691731f, 0.99638247f, + 0.99580491f, 0.99518472f, 0.99452192f, 0.99381644f, 0.99306846f, + 0.99227792f, 0.99144489f, 0.99056935f, 0.98965138f, 0.98869103f, + 0.98768836f, 0.98664331f, 0.98555607f, 0.98442656f, 0.98325491f, + 0.98204112f, 0.98078525f, 0.97948742f, 0.97814757f, 0.97676587f, + 0.97534227f, 0.97387695f, 0.97236991f, 0.97082120f, 0.96923089f, + 0.96759909f, 0.96592581f, 0.96421117f, 0.96245521f, 0.96065807f, + 0.95881969f, 0.95694029f, 0.95501995f, 0.95305860f, 0.95105648f, + 0.94901365f, 0.94693011f, 0.94480604f, 0.94264150f, 0.94043654f, + 0.93819129f, 0.93590593f, 0.93358046f, 0.93121493f, 0.92880952f, + 0.92636436f, 0.92387950f, 0.92135507f, 0.91879123f, 0.91618794f, + 0.91354543f, 0.91086382f, 0.90814310f, 0.90538365f, 0.90258527f, + 0.89974827f, 0.89687276f, 0.89395875f, 0.89100647f, 0.88801610f, + 0.88498759f, 0.88192123f, 0.87881714f, 0.87567532f, 0.87249595f, + 0.86927933f, 0.86602539f, 0.86273432f, 0.85940641f, 0.85604161f, + 0.85264009f, 0.84920216f, 0.84572780f, 0.84221715f, 0.83867055f, + 0.83508795f, 0.83146954f, 0.82781565f, 0.82412612f, 0.82040137f, + 0.81664157f, 0.81284660f, 0.80901700f, 0.80515265f, 0.80125374f, + 0.79732066f, 0.79335332f, 0.78935200f, 0.78531694f, 0.78124815f, + 0.77714586f, 0.77301049f, 0.76884180f, 0.76464021f, 0.76040596f, + 0.75613904f, 0.75183970f, 0.74750835f, 0.74314481f, 0.73874938f, + 0.73432249f, 0.72986400f, 0.72537428f, 0.72085363f, 0.71630186f, + 0.71171951f, 0.70710677f, 0.70246363f, 0.69779032f, 0.69308734f, + 0.68835449f, 0.68359220f, 0.67880070f, 0.67398006f, 0.66913044f, + 0.66425240f, 0.65934575f, 0.65441096f, 0.64944804f, 0.64445722f, + 0.63943905f, 0.63439327f, 0.62932026f, 0.62422055f, 0.61909389f, + 0.61394072f, 0.60876143f, 0.60355592f, 0.59832448f, 0.59306765f, + 0.58778518f, 0.58247757f, 0.57714522f, 0.57178789f, 0.56640613f, + 0.56100023f, 0.55557019f, 0.55011630f, 0.54463905f, 0.53913826f, + 0.53361434f, 0.52806783f, 0.52249849f, 0.51690674f, 0.51129305f, + 0.50565726f, 0.50000006f, 0.49432117f, 0.48862115f, 0.48290038f, + 0.47715873f, 0.47139663f, 0.46561456f, 0.45981231f, 0.45399037f, + 0.44814920f, 0.44228864f, 0.43640912f, 0.43051112f, 0.42459446f, + 0.41865960f, 0.41270703f, 0.40673658f, 0.40074870f, 0.39474386f, + 0.38872188f, 0.38268328f, 0.37662849f, 0.37055734f, 0.36447033f, + 0.35836792f, 0.35224995f, 0.34611690f, 0.33996922f, 0.33380675f, + 0.32763001f, 0.32143945f, 0.31523487f, 0.30901679f, 0.30278572f, + 0.29654145f, 0.29028472f, 0.28401530f, 0.27773371f, 0.27144048f, + 0.26513538f, 0.25881892f, 0.25249159f, 0.24615324f, 0.23980433f, + 0.23344538f, 0.22707619f, 0.22069728f, 0.21430916f, 0.20791161f, + 0.20150517f, 0.19509031f, 0.18866688f, 0.18223536f, 0.17579627f, + 0.16934940f, 0.16289529f, 0.15643445f, 0.14996666f, 0.14349243f, + 0.13701232f, 0.13052608f, 0.12403426f, 0.11753736f, 0.11103519f, + 0.10452849f, 0.09801710f, 0.09150149f, 0.08498220f, 0.07845904f, + 0.07193252f, 0.06540315f, 0.05887074f, 0.05233581f, 0.04579888f, + 0.03925974f, 0.03271893f, 0.02617695f, 0.01963361f, 0.01308943f, + 0.00654493f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f}; + +// Hanning window: for 30ms with 1024 fft with symmetric zeros at 16kHz +static const float kBlocks480w1024[1024] = { + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00327249f, 0.00654494f, + 0.00981732f, 0.01308960f, 0.01636173f, 0.01963369f, 0.02290544f, + 0.02617695f, 0.02944817f, 0.03271908f, 0.03598964f, 0.03925982f, + 0.04252957f, 0.04579887f, 0.04906768f, 0.05233596f, 0.05560368f, + 0.05887080f, 0.06213730f, 0.06540313f, 0.06866825f, 0.07193266f, + 0.07519628f, 0.07845910f, 0.08172107f, 0.08498218f, 0.08824237f, + 0.09150162f, 0.09475989f, 0.09801714f, 0.10127335f, 0.10452846f, + 0.10778246f, 0.11103531f, 0.11428697f, 0.11753740f, 0.12078657f, + 0.12403446f, 0.12728101f, 0.13052620f, 0.13376999f, 0.13701233f, + 0.14025325f, 0.14349262f, 0.14673047f, 0.14996676f, 0.15320145f, + 0.15643448f, 0.15966582f, 0.16289547f, 0.16612339f, 0.16934951f, + 0.17257382f, 0.17579629f, 0.17901687f, 0.18223552f, 0.18545224f, + 0.18866697f, 0.19187967f, 0.19509032f, 0.19829889f, 0.20150533f, + 0.20470962f, 0.20791170f, 0.21111156f, 0.21430916f, 0.21750447f, + 0.22069745f, 0.22388805f, 0.22707628f, 0.23026206f, 0.23344538f, + 0.23662618f, 0.23980446f, 0.24298020f, 0.24615330f, 0.24932377f, + 0.25249159f, 0.25565669f, 0.25881904f, 0.26197866f, 0.26513544f, + 0.26828939f, 0.27144045f, 0.27458861f, 0.27773386f, 0.28087610f, + 0.28401536f, 0.28715158f, 0.29028466f, 0.29341471f, 0.29654160f, + 0.29966527f, 0.30278578f, 0.30590302f, 0.30901700f, 0.31212768f, + 0.31523499f, 0.31833893f, 0.32143945f, 0.32453656f, 0.32763019f, + 0.33072028f, 0.33380687f, 0.33688986f, 0.33996925f, 0.34304500f, + 0.34611708f, 0.34918544f, 0.35225007f, 0.35531089f, 0.35836795f, + 0.36142117f, 0.36447051f, 0.36751595f, 0.37055743f, 0.37359497f, + 0.37662852f, 0.37965801f, 0.38268346f, 0.38570479f, 0.38872197f, + 0.39173502f, 0.39474389f, 0.39774847f, 0.40074885f, 0.40374491f, + 0.40673664f, 0.40972406f, 0.41270703f, 0.41568562f, 0.41865975f, + 0.42162940f, 0.42459452f, 0.42755508f, 0.43051112f, 0.43346250f, + 0.43640924f, 0.43935132f, 0.44228873f, 0.44522133f, 0.44814920f, + 0.45107228f, 0.45399052f, 0.45690390f, 0.45981237f, 0.46271592f, + 0.46561453f, 0.46850815f, 0.47139674f, 0.47428030f, 0.47715878f, + 0.48003215f, 0.48290035f, 0.48576337f, 0.48862126f, 0.49147385f, + 0.49432120f, 0.49716330f, 0.50000000f, 0.50283140f, 0.50565743f, + 0.50847799f, 0.51129311f, 0.51410276f, 0.51690692f, 0.51970553f, + 0.52249855f, 0.52528602f, 0.52806789f, 0.53084403f, 0.53361452f, + 0.53637928f, 0.53913832f, 0.54189163f, 0.54463905f, 0.54738063f, + 0.55011642f, 0.55284631f, 0.55557024f, 0.55828828f, 0.56100029f, + 0.56370628f, 0.56640625f, 0.56910014f, 0.57178795f, 0.57446963f, + 0.57714522f, 0.57981455f, 0.58247769f, 0.58513463f, 0.58778524f, + 0.59042960f, 0.59306765f, 0.59569931f, 0.59832460f, 0.60094351f, + 0.60355598f, 0.60616195f, 0.60876143f, 0.61135441f, 0.61394083f, + 0.61652070f, 0.61909395f, 0.62166059f, 0.62422055f, 0.62677383f, + 0.62932038f, 0.63186020f, 0.63439333f, 0.63691956f, 0.63943899f, + 0.64195162f, 0.64445734f, 0.64695615f, 0.64944810f, 0.65193301f, + 0.65441096f, 0.65688187f, 0.65934587f, 0.66180271f, 0.66425246f, + 0.66669512f, 0.66913062f, 0.67155898f, 0.67398012f, 0.67639405f, + 0.67880076f, 0.68120021f, 0.68359232f, 0.68597710f, 0.68835455f, + 0.69072467f, 0.69308740f, 0.69544262f, 0.69779050f, 0.70013082f, + 0.70246369f, 0.70478904f, 0.70710677f, 0.70941699f, 0.71171963f, + 0.71401459f, 0.71630198f, 0.71858168f, 0.72085363f, 0.72311789f, + 0.72537440f, 0.72762316f, 0.72986406f, 0.73209721f, 0.73432255f, + 0.73653996f, 0.73874950f, 0.74095118f, 0.74314487f, 0.74533057f, + 0.74750835f, 0.74967808f, 0.75183982f, 0.75399351f, 0.75613910f, + 0.75827658f, 0.76040596f, 0.76252723f, 0.76464027f, 0.76674515f, + 0.76884186f, 0.77093029f, 0.77301043f, 0.77508241f, 0.77714598f, + 0.77920127f, 0.78124821f, 0.78328675f, 0.78531694f, 0.78733873f, + 0.78935206f, 0.79135692f, 0.79335338f, 0.79534125f, 0.79732066f, + 0.79929149f, 0.80125386f, 0.80320752f, 0.80515265f, 0.80708915f, + 0.80901700f, 0.81093621f, 0.81284672f, 0.81474853f, 0.81664157f, + 0.81852591f, 0.82040149f, 0.82226825f, 0.82412618f, 0.82597536f, + 0.82781565f, 0.82964706f, 0.83146966f, 0.83328325f, 0.83508795f, + 0.83688378f, 0.83867061f, 0.84044838f, 0.84221727f, 0.84397703f, + 0.84572780f, 0.84746957f, 0.84920216f, 0.85092574f, 0.85264021f, + 0.85434544f, 0.85604161f, 0.85772866f, 0.85940641f, 0.86107504f, + 0.86273444f, 0.86438453f, 0.86602545f, 0.86765707f, 0.86927933f, + 0.87089235f, 0.87249607f, 0.87409031f, 0.87567532f, 0.87725097f, + 0.87881714f, 0.88037390f, 0.88192129f, 0.88345921f, 0.88498765f, + 0.88650668f, 0.88801610f, 0.88951612f, 0.89100653f, 0.89248741f, + 0.89395881f, 0.89542055f, 0.89687276f, 0.89831537f, 0.89974827f, + 0.90117162f, 0.90258533f, 0.90398932f, 0.90538365f, 0.90676826f, + 0.90814316f, 0.90950841f, 0.91086388f, 0.91220951f, 0.91354549f, + 0.91487163f, 0.91618794f, 0.91749454f, 0.91879123f, 0.92007810f, + 0.92135513f, 0.92262226f, 0.92387950f, 0.92512691f, 0.92636442f, + 0.92759192f, 0.92880958f, 0.93001723f, 0.93121493f, 0.93240267f, + 0.93358046f, 0.93474817f, 0.93590593f, 0.93705362f, 0.93819135f, + 0.93931901f, 0.94043654f, 0.94154406f, 0.94264150f, 0.94372880f, + 0.94480604f, 0.94587320f, 0.94693011f, 0.94797695f, 0.94901365f, + 0.95004016f, 0.95105654f, 0.95206273f, 0.95305866f, 0.95404440f, + 0.95501995f, 0.95598525f, 0.95694035f, 0.95788521f, 0.95881975f, + 0.95974404f, 0.96065807f, 0.96156180f, 0.96245527f, 0.96333838f, + 0.96421117f, 0.96507370f, 0.96592581f, 0.96676767f, 0.96759909f, + 0.96842021f, 0.96923089f, 0.97003126f, 0.97082120f, 0.97160077f, + 0.97236991f, 0.97312868f, 0.97387701f, 0.97461486f, 0.97534233f, + 0.97605932f, 0.97676587f, 0.97746199f, 0.97814763f, 0.97882277f, + 0.97948742f, 0.98014158f, 0.98078531f, 0.98141843f, 0.98204112f, + 0.98265332f, 0.98325491f, 0.98384601f, 0.98442656f, 0.98499662f, + 0.98555607f, 0.98610497f, 0.98664331f, 0.98717111f, 0.98768836f, + 0.98819500f, 0.98869103f, 0.98917651f, 0.98965138f, 0.99011570f, + 0.99056935f, 0.99101239f, 0.99144489f, 0.99186671f, 0.99227792f, + 0.99267852f, 0.99306846f, 0.99344778f, 0.99381649f, 0.99417448f, + 0.99452192f, 0.99485862f, 0.99518472f, 0.99550015f, 0.99580491f, + 0.99609905f, 0.99638247f, 0.99665523f, 0.99691731f, 0.99716878f, + 0.99740952f, 0.99763954f, 0.99785894f, 0.99806762f, 0.99826562f, + 0.99845290f, 0.99862951f, 0.99879545f, 0.99895066f, 0.99909520f, + 0.99922901f, 0.99935216f, 0.99946457f, 0.99956632f, 0.99965733f, + 0.99973762f, 0.99980724f, 0.99986613f, 0.99991435f, 0.99995178f, + 0.99997860f, 0.99999464f, 1.00000000f, 0.99999464f, 0.99997860f, + 0.99995178f, 0.99991435f, 0.99986613f, 0.99980724f, 0.99973762f, + 0.99965733f, 0.99956632f, 0.99946457f, 0.99935216f, 0.99922901f, + 0.99909520f, 0.99895066f, 0.99879545f, 0.99862951f, 0.99845290f, + 0.99826562f, 0.99806762f, 0.99785894f, 0.99763954f, 0.99740946f, + 0.99716872f, 0.99691731f, 0.99665523f, 0.99638247f, 0.99609905f, + 0.99580491f, 0.99550015f, 0.99518472f, 0.99485862f, 0.99452192f, + 0.99417448f, 0.99381644f, 0.99344778f, 0.99306846f, 0.99267852f, + 0.99227792f, 0.99186671f, 0.99144489f, 0.99101239f, 0.99056935f, + 0.99011564f, 0.98965138f, 0.98917651f, 0.98869103f, 0.98819494f, + 0.98768836f, 0.98717111f, 0.98664331f, 0.98610497f, 0.98555607f, + 0.98499656f, 0.98442656f, 0.98384601f, 0.98325491f, 0.98265326f, + 0.98204112f, 0.98141843f, 0.98078525f, 0.98014158f, 0.97948742f, + 0.97882277f, 0.97814757f, 0.97746193f, 0.97676587f, 0.97605932f, + 0.97534227f, 0.97461486f, 0.97387695f, 0.97312862f, 0.97236991f, + 0.97160077f, 0.97082120f, 0.97003126f, 0.96923089f, 0.96842015f, + 0.96759909f, 0.96676761f, 0.96592581f, 0.96507365f, 0.96421117f, + 0.96333838f, 0.96245521f, 0.96156180f, 0.96065807f, 0.95974404f, + 0.95881969f, 0.95788515f, 0.95694029f, 0.95598525f, 0.95501995f, + 0.95404440f, 0.95305860f, 0.95206267f, 0.95105648f, 0.95004016f, + 0.94901365f, 0.94797695f, 0.94693011f, 0.94587314f, 0.94480604f, + 0.94372880f, 0.94264150f, 0.94154406f, 0.94043654f, 0.93931895f, + 0.93819129f, 0.93705362f, 0.93590593f, 0.93474817f, 0.93358046f, + 0.93240267f, 0.93121493f, 0.93001723f, 0.92880952f, 0.92759192f, + 0.92636436f, 0.92512691f, 0.92387950f, 0.92262226f, 0.92135507f, + 0.92007804f, 0.91879123f, 0.91749448f, 0.91618794f, 0.91487157f, + 0.91354543f, 0.91220951f, 0.91086382f, 0.90950835f, 0.90814310f, + 0.90676820f, 0.90538365f, 0.90398932f, 0.90258527f, 0.90117157f, + 0.89974827f, 0.89831525f, 0.89687276f, 0.89542055f, 0.89395875f, + 0.89248741f, 0.89100647f, 0.88951600f, 0.88801610f, 0.88650662f, + 0.88498759f, 0.88345915f, 0.88192123f, 0.88037384f, 0.87881714f, + 0.87725091f, 0.87567532f, 0.87409031f, 0.87249595f, 0.87089223f, + 0.86927933f, 0.86765701f, 0.86602539f, 0.86438447f, 0.86273432f, + 0.86107504f, 0.85940641f, 0.85772860f, 0.85604161f, 0.85434544f, + 0.85264009f, 0.85092574f, 0.84920216f, 0.84746951f, 0.84572780f, + 0.84397697f, 0.84221715f, 0.84044844f, 0.83867055f, 0.83688372f, + 0.83508795f, 0.83328319f, 0.83146954f, 0.82964706f, 0.82781565f, + 0.82597530f, 0.82412612f, 0.82226813f, 0.82040137f, 0.81852591f, + 0.81664157f, 0.81474847f, 0.81284660f, 0.81093609f, 0.80901700f, + 0.80708915f, 0.80515265f, 0.80320752f, 0.80125374f, 0.79929143f, + 0.79732066f, 0.79534125f, 0.79335332f, 0.79135686f, 0.78935200f, + 0.78733861f, 0.78531694f, 0.78328675f, 0.78124815f, 0.77920121f, + 0.77714586f, 0.77508223f, 0.77301049f, 0.77093029f, 0.76884180f, + 0.76674509f, 0.76464021f, 0.76252711f, 0.76040596f, 0.75827658f, + 0.75613904f, 0.75399339f, 0.75183970f, 0.74967796f, 0.74750835f, + 0.74533057f, 0.74314481f, 0.74095106f, 0.73874938f, 0.73653996f, + 0.73432249f, 0.73209721f, 0.72986400f, 0.72762305f, 0.72537428f, + 0.72311789f, 0.72085363f, 0.71858162f, 0.71630186f, 0.71401453f, + 0.71171951f, 0.70941705f, 0.70710677f, 0.70478898f, 0.70246363f, + 0.70013070f, 0.69779032f, 0.69544268f, 0.69308734f, 0.69072461f, + 0.68835449f, 0.68597704f, 0.68359220f, 0.68120021f, 0.67880070f, + 0.67639399f, 0.67398006f, 0.67155886f, 0.66913044f, 0.66669512f, + 0.66425240f, 0.66180259f, 0.65934575f, 0.65688181f, 0.65441096f, + 0.65193301f, 0.64944804f, 0.64695609f, 0.64445722f, 0.64195150f, + 0.63943905f, 0.63691956f, 0.63439327f, 0.63186014f, 0.62932026f, + 0.62677372f, 0.62422055f, 0.62166059f, 0.61909389f, 0.61652064f, + 0.61394072f, 0.61135429f, 0.60876143f, 0.60616189f, 0.60355592f, + 0.60094339f, 0.59832448f, 0.59569913f, 0.59306765f, 0.59042960f, + 0.58778518f, 0.58513451f, 0.58247757f, 0.57981461f, 0.57714522f, + 0.57446963f, 0.57178789f, 0.56910002f, 0.56640613f, 0.56370628f, + 0.56100023f, 0.55828822f, 0.55557019f, 0.55284619f, 0.55011630f, + 0.54738069f, 0.54463905f, 0.54189152f, 0.53913826f, 0.53637916f, + 0.53361434f, 0.53084403f, 0.52806783f, 0.52528596f, 0.52249849f, + 0.51970541f, 0.51690674f, 0.51410276f, 0.51129305f, 0.50847787f, + 0.50565726f, 0.50283122f, 0.50000006f, 0.49716327f, 0.49432117f, + 0.49147379f, 0.48862115f, 0.48576325f, 0.48290038f, 0.48003212f, + 0.47715873f, 0.47428021f, 0.47139663f, 0.46850798f, 0.46561456f, + 0.46271589f, 0.45981231f, 0.45690379f, 0.45399037f, 0.45107210f, + 0.44814920f, 0.44522130f, 0.44228864f, 0.43935123f, 0.43640912f, + 0.43346232f, 0.43051112f, 0.42755505f, 0.42459446f, 0.42162928f, + 0.41865960f, 0.41568545f, 0.41270703f, 0.40972400f, 0.40673658f, + 0.40374479f, 0.40074870f, 0.39774850f, 0.39474386f, 0.39173496f, + 0.38872188f, 0.38570464f, 0.38268328f, 0.37965804f, 0.37662849f, + 0.37359491f, 0.37055734f, 0.36751580f, 0.36447033f, 0.36142117f, + 0.35836792f, 0.35531086f, 0.35224995f, 0.34918529f, 0.34611690f, + 0.34304500f, 0.33996922f, 0.33688980f, 0.33380675f, 0.33072016f, + 0.32763001f, 0.32453656f, 0.32143945f, 0.31833887f, 0.31523487f, + 0.31212750f, 0.30901679f, 0.30590302f, 0.30278572f, 0.29966521f, + 0.29654145f, 0.29341453f, 0.29028472f, 0.28715155f, 0.28401530f, + 0.28087601f, 0.27773371f, 0.27458847f, 0.27144048f, 0.26828936f, + 0.26513538f, 0.26197854f, 0.25881892f, 0.25565651f, 0.25249159f, + 0.24932374f, 0.24615324f, 0.24298008f, 0.23980433f, 0.23662600f, + 0.23344538f, 0.23026201f, 0.22707619f, 0.22388794f, 0.22069728f, + 0.21750426f, 0.21430916f, 0.21111152f, 0.20791161f, 0.20470949f, + 0.20150517f, 0.19829892f, 0.19509031f, 0.19187963f, 0.18866688f, + 0.18545210f, 0.18223536f, 0.17901689f, 0.17579627f, 0.17257376f, + 0.16934940f, 0.16612324f, 0.16289529f, 0.15966584f, 0.15643445f, + 0.15320137f, 0.14996666f, 0.14673033f, 0.14349243f, 0.14025325f, + 0.13701232f, 0.13376991f, 0.13052608f, 0.12728085f, 0.12403426f, + 0.12078657f, 0.11753736f, 0.11428688f, 0.11103519f, 0.10778230f, + 0.10452849f, 0.10127334f, 0.09801710f, 0.09475980f, 0.09150149f, + 0.08824220f, 0.08498220f, 0.08172106f, 0.07845904f, 0.07519618f, + 0.07193252f, 0.06866808f, 0.06540315f, 0.06213728f, 0.05887074f, + 0.05560357f, 0.05233581f, 0.04906749f, 0.04579888f, 0.04252954f, + 0.03925974f, 0.03598953f, 0.03271893f, 0.02944798f, 0.02617695f, + 0.02290541f, 0.01963361f, 0.01636161f, 0.01308943f, 0.00981712f, + 0.00654493f, 0.00327244f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc new file mode 100644 index 0000000000..2e0ee7e5b7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_node.h" + +#include +#include + +#include "common_audio/fir_filter.h" +#include "common_audio/fir_filter_factory.h" +#include "modules/audio_processing/transient/dyadic_decimator.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +WPDNode::WPDNode(size_t length, + const float* coefficients, + size_t coefficients_length) + : // The data buffer has parent data length to be able to contain and + // filter it. + data_(new float[2 * length + 1]), + length_(length), + filter_( + CreateFirFilter(coefficients, coefficients_length, 2 * length + 1)) { + RTC_DCHECK_GT(length, 0); + RTC_DCHECK(coefficients); + RTC_DCHECK_GT(coefficients_length, 0); + memset(data_.get(), 0.f, (2 * length + 1) * sizeof(data_[0])); +} + +WPDNode::~WPDNode() {} + +int WPDNode::Update(const float* parent_data, size_t parent_data_length) { + if (!parent_data || (parent_data_length / 2) != length_) { + return -1; + } + + // Filter data. + filter_->Filter(parent_data, parent_data_length, data_.get()); + + // Decimate data. + const bool kOddSequence = true; + size_t output_samples = DyadicDecimate(data_.get(), parent_data_length, + kOddSequence, data_.get(), length_); + if (output_samples != length_) { + return -1; + } + + // Get abs to all values. + for (size_t i = 0; i < length_; ++i) { + data_[i] = fabs(data_[i]); + } + + return 0; +} + +int WPDNode::set_data(const float* new_data, size_t length) { + if (!new_data || length != length_) { + return -1; + } + memcpy(data_.get(), new_data, length * sizeof(data_[0])); + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.h b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.h new file mode 100644 index 0000000000..41614fab0f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ + +#include + +namespace webrtc { + +class FIRFilter; + +// A single node of a Wavelet Packet Decomposition (WPD) tree. +class WPDNode { + public: + // Creates a WPDNode. The data vector will contain zeros. The filter will have + // the coefficients provided. + WPDNode(size_t length, const float* coefficients, size_t coefficients_length); + ~WPDNode(); + + // Updates the node data. `parent_data` / 2 must be equals to `length_`. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* parent_data, size_t parent_data_length); + + const float* data() const { return data_.get(); } + // Returns 0 if correct, and -1 otherwise. + int set_data(const float* new_data, size_t length); + size_t length() const { return length_; } + + private: + std::unique_ptr data_; + size_t length_; + std::unique_ptr filter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_node_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node_unittest.cc new file mode 100644 index 0000000000..5f9238255c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node_unittest.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_node.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +static const size_t kDataLength = 5; +static const float kTolerance = 0.0001f; + +static const size_t kParentDataLength = kDataLength * 2; +static const float kParentData[kParentDataLength] = {1.f, 2.f, 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, 9.f, 10.f}; + +static const float kCoefficients[] = {0.2f, -0.3f, 0.5f, -0.7f, 0.11f}; +static const size_t kCoefficientsLength = + sizeof(kCoefficients) / sizeof(kCoefficients[0]); + +TEST(WPDNodeTest, Accessors) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(0, node.set_data(kParentData, kDataLength)); + EXPECT_EQ(0, memcmp(node.data(), kParentData, + kDataLength * sizeof(node.data()[0]))); +} + +TEST(WPDNodeTest, UpdateThatOnlyDecimates) { + const float kIndentyCoefficient = 1.f; + WPDNode node(kDataLength, &kIndentyCoefficient, 1); + EXPECT_EQ(0, node.Update(kParentData, kParentDataLength)); + for (size_t i = 0; i < kDataLength; ++i) { + EXPECT_FLOAT_EQ(kParentData[i * 2 + 1], node.data()[i]); + } +} + +TEST(WPDNodeTest, UpdateWithArbitraryDataAndArbitraryFilter) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(0, node.Update(kParentData, kParentDataLength)); + EXPECT_NEAR(0.1f, node.data()[0], kTolerance); + EXPECT_NEAR(0.2f, node.data()[1], kTolerance); + EXPECT_NEAR(0.18f, node.data()[2], kTolerance); + EXPECT_NEAR(0.56f, node.data()[3], kTolerance); + EXPECT_NEAR(0.94f, node.data()[4], kTolerance); +} + +TEST(WPDNodeTest, ExpectedErrorReturnValue) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(-1, node.Update(kParentData, kParentDataLength - 1)); + EXPECT_EQ(-1, node.Update(NULL, kParentDataLength)); + EXPECT_EQ(-1, node.set_data(kParentData, kDataLength - 1)); + EXPECT_EQ(-1, node.set_data(NULL, kDataLength)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc new file mode 100644 index 0000000000..c8aa615881 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_tree.h" + +#include + +#include "modules/audio_processing/transient/wpd_node.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +WPDTree::WPDTree(size_t data_length, + const float* high_pass_coefficients, + const float* low_pass_coefficients, + size_t coefficients_length, + int levels) + : data_length_(data_length), + levels_(levels), + num_nodes_((1 << (levels + 1)) - 1) { + RTC_DCHECK_GT(data_length, (static_cast(1) << levels)); + RTC_DCHECK(high_pass_coefficients); + RTC_DCHECK(low_pass_coefficients); + RTC_DCHECK_GT(levels, 0); + // Size is 1 more, so we can use the array as 1-based. nodes_[0] is never + // allocated. + nodes_.reset(new std::unique_ptr[num_nodes_ + 1]); + + // Create the first node + const float kRootCoefficient = 1.f; // Identity Coefficient. + nodes_[1].reset(new WPDNode(data_length, &kRootCoefficient, 1)); + // Variables used to create the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + // Branching each node in each level to create its children. The last level is + // not branched (all the nodes of that level are leaves). + for (int current_level = 0; current_level < levels; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + nodes_[index_left_child].reset(new WPDNode(nodes_[index]->length() / 2, + low_pass_coefficients, + coefficients_length)); + nodes_[index_right_child].reset(new WPDNode(nodes_[index]->length() / 2, + high_pass_coefficients, + coefficients_length)); + } + } +} + +WPDTree::~WPDTree() {} + +WPDNode* WPDTree::NodeAt(int level, int index) { + if (level < 0 || level > levels_ || index < 0 || index >= 1 << level) { + return NULL; + } + + return nodes_[(1 << level) + index].get(); +} + +int WPDTree::Update(const float* data, size_t data_length) { + if (!data || data_length != data_length_) { + return -1; + } + + // Update the root node. + int update_result = nodes_[1]->set_data(data, data_length); + if (update_result != 0) { + return -1; + } + + // Variables used to update the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + for (int current_level = 0; current_level < levels_; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + + update_result = nodes_[index_left_child]->Update(nodes_[index]->data(), + nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + + update_result = nodes_[index_right_child]->Update( + nodes_[index]->data(), nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + } + } + + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.h b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.h new file mode 100644 index 0000000000..13cb8d9c2f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ + +#include + +#include + +#include "modules/audio_processing/transient/wpd_node.h" + +namespace webrtc { + +// Tree of a Wavelet Packet Decomposition (WPD). +// +// The root node contains all the data provided; for each node in the tree, the +// left child contains the approximation coefficients extracted from the node, +// and the right child contains the detail coefficients. +// It preserves its state, so it can be multiple-called. +// +// The number of nodes in the tree will be 2 ^ levels - 1. +// +// Implementation details: Since the tree always will be a complete binary tree, +// it is implemented using a single linear array instead of managing the +// relationships in each node. For convience is better to use a array that +// starts in 1 (instead of 0). Taking that into account, the following formulas +// apply: +// Root node index: 1. +// Node(Level, Index in that level): 2 ^ Level + (Index in that level). +// Left Child: Current node index * 2. +// Right Child: Current node index * 2 + 1. +// Parent: Current Node Index / 2 (Integer division). +class WPDTree { + public: + // Creates a WPD tree using the data length and coefficients provided. + WPDTree(size_t data_length, + const float* high_pass_coefficients, + const float* low_pass_coefficients, + size_t coefficients_length, + int levels); + ~WPDTree(); + + // Returns the number of nodes at any given level. + static int NumberOfNodesAtLevel(int level) { return 1 << level; } + + // Returns a pointer to the node at the given level and index(of that level). + // Level goes from 0 to levels(). + // Index goes from 0 to the number of NumberOfNodesAtLevel(level) - 1. + // + // You can use the following formulas to get any node within the tree: + // Notation: (Level, Index of node in that level). + // Root node: (0/0). + // Left Child: (Current node level + 1, Current node index * 2). + // Right Child: (Current node level + 1, Current node index * 2 + 1). + // Parent: (Current node level - 1, Current node index / 2) (Integer division) + // + // If level or index are out of bounds the function will return NULL. + WPDNode* NodeAt(int level, int index); + + // Updates all the nodes of the tree with the new data. `data_length` must be + // teh same that was used for the creation of the tree. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* data, size_t data_length); + + // Returns the total number of levels below the root. Root is cosidered level + // 0. + int levels() const { return levels_; } + + // Returns the total number of nodes. + int num_nodes() const { return num_nodes_; } + + // Returns the total number of leaves. + int num_leaves() const { return 1 << levels_; } + + private: + size_t data_length_; + int levels_; + int num_nodes_; + std::unique_ptr[]> nodes_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree_unittest.cc new file mode 100644 index 0000000000..bf3ff987d7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree_unittest.cc @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_tree.h" + +#include +#include + +#include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" +#include "modules/audio_processing/transient/file_utils.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/file_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(WPDTreeTest, Construction) { + const size_t kTestBufferSize = 100; + const int kLevels = 5; + const int kExpectedNumberOfNodes = (1 << (kLevels + 1)) - 1; + + float test_buffer[kTestBufferSize]; + memset(test_buffer, 0.f, kTestBufferSize * sizeof(*test_buffer)); + float test_coefficients[] = {1.f, 2.f, 3.f, 4.f, 5.f}; + const size_t kTestCoefficientsLength = + sizeof(test_coefficients) / sizeof(test_coefficients[0]); + WPDTree tree(kTestBufferSize, test_coefficients, test_coefficients, + kTestCoefficientsLength, kLevels); + ASSERT_EQ(kExpectedNumberOfNodes, tree.num_nodes()); + // Checks for NodeAt(level, index). + int nodes_at_level = 0; + for (int level = 0; level <= kLevels; ++level) { + nodes_at_level = 1 << level; + for (int i = 0; i < nodes_at_level; ++i) { + ASSERT_TRUE(NULL != tree.NodeAt(level, i)); + } + // Out of bounds. + EXPECT_EQ(NULL, tree.NodeAt(level, -1)); + EXPECT_EQ(NULL, tree.NodeAt(level, -12)); + EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level)); + EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level + 5)); + } + // Out of bounds. + EXPECT_EQ(NULL, tree.NodeAt(-1, 0)); + EXPECT_EQ(NULL, tree.NodeAt(-12, 0)); + EXPECT_EQ(NULL, tree.NodeAt(kLevels + 1, 0)); + EXPECT_EQ(NULL, tree.NodeAt(kLevels + 5, 0)); + // Checks for Update(). + EXPECT_EQ(0, tree.Update(test_buffer, kTestBufferSize)); + EXPECT_EQ(-1, tree.Update(NULL, kTestBufferSize)); + EXPECT_EQ(-1, tree.Update(test_buffer, kTestBufferSize - 1)); +} + +// This test is for the correctness of the tree. +// Checks the results from the Matlab equivalent, it is done comparing the +// results that are stored in the output files from Matlab. +// It also writes the results in its own set of files in the out directory. +// Matlab and output files contain all the results in double precision (Little +// endian) appended. +#if defined(WEBRTC_IOS) +TEST(WPDTreeTest, DISABLED_CorrectnessBasedOnMatlabFiles) { +#else +TEST(WPDTreeTest, CorrectnessBasedOnMatlabFiles) { +#endif + // 10 ms at 16000 Hz. + const size_t kTestBufferSize = 160; + const int kLevels = 3; + const int kLeaves = 1 << kLevels; + const size_t kLeavesSamples = kTestBufferSize >> kLevels; + // Create tree with Discrete Meyer Wavelet Coefficients. + WPDTree tree(kTestBufferSize, kDaubechies8HighPassCoefficients, + kDaubechies8LowPassCoefficients, kDaubechies8CoefficientsLength, + kLevels); + // Allocate and open all matlab and out files. + FileWrapper matlab_files_data[kLeaves]; + FileWrapper out_files_data[kLeaves]; + + for (int i = 0; i < kLeaves; ++i) { + // Matlab files. + rtc::StringBuilder matlab_stream; + matlab_stream << "audio_processing/transient/wpd" << i; + std::string matlab_string = test::ResourcePath(matlab_stream.str(), "dat"); + matlab_files_data[i] = FileWrapper::OpenReadOnly(matlab_string); + + bool file_opened = matlab_files_data[i].is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << matlab_string; + + // Out files. + rtc::StringBuilder out_stream; + out_stream << test::OutputPath() << "wpd_" << i << ".out"; + std::string out_string = out_stream.str(); + + out_files_data[i] = FileWrapper::OpenWriteOnly(out_string); + + file_opened = out_files_data[i].is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << out_string; + } + + // Prepare the test file. + std::string test_file_name = test::ResourcePath( + "audio_processing/transient/ajm-macbook-1-spke16m", "pcm"); + + FileWrapper test_file = FileWrapper::OpenReadOnly(test_file_name); + + bool file_opened = test_file.is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << test_file_name; + + float test_buffer[kTestBufferSize]; + + // Only the first frames of the audio file are tested. The matlab files also + // only contains information about the first frames. + const size_t kMaxFramesToTest = 100; + const float kTolerance = 0.03f; + + size_t frames_read = 0; + + // Read first buffer from the PCM test file. + size_t file_samples_read = + ReadInt16FromFileToFloatBuffer(&test_file, kTestBufferSize, test_buffer); + while (file_samples_read > 0 && frames_read < kMaxFramesToTest) { + ++frames_read; + + if (file_samples_read < kTestBufferSize) { + // Pad the rest of the buffer with zeros. + for (size_t i = file_samples_read; i < kTestBufferSize; ++i) { + test_buffer[i] = 0.0; + } + } + tree.Update(test_buffer, kTestBufferSize); + double matlab_buffer[kTestBufferSize]; + + // Compare results with data from the matlab test files. + for (int i = 0; i < kLeaves; ++i) { + // Compare data values + size_t matlab_samples_read = ReadDoubleBufferFromFile( + &matlab_files_data[i], kLeavesSamples, matlab_buffer); + + ASSERT_EQ(kLeavesSamples, matlab_samples_read) + << "Matlab test files are malformed.\n" + "File: 3_" + << i; + // Get output data from the corresponding node + const float* node_data = tree.NodeAt(kLevels, i)->data(); + // Compare with matlab files. + for (size_t j = 0; j < kLeavesSamples; ++j) { + EXPECT_NEAR(matlab_buffer[j], node_data[j], kTolerance) + << "\nLeaf: " << i << "\nSample: " << j + << "\nFrame: " << frames_read - 1; + } + + // Write results to out files. + WriteFloatBufferToFile(&out_files_data[i], kLeavesSamples, node_data); + } + + // Read next buffer from the PCM test file. + file_samples_read = ReadInt16FromFileToFloatBuffer( + &test_file, kTestBufferSize, test_buffer); + } + + // Close all matlab and out files. + for (int i = 0; i < kLeaves; ++i) { + matlab_files_data[i].Close(); + out_files_data[i].Close(); + } + + test_file.Close(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/utility/BUILD.gn new file mode 100644 index 0000000000..4851e77b03 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/BUILD.gn @@ -0,0 +1,79 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("cascaded_biquad_filter") { + sources = [ + "cascaded_biquad_filter.cc", + "cascaded_biquad_filter.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:checks", + ] +} + +rtc_library("legacy_delay_estimator") { + sources = [ + "delay_estimator.cc", + "delay_estimator.h", + "delay_estimator_internal.h", + "delay_estimator_wrapper.cc", + "delay_estimator_wrapper.h", + ] + deps = [ "../../../rtc_base:checks" ] +} + +rtc_library("pffft_wrapper") { + visibility = [ "../*" ] + sources = [ + "pffft_wrapper.cc", + "pffft_wrapper.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:checks", + "//third_party/pffft", + ] +} + +if (rtc_include_tests) { + rtc_library("cascaded_biquad_filter_unittest") { + testonly = true + + sources = [ "cascaded_biquad_filter_unittest.cc" ] + deps = [ + ":cascaded_biquad_filter", + "../../../test:test_support", + "//testing/gtest", + ] + } + + rtc_library("legacy_delay_estimator_unittest") { + testonly = true + + sources = [ "delay_estimator_unittest.cc" ] + deps = [ + ":legacy_delay_estimator", + "../../../test:test_support", + "//testing/gtest", + ] + } + + rtc_library("pffft_wrapper_unittest") { + testonly = true + sources = [ "pffft_wrapper_unittest.cc" ] + deps = [ + ":pffft_wrapper", + "../../../test:test_support", + "//testing/gtest", + "//third_party/pffft", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/utility/DEPS b/third_party/libwebrtc/modules/audio_processing/utility/DEPS new file mode 100644 index 0000000000..c72d810b24 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+third_party/pffft", +] diff --git a/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc new file mode 100644 index 0000000000..0d236ce0be --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +CascadedBiQuadFilter::BiQuadParam::BiQuadParam(std::complex zero, + std::complex pole, + float gain, + bool mirror_zero_along_i_axis) + : zero(zero), + pole(pole), + gain(gain), + mirror_zero_along_i_axis(mirror_zero_along_i_axis) {} + +CascadedBiQuadFilter::BiQuadParam::BiQuadParam(const BiQuadParam&) = default; + +CascadedBiQuadFilter::BiQuad::BiQuad( + const CascadedBiQuadFilter::BiQuadParam& param) + : x(), y() { + float z_r = std::real(param.zero); + float z_i = std::imag(param.zero); + float p_r = std::real(param.pole); + float p_i = std::imag(param.pole); + float gain = param.gain; + + if (param.mirror_zero_along_i_axis) { + // Assuming zeroes at z_r and -z_r. + RTC_DCHECK(z_i == 0.f); + coefficients.b[0] = gain * 1.f; + coefficients.b[1] = 0.f; + coefficients.b[2] = gain * -(z_r * z_r); + } else { + // Assuming zeros at (z_r + z_i*i) and (z_r - z_i*i). + coefficients.b[0] = gain * 1.f; + coefficients.b[1] = gain * -2.f * z_r; + coefficients.b[2] = gain * (z_r * z_r + z_i * z_i); + } + + // Assuming poles at (p_r + p_i*i) and (p_r - p_i*i). + coefficients.a[0] = -2.f * p_r; + coefficients.a[1] = p_r * p_r + p_i * p_i; +} + +void CascadedBiQuadFilter::BiQuad::BiQuad::Reset() { + x[0] = x[1] = y[0] = y[1] = 0.f; +} + +CascadedBiQuadFilter::CascadedBiQuadFilter( + const CascadedBiQuadFilter::BiQuadCoefficients& coefficients, + size_t num_biquads) + : biquads_(num_biquads, BiQuad(coefficients)) {} + +CascadedBiQuadFilter::CascadedBiQuadFilter( + const std::vector& biquad_params) { + for (const auto& param : biquad_params) { + biquads_.push_back(BiQuad(param)); + } +} + +CascadedBiQuadFilter::~CascadedBiQuadFilter() = default; + +void CascadedBiQuadFilter::Process(rtc::ArrayView x, + rtc::ArrayView y) { + if (biquads_.size() > 0) { + ApplyBiQuad(x, y, &biquads_[0]); + for (size_t k = 1; k < biquads_.size(); ++k) { + ApplyBiQuad(y, y, &biquads_[k]); + } + } else { + std::copy(x.begin(), x.end(), y.begin()); + } +} + +void CascadedBiQuadFilter::Process(rtc::ArrayView y) { + for (auto& biquad : biquads_) { + ApplyBiQuad(y, y, &biquad); + } +} + +void CascadedBiQuadFilter::Reset() { + for (auto& biquad : biquads_) { + biquad.Reset(); + } +} + +void CascadedBiQuadFilter::ApplyBiQuad(rtc::ArrayView x, + rtc::ArrayView y, + CascadedBiQuadFilter::BiQuad* biquad) { + RTC_DCHECK_EQ(x.size(), y.size()); + const float c_a_0 = biquad->coefficients.a[0]; + const float c_a_1 = biquad->coefficients.a[1]; + const float c_b_0 = biquad->coefficients.b[0]; + const float c_b_1 = biquad->coefficients.b[1]; + const float c_b_2 = biquad->coefficients.b[2]; + float m_x_0 = biquad->x[0]; + float m_x_1 = biquad->x[1]; + float m_y_0 = biquad->y[0]; + float m_y_1 = biquad->y[1]; + for (size_t k = 0; k < x.size(); ++k) { + const float tmp = x[k]; + y[k] = c_b_0 * tmp + c_b_1 * m_x_0 + c_b_2 * m_x_1 - c_a_0 * m_y_0 - + c_a_1 * m_y_1; + m_x_1 = m_x_0; + m_x_0 = tmp; + m_y_1 = m_y_0; + m_y_0 = y[k]; + } + biquad->x[0] = m_x_0; + biquad->x[1] = m_x_1; + biquad->y[0] = m_y_0; + biquad->y[1] = m_y_1; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.h b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.h new file mode 100644 index 0000000000..120b52aa57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_CASCADED_BIQUAD_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_CASCADED_BIQUAD_FILTER_H_ + +#include + +#include +#include + +#include "api/array_view.h" + +namespace webrtc { + +// Applies a number of biquads in a cascaded manner. The filter implementation +// is direct form 1. +class CascadedBiQuadFilter { + public: + struct BiQuadParam { + BiQuadParam(std::complex zero, + std::complex pole, + float gain, + bool mirror_zero_along_i_axis = false); + explicit BiQuadParam(const BiQuadParam&); + std::complex zero; + std::complex pole; + float gain; + bool mirror_zero_along_i_axis; + }; + + struct BiQuadCoefficients { + float b[3]; + float a[2]; + }; + + struct BiQuad { + explicit BiQuad(const BiQuadCoefficients& coefficients) + : coefficients(coefficients), x(), y() {} + explicit BiQuad(const CascadedBiQuadFilter::BiQuadParam& param); + void Reset(); + BiQuadCoefficients coefficients; + float x[2]; + float y[2]; + }; + + CascadedBiQuadFilter( + const CascadedBiQuadFilter::BiQuadCoefficients& coefficients, + size_t num_biquads); + explicit CascadedBiQuadFilter( + const std::vector& biquad_params); + ~CascadedBiQuadFilter(); + CascadedBiQuadFilter(const CascadedBiQuadFilter&) = delete; + CascadedBiQuadFilter& operator=(const CascadedBiQuadFilter&) = delete; + + // Applies the biquads on the values in x in order to form the output in y. + void Process(rtc::ArrayView x, rtc::ArrayView y); + // Applies the biquads on the values in y in an in-place manner. + void Process(rtc::ArrayView y); + // Resets the filter to its initial state. + void Reset(); + + private: + void ApplyBiQuad(rtc::ArrayView x, + rtc::ArrayView y, + CascadedBiQuadFilter::BiQuad* biquad); + + std::vector biquads_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_CASCADED_BIQUAD_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_gn/moz.build new file mode 100644 index 0000000000..8cf3aaefeb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("cascaded_biquad_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_unittest.cc new file mode 100644 index 0000000000..ff7022dba4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_unittest.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +// Coefficients for a second order Butterworth high-pass filter with cutoff +// frequency 100 Hz. +const CascadedBiQuadFilter::BiQuadCoefficients kHighPassFilterCoefficients = { + {0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + +const CascadedBiQuadFilter::BiQuadCoefficients kTransparentCoefficients = { + {1.f, 0.f, 0.f}, + {0.f, 0.f}}; + +const CascadedBiQuadFilter::BiQuadCoefficients kBlockingCoefficients = { + {0.f, 0.f, 0.f}, + {0.f, 0.f}}; + +std::vector CreateInputWithIncreasingValues(size_t vector_length) { + std::vector v(vector_length); + for (size_t k = 0; k < v.size(); ++k) { + v[k] = k; + } + return v; +} + +} // namespace + +// Verifies that the filter applies an effect which removes the input signal. +// The test also verifies that the in-place Process API call works as intended. +TEST(CascadedBiquadFilter, BlockingConfiguration) { + std::vector values = CreateInputWithIncreasingValues(1000); + + CascadedBiQuadFilter filter(kBlockingCoefficients, 1); + filter.Process(values); + + EXPECT_EQ(std::vector(1000, 0.f), values); +} + +// Verifies that the filter is able to form a zero-mean output from a +// non-zeromean input signal when coefficients for a high-pass filter are +// applied. The test also verifies that the filter works with multiple biquads. +TEST(CascadedBiquadFilter, HighPassConfiguration) { + std::vector values(1000); + for (size_t k = 0; k < values.size(); ++k) { + values[k] = 1.f; + } + + CascadedBiQuadFilter filter(kHighPassFilterCoefficients, 2); + filter.Process(values); + + for (size_t k = values.size() / 2; k < values.size(); ++k) { + EXPECT_NEAR(0.f, values[k], 1e-4); + } +} + +// Verifies that the reset functionality works as intended. +TEST(CascadedBiquadFilter, HighPassConfigurationResetFunctionality) { + CascadedBiQuadFilter filter(kHighPassFilterCoefficients, 2); + + std::vector values1(100, 1.f); + filter.Process(values1); + + filter.Reset(); + + std::vector values2(100, 1.f); + filter.Process(values2); + + for (size_t k = 0; k < values1.size(); ++k) { + EXPECT_EQ(values1[k], values2[k]); + } +} + +// Verifies that the filter is able to produce a transparent effect with no +// impact on the data when the proper coefficients are applied. The test also +// verifies that the non-in-place Process API call works as intended. +TEST(CascadedBiquadFilter, TransparentConfiguration) { + const std::vector input = CreateInputWithIncreasingValues(1000); + std::vector output(input.size()); + + CascadedBiQuadFilter filter(kTransparentCoefficients, 1); + filter.Process(input, output); + + EXPECT_EQ(input, output); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check of the lengths for the input and output works for the +// non-in-place call. +TEST(CascadedBiquadFilterDeathTest, InputSizeCheckVerification) { + const std::vector input = CreateInputWithIncreasingValues(10); + std::vector output(input.size() - 1); + + CascadedBiQuadFilter filter(kTransparentCoefficients, 1); + EXPECT_DEATH(filter.Process(input, output), ""); +} +#endif + +// Verifies the conversion from zero, pole, gain to filter coefficients for +// lowpass filter. +TEST(CascadedBiquadFilter, BiQuadParamLowPass) { + CascadedBiQuadFilter::BiQuadParam param( + {-1.0f, 0.0f}, {0.23146901f, 0.39514232f}, 0.1866943331163784f); + CascadedBiQuadFilter::BiQuad filter(param); + const float epsilon = 1e-6f; + EXPECT_NEAR(filter.coefficients.b[0], 0.18669433f, epsilon); + EXPECT_NEAR(filter.coefficients.b[1], 0.37338867f, epsilon); + EXPECT_NEAR(filter.coefficients.b[2], 0.18669433f, epsilon); + EXPECT_NEAR(filter.coefficients.a[0], -0.46293803f, epsilon); + EXPECT_NEAR(filter.coefficients.a[1], 0.20971536f, epsilon); +} + +// Verifies the conversion from zero, pole, gain to filter coefficients for +// highpass filter. +TEST(CascadedBiquadFilter, BiQuadParamHighPass) { + CascadedBiQuadFilter::BiQuadParam param( + {1.0f, 0.0f}, {0.72712179f, 0.21296904f}, 0.75707637533388494f); + CascadedBiQuadFilter::BiQuad filter(param); + const float epsilon = 1e-6f; + EXPECT_NEAR(filter.coefficients.b[0], 0.75707638f, epsilon); + EXPECT_NEAR(filter.coefficients.b[1], -1.51415275f, epsilon); + EXPECT_NEAR(filter.coefficients.b[2], 0.75707638f, epsilon); + EXPECT_NEAR(filter.coefficients.a[0], -1.45424359f, epsilon); + EXPECT_NEAR(filter.coefficients.a[1], 0.57406192f, epsilon); +} + +// Verifies the conversion from zero, pole, gain to filter coefficients for +// bandpass filter. +TEST(CascadedBiquadFilter, BiQuadParamBandPass) { + CascadedBiQuadFilter::BiQuadParam param( + {1.0f, 0.0f}, {1.11022302e-16f, 0.71381051f}, 0.2452372752527856f, true); + CascadedBiQuadFilter::BiQuad filter(param); + const float epsilon = 1e-6f; + EXPECT_NEAR(filter.coefficients.b[0], 0.24523728f, epsilon); + EXPECT_NEAR(filter.coefficients.b[1], 0.f, epsilon); + EXPECT_NEAR(filter.coefficients.b[2], -0.24523728f, epsilon); + EXPECT_NEAR(filter.coefficients.a[0], -2.22044605e-16f, epsilon); + EXPECT_NEAR(filter.coefficients.a[1], 5.09525449e-01f, epsilon); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc new file mode 100644 index 0000000000..6868392f6f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/delay_estimator.h" + +#include +#include + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Number of right shifts for scaling is linearly depending on number of bits in +// the far-end binary spectrum. +static const int kShiftsAtZero = 13; // Right shifts at zero binary spectrum. +static const int kShiftsLinearSlope = 3; + +static const int32_t kProbabilityOffset = 1024; // 2 in Q9. +static const int32_t kProbabilityLowerLimit = 8704; // 17 in Q9. +static const int32_t kProbabilityMinSpread = 2816; // 5.5 in Q9. + +// Robust validation settings +static const float kHistogramMax = 3000.f; +static const float kLastHistogramMax = 250.f; +static const float kMinHistogramThreshold = 1.5f; +static const int kMinRequiredHits = 10; +static const int kMaxHitsWhenPossiblyNonCausal = 10; +static const int kMaxHitsWhenPossiblyCausal = 1000; +static const float kQ14Scaling = 1.f / (1 << 14); // Scaling by 2^14 to get Q0. +static const float kFractionSlope = 0.05f; +static const float kMinFractionWhenPossiblyCausal = 0.5f; +static const float kMinFractionWhenPossiblyNonCausal = 0.25f; + +} // namespace + +// Counts and returns number of bits of a 32-bit word. +static int BitCount(uint32_t u32) { + uint32_t tmp = + u32 - ((u32 >> 1) & 033333333333) - ((u32 >> 2) & 011111111111); + tmp = ((tmp + (tmp >> 3)) & 030707070707); + tmp = (tmp + (tmp >> 6)); + tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; + + return ((int)tmp); +} + +// Compares the `binary_vector` with all rows of the `binary_matrix` and counts +// per row the number of times they have the same value. +// +// Inputs: +// - binary_vector : binary "vector" stored in a long +// - binary_matrix : binary "matrix" stored as a vector of long +// - matrix_size : size of binary "matrix" +// +// Output: +// - bit_counts : "Vector" stored as a long, containing for each +// row the number of times the matrix row and the +// input vector have the same value +// +static void BitCountComparison(uint32_t binary_vector, + const uint32_t* binary_matrix, + int matrix_size, + int32_t* bit_counts) { + int n = 0; + + // Compare `binary_vector` with all rows of the `binary_matrix` + for (; n < matrix_size; n++) { + bit_counts[n] = (int32_t)BitCount(binary_vector ^ binary_matrix[n]); + } +} + +// Collects necessary statistics for the HistogramBasedValidation(). This +// function has to be called prior to calling HistogramBasedValidation(). The +// statistics updated and used by the HistogramBasedValidation() are: +// 1. the number of `candidate_hits`, which states for how long we have had the +// same `candidate_delay` +// 2. the `histogram` of candidate delays over time. This histogram is +// weighted with respect to a reliability measure and time-varying to cope +// with possible delay shifts. +// For further description see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - valley_depth_q14 : The cost function has a valley/minimum at the +// `candidate_delay` location. `valley_depth_q14` is the +// cost function difference between the minimum and +// maximum locations. The value is in the Q14 domain. +// - valley_level_q14 : Is the cost function value at the minimum, in Q14. +static void UpdateRobustValidationStatistics(BinaryDelayEstimator* self, + int candidate_delay, + int32_t valley_depth_q14, + int32_t valley_level_q14) { + const float valley_depth = valley_depth_q14 * kQ14Scaling; + float decrease_in_last_set = valley_depth; + const int max_hits_for_slow_change = (candidate_delay < self->last_delay) + ? kMaxHitsWhenPossiblyNonCausal + : kMaxHitsWhenPossiblyCausal; + int i = 0; + + RTC_DCHECK_EQ(self->history_size, self->farend->history_size); + // Reset `candidate_hits` if we have a new candidate. + if (candidate_delay != self->last_candidate_delay) { + self->candidate_hits = 0; + self->last_candidate_delay = candidate_delay; + } + self->candidate_hits++; + + // The `histogram` is updated differently across the bins. + // 1. The `candidate_delay` histogram bin is increased with the + // `valley_depth`, which is a simple measure of how reliable the + // `candidate_delay` is. The histogram is not increased above + // `kHistogramMax`. + self->histogram[candidate_delay] += valley_depth; + if (self->histogram[candidate_delay] > kHistogramMax) { + self->histogram[candidate_delay] = kHistogramMax; + } + // 2. The histogram bins in the neighborhood of `candidate_delay` are + // unaffected. The neighborhood is defined as x + {-2, -1, 0, 1}. + // 3. The histogram bins in the neighborhood of `last_delay` are decreased + // with `decrease_in_last_set`. This value equals the difference between + // the cost function values at the locations `candidate_delay` and + // `last_delay` until we reach `max_hits_for_slow_change` consecutive hits + // at the `candidate_delay`. If we exceed this amount of hits the + // `candidate_delay` is a "potential" candidate and we start decreasing + // these histogram bins more rapidly with `valley_depth`. + if (self->candidate_hits < max_hits_for_slow_change) { + decrease_in_last_set = + (self->mean_bit_counts[self->compare_delay] - valley_level_q14) * + kQ14Scaling; + } + // 4. All other bins are decreased with `valley_depth`. + // TODO(bjornv): Investigate how to make this loop more efficient. Split up + // the loop? Remove parts that doesn't add too much. + for (i = 0; i < self->history_size; ++i) { + int is_in_last_set = (i >= self->last_delay - 2) && + (i <= self->last_delay + 1) && (i != candidate_delay); + int is_in_candidate_set = + (i >= candidate_delay - 2) && (i <= candidate_delay + 1); + self->histogram[i] -= + decrease_in_last_set * is_in_last_set + + valley_depth * (!is_in_last_set && !is_in_candidate_set); + // 5. No histogram bin can go below 0. + if (self->histogram[i] < 0) { + self->histogram[i] = 0; + } + } +} + +// Validates the `candidate_delay`, estimated in WebRtc_ProcessBinarySpectrum(), +// based on a mix of counting concurring hits with a modified histogram +// of recent delay estimates. In brief a candidate is valid (returns 1) if it +// is the most likely according to the histogram. There are a couple of +// exceptions that are worth mentioning: +// 1. If the `candidate_delay` < `last_delay` it can be that we are in a +// non-causal state, breaking a possible echo control algorithm. Hence, we +// open up for a quicker change by allowing the change even if the +// `candidate_delay` is not the most likely one according to the histogram. +// 2. There's a minimum number of hits (kMinRequiredHits) and the histogram +// value has to reached a minimum (kMinHistogramThreshold) to be valid. +// 3. The action is also depending on the filter length used for echo control. +// If the delay difference is larger than what the filter can capture, we +// also move quicker towards a change. +// For further description see commented code. +// +// Input: +// - candidate_delay : The delay to validate. +// +// Return value: +// - is_histogram_valid : 1 - The `candidate_delay` is valid. +// 0 - Otherwise. +static int HistogramBasedValidation(const BinaryDelayEstimator* self, + int candidate_delay) { + float fraction = 1.f; + float histogram_threshold = self->histogram[self->compare_delay]; + const int delay_difference = candidate_delay - self->last_delay; + int is_histogram_valid = 0; + + // The histogram based validation of `candidate_delay` is done by comparing + // the `histogram` at bin `candidate_delay` with a `histogram_threshold`. + // This `histogram_threshold` equals a `fraction` of the `histogram` at bin + // `last_delay`. The `fraction` is a piecewise linear function of the + // `delay_difference` between the `candidate_delay` and the `last_delay` + // allowing for a quicker move if + // i) a potential echo control filter can not handle these large differences. + // ii) keeping `last_delay` instead of updating to `candidate_delay` could + // force an echo control into a non-causal state. + // We further require the histogram to have reached a minimum value of + // `kMinHistogramThreshold`. In addition, we also require the number of + // `candidate_hits` to be more than `kMinRequiredHits` to remove spurious + // values. + + // Calculate a comparison histogram value (`histogram_threshold`) that is + // depending on the distance between the `candidate_delay` and `last_delay`. + // TODO(bjornv): How much can we gain by turning the fraction calculation + // into tables? + if (delay_difference > self->allowed_offset) { + fraction = 1.f - kFractionSlope * (delay_difference - self->allowed_offset); + fraction = (fraction > kMinFractionWhenPossiblyCausal + ? fraction + : kMinFractionWhenPossiblyCausal); + } else if (delay_difference < 0) { + fraction = + kMinFractionWhenPossiblyNonCausal - kFractionSlope * delay_difference; + fraction = (fraction > 1.f ? 1.f : fraction); + } + histogram_threshold *= fraction; + histogram_threshold = + (histogram_threshold > kMinHistogramThreshold ? histogram_threshold + : kMinHistogramThreshold); + + is_histogram_valid = + (self->histogram[candidate_delay] >= histogram_threshold) && + (self->candidate_hits > kMinRequiredHits); + + return is_histogram_valid; +} + +// Performs a robust validation of the `candidate_delay` estimated in +// WebRtc_ProcessBinarySpectrum(). The algorithm takes the +// `is_instantaneous_valid` and the `is_histogram_valid` and combines them +// into a robust validation. The HistogramBasedValidation() has to be called +// prior to this call. +// For further description on how the combination is done, see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - is_instantaneous_valid : The instantaneous validation performed in +// WebRtc_ProcessBinarySpectrum(). +// - is_histogram_valid : The histogram based validation. +// +// Return value: +// - is_robust : 1 - The candidate_delay is valid according to a +// combination of the two inputs. +// : 0 - Otherwise. +static int RobustValidation(const BinaryDelayEstimator* self, + int candidate_delay, + int is_instantaneous_valid, + int is_histogram_valid) { + int is_robust = 0; + + // The final robust validation is based on the two algorithms; 1) the + // `is_instantaneous_valid` and 2) the histogram based with result stored in + // `is_histogram_valid`. + // i) Before we actually have a valid estimate (`last_delay` == -2), we say + // a candidate is valid if either algorithm states so + // (`is_instantaneous_valid` OR `is_histogram_valid`). + is_robust = + (self->last_delay < 0) && (is_instantaneous_valid || is_histogram_valid); + // ii) Otherwise, we need both algorithms to be certain + // (`is_instantaneous_valid` AND `is_histogram_valid`) + is_robust |= is_instantaneous_valid && is_histogram_valid; + // iii) With one exception, i.e., the histogram based algorithm can overrule + // the instantaneous one if `is_histogram_valid` = 1 and the histogram + // is significantly strong. + is_robust |= is_histogram_valid && + (self->histogram[candidate_delay] > self->last_delay_histogram); + + return is_robust; +} + +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { + if (self == NULL) { + return; + } + + free(self->binary_far_history); + self->binary_far_history = NULL; + + free(self->far_bit_counts); + self->far_bit_counts = NULL; + + free(self); +} + +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size) { + BinaryDelayEstimatorFarend* self = NULL; + + if (history_size > 1) { + // Sanity conditions fulfilled. + self = static_cast( + malloc(sizeof(BinaryDelayEstimatorFarend))); + } + if (self == NULL) { + return NULL; + } + + self->history_size = 0; + self->binary_far_history = NULL; + self->far_bit_counts = NULL; + if (WebRtc_AllocateFarendBufferMemory(self, history_size) == 0) { + WebRtc_FreeBinaryDelayEstimatorFarend(self); + self = NULL; + } + return self; +} + +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size) { + RTC_DCHECK(self); + // (Re-)Allocate memory for history buffers. + self->binary_far_history = static_cast( + realloc(self->binary_far_history, + history_size * sizeof(*self->binary_far_history))); + self->far_bit_counts = static_cast(realloc( + self->far_bit_counts, history_size * sizeof(*self->far_bit_counts))); + if ((self->binary_far_history == NULL) || (self->far_bit_counts == NULL)) { + history_size = 0; + } + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->binary_far_history[self->history_size], 0, + sizeof(*self->binary_far_history) * size_diff); + memset(&self->far_bit_counts[self->history_size], 0, + sizeof(*self->far_bit_counts) * size_diff); + } + self->history_size = history_size; + + return self->history_size; +} + +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { + RTC_DCHECK(self); + memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size); + memset(self->far_bit_counts, 0, sizeof(int) * self->history_size); +} + +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, + int delay_shift) { + int abs_shift = abs(delay_shift); + int shift_size = 0; + int dest_index = 0; + int src_index = 0; + int padding_index = 0; + + RTC_DCHECK(self); + shift_size = self->history_size - abs_shift; + RTC_DCHECK_GT(shift_size, 0); + if (delay_shift == 0) { + return; + } else if (delay_shift > 0) { + dest_index = abs_shift; + } else if (delay_shift < 0) { + src_index = abs_shift; + padding_index = shift_size; + } + + // Shift and zero pad buffers. + memmove(&self->binary_far_history[dest_index], + &self->binary_far_history[src_index], + sizeof(*self->binary_far_history) * shift_size); + memset(&self->binary_far_history[padding_index], 0, + sizeof(*self->binary_far_history) * abs_shift); + memmove(&self->far_bit_counts[dest_index], &self->far_bit_counts[src_index], + sizeof(*self->far_bit_counts) * shift_size); + memset(&self->far_bit_counts[padding_index], 0, + sizeof(*self->far_bit_counts) * abs_shift); +} + +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* handle, + uint32_t binary_far_spectrum) { + RTC_DCHECK(handle); + // Shift binary spectrum history and insert current `binary_far_spectrum`. + memmove(&(handle->binary_far_history[1]), &(handle->binary_far_history[0]), + (handle->history_size - 1) * sizeof(uint32_t)); + handle->binary_far_history[0] = binary_far_spectrum; + + // Shift history of far-end binary spectrum bit counts and insert bit count + // of current `binary_far_spectrum`. + memmove(&(handle->far_bit_counts[1]), &(handle->far_bit_counts[0]), + (handle->history_size - 1) * sizeof(int)); + handle->far_bit_counts[0] = BitCount(binary_far_spectrum); +} + +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self) { + if (self == NULL) { + return; + } + + free(self->mean_bit_counts); + self->mean_bit_counts = NULL; + + free(self->bit_counts); + self->bit_counts = NULL; + + free(self->binary_near_history); + self->binary_near_history = NULL; + + free(self->histogram); + self->histogram = NULL; + + // BinaryDelayEstimator does not have ownership of `farend`, hence we do not + // free the memory here. That should be handled separately by the user. + self->farend = NULL; + + free(self); +} + +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, + int max_lookahead) { + BinaryDelayEstimator* self = NULL; + + if ((farend != NULL) && (max_lookahead >= 0)) { + // Sanity conditions fulfilled. + self = static_cast( + malloc(sizeof(BinaryDelayEstimator))); + } + if (self == NULL) { + return NULL; + } + + self->farend = farend; + self->near_history_size = max_lookahead + 1; + self->history_size = 0; + self->robust_validation_enabled = 0; // Disabled by default. + self->allowed_offset = 0; + + self->lookahead = max_lookahead; + + // Allocate memory for spectrum and history buffers. + self->mean_bit_counts = NULL; + self->bit_counts = NULL; + self->histogram = NULL; + self->binary_near_history = static_cast( + malloc((max_lookahead + 1) * sizeof(*self->binary_near_history))); + if (self->binary_near_history == NULL || + WebRtc_AllocateHistoryBufferMemory(self, farend->history_size) == 0) { + WebRtc_FreeBinaryDelayEstimator(self); + self = NULL; + } + + return self; +} + +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size) { + BinaryDelayEstimatorFarend* far = self->farend; + // (Re-)Allocate memory for spectrum and history buffers. + if (history_size != far->history_size) { + // Only update far-end buffers if we need. + history_size = WebRtc_AllocateFarendBufferMemory(far, history_size); + } + // The extra array element in `mean_bit_counts` and `histogram` is a dummy + // element only used while `last_delay` == -2, i.e., before we have a valid + // estimate. + self->mean_bit_counts = static_cast( + realloc(self->mean_bit_counts, + (history_size + 1) * sizeof(*self->mean_bit_counts))); + self->bit_counts = static_cast( + realloc(self->bit_counts, history_size * sizeof(*self->bit_counts))); + self->histogram = static_cast( + realloc(self->histogram, (history_size + 1) * sizeof(*self->histogram))); + + if ((self->mean_bit_counts == NULL) || (self->bit_counts == NULL) || + (self->histogram == NULL)) { + history_size = 0; + } + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->mean_bit_counts[self->history_size], 0, + sizeof(*self->mean_bit_counts) * size_diff); + memset(&self->bit_counts[self->history_size], 0, + sizeof(*self->bit_counts) * size_diff); + memset(&self->histogram[self->history_size], 0, + sizeof(*self->histogram) * size_diff); + } + self->history_size = history_size; + + return self->history_size; +} + +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) { + int i = 0; + RTC_DCHECK(self); + + memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size); + memset(self->binary_near_history, 0, + sizeof(uint32_t) * self->near_history_size); + for (i = 0; i <= self->history_size; ++i) { + self->mean_bit_counts[i] = (20 << 9); // 20 in Q9. + self->histogram[i] = 0.f; + } + self->minimum_probability = kMaxBitCountsQ9; // 32 in Q9. + self->last_delay_probability = (int)kMaxBitCountsQ9; // 32 in Q9. + + // Default return value if we're unable to estimate. -1 is used for errors. + self->last_delay = -2; + + self->last_candidate_delay = -2; + self->compare_delay = self->history_size; + self->candidate_hits = 0; + self->last_delay_histogram = 0.f; +} + +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift) { + int lookahead = 0; + RTC_DCHECK(self); + lookahead = self->lookahead; + self->lookahead -= delay_shift; + if (self->lookahead < 0) { + self->lookahead = 0; + } + if (self->lookahead > self->near_history_size - 1) { + self->lookahead = self->near_history_size - 1; + } + return lookahead - self->lookahead; +} + +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum) { + int i = 0; + int candidate_delay = -1; + int valid_candidate = 0; + + int32_t value_best_candidate = kMaxBitCountsQ9; + int32_t value_worst_candidate = 0; + int32_t valley_depth = 0; + + RTC_DCHECK(self); + if (self->farend->history_size != self->history_size) { + // Non matching history sizes. + return -1; + } + if (self->near_history_size > 1) { + // If we apply lookahead, shift near-end binary spectrum history. Insert + // current `binary_near_spectrum` and pull out the delayed one. + memmove(&(self->binary_near_history[1]), &(self->binary_near_history[0]), + (self->near_history_size - 1) * sizeof(uint32_t)); + self->binary_near_history[0] = binary_near_spectrum; + binary_near_spectrum = self->binary_near_history[self->lookahead]; + } + + // Compare with delayed spectra and store the `bit_counts` for each delay. + BitCountComparison(binary_near_spectrum, self->farend->binary_far_history, + self->history_size, self->bit_counts); + + // Update `mean_bit_counts`, which is the smoothed version of `bit_counts`. + for (i = 0; i < self->history_size; i++) { + // `bit_counts` is constrained to [0, 32], meaning we can smooth with a + // factor up to 2^26. We use Q9. + int32_t bit_count = (self->bit_counts[i] << 9); // Q9. + + // Update `mean_bit_counts` only when far-end signal has something to + // contribute. If `far_bit_counts` is zero the far-end signal is weak and + // we likely have a poor echo condition, hence don't update. + if (self->farend->far_bit_counts[i] > 0) { + // Make number of right shifts piecewise linear w.r.t. `far_bit_counts`. + int shifts = kShiftsAtZero; + shifts -= (kShiftsLinearSlope * self->farend->far_bit_counts[i]) >> 4; + WebRtc_MeanEstimatorFix(bit_count, shifts, &(self->mean_bit_counts[i])); + } + } + + // Find `candidate_delay`, `value_best_candidate` and `value_worst_candidate` + // of `mean_bit_counts`. + for (i = 0; i < self->history_size; i++) { + if (self->mean_bit_counts[i] < value_best_candidate) { + value_best_candidate = self->mean_bit_counts[i]; + candidate_delay = i; + } + if (self->mean_bit_counts[i] > value_worst_candidate) { + value_worst_candidate = self->mean_bit_counts[i]; + } + } + valley_depth = value_worst_candidate - value_best_candidate; + + // The `value_best_candidate` is a good indicator on the probability of + // `candidate_delay` being an accurate delay (a small `value_best_candidate` + // means a good binary match). In the following sections we make a decision + // whether to update `last_delay` or not. + // 1) If the difference bit counts between the best and the worst delay + // candidates is too small we consider the situation to be unreliable and + // don't update `last_delay`. + // 2) If the situation is reliable we update `last_delay` if the value of the + // best candidate delay has a value less than + // i) an adaptive threshold `minimum_probability`, or + // ii) this corresponding value `last_delay_probability`, but updated at + // this time instant. + + // Update `minimum_probability`. + if ((self->minimum_probability > kProbabilityLowerLimit) && + (valley_depth > kProbabilityMinSpread)) { + // The "hard" threshold can't be lower than 17 (in Q9). + // The valley in the curve also has to be distinct, i.e., the + // difference between `value_worst_candidate` and `value_best_candidate` has + // to be large enough. + int32_t threshold = value_best_candidate + kProbabilityOffset; + if (threshold < kProbabilityLowerLimit) { + threshold = kProbabilityLowerLimit; + } + if (self->minimum_probability > threshold) { + self->minimum_probability = threshold; + } + } + // Update `last_delay_probability`. + // We use a Markov type model, i.e., a slowly increasing level over time. + self->last_delay_probability++; + // Validate `candidate_delay`. We have a reliable instantaneous delay + // estimate if + // 1) The valley is distinct enough (`valley_depth` > `kProbabilityOffset`) + // and + // 2) The depth of the valley is deep enough + // (`value_best_candidate` < `minimum_probability`) + // and deeper than the best estimate so far + // (`value_best_candidate` < `last_delay_probability`) + valid_candidate = ((valley_depth > kProbabilityOffset) && + ((value_best_candidate < self->minimum_probability) || + (value_best_candidate < self->last_delay_probability))); + + // Check for nonstationary farend signal. + const bool non_stationary_farend = + std::any_of(self->farend->far_bit_counts, + self->farend->far_bit_counts + self->history_size, + [](int a) { return a > 0; }); + + if (non_stationary_farend) { + // Only update the validation statistics when the farend is nonstationary + // as the underlying estimates are otherwise frozen. + UpdateRobustValidationStatistics(self, candidate_delay, valley_depth, + value_best_candidate); + } + + if (self->robust_validation_enabled) { + int is_histogram_valid = HistogramBasedValidation(self, candidate_delay); + valid_candidate = RobustValidation(self, candidate_delay, valid_candidate, + is_histogram_valid); + } + + // Only update the delay estimate when the farend is nonstationary and when + // a valid delay candidate is available. + if (non_stationary_farend && valid_candidate) { + if (candidate_delay != self->last_delay) { + self->last_delay_histogram = + (self->histogram[candidate_delay] > kLastHistogramMax + ? kLastHistogramMax + : self->histogram[candidate_delay]); + // Adjust the histogram if we made a change to `last_delay`, though it was + // not the most likely one according to the histogram. + if (self->histogram[candidate_delay] < + self->histogram[self->compare_delay]) { + self->histogram[self->compare_delay] = self->histogram[candidate_delay]; + } + } + self->last_delay = candidate_delay; + if (value_best_candidate < self->last_delay_probability) { + self->last_delay_probability = value_best_candidate; + } + self->compare_delay = self->last_delay; + } + + return self->last_delay; +} + +int WebRtc_binary_last_delay(BinaryDelayEstimator* self) { + RTC_DCHECK(self); + return self->last_delay; +} + +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self) { + float quality = 0; + RTC_DCHECK(self); + + if (self->robust_validation_enabled) { + // Simply a linear function of the histogram height at delay estimate. + quality = self->histogram[self->compare_delay] / kHistogramMax; + } else { + // Note that `last_delay_probability` states how deep the minimum of the + // cost function is, so it is rather an error probability. + quality = (float)(kMaxBitCountsQ9 - self->last_delay_probability) / + kMaxBitCountsQ9; + if (quality < 0) { + quality = 0; + } + } + return quality; +} + +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value) { + int32_t diff = new_value - *mean_value; + + // mean_new = mean_value + ((new_value - mean_value) >> factor); + if (diff < 0) { + diff = -((-diff) >> factor); + } else { + diff = (diff >> factor); + } + *mean_value += diff; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.h b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.h new file mode 100644 index 0000000000..b6fc36a759 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.h @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on binary converted spectra. +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ + +#include + +namespace webrtc { + +static const int32_t kMaxBitCountsQ9 = (32 << 9); // 32 matching bits in Q9. + +typedef struct { + // Pointer to bit counts. + int* far_bit_counts; + // Binary history variables. + uint32_t* binary_far_history; + int history_size; +} BinaryDelayEstimatorFarend; + +typedef struct { + // Pointer to bit counts. + int32_t* mean_bit_counts; + // Array only used locally in ProcessBinarySpectrum() but whose size is + // determined at run-time. + int32_t* bit_counts; + + // Binary history variables. + uint32_t* binary_near_history; + int near_history_size; + int history_size; + + // Delay estimation variables. + int32_t minimum_probability; + int last_delay_probability; + + // Delay memory. + int last_delay; + + // Robust validation + int robust_validation_enabled; + int allowed_offset; + int last_candidate_delay; + int compare_delay; + int candidate_hits; + float* histogram; + float last_delay_histogram; + + // For dynamically changing the lookahead when using SoftReset...(). + int lookahead; + + // Far-end binary spectrum history buffer etc. + BinaryDelayEstimatorFarend* farend; +} BinaryDelayEstimator; + +// Releases the memory allocated by +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// Input: +// - self : Pointer to the binary delay estimation far-end +// instance which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). +// +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); + +// Allocates the memory needed by the far-end part of the binary delay +// estimation. The memory needs to be initialized separately through +// WebRtc_InitBinaryDelayEstimatorFarend(...). +// +// Inputs: +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - BinaryDelayEstimatorFarend* +// : Created `handle`. If the memory can't be allocated +// or if any of the input parameters are invalid NULL +// is returned. +// +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size); + +// Re-allocates the buffers. +// +// Inputs: +// - self : Pointer to the binary estimation far-end instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size); + +// Initializes the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - self : Pointer to the delay estimation far-end instance. +// +// Output: +// - self : Initialized far-end instance. +// +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); + +// Soft resets the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, + int delay_shift); + +// Adds the binary far-end spectrum to the internal far-end history buffer. This +// spectrum is used as reference when calculating the delay using +// WebRtc_ProcessBinarySpectrum(). +// +// Inputs: +// - self : Pointer to the delay estimation far-end +// instance. +// - binary_far_spectrum : Far-end binary spectrum. +// +// Output: +// - self : Updated far-end instance. +// +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* self, + uint32_t binary_far_spectrum); + +// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...). +// +// Note that BinaryDelayEstimator utilizes BinaryDelayEstimatorFarend, but does +// not take ownership of it, hence the BinaryDelayEstimator has to be torn down +// before the far-end. +// +// Input: +// - self : Pointer to the binary delay estimation instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self); + +// Allocates the memory needed by the binary delay estimation. The memory needs +// to be initialized separately through WebRtc_InitBinaryDelayEstimator(...). +// +// See WebRtc_CreateDelayEstimator(..) in delay_estimator_wrapper.c for detailed +// description. +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, + int max_lookahead); + +// Re-allocates `history_size` dependent buffers. The far-end buffers will be +// updated at the same time if needed. +// +// Input: +// - self : Pointer to the binary estimation instance which is +// the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// - history_size : Size of the history buffers. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size); + +// Initializes the delay estimation instance created with +// WebRtc_CreateBinaryDelayEstimator(...). +// +// Input: +// - self : Pointer to the delay estimation instance. +// +// Output: +// - self : Initialized instance. +// +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self); + +// Soft resets the delay estimation instance created with +// WebRtc_CreateBinaryDelayEstimator(...). +// +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +// Return value: +// - actual_shifts : The actual number of shifts performed. +// +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift); + +// Estimates and returns the delay between the binary far-end and binary near- +// end spectra. It is assumed the binary far-end spectrum has been added using +// WebRtc_AddBinaryFarSpectrum() prior to this call. The value will be offset by +// the lookahead (i.e. the lookahead should be subtracted from the returned +// value). +// +// Inputs: +// - self : Pointer to the delay estimation instance. +// - binary_near_spectrum : Near-end binary spectrum of the current block. +// +// Output: +// - self : Updated instance. +// +// Return value: +// - delay : >= 0 - Calculated delay value. +// -2 - Insufficient data for estimation. +// +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum); + +// Returns the last calculated delay updated by the function +// WebRtc_ProcessBinarySpectrum(...). +// +// Input: +// - self : Pointer to the delay estimation instance. +// +// Return value: +// - delay : >= 0 - Last calculated delay value +// -2 - Insufficient data for estimation. +// +int WebRtc_binary_last_delay(BinaryDelayEstimator* self); + +// Returns the estimation quality of the last calculated delay updated by the +// function WebRtc_ProcessBinarySpectrum(...). The estimation quality is a value +// in the interval [0, 1]. The higher the value, the better the quality. +// +// Return value: +// - delay_quality : >= 0 - Estimation quality of last calculated +// delay value. +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self); + +// Updates the `mean_value` recursively with a step size of 2^-`factor`. This +// function is used internally in the Binary Delay Estimator as well as the +// Fixed point wrapper. +// +// Inputs: +// - new_value : The new value the mean should be updated with. +// - factor : The step size, in number of right shifts. +// +// Input/Output: +// - mean_value : Pointer to the mean value. +// +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_internal.h b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_internal.h new file mode 100644 index 0000000000..891e20027d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_internal.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Header file including the delay estimator handle used for testing. + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ + +#include "modules/audio_processing/utility/delay_estimator.h" + +namespace webrtc { + +typedef union { + float float_; + int32_t int32_; +} SpectrumType; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_far_spectrum; + // `mean_far_spectrum` initialization indicator. + int far_spectrum_initialized; + + int spectrum_size; + + // Far-end part of binary spectrum based delay estimation. + BinaryDelayEstimatorFarend* binary_farend; +} DelayEstimatorFarend; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_near_spectrum; + // `mean_near_spectrum` initialization indicator. + int near_spectrum_initialized; + + int spectrum_size; + + // Binary spectrum based delay estimator + BinaryDelayEstimator* binary_handle; +} DelayEstimator; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_unittest.cc new file mode 100644 index 0000000000..6052612ef3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_unittest.cc @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/delay_estimator.h" + +#include "modules/audio_processing/utility/delay_estimator_internal.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kSpectrumSize = 65; +// Delay history sizes. +constexpr int kMaxDelay = 100; +constexpr int kLookahead = 10; +constexpr int kHistorySize = kMaxDelay + kLookahead; +// Length of binary spectrum sequence. +constexpr int kSequenceLength = 400; + +const int kDifferentHistorySize = 3; +const int kDifferentLookahead = 1; + +const int kEnable[] = {0, 1}; +const size_t kSizeEnable = sizeof(kEnable) / sizeof(*kEnable); + +class DelayEstimatorTest : public ::testing::Test { + protected: + DelayEstimatorTest(); + void SetUp() override; + void TearDown() override; + + void Init(); + void InitBinary(); + void VerifyDelay(BinaryDelayEstimator* binary_handle, int offset, int delay); + void RunBinarySpectra(BinaryDelayEstimator* binary1, + BinaryDelayEstimator* binary2, + int near_offset, + int lookahead_offset, + int far_offset); + void RunBinarySpectraTest(int near_offset, + int lookahead_offset, + int ref_robust_validation, + int robust_validation); + + void* handle_; + DelayEstimator* self_; + void* farend_handle_; + DelayEstimatorFarend* farend_self_; + BinaryDelayEstimator* binary_; + BinaryDelayEstimatorFarend* binary_farend_; + int spectrum_size_; + // Dummy input spectra. + float far_f_[kSpectrumSize]; + float near_f_[kSpectrumSize]; + uint16_t far_u16_[kSpectrumSize]; + uint16_t near_u16_[kSpectrumSize]; + uint32_t binary_spectrum_[kSequenceLength + kHistorySize]; +}; + +DelayEstimatorTest::DelayEstimatorTest() + : handle_(NULL), + self_(NULL), + farend_handle_(NULL), + farend_self_(NULL), + binary_(NULL), + binary_farend_(NULL), + spectrum_size_(kSpectrumSize) { + // Dummy input data are set with more or less arbitrary non-zero values. + memset(far_f_, 1, sizeof(far_f_)); + memset(near_f_, 2, sizeof(near_f_)); + memset(far_u16_, 1, sizeof(far_u16_)); + memset(near_u16_, 2, sizeof(near_u16_)); + // Construct a sequence of binary spectra used to verify delay estimate. The + // `kSequenceLength` has to be long enough for the delay estimation to leave + // the initialized state. + binary_spectrum_[0] = 1; + for (int i = 1; i < (kSequenceLength + kHistorySize); i++) { + binary_spectrum_[i] = 3 * binary_spectrum_[i - 1]; + } +} + +void DelayEstimatorTest::SetUp() { + farend_handle_ = + WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, kHistorySize); + ASSERT_TRUE(farend_handle_ != NULL); + farend_self_ = reinterpret_cast(farend_handle_); + handle_ = WebRtc_CreateDelayEstimator(farend_handle_, kLookahead); + ASSERT_TRUE(handle_ != NULL); + self_ = reinterpret_cast(handle_); + binary_farend_ = WebRtc_CreateBinaryDelayEstimatorFarend(kHistorySize); + ASSERT_TRUE(binary_farend_ != NULL); + binary_ = WebRtc_CreateBinaryDelayEstimator(binary_farend_, kLookahead); + ASSERT_TRUE(binary_ != NULL); +} + +void DelayEstimatorTest::TearDown() { + WebRtc_FreeDelayEstimator(handle_); + handle_ = NULL; + self_ = NULL; + WebRtc_FreeDelayEstimatorFarend(farend_handle_); + farend_handle_ = NULL; + farend_self_ = NULL; + WebRtc_FreeBinaryDelayEstimator(binary_); + binary_ = NULL; + WebRtc_FreeBinaryDelayEstimatorFarend(binary_farend_); + binary_farend_ = NULL; +} + +void DelayEstimatorTest::Init() { + // Initialize Delay Estimator + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + // Verify initialization. + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_last_delay(handle_)); // Delay in initial state. + EXPECT_FLOAT_EQ(0, WebRtc_last_delay_quality(handle_)); // Zero quality. +} + +void DelayEstimatorTest::InitBinary() { + // Initialize Binary Delay Estimator (far-end part). + WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_); + // Initialize Binary Delay Estimator + WebRtc_InitBinaryDelayEstimator(binary_); + // Verify initialization. This does not guarantee a complete check, since + // `last_delay` may be equal to -2 before initialization if done on the fly. + EXPECT_EQ(-2, binary_->last_delay); +} + +void DelayEstimatorTest::VerifyDelay(BinaryDelayEstimator* binary_handle, + int offset, + int delay) { + // Verify that we WebRtc_binary_last_delay() returns correct delay. + EXPECT_EQ(delay, WebRtc_binary_last_delay(binary_handle)); + + if (delay != -2) { + // Verify correct delay estimate. In the non-causal case the true delay + // is equivalent with the `offset`. + EXPECT_EQ(offset, delay); + } +} + +void DelayEstimatorTest::RunBinarySpectra(BinaryDelayEstimator* binary1, + BinaryDelayEstimator* binary2, + int near_offset, + int lookahead_offset, + int far_offset) { + int different_validations = + binary1->robust_validation_enabled ^ binary2->robust_validation_enabled; + WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_); + WebRtc_InitBinaryDelayEstimator(binary1); + WebRtc_InitBinaryDelayEstimator(binary2); + // Verify initialization. This does not guarantee a complete check, since + // `last_delay` may be equal to -2 before initialization if done on the fly. + EXPECT_EQ(-2, binary1->last_delay); + EXPECT_EQ(-2, binary2->last_delay); + for (int i = kLookahead; i < (kSequenceLength + kLookahead); i++) { + WebRtc_AddBinaryFarSpectrum(binary_farend_, + binary_spectrum_[i + far_offset]); + int delay_1 = WebRtc_ProcessBinarySpectrum(binary1, binary_spectrum_[i]); + int delay_2 = WebRtc_ProcessBinarySpectrum( + binary2, binary_spectrum_[i - near_offset]); + + VerifyDelay(binary1, far_offset + kLookahead, delay_1); + VerifyDelay(binary2, + far_offset + kLookahead + lookahead_offset + near_offset, + delay_2); + // Expect the two delay estimates to be offset by `lookahead_offset` + + // `near_offset` when we have left the initial state. + if ((delay_1 != -2) && (delay_2 != -2)) { + EXPECT_EQ(delay_1, delay_2 - lookahead_offset - near_offset); + } + // For the case of identical signals `delay_1` and `delay_2` should match + // all the time, unless one of them has robust validation turned on. In + // that case the robust validation leaves the initial state faster. + if ((near_offset == 0) && (lookahead_offset == 0)) { + if (!different_validations) { + EXPECT_EQ(delay_1, delay_2); + } else { + if (binary1->robust_validation_enabled) { + EXPECT_GE(delay_1, delay_2); + } else { + EXPECT_GE(delay_2, delay_1); + } + } + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_binary_last_delay(binary1)); + EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary1)); + EXPECT_NE(-2, WebRtc_binary_last_delay(binary2)); + EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary2)); +} + +void DelayEstimatorTest::RunBinarySpectraTest(int near_offset, + int lookahead_offset, + int ref_robust_validation, + int robust_validation) { + BinaryDelayEstimator* binary2 = WebRtc_CreateBinaryDelayEstimator( + binary_farend_, kLookahead + lookahead_offset); + // Verify the delay for both causal and non-causal systems. For causal systems + // the delay is equivalent with a positive `offset` of the far-end sequence. + // For non-causal systems the delay is equivalent with a negative `offset` of + // the far-end sequence. + binary_->robust_validation_enabled = ref_robust_validation; + binary2->robust_validation_enabled = robust_validation; + for (int offset = -kLookahead; + offset < kMaxDelay - lookahead_offset - near_offset; offset++) { + RunBinarySpectra(binary_, binary2, near_offset, lookahead_offset, offset); + } + WebRtc_FreeBinaryDelayEstimator(binary2); + binary2 = NULL; + binary_->robust_validation_enabled = 0; // Reset reference. +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfWrapper) { + // In this test we verify correct error returns on invalid API calls. + + // WebRtc_CreateDelayEstimatorFarend() and WebRtc_CreateDelayEstimator() + // should return a NULL pointer on invalid input values. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + void* handle = farend_handle_; + handle = WebRtc_CreateDelayEstimatorFarend(33, kHistorySize); + EXPECT_TRUE(handle == NULL); + handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, 1); + EXPECT_TRUE(handle == NULL); + + handle = handle_; + handle = WebRtc_CreateDelayEstimator(NULL, kLookahead); + EXPECT_TRUE(handle == NULL); + handle = WebRtc_CreateDelayEstimator(farend_handle_, -1); + EXPECT_TRUE(handle == NULL); + + // WebRtc_InitDelayEstimatorFarend() and WebRtc_InitDelayEstimator() should + // return -1 if we have a NULL pointer as `handle`. + EXPECT_EQ(-1, WebRtc_InitDelayEstimatorFarend(NULL)); + EXPECT_EQ(-1, WebRtc_InitDelayEstimator(NULL)); + + // WebRtc_AddFarSpectrumFloat() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) NULL pointer as far-end spectrum. + // 3) Incorrect spectrum size. + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(NULL, far_f_, spectrum_size_)); + // Use `farend_handle_` which is properly created at SetUp(). + EXPECT_EQ(-1, + WebRtc_AddFarSpectrumFloat(farend_handle_, NULL, spectrum_size_)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, + spectrum_size_ + 1)); + + // WebRtc_AddFarSpectrumFix() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) NULL pointer as far-end spectrum. + // 3) Incorrect spectrum size. + // 4) Too high precision in far-end spectrum (Q-domain > 15). + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(NULL, far_u16_, spectrum_size_, 0)); + EXPECT_EQ(-1, + WebRtc_AddFarSpectrumFix(farend_handle_, NULL, spectrum_size_, 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_ + 1, 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 16)); + + // WebRtc_set_history_size() should return -1 if: + // 1) `handle` is a NULL. + // 2) `history_size` <= 1. + EXPECT_EQ(-1, WebRtc_set_history_size(NULL, 1)); + EXPECT_EQ(-1, WebRtc_set_history_size(handle_, 1)); + // WebRtc_history_size() should return -1 if: + // 1) NULL pointer input. + EXPECT_EQ(-1, WebRtc_history_size(NULL)); + // 2) there is a mismatch between history size. + void* tmp_handle = WebRtc_CreateDelayEstimator(farend_handle_, kHistorySize); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(tmp_handle)); + EXPECT_EQ(kDifferentHistorySize, + WebRtc_set_history_size(tmp_handle, kDifferentHistorySize)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(tmp_handle)); + EXPECT_EQ(kHistorySize, WebRtc_set_history_size(handle_, kHistorySize)); + EXPECT_EQ(-1, WebRtc_history_size(tmp_handle)); + + // WebRtc_set_lookahead() should return -1 if we try a value outside the + /// buffer. + EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, kLookahead + 1)); + EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, -1)); + + // WebRtc_set_allowed_offset() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) `allowed_offset` < 0. + EXPECT_EQ(-1, WebRtc_set_allowed_offset(NULL, 0)); + EXPECT_EQ(-1, WebRtc_set_allowed_offset(handle_, -1)); + + EXPECT_EQ(-1, WebRtc_get_allowed_offset(NULL)); + + // WebRtc_enable_robust_validation() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) Incorrect `enable` value (not 0 or 1). + EXPECT_EQ(-1, WebRtc_enable_robust_validation(NULL, kEnable[0])); + EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, -1)); + EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, 2)); + + // WebRtc_is_robust_validation_enabled() should return -1 if we have NULL + // pointer as `handle`. + EXPECT_EQ(-1, WebRtc_is_robust_validation_enabled(NULL)); + + // WebRtc_DelayEstimatorProcessFloat() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) NULL pointer as near-end spectrum. + // 3) Incorrect spectrum size. + // 4) Non matching history sizes if multiple delay estimators using the same + // far-end reference. + EXPECT_EQ(-1, + WebRtc_DelayEstimatorProcessFloat(NULL, near_f_, spectrum_size_)); + // Use `handle_` which is properly created at SetUp(). + EXPECT_EQ(-1, + WebRtc_DelayEstimatorProcessFloat(handle_, NULL, spectrum_size_)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, + spectrum_size_ + 1)); + // `tmp_handle` is already in a non-matching state. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(tmp_handle, near_f_, + spectrum_size_)); + + // WebRtc_DelayEstimatorProcessFix() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) NULL pointer as near-end spectrum. + // 3) Incorrect spectrum size. + // 4) Too high precision in near-end spectrum (Q-domain > 15). + // 5) Non matching history sizes if multiple delay estimators using the same + // far-end reference. + EXPECT_EQ( + -1, WebRtc_DelayEstimatorProcessFix(NULL, near_u16_, spectrum_size_, 0)); + EXPECT_EQ(-1, + WebRtc_DelayEstimatorProcessFix(handle_, NULL, spectrum_size_, 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_ + 1, 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 16)); + // `tmp_handle` is already in a non-matching state. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(tmp_handle, near_u16_, + spectrum_size_, 0)); + WebRtc_FreeDelayEstimator(tmp_handle); + + // WebRtc_last_delay() should return -1 if we have a NULL pointer as `handle`. + EXPECT_EQ(-1, WebRtc_last_delay(NULL)); + + // Free any local memory if needed. + WebRtc_FreeDelayEstimator(handle); +} + +TEST_F(DelayEstimatorTest, VerifyAllowedOffset) { + // Is set to zero by default. + EXPECT_EQ(0, WebRtc_get_allowed_offset(handle_)); + for (int i = 1; i >= 0; i--) { + EXPECT_EQ(0, WebRtc_set_allowed_offset(handle_, i)); + EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_)); + Init(); + // Unaffected over a reset. + EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_)); + } +} + +TEST_F(DelayEstimatorTest, VerifyEnableRobustValidation) { + // Disabled by default. + EXPECT_EQ(0, WebRtc_is_robust_validation_enabled(handle_)); + for (size_t i = 0; i < kSizeEnable; ++i) { + EXPECT_EQ(0, WebRtc_enable_robust_validation(handle_, kEnable[i])); + EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_)); + Init(); + // Unaffected over a reset. + EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_)); + } +} + +TEST_F(DelayEstimatorTest, InitializedSpectrumAfterProcess) { + // In this test we verify that the mean spectra are initialized after first + // time we call WebRtc_AddFarSpectrum() and Process() respectively. The test + // also verifies the state is not left for zero spectra. + const float kZerosFloat[kSpectrumSize] = {0.0}; + const uint16_t kZerosU16[kSpectrumSize] = {0}; + + // For floating point operations, process one frame and verify initialization + // flag. + Init(); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, kZerosFloat, + spectrum_size_)); + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, + WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, spectrum_size_)); + EXPECT_EQ(1, farend_self_->far_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, kZerosFloat, + spectrum_size_)); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ( + -2, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, spectrum_size_)); + EXPECT_EQ(1, self_->near_spectrum_initialized); + + // For fixed point operations, process one frame and verify initialization + // flag. + Init(); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, kZerosU16, + spectrum_size_, 0)); + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ( + 0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, spectrum_size_, 0)); + EXPECT_EQ(1, farend_self_->far_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, kZerosU16, + spectrum_size_, 0)); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 0)); + EXPECT_EQ(1, self_->near_spectrum_initialized); +} + +TEST_F(DelayEstimatorTest, CorrectLastDelay) { + // In this test we verify that we get the correct last delay upon valid call. + // We simply process the same data until we leave the initialized state + // (`last_delay` = -2). Then we compare the Process() output with the + // last_delay() call. + + // TODO(bjornv): Update quality values for robust validation. + int last_delay = 0; + // Floating point operations. + Init(); + for (int i = 0; i < 200; i++) { + EXPECT_EQ( + 0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, spectrum_size_)); + last_delay = + WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, spectrum_size_); + if (last_delay != -2) { + EXPECT_EQ(last_delay, WebRtc_last_delay(handle_)); + if (!WebRtc_is_robust_validation_enabled(handle_)) { + EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9, + WebRtc_last_delay_quality(handle_)); + } + break; + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_last_delay(handle_)); + EXPECT_LT(0, WebRtc_last_delay_quality(handle_)); + + // Fixed point operations. + Init(); + for (int i = 0; i < 200; i++) { + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 0)); + last_delay = + WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, spectrum_size_, 0); + if (last_delay != -2) { + EXPECT_EQ(last_delay, WebRtc_last_delay(handle_)); + if (!WebRtc_is_robust_validation_enabled(handle_)) { + EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9, + WebRtc_last_delay_quality(handle_)); + } + break; + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_last_delay(handle_)); + EXPECT_LT(0, WebRtc_last_delay_quality(handle_)); +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimatorFarend) { + // In this test we verify correct output on invalid API calls to the Binary + // Delay Estimator (far-end part). + + BinaryDelayEstimatorFarend* binary = binary_farend_; + // WebRtc_CreateBinaryDelayEstimatorFarend() should return -1 if the input + // history size is less than 2. This is to make sure the buffer shifting + // applies properly. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + binary = WebRtc_CreateBinaryDelayEstimatorFarend(1); + EXPECT_TRUE(binary == NULL); +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimator) { + // In this test we verify correct output on invalid API calls to the Binary + // Delay Estimator. + + BinaryDelayEstimator* binary_handle = binary_; + // WebRtc_CreateBinaryDelayEstimator() should return -1 if we have a NULL + // pointer as `binary_farend` or invalid input values. Upon failure, the + // `binary_handle` should be NULL. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + binary_handle = WebRtc_CreateBinaryDelayEstimator(NULL, kLookahead); + EXPECT_TRUE(binary_handle == NULL); + binary_handle = WebRtc_CreateBinaryDelayEstimator(binary_farend_, -1); + EXPECT_TRUE(binary_handle == NULL); +} + +TEST_F(DelayEstimatorTest, MeanEstimatorFix) { + // In this test we verify that we update the mean value in correct direction + // only. With "direction" we mean increase or decrease. + + int32_t mean_value = 4000; + int32_t mean_value_before = mean_value; + int32_t new_mean_value = mean_value * 2; + + // Increasing `mean_value`. + WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value); + EXPECT_LT(mean_value_before, mean_value); + EXPECT_GT(new_mean_value, mean_value); + + // Decreasing `mean_value`. + new_mean_value = mean_value / 2; + mean_value_before = mean_value; + WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value); + EXPECT_GT(mean_value_before, mean_value); + EXPECT_LT(new_mean_value, mean_value); +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearSameSpectrum) { + // In this test we verify that we get the correct delay estimates if we shift + // the signal accordingly. We create two Binary Delay Estimators and feed them + // with the same signals, so they should output the same results. + // We verify both causal and non-causal delays. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(0, 0, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentSpectrum) { + // In this test we use the same setup as above, but we now feed the two Binary + // Delay Estimators with different signals, so they should output different + // results. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + const int kNearOffset = 1; + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(kNearOffset, 0, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentLookahead) { + // In this test we use the same setup as above, feeding the two Binary + // Delay Estimators with the same signals. The difference is that we create + // them with different lookahead. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + const int kLookaheadOffset = 1; + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(0, kLookaheadOffset, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, AllowedOffsetNoImpactWhenRobustValidationDisabled) { + // The same setup as in ExactDelayEstimateMultipleNearSameSpectrum with the + // difference that `allowed_offset` is set for the reference binary delay + // estimator. + + binary_->allowed_offset = 10; + RunBinarySpectraTest(0, 0, 0, 0); + binary_->allowed_offset = 0; // Reset reference. +} + +TEST_F(DelayEstimatorTest, VerifyLookaheadAtCreate) { + void* farend_handle = + WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, kMaxDelay); + ASSERT_TRUE(farend_handle != NULL); + void* handle = WebRtc_CreateDelayEstimator(farend_handle, kLookahead); + ASSERT_TRUE(handle != NULL); + EXPECT_EQ(kLookahead, WebRtc_lookahead(handle)); + WebRtc_FreeDelayEstimator(handle); + WebRtc_FreeDelayEstimatorFarend(farend_handle); +} + +TEST_F(DelayEstimatorTest, VerifyLookaheadIsSetAndKeptAfterInit) { + EXPECT_EQ(kLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(kDifferentLookahead, + WebRtc_set_lookahead(handle_, kDifferentLookahead)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); +} + +TEST_F(DelayEstimatorTest, VerifyHistorySizeAtCreate) { + EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_)); +} + +TEST_F(DelayEstimatorTest, VerifyHistorySizeIsSetAndKeptAfterInit) { + EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(kDifferentHistorySize, + WebRtc_set_history_size(handle_, kDifferentHistorySize)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); +} + +// TODO(bjornv): Add tests for SoftReset...(...). + +} // namespace + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc new file mode 100644 index 0000000000..3b1409cc0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" + +#include +#include + +#include "modules/audio_processing/utility/delay_estimator.h" +#include "modules/audio_processing/utility/delay_estimator_internal.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Only bit `kBandFirst` through bit `kBandLast` are processed and +// `kBandFirst` - `kBandLast` must be < 32. +constexpr int kBandFirst = 12; +constexpr int kBandLast = 43; + +static __inline uint32_t SetBit(uint32_t in, int pos) { + uint32_t mask = (1 << pos); + uint32_t out = (in | mask); + + return out; +} + +// Calculates the mean recursively. Same version as WebRtc_MeanEstimatorFix(), +// but for float. +// +// Inputs: +// - new_value : New additional value. +// - scale : Scale for smoothing (should be less than 1.0). +// +// Input/Output: +// - mean_value : Pointer to the mean value for updating. +// +static void MeanEstimatorFloat(float new_value, + float scale, + float* mean_value) { + RTC_DCHECK_LT(scale, 1.0f); + *mean_value += (new_value - *mean_value) * scale; +} + +// Computes the binary spectrum by comparing the input `spectrum` with a +// `threshold_spectrum`. Float and fixed point versions. +// +// Inputs: +// - spectrum : Spectrum of which the binary spectrum should be +// calculated. +// - threshold_spectrum : Threshold spectrum with which the input +// spectrum is compared. +// Return: +// - out : Binary spectrum. +// +static uint32_t BinarySpectrumFix(const uint16_t* spectrum, + SpectrumType* threshold_spectrum, + int q_domain, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + + RTC_DCHECK_LT(q_domain, 16); + + if (!(*threshold_initialized)) { + // Set the `threshold_spectrum` to half the input `spectrum` as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0) { + // Convert input spectrum from Q(`q_domain`) to Q15. + int32_t spectrum_q15 = ((int32_t)spectrum[i]) << (15 - q_domain); + threshold_spectrum[i].int32_ = (spectrum_q15 >> 1); + *threshold_initialized = 1; + } + } + } + for (i = kBandFirst; i <= kBandLast; i++) { + // Convert input spectrum from Q(`q_domain`) to Q15. + int32_t spectrum_q15 = ((int32_t)spectrum[i]) << (15 - q_domain); + // Update the `threshold_spectrum`. + WebRtc_MeanEstimatorFix(spectrum_q15, 6, &(threshold_spectrum[i].int32_)); + // Convert `spectrum` at current frequency bin to a binary value. + if (spectrum_q15 > threshold_spectrum[i].int32_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +static uint32_t BinarySpectrumFloat(const float* spectrum, + SpectrumType* threshold_spectrum, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + const float kScale = 1 / 64.0; + + if (!(*threshold_initialized)) { + // Set the `threshold_spectrum` to half the input `spectrum` as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0.0f) { + threshold_spectrum[i].float_ = (spectrum[i] / 2); + *threshold_initialized = 1; + } + } + } + + for (i = kBandFirst; i <= kBandLast; i++) { + // Update the `threshold_spectrum`. + MeanEstimatorFloat(spectrum[i], kScale, &(threshold_spectrum[i].float_)); + // Convert `spectrum` at current frequency bin to a binary value. + if (spectrum[i] > threshold_spectrum[i].float_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +void WebRtc_FreeDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + + if (handle == NULL) { + return; + } + + free(self->mean_far_spectrum); + self->mean_far_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimatorFarend(self->binary_farend); + self->binary_farend = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size) { + DelayEstimatorFarend* self = NULL; + + // Check if the sub band used in the delay estimation is small enough to fit + // the binary spectra in a uint32_t. + static_assert(kBandLast - kBandFirst < 32, ""); + + if (spectrum_size >= kBandLast) { + self = static_cast( + malloc(sizeof(DelayEstimatorFarend))); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the binary far-end spectrum handling. + self->binary_farend = WebRtc_CreateBinaryDelayEstimatorFarend(history_size); + memory_fail |= (self->binary_farend == NULL); + + // Allocate memory for spectrum buffers. + self->mean_far_spectrum = static_cast( + malloc(spectrum_size * sizeof(SpectrumType))); + memory_fail |= (self->mean_far_spectrum == NULL); + + self->spectrum_size = spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimatorFarend(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + + if (self == NULL) { + return -1; + } + + // Initialize far-end part of binary delay estimator. + WebRtc_InitBinaryDelayEstimatorFarend(self->binary_farend); + + // Set averaged far and near end spectra to zero. + memset(self->mean_far_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->far_spectrum_initialized = 0; + + return 0; +} + +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + RTC_DCHECK(self); + WebRtc_SoftResetBinaryDelayEstimatorFarend(self->binary_farend, delay_shift); +} + +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (far_q > 15) { + // If `far_q` is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFix(far_spectrum, self->mean_far_spectrum, + far_q, &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(far_spectrum, self->mean_far_spectrum, + &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +void WebRtc_FreeDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + + if (handle == NULL) { + return; + } + + free(self->mean_near_spectrum); + self->mean_near_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimator(self->binary_handle); + self->binary_handle = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead) { + DelayEstimator* self = NULL; + DelayEstimatorFarend* farend = (DelayEstimatorFarend*)farend_handle; + + if (farend_handle != NULL) { + self = static_cast(malloc(sizeof(DelayEstimator))); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the farend spectrum handling. + self->binary_handle = + WebRtc_CreateBinaryDelayEstimator(farend->binary_farend, max_lookahead); + memory_fail |= (self->binary_handle == NULL); + + // Allocate memory for spectrum buffers. + self->mean_near_spectrum = static_cast( + malloc(farend->spectrum_size * sizeof(SpectrumType))); + memory_fail |= (self->mean_near_spectrum == NULL); + + self->spectrum_size = farend->spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + + // Initialize binary delay estimator. + WebRtc_InitBinaryDelayEstimator(self->binary_handle); + + // Set averaged far and near end spectra to zero. + memset(self->mean_near_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->near_spectrum_initialized = 0; + + return 0; +} + +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift) { + DelayEstimator* self = (DelayEstimator*)handle; + RTC_DCHECK(self); + return WebRtc_SoftResetBinaryDelayEstimator(self->binary_handle, delay_shift); +} + +int WebRtc_set_history_size(void* handle, int history_size) { + DelayEstimator* self = static_cast(handle); + + if ((self == NULL) || (history_size <= 1)) { + return -1; + } + return WebRtc_AllocateHistoryBufferMemory(self->binary_handle, history_size); +} + +int WebRtc_history_size(const void* handle) { + const DelayEstimator* self = static_cast(handle); + + if (self == NULL) { + return -1; + } + if (self->binary_handle->farend->history_size != + self->binary_handle->history_size) { + // Non matching history sizes. + return -1; + } + return self->binary_handle->history_size; +} + +int WebRtc_set_lookahead(void* handle, int lookahead) { + DelayEstimator* self = (DelayEstimator*)handle; + RTC_DCHECK(self); + RTC_DCHECK(self->binary_handle); + if ((lookahead > self->binary_handle->near_history_size - 1) || + (lookahead < 0)) { + return -1; + } + self->binary_handle->lookahead = lookahead; + return self->binary_handle->lookahead; +} + +int WebRtc_lookahead(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + RTC_DCHECK(self); + RTC_DCHECK(self->binary_handle); + return self->binary_handle->lookahead; +} + +int WebRtc_set_allowed_offset(void* handle, int allowed_offset) { + DelayEstimator* self = (DelayEstimator*)handle; + + if ((self == NULL) || (allowed_offset < 0)) { + return -1; + } + self->binary_handle->allowed_offset = allowed_offset; + return 0; +} + +int WebRtc_get_allowed_offset(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->allowed_offset; +} + +int WebRtc_enable_robust_validation(void* handle, int enable) { + DelayEstimator* self = (DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + if ((enable < 0) || (enable > 1)) { + return -1; + } + RTC_DCHECK(self->binary_handle); + self->binary_handle->robust_validation_enabled = enable; + return 0; +} + +int WebRtc_is_robust_validation_enabled(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->robust_validation_enabled; +} + +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q) { + DelayEstimator* self = (DelayEstimator*)handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (near_q > 15) { + // If `near_q` is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectra. + binary_spectrum = + BinarySpectrumFix(near_spectrum, self->mean_near_spectrum, near_q, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size) { + DelayEstimator* self = (DelayEstimator*)handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(near_spectrum, self->mean_near_spectrum, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_last_delay(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + + return WebRtc_binary_last_delay(self->binary_handle); +} + +float WebRtc_last_delay_quality(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + RTC_DCHECK(self); + return WebRtc_binary_last_delay_quality(self->binary_handle); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.h b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.h new file mode 100644 index 0000000000..a90cbe31cb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.h @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on block by block basis. +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ + +#include + +namespace webrtc { + +// Releases the memory allocated by WebRtc_CreateDelayEstimatorFarend(...) +void WebRtc_FreeDelayEstimatorFarend(void* handle); + +// Allocates the memory needed by the far-end part of the delay estimation. The +// memory needs to be initialized separately through +// WebRtc_InitDelayEstimatorFarend(...). +// +// Inputs: +// - spectrum_size : Size of the spectrum used both in far-end and +// near-end. Used to allocate memory for spectrum +// specific buffers. +// - history_size : The far-end history buffer size. A change in buffer +// size can be forced with WebRtc_set_history_size(). +// Note that the maximum delay which can be estimated is +// determined together with WebRtc_set_lookahead(). +// +// Return value: +// - void* : Created `handle`. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size); + +// Initializes the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...) +int WebRtc_InitDelayEstimatorFarend(void* handle); + +// Soft resets the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...). +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift); + +// Adds the far-end spectrum to the far-end history buffer. This spectrum is +// used as reference when calculating the delay using +// WebRtc_ProcessSpectrum(). +// +// Inputs: +// - far_spectrum : Far-end spectrum. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - far_q : The Q-domain of the far-end data. +// +// Output: +// - handle : Updated far-end instance. +// +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q); + +// See WebRtc_AddFarSpectrumFix() for description. +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size); + +// Releases the memory allocated by WebRtc_CreateDelayEstimator(...) +void WebRtc_FreeDelayEstimator(void* handle); + +// Allocates the memory needed by the delay estimation. The memory needs to be +// initialized separately through WebRtc_InitDelayEstimator(...). +// +// Inputs: +// - farend_handle : Pointer to the far-end part of the delay estimation +// instance created prior to this call using +// WebRtc_CreateDelayEstimatorFarend(). +// +// Note that WebRtc_CreateDelayEstimator does not take +// ownership of `farend_handle`, which has to be torn +// down properly after this instance. +// +// - max_lookahead : Maximum amount of non-causal lookahead allowed. The +// actual amount of lookahead used can be controlled by +// WebRtc_set_lookahead(...). The default `lookahead` is +// set to `max_lookahead` at create time. Use +// WebRtc_set_lookahead(...) before start if a different +// value is desired. +// +// Using lookahead can detect cases in which a near-end +// signal occurs before the corresponding far-end signal. +// It will delay the estimate for the current block by an +// equal amount, and the returned values will be offset +// by it. +// +// A value of zero is the typical no-lookahead case. +// This also represents the minimum delay which can be +// estimated. +// +// Note that the effective range of delay estimates is +// [-`lookahead`,... ,`history_size`-`lookahead`) +// where `history_size` is set through +// WebRtc_set_history_size(). +// +// Return value: +// - void* : Created `handle`. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead); + +// Initializes the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +int WebRtc_InitDelayEstimator(void* handle); + +// Soft resets the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +// Return value: +// - actual_shifts : The actual number of shifts performed. +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift); + +// Sets the effective `history_size` used. Valid values from 2. We simply need +// at least two delays to compare to perform an estimate. If `history_size` is +// changed, buffers are reallocated filling in with zeros if necessary. +// Note that changing the `history_size` affects both buffers in far-end and +// near-end. Hence it is important to change all DelayEstimators that use the +// same reference far-end, to the same `history_size` value. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - history_size : Effective history size to be used. +// Return value: +// - new_history_size : The new history size used. If the memory was not able +// to be allocated 0 is returned. +int WebRtc_set_history_size(void* handle, int history_size); + +// Returns the history_size currently used. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_history_size(const void* handle); + +// Sets the amount of `lookahead` to use. Valid values are [0, max_lookahead] +// where `max_lookahead` was set at create time through +// WebRtc_CreateDelayEstimator(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// - lookahead : The amount of lookahead to be used. +// +// Return value: +// - new_lookahead : The actual amount of lookahead set, unless `handle` is +// a NULL pointer or `lookahead` is invalid, for which an +// error is returned. +int WebRtc_set_lookahead(void* handle, int lookahead); + +// Returns the amount of lookahead we currently use. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_lookahead(void* handle); + +// Sets the `allowed_offset` used in the robust validation scheme. If the +// delay estimator is used in an echo control component, this parameter is +// related to the filter length. In principle `allowed_offset` should be set to +// the echo control filter length minus the expected echo duration, i.e., the +// delay offset the echo control can handle without quality regression. The +// default value, used if not set manually, is zero. Note that `allowed_offset` +// has to be non-negative. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - allowed_offset : The amount of delay offset, measured in partitions, +// the echo control filter can handle. +int WebRtc_set_allowed_offset(void* handle, int allowed_offset); + +// Returns the `allowed_offset` in number of partitions. +int WebRtc_get_allowed_offset(const void* handle); + +// Enables/Disables a robust validation functionality in the delay estimation. +// This is by default set to disabled at create time. The state is preserved +// over a reset. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - enable : Enable (1) or disable (0) this feature. +int WebRtc_enable_robust_validation(void* handle, int enable); + +// Returns 1 if robust validation is enabled and 0 if disabled. +int WebRtc_is_robust_validation_enabled(const void* handle); + +// Estimates and returns the delay between the far-end and near-end blocks. The +// value will be offset by the lookahead (i.e. the lookahead should be +// subtracted from the returned value). +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - near_spectrum : Pointer to the near-end spectrum data of the current +// block. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - near_q : The Q-domain of the near-end data. +// +// Output: +// - handle : Updated instance. +// +// Return value: +// - delay : >= 0 - Calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q); + +// See WebRtc_DelayEstimatorProcessFix() for description. +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size); + +// Returns the last calculated delay updated by the function +// WebRtc_DelayEstimatorProcess(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// +// Return value: +// - delay : >= 0 - Last calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_last_delay(void* handle); + +// Returns the estimation quality/probability of the last calculated delay +// updated by the function WebRtc_DelayEstimatorProcess(...). The estimation +// quality is a value in the interval [0, 1]. The higher the value, the better +// the quality. +// +// Return value: +// - delay_quality : >= 0 - Estimation quality of last calculated delay. +float WebRtc_last_delay_quality(void* handle); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/legacy_delay_estimator_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/utility/legacy_delay_estimator_gn/moz.build new file mode 100644 index 0000000000..11294e2aef --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/legacy_delay_estimator_gn/moz.build @@ -0,0 +1,222 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("legacy_delay_estimator_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc new file mode 100644 index 0000000000..88642fb12b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/pffft_wrapper.h" + +#include "rtc_base/checks.h" +#include "third_party/pffft/src/pffft.h" + +namespace webrtc { +namespace { + +size_t GetBufferSize(size_t fft_size, Pffft::FftType fft_type) { + return fft_size * (fft_type == Pffft::FftType::kReal ? 1 : 2); +} + +float* AllocatePffftBuffer(size_t size) { + return static_cast(pffft_aligned_malloc(size * sizeof(float))); +} + +} // namespace + +Pffft::FloatBuffer::FloatBuffer(size_t fft_size, FftType fft_type) + : size_(GetBufferSize(fft_size, fft_type)), + data_(AllocatePffftBuffer(size_)) {} + +Pffft::FloatBuffer::~FloatBuffer() { + pffft_aligned_free(data_); +} + +rtc::ArrayView Pffft::FloatBuffer::GetConstView() const { + return {data_, size_}; +} + +rtc::ArrayView Pffft::FloatBuffer::GetView() { + return {data_, size_}; +} + +Pffft::Pffft(size_t fft_size, FftType fft_type) + : fft_size_(fft_size), + fft_type_(fft_type), + pffft_status_(pffft_new_setup( + fft_size_, + fft_type == Pffft::FftType::kReal ? PFFFT_REAL : PFFFT_COMPLEX)), + scratch_buffer_( + AllocatePffftBuffer(GetBufferSize(fft_size_, fft_type_))) { + RTC_DCHECK(pffft_status_); + RTC_DCHECK(scratch_buffer_); +} + +Pffft::~Pffft() { + pffft_destroy_setup(pffft_status_); + pffft_aligned_free(scratch_buffer_); +} + +bool Pffft::IsValidFftSize(size_t fft_size, FftType fft_type) { + if (fft_size == 0) { + return false; + } + // PFFFT only supports transforms for inputs of length N of the form + // N = (2^a)*(3^b)*(5^c) where b >=0 and c >= 0 and a >= 5 for the real FFT + // and a >= 4 for the complex FFT. + constexpr int kFactors[] = {2, 3, 5}; + int factorization[] = {0, 0, 0}; + int n = static_cast(fft_size); + for (int i = 0; i < 3; ++i) { + while (n % kFactors[i] == 0) { + n = n / kFactors[i]; + factorization[i]++; + } + } + int a_min = (fft_type == Pffft::FftType::kReal) ? 5 : 4; + return factorization[0] >= a_min && n == 1; +} + +bool Pffft::IsSimdEnabled() { + return pffft_simd_size() > 1; +} + +std::unique_ptr Pffft::CreateBuffer() const { + // Cannot use make_unique from absl because Pffft is the only friend of + // Pffft::FloatBuffer. + std::unique_ptr buffer( + new Pffft::FloatBuffer(fft_size_, fft_type_)); + return buffer; +} + +void Pffft::ForwardTransform(const FloatBuffer& in, + FloatBuffer* out, + bool ordered) { + RTC_DCHECK_EQ(in.size(), GetBufferSize(fft_size_, fft_type_)); + RTC_DCHECK_EQ(in.size(), out->size()); + RTC_DCHECK(scratch_buffer_); + if (ordered) { + pffft_transform_ordered(pffft_status_, in.const_data(), out->data(), + scratch_buffer_, PFFFT_FORWARD); + } else { + pffft_transform(pffft_status_, in.const_data(), out->data(), + scratch_buffer_, PFFFT_FORWARD); + } +} + +void Pffft::BackwardTransform(const FloatBuffer& in, + FloatBuffer* out, + bool ordered) { + RTC_DCHECK_EQ(in.size(), GetBufferSize(fft_size_, fft_type_)); + RTC_DCHECK_EQ(in.size(), out->size()); + RTC_DCHECK(scratch_buffer_); + if (ordered) { + pffft_transform_ordered(pffft_status_, in.const_data(), out->data(), + scratch_buffer_, PFFFT_BACKWARD); + } else { + pffft_transform(pffft_status_, in.const_data(), out->data(), + scratch_buffer_, PFFFT_BACKWARD); + } +} + +void Pffft::FrequencyDomainConvolve(const FloatBuffer& fft_x, + const FloatBuffer& fft_y, + FloatBuffer* out, + float scaling) { + RTC_DCHECK_EQ(fft_x.size(), GetBufferSize(fft_size_, fft_type_)); + RTC_DCHECK_EQ(fft_x.size(), fft_y.size()); + RTC_DCHECK_EQ(fft_x.size(), out->size()); + pffft_zconvolve_accumulate(pffft_status_, fft_x.const_data(), + fft_y.const_data(), out->data(), scaling); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.h b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.h new file mode 100644 index 0000000000..983c2fd1bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_PFFFT_WRAPPER_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_PFFFT_WRAPPER_H_ + +#include + +#include "api/array_view.h" + +// Forward declaration. +struct PFFFT_Setup; + +namespace webrtc { + +// Pretty-Fast Fast Fourier Transform (PFFFT) wrapper class. +// Not thread safe. +class Pffft { + public: + enum class FftType { kReal, kComplex }; + + // 1D floating point buffer used as input/output data type for the FFT ops. + // It must be constructed using Pffft::CreateBuffer(). + class FloatBuffer { + public: + FloatBuffer(const FloatBuffer&) = delete; + FloatBuffer& operator=(const FloatBuffer&) = delete; + ~FloatBuffer(); + + rtc::ArrayView GetConstView() const; + rtc::ArrayView GetView(); + + private: + friend class Pffft; + FloatBuffer(size_t fft_size, FftType fft_type); + const float* const_data() const { return data_; } + float* data() { return data_; } + size_t size() const { return size_; } + + const size_t size_; + float* const data_; + }; + + // TODO(https://crbug.com/webrtc/9577): Consider adding a factory and making + // the ctor private. + // static std::unique_ptr Create(size_t fft_size, + // FftType fft_type); Ctor. `fft_size` must be a supported size (see + // Pffft::IsValidFftSize()). If not supported, the code will crash. + Pffft(size_t fft_size, FftType fft_type); + Pffft(const Pffft&) = delete; + Pffft& operator=(const Pffft&) = delete; + ~Pffft(); + + // Returns true if the FFT size is supported. + static bool IsValidFftSize(size_t fft_size, FftType fft_type); + + // Returns true if SIMD code optimizations are being used. + static bool IsSimdEnabled(); + + // Creates a buffer of the right size. + std::unique_ptr CreateBuffer() const; + + // TODO(https://crbug.com/webrtc/9577): Overload with rtc::ArrayView args. + // Computes the forward fast Fourier transform. + void ForwardTransform(const FloatBuffer& in, FloatBuffer* out, bool ordered); + // Computes the backward fast Fourier transform. + void BackwardTransform(const FloatBuffer& in, FloatBuffer* out, bool ordered); + + // Multiplies the frequency components of `fft_x` and `fft_y` and accumulates + // them into `out`. The arrays must have been obtained with + // ForwardTransform(..., /*ordered=*/false) - i.e., `fft_x` and `fft_y` must + // not be ordered. + void FrequencyDomainConvolve(const FloatBuffer& fft_x, + const FloatBuffer& fft_y, + FloatBuffer* out, + float scaling = 1.f); + + private: + const size_t fft_size_; + const FftType fft_type_; + PFFFT_Setup* pffft_status_; + float* const scratch_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_PFFFT_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_gn/moz.build new file mode 100644 index 0000000000..02898359bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("pffft_wrapper_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_unittest.cc b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_unittest.cc new file mode 100644 index 0000000000..2ad6849cd4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_unittest.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/pffft_wrapper.h" + +#include +#include +#include + +#include "test/gtest.h" +#include "third_party/pffft/src/pffft.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr size_t kMaxValidSizeCheck = 1024; + +static constexpr int kFftSizes[] = { + 16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5 * 96, 512, + 576, 5 * 128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864}; + +void CreatePffftWrapper(size_t fft_size, Pffft::FftType fft_type) { + Pffft pffft_wrapper(fft_size, fft_type); +} + +float* AllocateScratchBuffer(size_t fft_size, bool complex_fft) { + return static_cast( + pffft_aligned_malloc(fft_size * (complex_fft ? 2 : 1) * sizeof(float))); +} + +double frand() { + return std::rand() / static_cast(RAND_MAX); +} + +void ExpectArrayViewsEquality(rtc::ArrayView a, + rtc::ArrayView b) { + ASSERT_EQ(a.size(), b.size()); + for (size_t i = 0; i < a.size(); ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(a[i], b[i]); + } +} + +// Compares the output of the PFFFT C++ wrapper to that of the C PFFFT. +// Bit-exactness is expected. +void PffftValidateWrapper(size_t fft_size, bool complex_fft) { + // Always use the same seed to avoid flakiness. + std::srand(0); + + // Init PFFFT. + PFFFT_Setup* pffft_status = + pffft_new_setup(fft_size, complex_fft ? PFFFT_COMPLEX : PFFFT_REAL); + ASSERT_TRUE(pffft_status) << "FFT size (" << fft_size << ") not supported."; + size_t num_floats = fft_size * (complex_fft ? 2 : 1); + int num_bytes = static_cast(num_floats) * sizeof(float); + float* in = static_cast(pffft_aligned_malloc(num_bytes)); + float* out = static_cast(pffft_aligned_malloc(num_bytes)); + float* scratch = AllocateScratchBuffer(fft_size, complex_fft); + + // Init PFFFT C++ wrapper. + Pffft::FftType fft_type = + complex_fft ? Pffft::FftType::kComplex : Pffft::FftType::kReal; + ASSERT_TRUE(Pffft::IsValidFftSize(fft_size, fft_type)); + Pffft pffft_wrapper(fft_size, fft_type); + auto in_wrapper = pffft_wrapper.CreateBuffer(); + auto out_wrapper = pffft_wrapper.CreateBuffer(); + + // Input and output buffers views. + rtc::ArrayView in_view(in, num_floats); + rtc::ArrayView out_view(out, num_floats); + auto in_wrapper_view = in_wrapper->GetView(); + EXPECT_EQ(in_wrapper_view.size(), num_floats); + auto out_wrapper_view = out_wrapper->GetConstView(); + EXPECT_EQ(out_wrapper_view.size(), num_floats); + + // Random input data. + for (size_t i = 0; i < num_floats; ++i) { + in_wrapper_view[i] = in[i] = static_cast(frand() * 2.0 - 1.0); + } + + // Forward transform. + pffft_transform(pffft_status, in, out, scratch, PFFFT_FORWARD); + pffft_wrapper.ForwardTransform(*in_wrapper, out_wrapper.get(), + /*ordered=*/false); + ExpectArrayViewsEquality(out_view, out_wrapper_view); + + // Copy the FFT results into the input buffers to compute the backward FFT. + std::copy(out_view.begin(), out_view.end(), in_view.begin()); + std::copy(out_wrapper_view.begin(), out_wrapper_view.end(), + in_wrapper_view.begin()); + + // Backward transform. + pffft_transform(pffft_status, in, out, scratch, PFFFT_BACKWARD); + pffft_wrapper.BackwardTransform(*in_wrapper, out_wrapper.get(), + /*ordered=*/false); + ExpectArrayViewsEquality(out_view, out_wrapper_view); + + pffft_destroy_setup(pffft_status); + pffft_aligned_free(in); + pffft_aligned_free(out); + pffft_aligned_free(scratch); +} + +} // namespace + +TEST(PffftTest, CreateWrapperWithValidSize) { + for (size_t fft_size = 0; fft_size < kMaxValidSizeCheck; ++fft_size) { + SCOPED_TRACE(fft_size); + if (Pffft::IsValidFftSize(fft_size, Pffft::FftType::kReal)) { + CreatePffftWrapper(fft_size, Pffft::FftType::kReal); + } + if (Pffft::IsValidFftSize(fft_size, Pffft::FftType::kComplex)) { + CreatePffftWrapper(fft_size, Pffft::FftType::kComplex); + } + } +} + +#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +class PffftInvalidSizeDeathTest : public ::testing::Test, + public ::testing::WithParamInterface { +}; + +TEST_P(PffftInvalidSizeDeathTest, DoNotCreateRealWrapper) { + size_t fft_size = GetParam(); + ASSERT_FALSE(Pffft::IsValidFftSize(fft_size, Pffft::FftType::kReal)); + EXPECT_DEATH(CreatePffftWrapper(fft_size, Pffft::FftType::kReal), ""); +} + +TEST_P(PffftInvalidSizeDeathTest, DoNotCreateComplexWrapper) { + size_t fft_size = GetParam(); + ASSERT_FALSE(Pffft::IsValidFftSize(fft_size, Pffft::FftType::kComplex)); + EXPECT_DEATH(CreatePffftWrapper(fft_size, Pffft::FftType::kComplex), ""); +} + +INSTANTIATE_TEST_SUITE_P(PffftTest, + PffftInvalidSizeDeathTest, + ::testing::Values(17, + 33, + 65, + 97, + 129, + 161, + 193, + 257, + 289, + 385, + 481, + 513, + 577, + 641, + 801, + 865, + 1025)); + +#endif + +// TODO(https://crbug.com/webrtc/9577): Enable once SIMD is always enabled. +TEST(PffftTest, DISABLED_CheckSimd) { + EXPECT_TRUE(Pffft::IsSimdEnabled()); +} + +TEST(PffftTest, FftBitExactness) { + for (int fft_size : kFftSizes) { + SCOPED_TRACE(fft_size); + if (fft_size != 16) { + PffftValidateWrapper(fft_size, /*complex_fft=*/false); + } + PffftValidateWrapper(fft_size, /*complex_fft=*/true); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/vad/BUILD.gn new file mode 100644 index 0000000000..71e079d3a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/BUILD.gn @@ -0,0 +1,69 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") +rtc_library("vad") { + visibility = [ + "../*", + "../../../rtc_tools:*", + ] + sources = [ + "common.h", + "gmm.cc", + "gmm.h", + "noise_gmm_tables.h", + "pitch_based_vad.cc", + "pitch_based_vad.h", + "pitch_internal.cc", + "pitch_internal.h", + "pole_zero_filter.cc", + "pole_zero_filter.h", + "standalone_vad.cc", + "standalone_vad.h", + "vad_audio_proc.cc", + "vad_audio_proc.h", + "vad_audio_proc_internal.h", + "vad_circular_buffer.cc", + "vad_circular_buffer.h", + "voice_activity_detector.cc", + "voice_activity_detector.h", + "voice_gmm_tables.h", + ] + deps = [ + "../../../audio/utility:audio_frame_operations", + "../../../common_audio", + "../../../common_audio:common_audio_c", + "../../../common_audio/third_party/ooura:fft_size_256", + "../../../rtc_base:checks", + "../../audio_coding:isac_vad", + ] +} + +if (rtc_include_tests) { + rtc_library("vad_unittests") { + testonly = true + sources = [ + "gmm_unittest.cc", + "pitch_based_vad_unittest.cc", + "pitch_internal_unittest.cc", + "pole_zero_filter_unittest.cc", + "standalone_vad_unittest.cc", + "vad_audio_proc_unittest.cc", + "vad_circular_buffer_unittest.cc", + "voice_activity_detector_unittest.cc", + ] + deps = [ + ":vad", + "../../../common_audio", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gmock", + "//testing/gtest", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/vad/common.h b/third_party/libwebrtc/modules/audio_processing/vad/common.h new file mode 100644 index 0000000000..b5a5fb385b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/common.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ + +#include + +static const int kSampleRateHz = 16000; +static const size_t kLength10Ms = kSampleRateHz / 100; +static const size_t kMaxNumFrames = 4; + +struct AudioFeatures { + double log_pitch_gain[kMaxNumFrames]; + double pitch_lag_hz[kMaxNumFrames]; + double spectral_peak[kMaxNumFrames]; + double rms[kMaxNumFrames]; + size_t num_frames; + bool silence; +}; + +#endif // MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/gmm.cc b/third_party/libwebrtc/modules/audio_processing/vad/gmm.cc new file mode 100644 index 0000000000..3b8764c4d0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/gmm.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/gmm.h" + +#include + +namespace webrtc { + +static const int kMaxDimension = 10; + +static void RemoveMean(const double* in, + const double* mean_vec, + int dimension, + double* out) { + for (int n = 0; n < dimension; ++n) + out[n] = in[n] - mean_vec[n]; +} + +static double ComputeExponent(const double* in, + const double* covar_inv, + int dimension) { + double q = 0; + for (int i = 0; i < dimension; ++i) { + double v = 0; + for (int j = 0; j < dimension; j++) + v += (*covar_inv++) * in[j]; + q += v * in[i]; + } + q *= -0.5; + return q; +} + +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) { + if (gmm_parameters.dimension > kMaxDimension) { + return -1; // This is invalid pdf so the caller can check this. + } + double f = 0; + double v[kMaxDimension]; + const double* mean_vec = gmm_parameters.mean; + const double* covar_inv = gmm_parameters.covar_inverse; + + for (int n = 0; n < gmm_parameters.num_mixtures; n++) { + RemoveMean(x, mean_vec, gmm_parameters.dimension, v); + double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) + + gmm_parameters.weight[n]; + f += exp(q); + mean_vec += gmm_parameters.dimension; + covar_inv += gmm_parameters.dimension * gmm_parameters.dimension; + } + return f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/gmm.h b/third_party/libwebrtc/modules/audio_processing/vad/gmm.h new file mode 100644 index 0000000000..d9d68ecfdc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/gmm.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_GMM_H_ +#define MODULES_AUDIO_PROCESSING_VAD_GMM_H_ + +namespace webrtc { + +// A structure that specifies a GMM. +// A GMM is formulated as +// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... + +// w[num_mixtures - 1] * mixture[num_mixtures - 1]; +// Where a 'mixture' is a Gaussian density. + +struct GmmParameters { + // weight[n] = log(w[n]) - `dimension`/2 * log(2*pi) - 1/2 * log(det(cov[n])); + // where cov[n] is the covariance matrix of mixture n; + const double* weight; + // pointer to the first element of a `num_mixtures`x`dimension` matrix + // where kth row is the mean of the kth mixture. + const double* mean; + // pointer to the first element of a `num_mixtures`x`dimension`x`dimension` + // 3D-matrix, where the kth 2D-matrix is the inverse of the covariance + // matrix of the kth mixture. + const double* covar_inverse; + // Dimensionality of the mixtures. + int dimension; + // number of the mixtures. + int num_mixtures; +}; + +// Evaluate the given GMM, according to `gmm_parameters`, at the given point +// `x`. If the dimensionality of the given GMM is larger that the maximum +// acceptable dimension by the following function -1 is returned. +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters); + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_VAD_GMM_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/gmm_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/gmm_unittest.cc new file mode 100644 index 0000000000..d895afab7b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/gmm_unittest.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/gmm.h" + +#include + +#include "modules/audio_processing/vad/noise_gmm_tables.h" +#include "modules/audio_processing/vad/voice_gmm_tables.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(GmmTest, EvaluateGmm) { + GmmParameters noise_gmm; + GmmParameters voice_gmm; + + // Setup noise GMM. + noise_gmm.dimension = kNoiseGmmDim; + noise_gmm.num_mixtures = kNoiseGmmNumMixtures; + noise_gmm.weight = kNoiseGmmWeights; + noise_gmm.mean = &kNoiseGmmMean[0][0]; + noise_gmm.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; + + // Setup voice GMM. + voice_gmm.dimension = kVoiceGmmDim; + voice_gmm.num_mixtures = kVoiceGmmNumMixtures; + voice_gmm.weight = kVoiceGmmWeights; + voice_gmm.mean = &kVoiceGmmMean[0][0]; + voice_gmm.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; + + // Test vectors. These are the mean of the GMM means. + const double kXVoice[kVoiceGmmDim] = {-1.35893162459863, 602.862491970368, + 178.022069191324}; + const double kXNoise[kNoiseGmmDim] = {-2.33443722724409, 2827.97828765184, + 141.114178166812}; + + // Expected pdf values. These values are computed in MATLAB using EvalGmm.m + const double kPdfNoise = 1.88904409403101e-07; + const double kPdfVoice = 1.30453996982266e-06; + + // Relative error should be smaller that the following value. + const double kAcceptedRelativeErr = 1e-10; + + // Test Voice. + double pdf = EvaluateGmm(kXVoice, voice_gmm); + EXPECT_GT(pdf, 0); + double relative_error = fabs(pdf - kPdfVoice) / kPdfVoice; + EXPECT_LE(relative_error, kAcceptedRelativeErr); + + // Test Noise. + pdf = EvaluateGmm(kXNoise, noise_gmm); + EXPECT_GT(pdf, 0); + relative_error = fabs(pdf - kPdfNoise) / kPdfNoise; + EXPECT_LE(relative_error, kAcceptedRelativeErr); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/noise_gmm_tables.h b/third_party/libwebrtc/modules/audio_processing/vad/noise_gmm_tables.h new file mode 100644 index 0000000000..944a5401cc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/noise_gmm_tables.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for inactive segments. Generated by MakeGmmTables.m. + +#ifndef MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ +#define MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ + +namespace webrtc { + +static const int kNoiseGmmNumMixtures = 12; +static const int kNoiseGmmDim = 3; + +static const double + kNoiseGmmCovarInverse[kNoiseGmmNumMixtures][kNoiseGmmDim][kNoiseGmmDim] = { + {{7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02}, + {4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04}, + {1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}}, + {{8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03}, + {-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04}, + {5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}}, + {{4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03}, + {-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05}, + {-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}}, + {{9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03}, + {-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07}, + {-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}}, + {{7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02}, + {-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06}, + {2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}}, + {{8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02}, + {-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06}, + {-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}}, + {{9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03}, + {5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07}, + {-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}}, + {{8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03}, + {5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07}, + {6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}}, + {{6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03}, + {-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05}, + {5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}}, + {{6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03}, + {4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08}, + {-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}}, + {{1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02}, + {-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07}, + {-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}}, + {{4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03}, + {-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07}, + {5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}}; + +static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = { + {-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01}, + {-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02}, + {-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02}, + {-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02}, + {-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01}, + {-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02}, + {-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02}, + {-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02}, + {-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02}, + {-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02}, + {-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02}, + {-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}}; + +static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = { + -1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01, + -1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01, + -1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01, + -1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc new file mode 100644 index 0000000000..68e60dc66a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_based_vad.h" + +#include + +#include "modules/audio_processing/vad/common.h" +#include "modules/audio_processing/vad/noise_gmm_tables.h" +#include "modules/audio_processing/vad/vad_circular_buffer.h" +#include "modules/audio_processing/vad/voice_gmm_tables.h" + +namespace webrtc { + +static_assert(kNoiseGmmDim == kVoiceGmmDim, + "noise and voice gmm dimension not equal"); + +// These values should match MATLAB counterparts for unit-tests to pass. +static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames. +static const double kInitialPriorProbability = 0.3; +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static double LimitProbability(double p) { + const double kLimHigh = 0.99; + const double kLimLow = 0.01; + + if (p > kLimHigh) + p = kLimHigh; + else if (p < kLimLow) + p = kLimLow; + return p; +} + +PitchBasedVad::PitchBasedVad() + : p_prior_(kInitialPriorProbability), + circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) { + // Setup noise GMM. + noise_gmm_.dimension = kNoiseGmmDim; + noise_gmm_.num_mixtures = kNoiseGmmNumMixtures; + noise_gmm_.weight = kNoiseGmmWeights; + noise_gmm_.mean = &kNoiseGmmMean[0][0]; + noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; + + // Setup voice GMM. + voice_gmm_.dimension = kVoiceGmmDim; + voice_gmm_.num_mixtures = kVoiceGmmNumMixtures; + voice_gmm_.weight = kVoiceGmmWeights; + voice_gmm_.mean = &kVoiceGmmMean[0][0]; + voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; +} + +PitchBasedVad::~PitchBasedVad() {} + +int PitchBasedVad::VoicingProbability(const AudioFeatures& features, + double* p_combined) { + double p; + double gmm_features[3]; + double pdf_features_given_voice; + double pdf_features_given_noise; + // These limits are the same in matlab implementation 'VoicingProbGMM().' + const double kLimLowLogPitchGain = -2.0; + const double kLimHighLogPitchGain = -0.9; + const double kLimLowSpectralPeak = 200; + const double kLimHighSpectralPeak = 2000; + const double kEps = 1e-12; + for (size_t n = 0; n < features.num_frames; n++) { + gmm_features[0] = features.log_pitch_gain[n]; + gmm_features[1] = features.spectral_peak[n]; + gmm_features[2] = features.pitch_lag_hz[n]; + + pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_); + pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_); + + if (features.spectral_peak[n] < kLimLowSpectralPeak || + features.spectral_peak[n] > kLimHighSpectralPeak || + features.log_pitch_gain[n] < kLimLowLogPitchGain) { + pdf_features_given_voice = kEps * pdf_features_given_noise; + } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) { + pdf_features_given_noise = kEps * pdf_features_given_voice; + } + + p = p_prior_ * pdf_features_given_voice / + (pdf_features_given_voice * p_prior_ + + pdf_features_given_noise * (1 - p_prior_)); + + p = LimitProbability(p); + + // Combine pitch-based probability with standalone probability, before + // updating prior probabilities. + double prod_active = p * p_combined[n]; + double prod_inactive = (1 - p) * (1 - p_combined[n]); + p_combined[n] = prod_active / (prod_active + prod_inactive); + + if (UpdatePrior(p_combined[n]) < 0) + return -1; + // Limit prior probability. With a zero prior probability the posterior + // probability is always zero. + p_prior_ = LimitProbability(p_prior_); + } + return 0; +} + +int PitchBasedVad::UpdatePrior(double p) { + circular_buffer_->Insert(p); + if (circular_buffer_->RemoveTransient(kTransientWidthThreshold, + kLowProbabilityThreshold) < 0) + return -1; + p_prior_ = circular_buffer_->Mean(); + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.h b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.h new file mode 100644 index 0000000000..fa3abc2d28 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ +#define MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ + +#include + +#include "modules/audio_processing/vad/common.h" +#include "modules/audio_processing/vad/gmm.h" + +namespace webrtc { + +class VadCircularBuffer; + +// Computes the probability of the input audio frame to be active given +// the corresponding pitch-gain and lag of the frame. +class PitchBasedVad { + public: + PitchBasedVad(); + ~PitchBasedVad(); + + // Compute pitch-based voicing probability, given the features. + // features: a structure containing features required for computing voicing + // probabilities. + // + // p_combined: an array which contains the combined activity probabilities + // computed prior to the call of this function. The method, + // then, computes the voicing probabilities and combine them + // with the given values. The result are returned in `p`. + int VoicingProbability(const AudioFeatures& features, double* p_combined); + + private: + int UpdatePrior(double p); + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + static const int kNoError = 0; + + GmmParameters noise_gmm_; + GmmParameters voice_gmm_; + + double p_prior_; + + std::unique_ptr circular_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc new file mode 100644 index 0000000000..4a8331a769 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_based_vad.h" + +#include +#include + +#include + +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(PitchBasedVadTest, VoicingProbabilityTest) { + std::string spectral_peak_file_name = + test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat"); + FILE* spectral_peak_file = fopen(spectral_peak_file_name.c_str(), "rb"); + ASSERT_TRUE(spectral_peak_file != NULL); + + std::string pitch_gain_file_name = + test::ResourcePath("audio_processing/agc/agc_pitch_gain", "dat"); + FILE* pitch_gain_file = fopen(pitch_gain_file_name.c_str(), "rb"); + ASSERT_TRUE(pitch_gain_file != NULL); + + std::string pitch_lag_file_name = + test::ResourcePath("audio_processing/agc/agc_pitch_lag", "dat"); + FILE* pitch_lag_file = fopen(pitch_lag_file_name.c_str(), "rb"); + ASSERT_TRUE(pitch_lag_file != NULL); + + std::string voicing_prob_file_name = + test::ResourcePath("audio_processing/agc/agc_voicing_prob", "dat"); + FILE* voicing_prob_file = fopen(voicing_prob_file_name.c_str(), "rb"); + ASSERT_TRUE(voicing_prob_file != NULL); + + PitchBasedVad vad_; + + double reference_activity_probability; + + AudioFeatures audio_features; + memset(&audio_features, 0, sizeof(audio_features)); + audio_features.num_frames = 1; + while (fread(audio_features.spectral_peak, + sizeof(audio_features.spectral_peak[0]), 1, + spectral_peak_file) == 1u) { + double p; + ASSERT_EQ(1u, fread(audio_features.log_pitch_gain, + sizeof(audio_features.log_pitch_gain[0]), 1, + pitch_gain_file)); + ASSERT_EQ(1u, + fread(audio_features.pitch_lag_hz, + sizeof(audio_features.pitch_lag_hz[0]), 1, pitch_lag_file)); + ASSERT_EQ(1u, fread(&reference_activity_probability, + sizeof(reference_activity_probability), 1, + voicing_prob_file)); + + p = 0.5; // Initialize to the neutral value for combining probabilities. + EXPECT_EQ(0, vad_.VoicingProbability(audio_features, &p)); + EXPECT_NEAR(p, reference_activity_probability, 0.01); + } + + fclose(spectral_peak_file); + fclose(pitch_gain_file); + fclose(pitch_lag_file); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc new file mode 100644 index 0000000000..8f86918644 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_internal.h" + +#include + +namespace webrtc { + +// A 4-to-3 linear interpolation. +// The interpolation constants are derived as following: +// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval +// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is +// like interpolating 4-to-6 and keep the odd samples. +// The reason behind this is that LPC coefficients are computed for the first +// half of each 10ms interval. +static void PitchInterpolation(double old_val, const double* in, double* out) { + out[0] = 1. / 6. * old_val + 5. / 6. * in[0]; + out[1] = 5. / 6. * in[1] + 1. / 6. * in[2]; + out[2] = 0.5 * in[2] + 0.5 * in[3]; +} + +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz) { + // Gain interpolation is in log-domain, also returned in log-domain. + for (int n = 0; n < num_in_frames; n++) + gains[n] = log(gains[n] + 1e-12); + + // Interpolate lags and gains. + PitchInterpolation(*log_old_gain, gains, log_pitch_gain); + *log_old_gain = gains[num_in_frames - 1]; + PitchInterpolation(*old_lag, lags, pitch_lag_hz); + *old_lag = lags[num_in_frames - 1]; + + // Convert pitch-lags to Hertz. + for (int n = 0; n < num_out_frames; n++) { + pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.h b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.h new file mode 100644 index 0000000000..e382c1fbde --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ + +namespace webrtc { + +// TODO(turajs): Write a description of this function. Also be consistent with +// usage of `sampling_rate_hz` vs `kSamplingFreqHz`. +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal_unittest.cc new file mode 100644 index 0000000000..c851421ba7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal_unittest.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_internal.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +TEST(PitchInternalTest, test) { + const int kSamplingRateHz = 8000; + const int kNumInputParameters = 4; + const int kNumOutputParameters = 3; + // Inputs + double log_old_gain = log(0.5); + double gains[] = {0.6, 0.2, 0.5, 0.4}; + + double old_lag = 70; + double lags[] = {90, 111, 122, 50}; + + // Expected outputs + double expected_log_pitch_gain[] = {-0.541212549898316, -1.45672279045507, + -0.80471895621705}; + double expected_log_old_gain = log(gains[kNumInputParameters - 1]); + + double expected_pitch_lag_hz[] = {92.3076923076923, 70.9010339734121, + 93.0232558139535}; + double expected_old_lag = lags[kNumInputParameters - 1]; + + double log_pitch_gain[kNumOutputParameters]; + double pitch_lag_hz[kNumInputParameters]; + + GetSubframesPitchParameters(kSamplingRateHz, gains, lags, kNumInputParameters, + kNumOutputParameters, &log_old_gain, &old_lag, + log_pitch_gain, pitch_lag_hz); + + for (int n = 0; n < 3; n++) { + EXPECT_NEAR(pitch_lag_hz[n], expected_pitch_lag_hz[n], 1e-6); + EXPECT_NEAR(log_pitch_gain[n], expected_log_pitch_gain[n], 1e-8); + } + EXPECT_NEAR(old_lag, expected_old_lag, 1e-6); + EXPECT_NEAR(log_old_gain, expected_log_old_gain, 1e-8); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc new file mode 100644 index 0000000000..e7a611309c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pole_zero_filter.h" + +#include + +#include + +namespace webrtc { + +PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator) { + if (order_numerator > kMaxFilterOrder || + order_denominator > kMaxFilterOrder || denominator_coefficients[0] == 0 || + numerator_coefficients == NULL || denominator_coefficients == NULL) + return NULL; + return new PoleZeroFilter(numerator_coefficients, order_numerator, + denominator_coefficients, order_denominator); +} + +PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator) + : past_input_(), + past_output_(), + numerator_coefficients_(), + denominator_coefficients_(), + order_numerator_(order_numerator), + order_denominator_(order_denominator), + highest_order_(std::max(order_denominator, order_numerator)) { + memcpy(numerator_coefficients_, numerator_coefficients, + sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1)); + memcpy(denominator_coefficients_, denominator_coefficients, + sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1)); + + if (denominator_coefficients_[0] != 1) { + for (size_t n = 0; n <= order_numerator_; n++) + numerator_coefficients_[n] /= denominator_coefficients_[0]; + for (size_t n = 0; n <= order_denominator_; n++) + denominator_coefficients_[n] /= denominator_coefficients_[0]; + } +} + +template +static float FilterArPast(const T* past, + size_t order, + const float* coefficients) { + float sum = 0.0f; + size_t past_index = order - 1; + for (size_t k = 1; k <= order; k++, past_index--) + sum += coefficients[k] * past[past_index]; + return sum; +} + +int PoleZeroFilter::Filter(const int16_t* in, + size_t num_input_samples, + float* output) { + if (in == NULL || output == NULL) + return -1; + // This is the typical case, just a memcpy. + const size_t k = std::min(num_input_samples, highest_order_); + size_t n; + for (n = 0; n < k; n++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += FilterArPast(&past_input_[n], order_numerator_, + numerator_coefficients_); + output[n] -= FilterArPast(&past_output_[n], order_denominator_, + denominator_coefficients_); + + past_input_[n + order_numerator_] = in[n]; + past_output_[n + order_denominator_] = output[n]; + } + if (highest_order_ < num_input_samples) { + for (size_t m = 0; n < num_input_samples; n++, m++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += + FilterArPast(&in[m], order_numerator_, numerator_coefficients_); + output[n] -= FilterArPast(&output[m], order_denominator_, + denominator_coefficients_); + } + // Record into the past signal. + memcpy(past_input_, &in[num_input_samples - order_numerator_], + sizeof(in[0]) * order_numerator_); + memcpy(past_output_, &output[num_input_samples - order_denominator_], + sizeof(output[0]) * order_denominator_); + } else { + // Odd case that the length of the input is shorter that filter order. + memmove(past_input_, &past_input_[num_input_samples], + order_numerator_ * sizeof(past_input_[0])); + memmove(past_output_, &past_output_[num_input_samples], + order_denominator_ * sizeof(past_output_[0])); + } + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.h b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.h new file mode 100644 index 0000000000..11a05114d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ + +#include +#include + +namespace webrtc { + +class PoleZeroFilter { + public: + ~PoleZeroFilter() {} + + static PoleZeroFilter* Create(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator); + + int Filter(const int16_t* in, size_t num_input_samples, float* output); + + private: + PoleZeroFilter(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator); + + static const int kMaxFilterOrder = 24; + + int16_t past_input_[kMaxFilterOrder * 2]; + float past_output_[kMaxFilterOrder * 2]; + + float numerator_coefficients_[kMaxFilterOrder + 1]; + float denominator_coefficients_[kMaxFilterOrder + 1]; + + size_t order_numerator_; + size_t order_denominator_; + size_t highest_order_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc new file mode 100644 index 0000000000..8088b40125 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pole_zero_filter.h" + +#include +#include + +#include + +#include "modules/audio_processing/vad/vad_audio_proc_internal.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +static const int kInputSamples = 50; + +static const int16_t kInput[kInputSamples] = { + -2136, -7116, 10715, 2464, 3164, 8139, 11393, 24013, -32117, -5544, + -27740, 10181, 14190, -24055, -15912, 17393, 6359, -9950, -13894, 32432, + -23944, 3437, -8381, 19768, 3087, -19795, -5920, 13310, 1407, 3876, + 4059, 3524, -23130, 19121, -27900, -24840, 4089, 21422, -3625, 3015, + -11236, 28856, 13424, 6571, -19761, -6361, 15821, -9469, 29727, 32229}; + +static const float kReferenceOutput[kInputSamples] = { + -2082.230472f, -6878.572941f, 10697.090871f, 2358.373952f, + 2973.936512f, 7738.580650f, 10690.803213f, 22687.091576f, + -32676.684717f, -5879.621684f, -27359.297432f, 10368.735888f, + 13994.584604f, -23676.126249f, -15078.250390f, 17818.253338f, + 6577.743123f, -9498.369315f, -13073.651079f, 32460.026588f, + -23391.849347f, 3953.805667f, -7667.761363f, 19995.153447f, + 3185.575477f, -19207.365160f, -5143.103201f, 13756.317237f, + 1779.654794f, 4142.269755f, 4209.475034f, 3572.991789f, + -22509.089546f, 19307.878964f, -27060.439759f, -23319.042810f, + 5547.685267f, 22312.718676f, -2707.309027f, 3852.358490f, + -10135.510093f, 29241.509970f, 13394.397233f, 6340.721417f, + -19510.207905f, -5908.442086f, 15882.301634f, -9211.335255f, + 29253.056735f, 30874.443046f}; + +class PoleZeroFilterTest : public ::testing::Test { + protected: + PoleZeroFilterTest() + : my_filter_(PoleZeroFilter::Create(kCoeffNumerator, + kFilterOrder, + kCoeffDenominator, + kFilterOrder)) {} + + ~PoleZeroFilterTest() override {} + + void FilterSubframes(int num_subframes); + + private: + void TestClean(); + std::unique_ptr my_filter_; +}; + +void PoleZeroFilterTest::FilterSubframes(int num_subframes) { + float output[kInputSamples]; + const int num_subframe_samples = kInputSamples / num_subframes; + EXPECT_EQ(num_subframe_samples * num_subframes, kInputSamples); + + for (int n = 0; n < num_subframes; n++) { + my_filter_->Filter(&kInput[n * num_subframe_samples], num_subframe_samples, + &output[n * num_subframe_samples]); + } + for (int n = 0; n < kInputSamples; n++) { + EXPECT_NEAR(output[n], kReferenceOutput[n], 1); + } +} + +TEST_F(PoleZeroFilterTest, OneSubframe) { + FilterSubframes(1); +} + +TEST_F(PoleZeroFilterTest, TwoSubframes) { + FilterSubframes(2); +} + +TEST_F(PoleZeroFilterTest, FiveSubframes) { + FilterSubframes(5); +} + +TEST_F(PoleZeroFilterTest, TenSubframes) { + FilterSubframes(10); +} + +TEST_F(PoleZeroFilterTest, TwentyFiveSubframes) { + FilterSubframes(25); +} + +TEST_F(PoleZeroFilterTest, FiftySubframes) { + FilterSubframes(50); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc new file mode 100644 index 0000000000..1397668eb4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/standalone_vad.h" + +#include + +#include "common_audio/vad/include/webrtc_vad.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +static const int kDefaultStandaloneVadMode = 3; + +StandaloneVad::StandaloneVad(VadInst* vad) + : vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) {} + +StandaloneVad::~StandaloneVad() { + WebRtcVad_Free(vad_); +} + +StandaloneVad* StandaloneVad::Create() { + VadInst* vad = WebRtcVad_Create(); + if (!vad) + return nullptr; + + int err = WebRtcVad_Init(vad); + err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode); + if (err != 0) { + WebRtcVad_Free(vad); + return nullptr; + } + return new StandaloneVad(vad); +} + +int StandaloneVad::AddAudio(const int16_t* data, size_t length) { + if (length != kLength10Ms) + return -1; + + if (index_ + length > kLength10Ms * kMaxNum10msFrames) + // Reset the buffer if it's full. + // TODO(ajm): Instead, consider just processing every 10 ms frame. Then we + // can forgo the buffering. + index_ = 0; + + memcpy(&buffer_[index_], data, sizeof(int16_t) * length); + index_ += length; + return 0; +} + +int StandaloneVad::GetActivity(double* p, size_t length_p) { + if (index_ == 0) + return -1; + + const size_t num_frames = index_ / kLength10Ms; + if (num_frames > length_p) + return -1; + RTC_DCHECK_EQ(0, WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_)); + + int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_); + if (activity < 0) + return -1; + else if (activity == 0) + p[0] = 0.01; // Arbitrary but small and non-zero. + else + p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities. + for (size_t n = 1; n < num_frames; n++) + p[n] = p[0]; + // Reset the buffer to start from the beginning. + index_ = 0; + return activity; +} + +int StandaloneVad::set_mode(int mode) { + if (mode < 0 || mode > 3) + return -1; + if (WebRtcVad_set_mode(vad_, mode) != 0) + return -1; + + mode_ = mode; + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.h b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.h new file mode 100644 index 0000000000..b08463374e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ +#define MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ + +#include +#include + +#include "common_audio/vad/include/webrtc_vad.h" +#include "modules/audio_processing/vad/common.h" + +namespace webrtc { + +class StandaloneVad { + public: + static StandaloneVad* Create(); + ~StandaloneVad(); + + // Outputs + // p: a buffer where probabilities are written to. + // length_p: number of elements of `p`. + // + // return value: + // -1: if no audio is stored or VAD returns error. + // 0: in success. + // In case of error the content of `activity` is unchanged. + // + // Note that due to a high false-positive (VAD decision is active while the + // processed audio is just background noise) rate, stand-alone VAD is used as + // a one-sided indicator. The activity probability is 0.5 if the frame is + // classified as active, and the probability is 0.01 if the audio is + // classified as passive. In this way, when probabilities are combined, the + // effect of the stand-alone VAD is neutral if the input is classified as + // active. + int GetActivity(double* p, size_t length_p); + + // Expecting 10 ms of 16 kHz audio to be pushed in. + int AddAudio(const int16_t* data, size_t length); + + // Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most + // aggressive mode. Returns -1 if the input is less than 0 or larger than 3, + // otherwise 0 is returned. + int set_mode(int mode); + // Get the agressiveness of the current VAD. + int mode() const { return mode_; } + + private: + explicit StandaloneVad(VadInst* vad); + + static const size_t kMaxNum10msFrames = 3; + + // TODO(turajs): Is there a way to use scoped-pointer here? + VadInst* vad_; + int16_t buffer_[kMaxNum10msFrames * kLength10Ms]; + size_t index_; + int mode_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad_unittest.cc new file mode 100644 index 0000000000..0fa2ed78b1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad_unittest.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/standalone_vad.h" + +#include + +#include + +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(StandaloneVadTest, Api) { + std::unique_ptr vad(StandaloneVad::Create()); + int16_t data[kLength10Ms] = {0}; + + // Valid frame length (for 32 kHz rate), but not what the VAD is expecting. + EXPECT_EQ(-1, vad->AddAudio(data, 320)); + + const size_t kMaxNumFrames = 3; + double p[kMaxNumFrames]; + for (size_t n = 0; n < kMaxNumFrames; n++) + EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms)); + + // Pretend `p` is shorter that it should be. + EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames - 1)); + + EXPECT_EQ(0, vad->GetActivity(p, kMaxNumFrames)); + + // Ask for activity when buffer is empty. + EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames)); + + // Should reset and result in one buffer. + for (size_t n = 0; n < kMaxNumFrames + 1; n++) + EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms)); + EXPECT_EQ(0, vad->GetActivity(p, 1)); + + // Wrong modes + EXPECT_EQ(-1, vad->set_mode(-1)); + EXPECT_EQ(-1, vad->set_mode(4)); + + // Valid mode. + const int kMode = 2; + EXPECT_EQ(0, vad->set_mode(kMode)); + EXPECT_EQ(kMode, vad->mode()); +} + +#if defined(WEBRTC_IOS) +TEST(StandaloneVadTest, DISABLED_ActivityDetection) { +#else +TEST(StandaloneVadTest, ActivityDetection) { +#endif + std::unique_ptr vad(StandaloneVad::Create()); + const size_t kDataLength = kLength10Ms; + int16_t data[kDataLength] = {0}; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/agc/agc_audio", "pcm").c_str(), + "rb"); + ASSERT_TRUE(pcm_file != NULL); + + FILE* reference_file = fopen( + test::ResourcePath("audio_processing/agc/agc_vad", "dat").c_str(), "rb"); + ASSERT_TRUE(reference_file != NULL); + + // Reference activities are prepared with 0 aggressiveness. + ASSERT_EQ(0, vad->set_mode(0)); + + // Stand-alone VAD can operate on 1, 2 or 3 frames of length 10 ms. The + // reference file is created for 30 ms frame. + const int kNumVadFramesToProcess = 3; + int num_frames = 0; + while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) { + vad->AddAudio(data, kDataLength); + num_frames++; + if (num_frames == kNumVadFramesToProcess) { + num_frames = 0; + int referece_activity; + double p[kNumVadFramesToProcess]; + EXPECT_EQ(1u, fread(&referece_activity, sizeof(referece_activity), 1, + reference_file)); + int activity = vad->GetActivity(p, kNumVadFramesToProcess); + EXPECT_EQ(referece_activity, activity); + if (activity != 0) { + // When active, probabilities are set to 0.5. + for (int n = 0; n < kNumVadFramesToProcess; n++) + EXPECT_EQ(0.5, p[n]); + } else { + // When inactive, probabilities are set to 0.01. + for (int n = 0; n < kNumVadFramesToProcess; n++) + EXPECT_EQ(0.01, p[n]); + } + } + } + fclose(reference_file); + fclose(pcm_file); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc new file mode 100644 index 0000000000..aaf8214d7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/vad_audio_proc.h" + +#include +#include +#include + +#include "common_audio/third_party/ooura/fft_size_256/fft4g.h" +#include "modules/audio_processing/vad/pitch_internal.h" +#include "modules/audio_processing/vad/pole_zero_filter.h" +#include "modules/audio_processing/vad/vad_audio_proc_internal.h" +#include "rtc_base/checks.h" +extern "C" { +#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h" +#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h" +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" +#include "modules/audio_coding/codecs/isac/main/source/structs.h" +} + +namespace webrtc { + +// The following structures are declared anonymous in iSAC's structs.h. To +// forward declare them, we use this derived class trick. +struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {}; +struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {}; + +static constexpr float kFrequencyResolution = + kSampleRateHz / static_cast(VadAudioProc::kDftSize); +static constexpr int kSilenceRms = 5; + +// TODO(turajs): Make a Create or Init for VadAudioProc. +VadAudioProc::VadAudioProc() + : audio_buffer_(), + num_buffer_samples_(kNumPastSignalSamples), + log_old_gain_(-2), + old_lag_(50), // Arbitrary but valid as pitch-lag (in samples). + pitch_analysis_handle_(new PitchAnalysisStruct), + pre_filter_handle_(new PreFiltBankstr), + high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator, + kFilterOrder, + kCoeffDenominator, + kFilterOrder)) { + static_assert(kNumPastSignalSamples + kNumSubframeSamples == + sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]), + "lpc analysis window incorrect size"); + static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]), + "correlation weight incorrect size"); + + // TODO(turajs): Are we doing too much in the constructor? + float data[kDftSize]; + // Make FFT to initialize. + ip_[0] = 0; + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + // TODO(turajs): Need to initialize high-pass filter. + + // Initialize iSAC components. + WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get()); + WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get()); +} + +VadAudioProc::~VadAudioProc() {} + +void VadAudioProc::ResetBuffer() { + memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess], + sizeof(audio_buffer_[0]) * kNumPastSignalSamples); + num_buffer_samples_ = kNumPastSignalSamples; +} + +int VadAudioProc::ExtractFeatures(const int16_t* frame, + size_t length, + AudioFeatures* features) { + features->num_frames = 0; + if (length != kNumSubframeSamples) { + return -1; + } + + // High-pass filter to remove the DC component and very low frequency content. + // We have experienced that this high-pass filtering improves voice/non-voiced + // classification. + if (high_pass_filter_->Filter(frame, kNumSubframeSamples, + &audio_buffer_[num_buffer_samples_]) != 0) { + return -1; + } + + num_buffer_samples_ += kNumSubframeSamples; + if (num_buffer_samples_ < kBufferLength) { + return 0; + } + RTC_DCHECK_EQ(num_buffer_samples_, kBufferLength); + features->num_frames = kNum10msSubframes; + features->silence = false; + + Rms(features->rms, kMaxNumFrames); + for (size_t i = 0; i < kNum10msSubframes; ++i) { + if (features->rms[i] < kSilenceRms) { + // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence. + // Bail out here instead. + features->silence = true; + ResetBuffer(); + return 0; + } + } + + PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz, + kMaxNumFrames); + FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames); + ResetBuffer(); + return 0; +} + +// Computes |kLpcOrder + 1| correlation coefficients. +void VadAudioProc::SubframeCorrelation(double* corr, + size_t length_corr, + size_t subframe_index) { + RTC_DCHECK_GE(length_corr, kLpcOrder + 1); + double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples]; + size_t buffer_index = subframe_index * kNumSubframeSamples; + + for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++) + windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n]; + + WebRtcIsac_AutoCorr(corr, windowed_audio, + kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder); +} + +// Compute `kNum10msSubframes` sets of LPC coefficients, one per 10 ms input. +// The analysis window is 15 ms long and it is centered on the first half of +// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the +// first half of each 10 ms subframe. +void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) { + RTC_DCHECK_GE(length_lpc, kNum10msSubframes * (kLpcOrder + 1)); + double corr[kLpcOrder + 1]; + double reflec_coeff[kLpcOrder]; + for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes; + i++, offset_lpc += kLpcOrder + 1) { + SubframeCorrelation(corr, kLpcOrder + 1, i); + corr[0] *= 1.0001; + // This makes Lev-Durb a bit more stable. + for (size_t k = 0; k < kLpcOrder + 1; k++) { + corr[k] *= kCorrWeight[k]; + } + WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder); + } +} + +// Fit a second order curve to these 3 points and find the location of the +// extremum. The points are inverted before curve fitting. +static float QuadraticInterpolation(float prev_val, + float curr_val, + float next_val) { + // Doing the interpolation in |1 / A(z)|^2. + float fractional_index = 0; + next_val = 1.0f / next_val; + prev_val = 1.0f / prev_val; + curr_val = 1.0f / curr_val; + + fractional_index = + -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val); + RTC_DCHECK_LT(fabs(fractional_index), 1); + return fractional_index; +} + +// 1 / A(z), where A(z) is defined by `lpc` is a model of the spectral envelope +// of the input signal. The local maximum of the spectral envelope corresponds +// with the local minimum of A(z). It saves complexity, as we save one +// inversion. Furthermore, we find the first local maximum of magnitude squared, +// to save on one square root. +void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, + size_t length_f_peak) { + RTC_DCHECK_GE(length_f_peak, kNum10msSubframes); + double lpc[kNum10msSubframes * (kLpcOrder + 1)]; + // For all sub-frames. + GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1)); + + const size_t kNumDftCoefficients = kDftSize / 2 + 1; + float data[kDftSize]; + + for (size_t i = 0; i < kNum10msSubframes; i++) { + // Convert to float with zero pad. + memset(data, 0, sizeof(data)); + for (size_t n = 0; n < kLpcOrder + 1; n++) { + data[n] = static_cast(lpc[i * (kLpcOrder + 1) + n]); + } + // Transform to frequency domain. + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + + size_t index_peak = 0; + float prev_magn_sqr = data[0] * data[0]; + float curr_magn_sqr = data[2] * data[2] + data[3] * data[3]; + float next_magn_sqr; + bool found_peak = false; + for (size_t n = 2; n < kNumDftCoefficients - 1; n++) { + next_magn_sqr = + data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + found_peak = true; + index_peak = n - 1; + break; + } + prev_magn_sqr = curr_magn_sqr; + curr_magn_sqr = next_magn_sqr; + } + float fractional_index = 0; + if (!found_peak) { + // Checking if |kNumDftCoefficients - 1| is the local minimum. + next_magn_sqr = data[1] * data[1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + index_peak = kNumDftCoefficients - 1; + } + } else { + // A peak is found, do a simple quadratic interpolation to get a more + // accurate estimate of the peak location. + fractional_index = + QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr); + } + f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution; + } +} + +// Using iSAC functions to estimate pitch gains & lags. +void VadAudioProc::PitchAnalysis(double* log_pitch_gains, + double* pitch_lags_hz, + size_t length) { + // TODO(turajs): This can be "imported" from iSAC & and the next two + // constants. + RTC_DCHECK_GE(length, kNum10msSubframes); + const int kNumPitchSubframes = 4; + double gains[kNumPitchSubframes]; + double lags[kNumPitchSubframes]; + + const int kNumSubbandFrameSamples = 240; + const int kNumLookaheadSamples = 24; + + float lower[kNumSubbandFrameSamples]; + float upper[kNumSubbandFrameSamples]; + double lower_lookahead[kNumSubbandFrameSamples]; + double upper_lookahead[kNumSubbandFrameSamples]; + double lower_lookahead_pre_filter[kNumSubbandFrameSamples + + kNumLookaheadSamples]; + + // Split signal to lower and upper bands + WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower, + upper, lower_lookahead, upper_lookahead, + pre_filter_handle_.get()); + WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter, + pitch_analysis_handle_.get(), lags, gains); + + // Lags are computed on lower-band signal with sampling rate half of the + // input signal. + GetSubframesPitchParameters( + kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes, + &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz); +} + +void VadAudioProc::Rms(double* rms, size_t length_rms) { + RTC_DCHECK_GE(length_rms, kNum10msSubframes); + size_t offset = kNumPastSignalSamples; + for (size_t i = 0; i < kNum10msSubframes; i++) { + rms[i] = 0; + for (size_t n = 0; n < kNumSubframeSamples; n++, offset++) + rms[i] += audio_buffer_[offset] * audio_buffer_[offset]; + rms[i] = sqrt(rms[i] / kNumSubframeSamples); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.h b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.h new file mode 100644 index 0000000000..cbdd707129 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ + +#include +#include + +#include + +#include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR... + +namespace webrtc { + +class PoleZeroFilter; + +class VadAudioProc { + public: + // Forward declare iSAC structs. + struct PitchAnalysisStruct; + struct PreFiltBankstr; + + VadAudioProc(); + ~VadAudioProc(); + + int ExtractFeatures(const int16_t* audio_frame, + size_t length, + AudioFeatures* audio_features); + + static constexpr size_t kDftSize = 512; + + private: + void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); + void SubframeCorrelation(double* corr, + size_t length_corr, + size_t subframe_index); + void GetLpcPolynomials(double* lpc, size_t length_lpc); + void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); + void Rms(double* rms, size_t length_rms); + void ResetBuffer(); + + // To compute spectral peak we perform LPC analysis to get spectral envelope. + // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. + // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame + // we need 5 ms of past signal to create the input of LPC analysis. + static constexpr size_t kNumPastSignalSamples = + static_cast(kSampleRateHz / 200); + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + static constexpr int kNoError = 0; + + static constexpr size_t kNum10msSubframes = 3; + static constexpr size_t kNumSubframeSamples = + static_cast(kSampleRateHz / 100); + // Samples in 30 ms @ given sampling rate. + static constexpr size_t kNumSamplesToProcess = + size_t{kNum10msSubframes} * kNumSubframeSamples; + static constexpr size_t kBufferLength = + size_t{kNumPastSignalSamples} + kNumSamplesToProcess; + static constexpr size_t kIpLength = kDftSize >> 1; + static constexpr size_t kWLength = kDftSize >> 1; + static constexpr size_t kLpcOrder = 16; + + size_t ip_[kIpLength]; + float w_fft_[kWLength]; + + // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). + float audio_buffer_[kBufferLength]; + size_t num_buffer_samples_; + + double log_old_gain_; + double old_lag_; + + std::unique_ptr pitch_analysis_handle_; + std::unique_ptr pre_filter_handle_; + std::unique_ptr high_pass_filter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_internal.h b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_internal.h new file mode 100644 index 0000000000..93589affe8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_internal.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ + +#include + +namespace webrtc { + +// These values should match MATLAB counterparts for unit-tests to pass. +static const double kCorrWeight[] = { + 1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217, + 0.913308, 0.899609, 0.886115, 0.872823, 0.859730, 0.846834, + 0.834132, 0.821620, 0.809296, 0.797156, 0.785199}; + +static const double kLpcAnalWin[] = { + 0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639, + 0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883, + 0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547, + 0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438, + 0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222, + 0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713, + 0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164, + 0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546, + 0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810, + 0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148, + 0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233, + 0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442, + 0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069, + 0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512, + 0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447, + 0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979, + 0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773, + 0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158, + 0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215, + 0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840, + 0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778, + 0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639, + 0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889, + 0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814, + 0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465, + 0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574, + 0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451, + 0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858, + 0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862, + 0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664, + 0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416, + 0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008, + 0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853, + 0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642, + 0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093, + 0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687, + 0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387, + 0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358, + 0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670, + 0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000}; + +static const size_t kFilterOrder = 2; +static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f, + 0.974827f}; +static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f, + 0.972457f}; + +static_assert(kFilterOrder + 1 == + sizeof(kCoeffNumerator) / sizeof(kCoeffNumerator[0]), + "numerator coefficients incorrect size"); +static_assert(kFilterOrder + 1 == + sizeof(kCoeffDenominator) / sizeof(kCoeffDenominator[0]), + "denominator coefficients incorrect size"); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc new file mode 100644 index 0000000000..0afed84c35 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// We don't test the value of pitch gain and lags as they are created by iSAC +// routines. However, interpolation of pitch-gain and lags is in a separate +// class and has its own unit-test. + +#include "modules/audio_processing/vad/vad_audio_proc.h" + +#include +#include + +#include + +#include "modules/audio_processing/vad/common.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(AudioProcessingTest, DISABLED_ComputingFirstSpectralPeak) { + VadAudioProc audioproc; + + std::string peak_file_name = + test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat"); + FILE* peak_file = fopen(peak_file_name.c_str(), "rb"); + ASSERT_TRUE(peak_file != NULL); + + std::string pcm_file_name = + test::ResourcePath("audio_processing/agc/agc_audio", "pcm"); + FILE* pcm_file = fopen(pcm_file_name.c_str(), "rb"); + ASSERT_TRUE(pcm_file != NULL); + + // Read 10 ms audio in each iteration. + const size_t kDataLength = kLength10Ms; + int16_t data[kDataLength] = {0}; + AudioFeatures features; + double sp[kMaxNumFrames]; + while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) { + audioproc.ExtractFeatures(data, kDataLength, &features); + if (features.num_frames > 0) { + ASSERT_LT(features.num_frames, kMaxNumFrames); + // Read reference values. + const size_t num_frames = features.num_frames; + ASSERT_EQ(num_frames, fread(sp, sizeof(sp[0]), num_frames, peak_file)); + for (size_t n = 0; n < features.num_frames; n++) + EXPECT_NEAR(features.spectral_peak[n], sp[n], 3); + } + } + + fclose(peak_file); + fclose(pcm_file); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc new file mode 100644 index 0000000000..31f14d7f64 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/vad_circular_buffer.h" + +#include + +namespace webrtc { + +VadCircularBuffer::VadCircularBuffer(int buffer_size) + : buffer_(new double[buffer_size]), + is_full_(false), + index_(0), + buffer_size_(buffer_size), + sum_(0) {} + +VadCircularBuffer::~VadCircularBuffer() {} + +void VadCircularBuffer::Reset() { + is_full_ = false; + index_ = 0; + sum_ = 0; +} + +VadCircularBuffer* VadCircularBuffer::Create(int buffer_size) { + if (buffer_size <= 0) + return NULL; + return new VadCircularBuffer(buffer_size); +} + +double VadCircularBuffer::Oldest() const { + if (!is_full_) + return buffer_[0]; + else + return buffer_[index_]; +} + +double VadCircularBuffer::Mean() { + double m; + if (is_full_) { + m = sum_ / buffer_size_; + } else { + if (index_ > 0) + m = sum_ / index_; + else + m = 0; + } + return m; +} + +void VadCircularBuffer::Insert(double value) { + if (is_full_) { + sum_ -= buffer_[index_]; + } + sum_ += value; + buffer_[index_] = value; + index_++; + if (index_ >= buffer_size_) { + is_full_ = true; + index_ = 0; + } +} +int VadCircularBuffer::BufferLevel() { + if (is_full_) + return buffer_size_; + return index_; +} + +int VadCircularBuffer::Get(int index, double* value) const { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + *value = buffer_[index]; + return 0; +} + +int VadCircularBuffer::Set(int index, double value) { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + + sum_ -= buffer_[index]; + buffer_[index] = value; + sum_ += value; + return 0; +} + +int VadCircularBuffer::ConvertToLinearIndex(int* index) const { + if (*index < 0 || *index >= buffer_size_) + return -1; + + if (!is_full_ && *index >= index_) + return -1; + + *index = index_ - 1 - *index; + if (*index < 0) + *index += buffer_size_; + return 0; +} + +int VadCircularBuffer::RemoveTransient(int width_threshold, + double val_threshold) { + if (!is_full_ && index_ < width_threshold + 2) + return 0; + + int index_1 = 0; + int index_2 = width_threshold + 1; + double v = 0; + if (Get(index_1, &v) < 0) + return -1; + if (v < val_threshold) { + Set(index_1, 0); + int index; + for (index = index_2; index > index_1; index--) { + if (Get(index, &v) < 0) + return -1; + if (v < val_threshold) + break; + } + for (; index > index_1; index--) { + if (Set(index, 0.0) < 0) + return -1; + } + } + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.h b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.h new file mode 100644 index 0000000000..c1806f9e83 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ + +#include + +namespace webrtc { + +// A circular buffer tailored to the need of this project. It stores last +// K samples of the input, and keeps track of the mean of the last samples. +// +// It is used in class "PitchBasedActivity" to keep track of posterior +// probabilities in the past few seconds. The posterior probabilities are used +// to recursively update prior probabilities. +class VadCircularBuffer { + public: + static VadCircularBuffer* Create(int buffer_size); + ~VadCircularBuffer(); + + // If buffer is wrapped around. + bool is_full() const { return is_full_; } + // Get the oldest entry in the buffer. + double Oldest() const; + // Insert new value into the buffer. + void Insert(double value); + // Reset buffer, forget the past, start fresh. + void Reset(); + + // The mean value of the elements in the buffer. The return value is zero if + // buffer is empty, i.e. no value is inserted. + double Mean(); + // Remove transients. If the values exceed `val_threshold` for a period + // shorter then or equal to `width_threshold`, then that period is considered + // transient and set to zero. + int RemoveTransient(int width_threshold, double val_threshold); + + private: + explicit VadCircularBuffer(int buffer_size); + // Get previous values. |index = 0| corresponds to the most recent + // insertion. |index = 1| is the one before the most recent insertion, and + // so on. + int Get(int index, double* value) const; + // Set a given position to `value`. `index` is interpreted as above. + int Set(int index, double value); + // Return the number of valid elements in the buffer. + int BufferLevel(); + + // Convert an index with the interpretation as get() method to the + // corresponding linear index. + int ConvertToLinearIndex(int* index) const; + + std::unique_ptr buffer_; + bool is_full_; + int index_; + int buffer_size_; + double sum_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc new file mode 100644 index 0000000000..efbd70d9d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/vad_circular_buffer.h" + +#include + +#include + +#include "test/gtest.h" + +namespace webrtc { + +static const int kWidthThreshold = 7; +static const double kValThreshold = 1.0; +static const int kLongBuffSize = 100; +static const int kShortBuffSize = 10; + +static void InsertSequentially(int k, VadCircularBuffer* circular_buffer) { + double mean_val; + for (int n = 1; n <= k; n++) { + EXPECT_TRUE(!circular_buffer->is_full()); + circular_buffer->Insert(n); + mean_val = circular_buffer->Mean(); + EXPECT_EQ((n + 1.0) / 2., mean_val); + } +} + +static void Insert(double value, + int num_insertion, + VadCircularBuffer* circular_buffer) { + for (int n = 0; n < num_insertion; n++) + circular_buffer->Insert(value); +} + +static void InsertZeros(int num_zeros, VadCircularBuffer* circular_buffer) { + Insert(0.0, num_zeros, circular_buffer); +} + +TEST(VadCircularBufferTest, GeneralTest) { + std::unique_ptr circular_buffer( + VadCircularBuffer::Create(kShortBuffSize)); + double mean_val; + + // Mean should return zero if nothing is inserted. + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0.0, mean_val); + InsertSequentially(kShortBuffSize, circular_buffer.get()); + + // Should be full. + EXPECT_TRUE(circular_buffer->is_full()); + // Correct update after being full. + for (int n = 1; n < kShortBuffSize; n++) { + circular_buffer->Insert(n); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ((kShortBuffSize + 1.) / 2., mean_val); + EXPECT_TRUE(circular_buffer->is_full()); + } + + // Check reset. This should be like starting fresh. + circular_buffer->Reset(); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0, mean_val); + InsertSequentially(kShortBuffSize, circular_buffer.get()); + EXPECT_TRUE(circular_buffer->is_full()); +} + +TEST(VadCircularBufferTest, TransientsRemoval) { + std::unique_ptr circular_buffer( + VadCircularBuffer::Create(kLongBuffSize)); + // Let the first transient be in wrap-around. + InsertZeros(kLongBuffSize - kWidthThreshold / 2, circular_buffer.get()); + + double push_val = kValThreshold; + double mean_val; + for (int k = kWidthThreshold; k >= 1; k--) { + Insert(push_val, k, circular_buffer.get()); + circular_buffer->Insert(0); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(k * push_val / kLongBuffSize, mean_val); + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0, mean_val); + } +} + +TEST(VadCircularBufferTest, TransientDetection) { + std::unique_ptr circular_buffer( + VadCircularBuffer::Create(kLongBuffSize)); + // Let the first transient be in wrap-around. + int num_insertion = kLongBuffSize - kWidthThreshold / 2; + InsertZeros(num_insertion, circular_buffer.get()); + + double push_val = 2; + // This is longer than a transient and shouldn't be removed. + int num_non_zero_elements = kWidthThreshold + 1; + Insert(push_val, num_non_zero_elements, circular_buffer.get()); + + double mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + circular_buffer->Insert(0); + EXPECT_EQ(0, + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + + // A transient right after a non-transient, should be removed and mean is + // not changed. + num_insertion = 3; + Insert(push_val, num_insertion, circular_buffer.get()); + circular_buffer->Insert(0); + EXPECT_EQ(0, + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + + // Last input is larger than threshold, although the sequence is short but + // it shouldn't be considered transient. + Insert(push_val, num_insertion, circular_buffer.get()); + num_non_zero_elements += num_insertion; + EXPECT_EQ(0, + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/vad/vad_gn/moz.build new file mode 100644 index 0000000000..0e76427c6e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_gn/moz.build @@ -0,0 +1,239 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/vad/gmm.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("vad_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc new file mode 100644 index 0000000000..02023d6a72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/voice_activity_detector.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +const size_t kNumChannels = 1; + +const double kDefaultVoiceValue = 1.0; +const double kNeutralProbability = 0.5; +const double kLowProbability = 0.01; + +} // namespace + +VoiceActivityDetector::VoiceActivityDetector() + : last_voice_probability_(kDefaultVoiceValue), + standalone_vad_(StandaloneVad::Create()) {} + +VoiceActivityDetector::~VoiceActivityDetector() = default; + +// Because ISAC has a different chunk length, it updates +// `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data. +// Otherwise it clears them. +void VoiceActivityDetector::ProcessChunk(const int16_t* audio, + size_t length, + int sample_rate_hz) { + RTC_DCHECK_EQ(length, sample_rate_hz / 100); + // TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio. + // Resample to the required rate. + const int16_t* resampled_ptr = audio; + if (sample_rate_hz != kSampleRateHz) { + RTC_CHECK_EQ( + resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), + 0); + resampler_.Push(audio, length, resampled_, kLength10Ms, length); + resampled_ptr = resampled_; + } + RTC_DCHECK_EQ(length, kLength10Ms); + + // Each chunk needs to be passed into `standalone_vad_`, because internally it + // buffers the audio and processes it all at once when GetActivity() is + // called. + RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0); + + audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); + + chunkwise_voice_probabilities_.resize(features_.num_frames); + chunkwise_rms_.resize(features_.num_frames); + std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), + chunkwise_rms_.begin()); + if (features_.num_frames > 0) { + if (features_.silence) { + // The other features are invalid, so set the voice probabilities to an + // arbitrary low value. + std::fill(chunkwise_voice_probabilities_.begin(), + chunkwise_voice_probabilities_.end(), kLowProbability); + } else { + std::fill(chunkwise_voice_probabilities_.begin(), + chunkwise_voice_probabilities_.end(), kNeutralProbability); + RTC_CHECK_GE( + standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], + chunkwise_voice_probabilities_.size()), + 0); + RTC_CHECK_GE(pitch_based_vad_.VoicingProbability( + features_, &chunkwise_voice_probabilities_[0]), + 0); + } + last_voice_probability_ = chunkwise_voice_probabilities_.back(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.h b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.h new file mode 100644 index 0000000000..92b9a8c208 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ + +#include +#include + +#include +#include + +#include "common_audio/resampler/include/resampler.h" +#include "modules/audio_processing/vad/common.h" +#include "modules/audio_processing/vad/pitch_based_vad.h" +#include "modules/audio_processing/vad/standalone_vad.h" +#include "modules/audio_processing/vad/vad_audio_proc.h" + +namespace webrtc { + +// A Voice Activity Detector (VAD) that combines the voice probability from the +// StandaloneVad and PitchBasedVad to get a more robust estimation. +class VoiceActivityDetector { + public: + VoiceActivityDetector(); + ~VoiceActivityDetector(); + + // Processes each audio chunk and estimates the voice probability. + // TODO(bugs.webrtc.org/7494): Switch to rtc::ArrayView and remove + // `sample_rate_hz`. + void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz); + + // Returns a vector of voice probabilities for each chunk. It can be empty for + // some chunks, but it catches up afterwards returning multiple values at + // once. + const std::vector& chunkwise_voice_probabilities() const { + return chunkwise_voice_probabilities_; + } + + // Returns a vector of RMS values for each chunk. It has the same length as + // chunkwise_voice_probabilities(). + const std::vector& chunkwise_rms() const { return chunkwise_rms_; } + + // Returns the last voice probability, regardless of the internal + // implementation, although it has a few chunks of delay. + float last_voice_probability() const { return last_voice_probability_; } + + private: + // TODO(aluebs): Change these to float. + std::vector chunkwise_voice_probabilities_; + std::vector chunkwise_rms_; + + float last_voice_probability_; + + Resampler resampler_; + VadAudioProc audio_processing_; + + std::unique_ptr standalone_vad_; + PitchBasedVad pitch_based_vad_; + + int16_t resampled_[kLength10Ms]; + AudioFeatures features_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc new file mode 100644 index 0000000000..80f21c8db0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/voice_activity_detector.h" + +#include +#include + +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace { + +const int kStartTimeSec = 16; +const float kMeanSpeechProbability = 0.3f; +const float kMaxNoiseProbability = 0.1f; +const size_t kNumChunks = 300u; +const size_t kNumChunksPerIsacBlock = 3; + +void GenerateNoise(std::vector* data) { + for (size_t i = 0; i < data->size(); ++i) { + // std::rand returns between 0 and RAND_MAX, but this will work because it + // wraps into some random place. + (*data)[i] = std::rand(); + } +} + +} // namespace + +TEST(VoiceActivityDetectorTest, ConstructorSetsDefaultValues) { + const float kDefaultVoiceValue = 1.f; + + VoiceActivityDetector vad; + + std::vector p = vad.chunkwise_voice_probabilities(); + std::vector rms = vad.chunkwise_rms(); + + EXPECT_EQ(p.size(), 0u); + EXPECT_EQ(rms.size(), 0u); + + EXPECT_FLOAT_EQ(vad.last_voice_probability(), kDefaultVoiceValue); +} + +TEST(VoiceActivityDetectorTest, Speech16kHzHasHighVoiceProbabilities) { + const int kSampleRateHz = 16000; + const int kLength10Ms = kSampleRateHz / 100; + + VoiceActivityDetector vad; + + std::vector data(kLength10Ms); + float mean_probability = 0.f; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/transient/audio16kHz", "pcm") + .c_str(), + "rb"); + ASSERT_TRUE(pcm_file != nullptr); + // The silences in the file are skipped to get a more robust voice probability + // for speech. + ASSERT_EQ(fseek(pcm_file, kStartTimeSec * kSampleRateHz * sizeof(data[0]), + SEEK_SET), + 0); + + size_t num_chunks = 0; + while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == + data.size()) { + vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); + + mean_probability += vad.last_voice_probability(); + + ++num_chunks; + } + + mean_probability /= num_chunks; + + EXPECT_GT(mean_probability, kMeanSpeechProbability); +} + +TEST(VoiceActivityDetectorTest, Speech32kHzHasHighVoiceProbabilities) { + const int kSampleRateHz = 32000; + const int kLength10Ms = kSampleRateHz / 100; + + VoiceActivityDetector vad; + + std::vector data(kLength10Ms); + float mean_probability = 0.f; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/transient/audio32kHz", "pcm") + .c_str(), + "rb"); + ASSERT_TRUE(pcm_file != nullptr); + // The silences in the file are skipped to get a more robust voice probability + // for speech. + ASSERT_EQ(fseek(pcm_file, kStartTimeSec * kSampleRateHz * sizeof(data[0]), + SEEK_SET), + 0); + + size_t num_chunks = 0; + while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == + data.size()) { + vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); + + mean_probability += vad.last_voice_probability(); + + ++num_chunks; + } + + mean_probability /= num_chunks; + + EXPECT_GT(mean_probability, kMeanSpeechProbability); +} + +TEST(VoiceActivityDetectorTest, Noise16kHzHasLowVoiceProbabilities) { + VoiceActivityDetector vad; + + std::vector data(kLength10Ms); + float max_probability = 0.f; + + std::srand(42); + + for (size_t i = 0; i < kNumChunks; ++i) { + GenerateNoise(&data); + + vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); + + // Before the |vad has enough data to process an ISAC block it will return + // the default value, 1.f, which would ruin the `max_probability` value. + if (i > kNumChunksPerIsacBlock) { + max_probability = std::max(max_probability, vad.last_voice_probability()); + } + } + + EXPECT_LT(max_probability, kMaxNoiseProbability); +} + +TEST(VoiceActivityDetectorTest, Noise32kHzHasLowVoiceProbabilities) { + VoiceActivityDetector vad; + + std::vector data(2 * kLength10Ms); + float max_probability = 0.f; + + std::srand(42); + + for (size_t i = 0; i < kNumChunks; ++i) { + GenerateNoise(&data); + + vad.ProcessChunk(&data[0], data.size(), 2 * kSampleRateHz); + + // Before the |vad has enough data to process an ISAC block it will return + // the default value, 1.f, which would ruin the `max_probability` value. + if (i > kNumChunksPerIsacBlock) { + max_probability = std::max(max_probability, vad.last_voice_probability()); + } + } + + EXPECT_LT(max_probability, kMaxNoiseProbability); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/voice_gmm_tables.h b/third_party/libwebrtc/modules/audio_processing/vad/voice_gmm_tables.h new file mode 100644 index 0000000000..ef4ad7e21e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/voice_gmm_tables.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for active segments. Generated by MakeGmmTables.m. + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ + +static const int kVoiceGmmNumMixtures = 12; +static const int kVoiceGmmDim = 3; + +static const double + kVoiceGmmCovarInverse[kVoiceGmmNumMixtures][kVoiceGmmDim][kVoiceGmmDim] = { + {{1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03}, + {-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04}, + {4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}}, + {{6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03}, + {-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05}, + {-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}}, + {{9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03}, + {-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05}, + {-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}}, + {{3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02}, + {-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05}, + {-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}}, + {{1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02}, + {-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05}, + {-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}}, + {{1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02}, + {-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06}, + {-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}}, + {{8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02}, + {-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06}, + {-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}}, + {{2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04}, + {-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06}, + {7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}}, + {{3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02}, + {1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05}, + {-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}}, + {{6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04}, + {-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06}, + {-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}}, + {{2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03}, + {-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05}, + {-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}}, + {{1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02}, + {-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05}, + {-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}}; + +static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = { + {-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02}, + {-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02}, + {-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02}, + {-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02}, + {-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02}, + {-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02}, + {-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02}, + {-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02}, + {-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02}, + {-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02}, + {-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02}, + {-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}}; + +static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = { + -1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01, + -1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01, + -1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01, + -1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00}; +#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/BUILD.gn b/third_party/libwebrtc/modules/congestion_controller/BUILD.gn new file mode 100644 index 0000000000..33f5508137 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/BUILD.gn @@ -0,0 +1,68 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") + +config("bwe_test_logging") { + if (rtc_enable_bwe_test_logging) { + defines = [ "BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=1" ] + } else { + defines = [ "BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0" ] + } +} + +rtc_library("congestion_controller") { + visibility = [ "*" ] + configs += [ ":bwe_test_logging" ] + sources = [ + "include/receive_side_congestion_controller.h", + "receive_side_congestion_controller.cc", + "remb_throttler.cc", + "remb_throttler.h", + ] + + deps = [ + "../../api/transport:network_control", + "../../api/units:data_rate", + "../../api/units:time_delta", + "../../api/units:timestamp", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base/synchronization:mutex", + "../pacing", + "../remote_bitrate_estimator", + "../rtp_rtcp:rtp_rtcp_format", + ] +} + +if (rtc_include_tests && !build_with_chromium) { + rtc_library("congestion_controller_unittests") { + testonly = true + + sources = [ + "receive_side_congestion_controller_unittest.cc", + "remb_throttler_unittest.cc", + ] + deps = [ + ":congestion_controller", + "../../api/test/network_emulation", + "../../api/test/network_emulation:create_cross_traffic", + "../../api/units:data_rate", + "../../api/units:time_delta", + "../../api/units:timestamp", + "../../system_wrappers", + "../../test:test_support", + "../../test/scenario", + "../pacing", + "goog_cc:estimators", + "goog_cc:goog_cc_unittests", + "pcc:pcc_unittests", + "rtp:congestion_controller_unittests", + ] + } +} diff --git a/third_party/libwebrtc/modules/congestion_controller/DEPS b/third_party/libwebrtc/modules/congestion_controller/DEPS new file mode 100644 index 0000000000..2ed9952e22 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/DEPS @@ -0,0 +1,5 @@ +include_rules = [ + "+logging/rtc_event_log", + "+system_wrappers", + "+video", +] diff --git a/third_party/libwebrtc/modules/congestion_controller/OWNERS b/third_party/libwebrtc/modules/congestion_controller/OWNERS new file mode 100644 index 0000000000..9a836bad06 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/OWNERS @@ -0,0 +1,7 @@ +danilchap@webrtc.org +linderborg@webrtc.org +stefan@webrtc.org +terelius@webrtc.org +mflodman@webrtc.org +yinwa@webrtc.org +perkj@webrtc.org diff --git a/third_party/libwebrtc/modules/congestion_controller/congestion_controller_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/congestion_controller_gn/moz.build new file mode 100644 index 0000000000..9633f1ef48 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/congestion_controller_gn/moz.build @@ -0,0 +1,234 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["BWE_TEST_LOGGING_COMPILE_TIME_ENABLE"] = "0" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/receive_side_congestion_controller.cc", + "/third_party/libwebrtc/modules/congestion_controller/remb_throttler.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("congestion_controller_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/BUILD.gn b/third_party/libwebrtc/modules/congestion_controller/goog_cc/BUILD.gn new file mode 100644 index 0000000000..150201e1bd --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/BUILD.gn @@ -0,0 +1,369 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +config("bwe_test_logging") { + if (rtc_enable_bwe_test_logging) { + defines = [ "BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=1" ] + } else { + defines = [ "BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0" ] + } +} + +rtc_library("goog_cc") { + configs += [ ":bwe_test_logging" ] + sources = [ + "goog_cc_network_control.cc", + "goog_cc_network_control.h", + ] + + deps = [ + ":alr_detector", + ":delay_based_bwe", + ":estimators", + ":loss_based_bwe_v2", + ":probe_controller", + ":pushback_controller", + ":send_side_bwe", + "../../../api:field_trials_view", + "../../../api:network_state_predictor_api", + "../../../api/rtc_event_log", + "../../../api/transport:field_trial_based_config", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:data_size", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../logging:rtc_event_bwe", + "../../../logging:rtc_event_pacing", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base/experiments:alr_experiment", + "../../../rtc_base/experiments:field_trial_parser", + "../../../rtc_base/experiments:rate_control_settings", + "../../../system_wrappers", + "../../remote_bitrate_estimator", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("link_capacity_estimator") { + sources = [ + "link_capacity_estimator.cc", + "link_capacity_estimator.h", + ] + deps = [ + "../../../api/units:data_rate", + "../../../rtc_base:safe_minmax", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("pushback_controller") { + sources = [ + "congestion_window_pushback_controller.cc", + "congestion_window_pushback_controller.h", + ] + deps = [ + "../../../api:field_trials_view", + "../../../api/transport:network_control", + "../../../api/units:data_size", + "../../../rtc_base:checks", + "../../../rtc_base/experiments:rate_control_settings", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("alr_detector") { + sources = [ + "alr_detector.cc", + "alr_detector.h", + ] + deps = [ + "../../../api:field_trials_view", + "../../../api/rtc_event_log", + "../../../api/transport:field_trial_based_config", + "../../../logging:rtc_event_pacing", + "../../../rtc_base:checks", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:timeutils", + "../../../rtc_base/experiments:alr_experiment", + "../../../rtc_base/experiments:field_trial_parser", + "../../pacing:interval_budget", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} +rtc_library("estimators") { + configs += [ ":bwe_test_logging" ] + sources = [ + "acknowledged_bitrate_estimator.cc", + "acknowledged_bitrate_estimator.h", + "acknowledged_bitrate_estimator_interface.cc", + "acknowledged_bitrate_estimator_interface.h", + "bitrate_estimator.cc", + "bitrate_estimator.h", + "delay_increase_detector_interface.h", + "probe_bitrate_estimator.cc", + "probe_bitrate_estimator.h", + "robust_throughput_estimator.cc", + "robust_throughput_estimator.h", + "trendline_estimator.cc", + "trendline_estimator.h", + ] + + deps = [ + "../../../api:field_trials_view", + "../../../api:network_state_predictor_api", + "../../../api/rtc_event_log", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:data_size", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../logging:rtc_event_bwe", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:macromagic", + "../../../rtc_base:rtc_numerics", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base/experiments:field_trial_parser", + "../../remote_bitrate_estimator", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("loss_based_bwe_v2") { + sources = [ + "loss_based_bwe_v2.cc", + "loss_based_bwe_v2.h", + ] + deps = [ + "../../../api:array_view", + "../../../api:field_trials_view", + "../../../api:network_state_predictor_api", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:data_size", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../rtc_base:logging", + "../../../rtc_base/experiments:field_trial_parser", + "../../remote_bitrate_estimator", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/algorithm:container", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("loss_based_bwe_v1") { + configs += [ ":bwe_test_logging" ] + sources = [ + "loss_based_bandwidth_estimation.cc", + "loss_based_bandwidth_estimation.h", + ] + deps = [ + "../../../api:field_trials_view", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../rtc_base:checks", + "../../../rtc_base/experiments:field_trial_parser", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_library("send_side_bwe") { + configs += [ ":bwe_test_logging" ] + sources = [ + "send_side_bandwidth_estimation.cc", + "send_side_bandwidth_estimation.h", + ] + deps = [ + ":loss_based_bwe_v1", + ":loss_based_bwe_v2", + "../../../api:field_trials_view", + "../../../api:network_state_predictor_api", + "../../../api/rtc_event_log", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../logging:rtc_event_bwe", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base/experiments:field_trial_parser", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + "../../remote_bitrate_estimator", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("delay_based_bwe") { + configs += [ ":bwe_test_logging" ] + sources = [ + "delay_based_bwe.cc", + "delay_based_bwe.h", + "inter_arrival_delta.cc", + "inter_arrival_delta.h", + ] + + deps = [ + ":estimators", + "../../../api:field_trials_view", + "../../../api:network_state_predictor_api", + "../../../api/rtc_event_log", + "../../../api/transport:network_control", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../logging:rtc_event_bwe", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:race_checker", + "../../../rtc_base/experiments:field_trial_parser", + "../../../system_wrappers:metrics", + "../../pacing", + "../../remote_bitrate_estimator", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("probe_controller") { + sources = [ + "probe_controller.cc", + "probe_controller.h", + ] + + deps = [ + "../../../api:field_trials_view", + "../../../api/rtc_event_log", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:data_size", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../logging:rtc_event_bwe", + "../../../logging:rtc_event_pacing", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:macromagic", + "../../../rtc_base:safe_conversions", + "../../../rtc_base/experiments:field_trial_parser", + "../../../system_wrappers:metrics", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/base:core_headers", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +if (rtc_include_tests) { + rtc_library("test_goog_cc_printer") { + testonly = true + sources = [ + "test/goog_cc_printer.cc", + "test/goog_cc_printer.h", + ] + deps = [ + ":alr_detector", + ":delay_based_bwe", + ":estimators", + ":goog_cc", + "../../../api/rtc_event_log", + "../../../api/transport:goog_cc", + "../../../api/transport:network_control", + "../../../api/units:timestamp", + "../../../rtc_base:checks", + "../../../test/logging:log_writer", + "../../remote_bitrate_estimator", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } + if (!build_with_chromium) { + rtc_library("goog_cc_unittests") { + testonly = true + + sources = [ + "acknowledged_bitrate_estimator_unittest.cc", + "alr_detector_unittest.cc", + "congestion_window_pushback_controller_unittest.cc", + "delay_based_bwe_unittest.cc", + "delay_based_bwe_unittest_helper.cc", + "delay_based_bwe_unittest_helper.h", + "goog_cc_network_control_unittest.cc", + "loss_based_bwe_v2_test.cc", + "probe_bitrate_estimator_unittest.cc", + "probe_controller_unittest.cc", + "robust_throughput_estimator_unittest.cc", + "send_side_bandwidth_estimation_unittest.cc", + "trendline_estimator_unittest.cc", + ] + deps = [ + ":alr_detector", + ":delay_based_bwe", + ":estimators", + ":goog_cc", + ":loss_based_bwe_v2", + ":probe_controller", + ":pushback_controller", + ":send_side_bwe", + "../../../api:field_trials_view", + "../../../api:network_state_predictor_api", + "../../../api/rtc_event_log", + "../../../api/test/network_emulation", + "../../../api/test/network_emulation:create_cross_traffic", + "../../../api/transport:field_trial_based_config", + "../../../api/transport:goog_cc", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:data_size", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../logging:mocks", + "../../../logging:rtc_event_bwe", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:random", + "../../../rtc_base:rtc_base_tests_utils", + "../../../rtc_base:stringutils", + "../../../rtc_base/experiments:alr_experiment", + "../../../system_wrappers", + "../../../test:explicit_key_value_config", + "../../../test:field_trial", + "../../../test:test_support", + "../../../test/scenario", + "../../pacing", + "//testing/gmock", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings:strings" ] + } + } +} diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.cc new file mode 100644 index 0000000000..08b42a8168 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.h" + +#include + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +AcknowledgedBitrateEstimator::AcknowledgedBitrateEstimator( + const FieldTrialsView* key_value_config) + : AcknowledgedBitrateEstimator( + key_value_config, + std::make_unique(key_value_config)) {} + +AcknowledgedBitrateEstimator::~AcknowledgedBitrateEstimator() {} + +AcknowledgedBitrateEstimator::AcknowledgedBitrateEstimator( + const FieldTrialsView* key_value_config, + std::unique_ptr bitrate_estimator) + : in_alr_(false), bitrate_estimator_(std::move(bitrate_estimator)) {} + +void AcknowledgedBitrateEstimator::IncomingPacketFeedbackVector( + const std::vector& packet_feedback_vector) { + RTC_DCHECK(std::is_sorted(packet_feedback_vector.begin(), + packet_feedback_vector.end(), + PacketResult::ReceiveTimeOrder())); + for (const auto& packet : packet_feedback_vector) { + if (alr_ended_time_ && packet.sent_packet.send_time > *alr_ended_time_) { + bitrate_estimator_->ExpectFastRateChange(); + alr_ended_time_.reset(); + } + DataSize acknowledged_estimate = packet.sent_packet.size; + acknowledged_estimate += packet.sent_packet.prior_unacked_data; + bitrate_estimator_->Update(packet.receive_time, acknowledged_estimate, + in_alr_); + } +} + +absl::optional AcknowledgedBitrateEstimator::bitrate() const { + return bitrate_estimator_->bitrate(); +} + +absl::optional AcknowledgedBitrateEstimator::PeekRate() const { + return bitrate_estimator_->PeekRate(); +} + +void AcknowledgedBitrateEstimator::SetAlrEndedTime(Timestamp alr_ended_time) { + alr_ended_time_.emplace(alr_ended_time); +} + +void AcknowledgedBitrateEstimator::SetAlr(bool in_alr) { + in_alr_ = in_alr; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.h new file mode 100644 index 0000000000..d10846ab3a --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_ACKNOWLEDGED_BITRATE_ESTIMATOR_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_ACKNOWLEDGED_BITRATE_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/field_trials_view.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.h" +#include "modules/congestion_controller/goog_cc/bitrate_estimator.h" + +namespace webrtc { + +class AcknowledgedBitrateEstimator + : public AcknowledgedBitrateEstimatorInterface { + public: + AcknowledgedBitrateEstimator( + const FieldTrialsView* key_value_config, + std::unique_ptr bitrate_estimator); + + explicit AcknowledgedBitrateEstimator( + const FieldTrialsView* key_value_config); + ~AcknowledgedBitrateEstimator() override; + + void IncomingPacketFeedbackVector( + const std::vector& packet_feedback_vector) override; + absl::optional bitrate() const override; + absl::optional PeekRate() const override; + void SetAlr(bool in_alr) override; + void SetAlrEndedTime(Timestamp alr_ended_time) override; + + private: + absl::optional alr_ended_time_; + bool in_alr_; + std::unique_ptr bitrate_estimator_; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_ACKNOWLEDGED_BITRATE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.cc new file mode 100644 index 0000000000..c043353a7a --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.cc @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.h" + +#include + +#include "api/units/time_delta.h" +#include "modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.h" +#include "modules/congestion_controller/goog_cc/robust_throughput_estimator.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +constexpr char RobustThroughputEstimatorSettings::kKey[]; + +RobustThroughputEstimatorSettings::RobustThroughputEstimatorSettings( + const FieldTrialsView* key_value_config) { + Parser()->Parse( + key_value_config->Lookup(RobustThroughputEstimatorSettings::kKey)); + if (window_packets < 10 || 1000 < window_packets) { + RTC_LOG(LS_WARNING) << "Window size must be between 10 and 1000 packets"; + window_packets = 20; + } + if (max_window_packets < 10 || 1000 < max_window_packets) { + RTC_LOG(LS_WARNING) + << "Max window size must be between 10 and 1000 packets"; + max_window_packets = 500; + } + max_window_packets = std::max(max_window_packets, window_packets); + + if (required_packets < 10 || 1000 < required_packets) { + RTC_LOG(LS_WARNING) << "Required number of initial packets must be between " + "10 and 1000 packets"; + required_packets = 10; + } + required_packets = std::min(required_packets, window_packets); + + if (min_window_duration < TimeDelta::Millis(100) || + TimeDelta::Millis(3000) < min_window_duration) { + RTC_LOG(LS_WARNING) << "Window duration must be between 100 and 3000 ms"; + min_window_duration = TimeDelta::Millis(750); + } + if (max_window_duration < TimeDelta::Seconds(1) || + TimeDelta::Seconds(15) < max_window_duration) { + RTC_LOG(LS_WARNING) << "Max window duration must be between 1 and 15 s"; + max_window_duration = TimeDelta::Seconds(5); + } + min_window_duration = std::min(min_window_duration, max_window_duration); + + if (unacked_weight < 0.0 || 1.0 < unacked_weight) { + RTC_LOG(LS_WARNING) + << "Weight for prior unacked size must be between 0 and 1."; + unacked_weight = 1.0; + } +} + +std::unique_ptr +RobustThroughputEstimatorSettings::Parser() { + return StructParametersParser::Create( + "enabled", &enabled, // + "window_packets", &window_packets, // + "max_window_packets", &max_window_packets, // + "window_duration", &min_window_duration, // + "max_window_duration", &max_window_duration, // + "required_packets", &required_packets, // + "unacked_weight", &unacked_weight); +} + +AcknowledgedBitrateEstimatorInterface:: + ~AcknowledgedBitrateEstimatorInterface() {} + +std::unique_ptr +AcknowledgedBitrateEstimatorInterface::Create( + const FieldTrialsView* key_value_config) { + RobustThroughputEstimatorSettings simplified_estimator_settings( + key_value_config); + if (simplified_estimator_settings.enabled) { + return std::make_unique( + simplified_estimator_settings); + } + return std::make_unique(key_value_config); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.h new file mode 100644 index 0000000000..515af1efc9 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_ACKNOWLEDGED_BITRATE_ESTIMATOR_INTERFACE_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_ACKNOWLEDGED_BITRATE_ESTIMATOR_INTERFACE_H_ + +#include + +#include +#include + +#include "absl/types/optional.h" +#include "api/field_trials_view.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "rtc_base/experiments/struct_parameters_parser.h" + +namespace webrtc { + +struct RobustThroughputEstimatorSettings { + static constexpr char kKey[] = "WebRTC-Bwe-RobustThroughputEstimatorSettings"; + + RobustThroughputEstimatorSettings() = delete; + explicit RobustThroughputEstimatorSettings( + const FieldTrialsView* key_value_config); + + bool enabled = false; // Set to true to use RobustThroughputEstimator. + + // The estimator keeps the smallest window containing at least + // `window_packets` and at least the packets received during the last + // `min_window_duration` milliseconds. + // (This means that it may store more than `window_packets` at high bitrates, + // and a longer duration than `min_window_duration` at low bitrates.) + // However, if will never store more than kMaxPackets (for performance + // reasons), and never longer than max_window_duration (to avoid very old + // packets influencing the estimate for example when sending is paused). + unsigned window_packets = 20; + unsigned max_window_packets = 500; + TimeDelta min_window_duration = TimeDelta::Seconds(1); + TimeDelta max_window_duration = TimeDelta::Seconds(5); + + // The estimator window requires at least `required_packets` packets + // to produce an estimate. + unsigned required_packets = 10; + + // If audio packets aren't included in allocation (i.e. the + // estimated available bandwidth is divided only among the video + // streams), then `unacked_weight` should be set to 0. + // If audio packets are included in allocation, but not in bandwidth + // estimation (i.e. they don't have transport-wide sequence numbers, + // but we nevertheless divide the estimated available bandwidth among + // both audio and video streams), then `unacked_weight` should be set to 1. + // If all packets have transport-wide sequence numbers, then the value + // of `unacked_weight` doesn't matter. + double unacked_weight = 1.0; + + std::unique_ptr Parser(); +}; + +class AcknowledgedBitrateEstimatorInterface { + public: + static std::unique_ptr Create( + const FieldTrialsView* key_value_config); + virtual ~AcknowledgedBitrateEstimatorInterface(); + + virtual void IncomingPacketFeedbackVector( + const std::vector& packet_feedback_vector) = 0; + virtual absl::optional bitrate() const = 0; + virtual absl::optional PeekRate() const = 0; + virtual void SetAlr(bool in_alr) = 0; + virtual void SetAlrEndedTime(Timestamp alr_ended_time) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_ACKNOWLEDGED_BITRATE_ESTIMATOR_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_unittest.cc new file mode 100644 index 0000000000..e5b733b119 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_unittest.cc @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.h" + +#include +#include + +#include "api/transport/field_trial_based_config.h" +#include "rtc_base/fake_clock.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::InSequence; +using ::testing::NiceMock; +using ::testing::Return; + +namespace webrtc { + +namespace { + +constexpr int64_t kFirstArrivalTimeMs = 10; +constexpr int64_t kFirstSendTimeMs = 10; +constexpr uint16_t kSequenceNumber = 1; +constexpr size_t kPayloadSize = 10; + +class MockBitrateEstimator : public BitrateEstimator { + public: + using BitrateEstimator::BitrateEstimator; + MOCK_METHOD(void, + Update, + (Timestamp at_time, DataSize data_size, bool in_alr), + (override)); + MOCK_METHOD(absl::optional, bitrate, (), (const, override)); + MOCK_METHOD(void, ExpectFastRateChange, (), (override)); +}; + +struct AcknowledgedBitrateEstimatorTestStates { + FieldTrialBasedConfig field_trial_config; + std::unique_ptr acknowledged_bitrate_estimator; + MockBitrateEstimator* mock_bitrate_estimator; +}; + +AcknowledgedBitrateEstimatorTestStates CreateTestStates() { + AcknowledgedBitrateEstimatorTestStates states; + auto mock_bitrate_estimator = + std::make_unique(&states.field_trial_config); + states.mock_bitrate_estimator = mock_bitrate_estimator.get(); + states.acknowledged_bitrate_estimator = + std::make_unique( + &states.field_trial_config, std::move(mock_bitrate_estimator)); + return states; +} + +std::vector CreateFeedbackVector() { + std::vector packet_feedback_vector(2); + packet_feedback_vector[0].receive_time = + Timestamp::Millis(kFirstArrivalTimeMs); + packet_feedback_vector[0].sent_packet.send_time = + Timestamp::Millis(kFirstSendTimeMs); + packet_feedback_vector[0].sent_packet.sequence_number = kSequenceNumber; + packet_feedback_vector[0].sent_packet.size = DataSize::Bytes(kPayloadSize); + packet_feedback_vector[1].receive_time = + Timestamp::Millis(kFirstArrivalTimeMs + 10); + packet_feedback_vector[1].sent_packet.send_time = + Timestamp::Millis(kFirstSendTimeMs + 10); + packet_feedback_vector[1].sent_packet.sequence_number = kSequenceNumber; + packet_feedback_vector[1].sent_packet.size = + DataSize::Bytes(kPayloadSize + 10); + return packet_feedback_vector; +} + +} // anonymous namespace + +TEST(TestAcknowledgedBitrateEstimator, UpdateBandwidth) { + auto states = CreateTestStates(); + auto packet_feedback_vector = CreateFeedbackVector(); + { + InSequence dummy; + EXPECT_CALL(*states.mock_bitrate_estimator, + Update(packet_feedback_vector[0].receive_time, + packet_feedback_vector[0].sent_packet.size, + /*in_alr*/ false)) + .Times(1); + EXPECT_CALL(*states.mock_bitrate_estimator, + Update(packet_feedback_vector[1].receive_time, + packet_feedback_vector[1].sent_packet.size, + /*in_alr*/ false)) + .Times(1); + } + states.acknowledged_bitrate_estimator->IncomingPacketFeedbackVector( + packet_feedback_vector); +} + +TEST(TestAcknowledgedBitrateEstimator, ExpectFastRateChangeWhenLeftAlr) { + auto states = CreateTestStates(); + auto packet_feedback_vector = CreateFeedbackVector(); + { + InSequence dummy; + EXPECT_CALL(*states.mock_bitrate_estimator, + Update(packet_feedback_vector[0].receive_time, + packet_feedback_vector[0].sent_packet.size, + /*in_alr*/ false)) + .Times(1); + EXPECT_CALL(*states.mock_bitrate_estimator, ExpectFastRateChange()) + .Times(1); + EXPECT_CALL(*states.mock_bitrate_estimator, + Update(packet_feedback_vector[1].receive_time, + packet_feedback_vector[1].sent_packet.size, + /*in_alr*/ false)) + .Times(1); + } + states.acknowledged_bitrate_estimator->SetAlrEndedTime( + Timestamp::Millis(kFirstArrivalTimeMs + 1)); + states.acknowledged_bitrate_estimator->IncomingPacketFeedbackVector( + packet_feedback_vector); +} + +TEST(TestAcknowledgedBitrateEstimator, ReturnBitrate) { + auto states = CreateTestStates(); + absl::optional return_value = DataRate::KilobitsPerSec(42); + EXPECT_CALL(*states.mock_bitrate_estimator, bitrate()) + .Times(1) + .WillOnce(Return(return_value)); + EXPECT_EQ(return_value, states.acknowledged_bitrate_estimator->bitrate()); +} + +} // namespace webrtc*/ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector.cc new file mode 100644 index 0000000000..f1e649b7cd --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/alr_detector.h" + +#include +#include +#include + +#include "api/rtc_event_log/rtc_event.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "logging/rtc_event_log/events/rtc_event_alr_state.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +namespace { +AlrDetectorConfig GetConfigFromTrials(const FieldTrialsView* key_value_config) { + RTC_CHECK(AlrExperimentSettings::MaxOneFieldTrialEnabled(*key_value_config)); + absl::optional experiment_settings = + AlrExperimentSettings::CreateFromFieldTrial( + *key_value_config, + AlrExperimentSettings::kScreenshareProbingBweExperimentName); + if (!experiment_settings) { + experiment_settings = AlrExperimentSettings::CreateFromFieldTrial( + *key_value_config, + AlrExperimentSettings::kStrictPacingAndProbingExperimentName); + } + AlrDetectorConfig conf; + if (experiment_settings) { + conf.bandwidth_usage_ratio = + experiment_settings->alr_bandwidth_usage_percent / 100.0; + conf.start_budget_level_ratio = + experiment_settings->alr_start_budget_level_percent / 100.0; + conf.stop_budget_level_ratio = + experiment_settings->alr_stop_budget_level_percent / 100.0; + } + conf.Parser()->Parse( + key_value_config->Lookup("WebRTC-AlrDetectorParameters")); + return conf; +} +} // namespace + +std::unique_ptr AlrDetectorConfig::Parser() { + return StructParametersParser::Create( // + "bw_usage", &bandwidth_usage_ratio, // + "start", &start_budget_level_ratio, // + "stop", &stop_budget_level_ratio); +} + +AlrDetector::AlrDetector(AlrDetectorConfig config, RtcEventLog* event_log) + : conf_(config), alr_budget_(0, true), event_log_(event_log) {} + +AlrDetector::AlrDetector(const FieldTrialsView* key_value_config) + : AlrDetector(GetConfigFromTrials(key_value_config), nullptr) {} + +AlrDetector::AlrDetector(const FieldTrialsView* key_value_config, + RtcEventLog* event_log) + : AlrDetector(GetConfigFromTrials(key_value_config), event_log) {} +AlrDetector::~AlrDetector() {} + +void AlrDetector::OnBytesSent(size_t bytes_sent, int64_t send_time_ms) { + if (!last_send_time_ms_.has_value()) { + last_send_time_ms_ = send_time_ms; + // Since the duration for sending the bytes is unknwon, return without + // updating alr state. + return; + } + int64_t delta_time_ms = send_time_ms - *last_send_time_ms_; + last_send_time_ms_ = send_time_ms; + + alr_budget_.UseBudget(bytes_sent); + alr_budget_.IncreaseBudget(delta_time_ms); + bool state_changed = false; + if (alr_budget_.budget_ratio() > conf_.start_budget_level_ratio && + !alr_started_time_ms_) { + alr_started_time_ms_.emplace(rtc::TimeMillis()); + state_changed = true; + } else if (alr_budget_.budget_ratio() < conf_.stop_budget_level_ratio && + alr_started_time_ms_) { + state_changed = true; + alr_started_time_ms_.reset(); + } + if (event_log_ && state_changed) { + event_log_->Log( + std::make_unique(alr_started_time_ms_.has_value())); + } +} + +void AlrDetector::SetEstimatedBitrate(int bitrate_bps) { + RTC_DCHECK(bitrate_bps); + int target_rate_kbps = + static_cast(bitrate_bps) * conf_.bandwidth_usage_ratio / 1000; + alr_budget_.set_target_rate_kbps(target_rate_kbps); +} + +absl::optional AlrDetector::GetApplicationLimitedRegionStartTime() + const { + return alr_started_time_ms_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector.h new file mode 100644 index 0000000000..5e7a3e1075 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_ALR_DETECTOR_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_ALR_DETECTOR_H_ + +#include +#include + +#include + +#include "absl/types/optional.h" +#include "api/field_trials_view.h" +#include "modules/pacing/interval_budget.h" +#include "rtc_base/experiments/alr_experiment.h" +#include "rtc_base/experiments/struct_parameters_parser.h" + +namespace webrtc { + +class RtcEventLog; + +struct AlrDetectorConfig { + // Sent traffic ratio as a function of network capacity used to determine + // application-limited region. ALR region start when bandwidth usage drops + // below kAlrStartUsageRatio and ends when it raises above + // kAlrEndUsageRatio. NOTE: This is intentionally conservative at the moment + // until BW adjustments of application limited region is fine tuned. + double bandwidth_usage_ratio = 0.65; + double start_budget_level_ratio = 0.80; + double stop_budget_level_ratio = 0.50; + std::unique_ptr Parser(); +}; +// Application limited region detector is a class that utilizes signals of +// elapsed time and bytes sent to estimate whether network traffic is +// currently limited by the application's ability to generate traffic. +// +// AlrDetector provides a signal that can be utilized to adjust +// estimate bandwidth. +// Note: This class is not thread-safe. +class AlrDetector { + public: + AlrDetector(AlrDetectorConfig config, RtcEventLog* event_log); + explicit AlrDetector(const FieldTrialsView* key_value_config); + AlrDetector(const FieldTrialsView* key_value_config, RtcEventLog* event_log); + ~AlrDetector(); + + void OnBytesSent(size_t bytes_sent, int64_t send_time_ms); + + // Set current estimated bandwidth. + void SetEstimatedBitrate(int bitrate_bps); + + // Returns time in milliseconds when the current application-limited region + // started or empty result if the sender is currently not application-limited. + absl::optional GetApplicationLimitedRegionStartTime() const; + + private: + friend class GoogCcStatePrinter; + const AlrDetectorConfig conf_; + + absl::optional last_send_time_ms_; + + IntervalBudget alr_budget_; + absl::optional alr_started_time_ms_; + + RtcEventLog* event_log_; +}; +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_ALR_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector_gn/moz.build new file mode 100644 index 0000000000..f6520bf358 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("alr_detector_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector_unittest.cc new file mode 100644 index 0000000000..eac19d0081 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/alr_detector_unittest.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/alr_detector.h" + +#include "api/transport/field_trial_based_config.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/alr_experiment.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace { + +constexpr int kEstimatedBitrateBps = 300000; + +} // namespace + +namespace webrtc { +namespace { +class SimulateOutgoingTrafficIn { + public: + explicit SimulateOutgoingTrafficIn(AlrDetector* alr_detector, + int64_t* timestamp_ms) + : alr_detector_(alr_detector), timestamp_ms_(timestamp_ms) { + RTC_CHECK(alr_detector_); + } + + SimulateOutgoingTrafficIn& ForTimeMs(int time_ms) { + interval_ms_ = time_ms; + ProduceTraffic(); + return *this; + } + + SimulateOutgoingTrafficIn& AtPercentOfEstimatedBitrate(int usage_percentage) { + usage_percentage_.emplace(usage_percentage); + ProduceTraffic(); + return *this; + } + + private: + void ProduceTraffic() { + if (!interval_ms_ || !usage_percentage_) + return; + const int kTimeStepMs = 10; + for (int t = 0; t < *interval_ms_; t += kTimeStepMs) { + *timestamp_ms_ += kTimeStepMs; + alr_detector_->OnBytesSent(kEstimatedBitrateBps * *usage_percentage_ * + kTimeStepMs / (8 * 100 * 1000), + *timestamp_ms_); + } + int remainder_ms = *interval_ms_ % kTimeStepMs; + if (remainder_ms > 0) { + *timestamp_ms_ += kTimeStepMs; + alr_detector_->OnBytesSent(kEstimatedBitrateBps * *usage_percentage_ * + remainder_ms / (8 * 100 * 1000), + *timestamp_ms_); + } + } + AlrDetector* const alr_detector_; + int64_t* timestamp_ms_; + absl::optional interval_ms_; + absl::optional usage_percentage_; +}; +} // namespace + +TEST(AlrDetectorTest, AlrDetection) { + FieldTrialBasedConfig field_trials; + int64_t timestamp_ms = 1000; + AlrDetector alr_detector(&field_trials); + alr_detector.SetEstimatedBitrate(kEstimatedBitrateBps); + + // Start in non-ALR state. + EXPECT_FALSE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // Stay in non-ALR state when usage is close to 100%. + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(1000) + .AtPercentOfEstimatedBitrate(90); + EXPECT_FALSE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // Verify that we ALR starts when bitrate drops below 20%. + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(1500) + .AtPercentOfEstimatedBitrate(20); + EXPECT_TRUE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // Verify that ALR ends when usage is above 65%. + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(4000) + .AtPercentOfEstimatedBitrate(100); + EXPECT_FALSE(alr_detector.GetApplicationLimitedRegionStartTime()); +} + +TEST(AlrDetectorTest, ShortSpike) { + FieldTrialBasedConfig field_trials; + int64_t timestamp_ms = 1000; + AlrDetector alr_detector(&field_trials); + alr_detector.SetEstimatedBitrate(kEstimatedBitrateBps); + // Start in non-ALR state. + EXPECT_FALSE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // Verify that we ALR starts when bitrate drops below 20%. + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(1000) + .AtPercentOfEstimatedBitrate(20); + EXPECT_TRUE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // Verify that we stay in ALR region even after a short bitrate spike. + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(100) + .AtPercentOfEstimatedBitrate(150); + EXPECT_TRUE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // ALR ends when usage is above 65%. + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(3000) + .AtPercentOfEstimatedBitrate(100); + EXPECT_FALSE(alr_detector.GetApplicationLimitedRegionStartTime()); +} + +TEST(AlrDetectorTest, BandwidthEstimateChanges) { + FieldTrialBasedConfig field_trials; + int64_t timestamp_ms = 1000; + AlrDetector alr_detector(&field_trials); + alr_detector.SetEstimatedBitrate(kEstimatedBitrateBps); + + // Start in non-ALR state. + EXPECT_FALSE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // ALR starts when bitrate drops below 20%. + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(1000) + .AtPercentOfEstimatedBitrate(20); + EXPECT_TRUE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // When bandwidth estimate drops the detector should stay in ALR mode and quit + // it shortly afterwards as the sender continues sending the same amount of + // traffic. This is necessary to ensure that ProbeController can still react + // to the BWE drop by initiating a new probe. + alr_detector.SetEstimatedBitrate(kEstimatedBitrateBps / 5); + EXPECT_TRUE(alr_detector.GetApplicationLimitedRegionStartTime()); + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(1000) + .AtPercentOfEstimatedBitrate(50); + EXPECT_FALSE(alr_detector.GetApplicationLimitedRegionStartTime()); +} + +TEST(AlrDetectorTest, ParseControlFieldTrial) { + webrtc::test::ScopedFieldTrials scoped_field_trial( + "WebRTC-ProbingScreenshareBwe/Control/"); + absl::optional parsed_params = + AlrExperimentSettings::CreateFromFieldTrial( + FieldTrialBasedConfig(), "WebRTC-ProbingScreenshareBwe"); + EXPECT_FALSE(static_cast(parsed_params)); +} + +TEST(AlrDetectorTest, ParseActiveFieldTrial) { + webrtc::test::ScopedFieldTrials scoped_field_trial( + "WebRTC-ProbingScreenshareBwe/1.1,2875,85,20,-20,1/"); + absl::optional parsed_params = + AlrExperimentSettings::CreateFromFieldTrial( + FieldTrialBasedConfig(), "WebRTC-ProbingScreenshareBwe"); + ASSERT_TRUE(static_cast(parsed_params)); + EXPECT_EQ(1.1f, parsed_params->pacing_factor); + EXPECT_EQ(2875, parsed_params->max_paced_queue_time); + EXPECT_EQ(85, parsed_params->alr_bandwidth_usage_percent); + EXPECT_EQ(20, parsed_params->alr_start_budget_level_percent); + EXPECT_EQ(-20, parsed_params->alr_stop_budget_level_percent); + EXPECT_EQ(1, parsed_params->group_id); +} + +TEST(AlrDetectorTest, ParseAlrSpecificFieldTrial) { + webrtc::test::ScopedFieldTrials scoped_field_trial( + "WebRTC-AlrDetectorParameters/" + "bw_usage:90%,start:0%,stop:-10%/"); + FieldTrialBasedConfig field_trials; + AlrDetector alr_detector(&field_trials); + int64_t timestamp_ms = 1000; + alr_detector.SetEstimatedBitrate(kEstimatedBitrateBps); + + // Start in non-ALR state. + EXPECT_FALSE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // ALR does not start at 100% utilization. + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(1000) + .AtPercentOfEstimatedBitrate(100); + EXPECT_FALSE(alr_detector.GetApplicationLimitedRegionStartTime()); + + // ALR does start at 85% utilization. + // Overused 10% above so it should take about 2s to reach a budget level of + // 0%. + SimulateOutgoingTrafficIn(&alr_detector, ×tamp_ms) + .ForTimeMs(2100) + .AtPercentOfEstimatedBitrate(85); + EXPECT_TRUE(alr_detector.GetApplicationLimitedRegionStartTime()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/bitrate_estimator.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/bitrate_estimator.cc new file mode 100644 index 0000000000..9c68e48886 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/bitrate_estimator.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/bitrate_estimator.h" + +#include + +#include +#include +#include + +#include "api/units/data_rate.h" +#include "modules/remote_bitrate_estimator/test/bwe_test_logging.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { +constexpr int kInitialRateWindowMs = 500; +constexpr int kRateWindowMs = 150; +constexpr int kMinRateWindowMs = 150; +constexpr int kMaxRateWindowMs = 1000; + +const char kBweThroughputWindowConfig[] = "WebRTC-BweThroughputWindowConfig"; + +} // namespace + +BitrateEstimator::BitrateEstimator(const FieldTrialsView* key_value_config) + : sum_(0), + initial_window_ms_("initial_window_ms", + kInitialRateWindowMs, + kMinRateWindowMs, + kMaxRateWindowMs), + noninitial_window_ms_("window_ms", + kRateWindowMs, + kMinRateWindowMs, + kMaxRateWindowMs), + uncertainty_scale_("scale", 10.0), + uncertainty_scale_in_alr_("scale_alr", uncertainty_scale_), + small_sample_uncertainty_scale_("scale_small", uncertainty_scale_), + small_sample_threshold_("small_thresh", DataSize::Zero()), + uncertainty_symmetry_cap_("symmetry_cap", DataRate::Zero()), + estimate_floor_("floor", DataRate::Zero()), + current_window_ms_(0), + prev_time_ms_(-1), + bitrate_estimate_kbps_(-1.0f), + bitrate_estimate_var_(50.0f) { + // E.g WebRTC-BweThroughputWindowConfig/initial_window_ms:350,window_ms:250/ + ParseFieldTrial( + {&initial_window_ms_, &noninitial_window_ms_, &uncertainty_scale_, + &uncertainty_scale_in_alr_, &small_sample_uncertainty_scale_, + &small_sample_threshold_, &uncertainty_symmetry_cap_, &estimate_floor_}, + key_value_config->Lookup(kBweThroughputWindowConfig)); +} + +BitrateEstimator::~BitrateEstimator() = default; + +void BitrateEstimator::Update(Timestamp at_time, DataSize amount, bool in_alr) { + int rate_window_ms = noninitial_window_ms_.Get(); + // We use a larger window at the beginning to get a more stable sample that + // we can use to initialize the estimate. + if (bitrate_estimate_kbps_ < 0.f) + rate_window_ms = initial_window_ms_.Get(); + bool is_small_sample = false; + float bitrate_sample_kbps = UpdateWindow(at_time.ms(), amount.bytes(), + rate_window_ms, &is_small_sample); + if (bitrate_sample_kbps < 0.0f) + return; + if (bitrate_estimate_kbps_ < 0.0f) { + // This is the very first sample we get. Use it to initialize the estimate. + bitrate_estimate_kbps_ = bitrate_sample_kbps; + return; + } + // Optionally use higher uncertainty for very small samples to avoid dropping + // estimate and for samples obtained in ALR. + float scale = uncertainty_scale_; + if (is_small_sample && bitrate_sample_kbps < bitrate_estimate_kbps_) { + scale = small_sample_uncertainty_scale_; + } else if (in_alr && bitrate_sample_kbps < bitrate_estimate_kbps_) { + // Optionally use higher uncertainty for samples obtained during ALR. + scale = uncertainty_scale_in_alr_; + } + // Define the sample uncertainty as a function of how far away it is from the + // current estimate. With low values of uncertainty_symmetry_cap_ we add more + // uncertainty to increases than to decreases. For higher values we approach + // symmetry. + float sample_uncertainty = + scale * std::abs(bitrate_estimate_kbps_ - bitrate_sample_kbps) / + (bitrate_estimate_kbps_ + + std::min(bitrate_sample_kbps, + uncertainty_symmetry_cap_.Get().kbps())); + + float sample_var = sample_uncertainty * sample_uncertainty; + // Update a bayesian estimate of the rate, weighting it lower if the sample + // uncertainty is large. + // The bitrate estimate uncertainty is increased with each update to model + // that the bitrate changes over time. + float pred_bitrate_estimate_var = bitrate_estimate_var_ + 5.f; + bitrate_estimate_kbps_ = (sample_var * bitrate_estimate_kbps_ + + pred_bitrate_estimate_var * bitrate_sample_kbps) / + (sample_var + pred_bitrate_estimate_var); + bitrate_estimate_kbps_ = + std::max(bitrate_estimate_kbps_, estimate_floor_.Get().kbps()); + bitrate_estimate_var_ = sample_var * pred_bitrate_estimate_var / + (sample_var + pred_bitrate_estimate_var); + BWE_TEST_LOGGING_PLOT(1, "acknowledged_bitrate", at_time.ms(), + bitrate_estimate_kbps_ * 1000); +} + +float BitrateEstimator::UpdateWindow(int64_t now_ms, + int bytes, + int rate_window_ms, + bool* is_small_sample) { + RTC_DCHECK(is_small_sample != nullptr); + // Reset if time moves backwards. + if (now_ms < prev_time_ms_) { + prev_time_ms_ = -1; + sum_ = 0; + current_window_ms_ = 0; + } + if (prev_time_ms_ >= 0) { + current_window_ms_ += now_ms - prev_time_ms_; + // Reset if nothing has been received for more than a full window. + if (now_ms - prev_time_ms_ > rate_window_ms) { + sum_ = 0; + current_window_ms_ %= rate_window_ms; + } + } + prev_time_ms_ = now_ms; + float bitrate_sample = -1.0f; + if (current_window_ms_ >= rate_window_ms) { + *is_small_sample = sum_ < small_sample_threshold_->bytes(); + bitrate_sample = 8.0f * sum_ / static_cast(rate_window_ms); + current_window_ms_ -= rate_window_ms; + sum_ = 0; + } + sum_ += bytes; + return bitrate_sample; +} + +absl::optional BitrateEstimator::bitrate() const { + if (bitrate_estimate_kbps_ < 0.f) + return absl::nullopt; + return DataRate::KilobitsPerSec(bitrate_estimate_kbps_); +} + +absl::optional BitrateEstimator::PeekRate() const { + if (current_window_ms_ > 0) + return DataSize::Bytes(sum_) / TimeDelta::Millis(current_window_ms_); + return absl::nullopt; +} + +void BitrateEstimator::ExpectFastRateChange() { + // By setting the bitrate-estimate variance to a higher value we allow the + // bitrate to change fast for the next few samples. + bitrate_estimate_var_ += 200; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/bitrate_estimator.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/bitrate_estimator.h new file mode 100644 index 0000000000..a6f985800e --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/bitrate_estimator.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_BITRATE_ESTIMATOR_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_BITRATE_ESTIMATOR_H_ + +#include + +#include "absl/types/optional.h" +#include "api/field_trials_view.h" +#include "api/units/data_rate.h" +#include "api/units/timestamp.h" +#include "rtc_base/experiments/field_trial_parser.h" + +namespace webrtc { + +// Computes a bayesian estimate of the throughput given acks containing +// the arrival time and payload size. Samples which are far from the current +// estimate or are based on few packets are given a smaller weight, as they +// are considered to be more likely to have been caused by, e.g., delay spikes +// unrelated to congestion. +class BitrateEstimator { + public: + explicit BitrateEstimator(const FieldTrialsView* key_value_config); + virtual ~BitrateEstimator(); + virtual void Update(Timestamp at_time, DataSize amount, bool in_alr); + + virtual absl::optional bitrate() const; + absl::optional PeekRate() const; + + virtual void ExpectFastRateChange(); + + private: + float UpdateWindow(int64_t now_ms, + int bytes, + int rate_window_ms, + bool* is_small_sample); + int sum_; + FieldTrialConstrained initial_window_ms_; + FieldTrialConstrained noninitial_window_ms_; + FieldTrialParameter uncertainty_scale_; + FieldTrialParameter uncertainty_scale_in_alr_; + FieldTrialParameter small_sample_uncertainty_scale_; + FieldTrialParameter small_sample_threshold_; + FieldTrialParameter uncertainty_symmetry_cap_; + FieldTrialParameter estimate_floor_; + int64_t current_window_ms_; + int64_t prev_time_ms_; + float bitrate_estimate_kbps_; + float bitrate_estimate_var_; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_BITRATE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller.cc new file mode 100644 index 0000000000..2f188f30ca --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/congestion_window_pushback_controller.h" + +#include +#include + +#include +#include + +#include "absl/strings/match.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/rate_control_settings.h" + +namespace webrtc { + +CongestionWindowPushbackController::CongestionWindowPushbackController( + const FieldTrialsView* key_value_config) + : add_pacing_( + absl::StartsWith(key_value_config->Lookup( + "WebRTC-AddPacingToCongestionWindowPushback"), + "Enabled")), + min_pushback_target_bitrate_bps_( + RateControlSettings::ParseFromKeyValueConfig(key_value_config) + .CongestionWindowMinPushbackTargetBitrateBps()), + current_data_window_( + RateControlSettings::ParseFromKeyValueConfig(key_value_config) + .CongestionWindowInitialDataWindow()) {} + +void CongestionWindowPushbackController::UpdateOutstandingData( + int64_t outstanding_bytes) { + outstanding_bytes_ = outstanding_bytes; +} +void CongestionWindowPushbackController::UpdatePacingQueue( + int64_t pacing_bytes) { + pacing_bytes_ = pacing_bytes; +} + +void CongestionWindowPushbackController::SetDataWindow(DataSize data_window) { + current_data_window_ = data_window; +} + +uint32_t CongestionWindowPushbackController::UpdateTargetBitrate( + uint32_t bitrate_bps) { + if (!current_data_window_ || current_data_window_->IsZero()) + return bitrate_bps; + int64_t total_bytes = outstanding_bytes_; + if (add_pacing_) + total_bytes += pacing_bytes_; + double fill_ratio = + total_bytes / static_cast(current_data_window_->bytes()); + if (fill_ratio > 1.5) { + encoding_rate_ratio_ *= 0.9; + } else if (fill_ratio > 1) { + encoding_rate_ratio_ *= 0.95; + } else if (fill_ratio < 0.1) { + encoding_rate_ratio_ = 1.0; + } else { + encoding_rate_ratio_ *= 1.05; + encoding_rate_ratio_ = std::min(encoding_rate_ratio_, 1.0); + } + uint32_t adjusted_target_bitrate_bps = + static_cast(bitrate_bps * encoding_rate_ratio_); + + // Do not adjust below the minimum pushback bitrate but do obey if the + // original estimate is below it. + bitrate_bps = adjusted_target_bitrate_bps < min_pushback_target_bitrate_bps_ + ? std::min(bitrate_bps, min_pushback_target_bitrate_bps_) + : adjusted_target_bitrate_bps; + return bitrate_bps; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller.h new file mode 100644 index 0000000000..ea9ed97c3d --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_CONGESTION_WINDOW_PUSHBACK_CONTROLLER_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_CONGESTION_WINDOW_PUSHBACK_CONTROLLER_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/field_trials_view.h" +#include "api/units/data_size.h" + +namespace webrtc { + +// This class enables pushback from congestion window directly to video encoder. +// When the congestion window is filling up, the video encoder target bitrate +// will be reduced accordingly to accommodate the network changes. To avoid +// pausing video too frequently, a minimum encoder target bitrate threshold is +// used to prevent video pause due to a full congestion window. +class CongestionWindowPushbackController { + public: + explicit CongestionWindowPushbackController( + const FieldTrialsView* key_value_config); + void UpdateOutstandingData(int64_t outstanding_bytes); + void UpdatePacingQueue(int64_t pacing_bytes); + uint32_t UpdateTargetBitrate(uint32_t bitrate_bps); + void SetDataWindow(DataSize data_window); + + private: + const bool add_pacing_; + const uint32_t min_pushback_target_bitrate_bps_; + absl::optional current_data_window_; + int64_t outstanding_bytes_ = 0; + int64_t pacing_bytes_ = 0; + double encoding_rate_ratio_ = 1.0; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_CONGESTION_WINDOW_PUSHBACK_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller_unittest.cc new file mode 100644 index 0000000000..62dde02323 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller_unittest.cc @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/congestion_window_pushback_controller.h" + +#include + +#include "api/transport/field_trial_based_config.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; + +namespace webrtc { +namespace test { + +class CongestionWindowPushbackControllerTest : public ::testing::Test { + public: + CongestionWindowPushbackControllerTest() { + cwnd_controller_.reset( + new CongestionWindowPushbackController(&field_trial_config_)); + } + + protected: + FieldTrialBasedConfig field_trial_config_; + + std::unique_ptr cwnd_controller_; +}; + +TEST_F(CongestionWindowPushbackControllerTest, FullCongestionWindow) { + cwnd_controller_->UpdateOutstandingData(100000); + cwnd_controller_->SetDataWindow(DataSize::Bytes(50000)); + + uint32_t bitrate_bps = 80000; + bitrate_bps = cwnd_controller_->UpdateTargetBitrate(bitrate_bps); + EXPECT_EQ(72000u, bitrate_bps); + + cwnd_controller_->SetDataWindow(DataSize::Bytes(50000)); + bitrate_bps = cwnd_controller_->UpdateTargetBitrate(bitrate_bps); + EXPECT_EQ(static_cast(72000 * 0.9 * 0.9), bitrate_bps); +} + +TEST_F(CongestionWindowPushbackControllerTest, NormalCongestionWindow) { + cwnd_controller_->UpdateOutstandingData(199999); + cwnd_controller_->SetDataWindow(DataSize::Bytes(200000)); + + uint32_t bitrate_bps = 80000; + bitrate_bps = cwnd_controller_->UpdateTargetBitrate(bitrate_bps); + EXPECT_EQ(80000u, bitrate_bps); +} + +TEST_F(CongestionWindowPushbackControllerTest, LowBitrate) { + cwnd_controller_->UpdateOutstandingData(100000); + cwnd_controller_->SetDataWindow(DataSize::Bytes(50000)); + + uint32_t bitrate_bps = 35000; + bitrate_bps = cwnd_controller_->UpdateTargetBitrate(bitrate_bps); + EXPECT_EQ(static_cast(35000 * 0.9), bitrate_bps); + + cwnd_controller_->SetDataWindow(DataSize::Bytes(20000)); + bitrate_bps = cwnd_controller_->UpdateTargetBitrate(bitrate_bps); + EXPECT_EQ(30000u, bitrate_bps); +} + +TEST_F(CongestionWindowPushbackControllerTest, NoPushbackOnDataWindowUnset) { + cwnd_controller_->UpdateOutstandingData(1e8); // Large number + + uint32_t bitrate_bps = 80000; + bitrate_bps = cwnd_controller_->UpdateTargetBitrate(bitrate_bps); + EXPECT_EQ(80000u, bitrate_bps); +} + +TEST_F(CongestionWindowPushbackControllerTest, PushbackOnInititialDataWindow) { + test::ScopedFieldTrials trials("WebRTC-CongestionWindow/InitWin:100000/"); + cwnd_controller_.reset( + new CongestionWindowPushbackController(&field_trial_config_)); + cwnd_controller_->UpdateOutstandingData(1e8); // Large number + + uint32_t bitrate_bps = 80000; + bitrate_bps = cwnd_controller_->UpdateTargetBitrate(bitrate_bps); + EXPECT_GT(80000u, bitrate_bps); +} + +TEST_F(CongestionWindowPushbackControllerTest, PushbackDropFrame) { + test::ScopedFieldTrials trials("WebRTC-CongestionWindow/DropFrame:true/"); + cwnd_controller_.reset( + new CongestionWindowPushbackController(&field_trial_config_)); + cwnd_controller_->UpdateOutstandingData(1e8); // Large number + cwnd_controller_->SetDataWindow(DataSize::Bytes(50000)); + + uint32_t bitrate_bps = 80000; + bitrate_bps = cwnd_controller_->UpdateTargetBitrate(bitrate_bps); + EXPECT_GT(80000u, bitrate_bps); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe.cc new file mode 100644 index 0000000000..07ac599148 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe.cc @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/delay_based_bwe.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "api/rtc_event_log/rtc_event.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "api/units/time_delta.h" +#include "logging/rtc_event_log/events/rtc_event_bwe_update_delay_based.h" +#include "modules/congestion_controller/goog_cc/trendline_estimator.h" +#include "modules/remote_bitrate_estimator/include/bwe_defines.h" +#include "modules/remote_bitrate_estimator/test/bwe_test_logging.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { +constexpr TimeDelta kStreamTimeOut = TimeDelta::Seconds(2); +constexpr TimeDelta kSendTimeGroupLength = TimeDelta::Millis(5); + +// This ssrc is used to fulfill the current API but will be removed +// after the API has been changed. +constexpr uint32_t kFixedSsrc = 0; +} // namespace + +constexpr char BweSeparateAudioPacketsSettings::kKey[]; + +BweSeparateAudioPacketsSettings::BweSeparateAudioPacketsSettings( + const FieldTrialsView* key_value_config) { + Parser()->Parse( + key_value_config->Lookup(BweSeparateAudioPacketsSettings::kKey)); +} + +std::unique_ptr +BweSeparateAudioPacketsSettings::Parser() { + return StructParametersParser::Create( // + "enabled", &enabled, // + "packet_threshold", &packet_threshold, // + "time_threshold", &time_threshold); +} + +DelayBasedBwe::Result::Result() + : updated(false), + probe(false), + target_bitrate(DataRate::Zero()), + recovered_from_overuse(false) {} + +DelayBasedBwe::DelayBasedBwe(const FieldTrialsView* key_value_config, + RtcEventLog* event_log, + NetworkStatePredictor* network_state_predictor) + : event_log_(event_log), + key_value_config_(key_value_config), + separate_audio_(key_value_config), + audio_packets_since_last_video_(0), + last_video_packet_recv_time_(Timestamp::MinusInfinity()), + network_state_predictor_(network_state_predictor), + video_delay_detector_( + new TrendlineEstimator(key_value_config_, network_state_predictor_)), + audio_delay_detector_( + new TrendlineEstimator(key_value_config_, network_state_predictor_)), + active_delay_detector_(video_delay_detector_.get()), + last_seen_packet_(Timestamp::MinusInfinity()), + uma_recorded_(false), + rate_control_(key_value_config, /*send_side=*/true), + prev_bitrate_(DataRate::Zero()), + prev_state_(BandwidthUsage::kBwNormal) { + RTC_LOG(LS_INFO) + << "Initialized DelayBasedBwe with separate audio overuse detection" + << separate_audio_.Parser()->Encode(); +} + +DelayBasedBwe::~DelayBasedBwe() {} + +DelayBasedBwe::Result DelayBasedBwe::IncomingPacketFeedbackVector( + const TransportPacketsFeedback& msg, + absl::optional acked_bitrate, + absl::optional probe_bitrate, + absl::optional network_estimate, + bool in_alr) { + RTC_DCHECK_RUNS_SERIALIZED(&network_race_); + + auto packet_feedback_vector = msg.SortedByReceiveTime(); + // TODO(holmer): An empty feedback vector here likely means that + // all acks were too late and that the send time history had + // timed out. We should reduce the rate when this occurs. + if (packet_feedback_vector.empty()) { + RTC_LOG(LS_WARNING) << "Very late feedback received."; + return DelayBasedBwe::Result(); + } + + if (!uma_recorded_) { + RTC_HISTOGRAM_ENUMERATION(kBweTypeHistogram, + BweNames::kSendSideTransportSeqNum, + BweNames::kBweNamesMax); + uma_recorded_ = true; + } + bool delayed_feedback = true; + bool recovered_from_overuse = false; + BandwidthUsage prev_detector_state = active_delay_detector_->State(); + for (const auto& packet_feedback : packet_feedback_vector) { + delayed_feedback = false; + IncomingPacketFeedback(packet_feedback, msg.feedback_time); + if (prev_detector_state == BandwidthUsage::kBwUnderusing && + active_delay_detector_->State() == BandwidthUsage::kBwNormal) { + recovered_from_overuse = true; + } + prev_detector_state = active_delay_detector_->State(); + } + + if (delayed_feedback) { + // TODO(bugs.webrtc.org/10125): Design a better mechanism to safe-guard + // against building very large network queues. + return Result(); + } + rate_control_.SetInApplicationLimitedRegion(in_alr); + rate_control_.SetNetworkStateEstimate(network_estimate); + return MaybeUpdateEstimate(acked_bitrate, probe_bitrate, + std::move(network_estimate), + recovered_from_overuse, in_alr, msg.feedback_time); +} + +void DelayBasedBwe::IncomingPacketFeedback(const PacketResult& packet_feedback, + Timestamp at_time) { + // Reset if the stream has timed out. + if (last_seen_packet_.IsInfinite() || + at_time - last_seen_packet_ > kStreamTimeOut) { + video_inter_arrival_delta_ = + std::make_unique(kSendTimeGroupLength); + audio_inter_arrival_delta_ = + std::make_unique(kSendTimeGroupLength); + + video_delay_detector_.reset( + new TrendlineEstimator(key_value_config_, network_state_predictor_)); + audio_delay_detector_.reset( + new TrendlineEstimator(key_value_config_, network_state_predictor_)); + active_delay_detector_ = video_delay_detector_.get(); + } + last_seen_packet_ = at_time; + + // As an alternative to ignoring small packets, we can separate audio and + // video packets for overuse detection. + DelayIncreaseDetectorInterface* delay_detector_for_packet = + video_delay_detector_.get(); + if (separate_audio_.enabled) { + if (packet_feedback.sent_packet.audio) { + delay_detector_for_packet = audio_delay_detector_.get(); + audio_packets_since_last_video_++; + if (audio_packets_since_last_video_ > separate_audio_.packet_threshold && + packet_feedback.receive_time - last_video_packet_recv_time_ > + separate_audio_.time_threshold) { + active_delay_detector_ = audio_delay_detector_.get(); + } + } else { + audio_packets_since_last_video_ = 0; + last_video_packet_recv_time_ = + std::max(last_video_packet_recv_time_, packet_feedback.receive_time); + active_delay_detector_ = video_delay_detector_.get(); + } + } + DataSize packet_size = packet_feedback.sent_packet.size; + + TimeDelta send_delta = TimeDelta::Zero(); + TimeDelta recv_delta = TimeDelta::Zero(); + int size_delta = 0; + + InterArrivalDelta* inter_arrival_for_packet = + (separate_audio_.enabled && packet_feedback.sent_packet.audio) + ? audio_inter_arrival_delta_.get() + : video_inter_arrival_delta_.get(); + bool calculated_deltas = inter_arrival_for_packet->ComputeDeltas( + packet_feedback.sent_packet.send_time, packet_feedback.receive_time, + at_time, packet_size.bytes(), &send_delta, &recv_delta, &size_delta); + + delay_detector_for_packet->Update(recv_delta.ms(), + send_delta.ms(), + packet_feedback.sent_packet.send_time.ms(), + packet_feedback.receive_time.ms(), + packet_size.bytes(), calculated_deltas); +} + +DataRate DelayBasedBwe::TriggerOveruse(Timestamp at_time, + absl::optional link_capacity) { + RateControlInput input(BandwidthUsage::kBwOverusing, link_capacity); + return rate_control_.Update(&input, at_time); +} + +DelayBasedBwe::Result DelayBasedBwe::MaybeUpdateEstimate( + absl::optional acked_bitrate, + absl::optional probe_bitrate, + absl::optional state_estimate, + bool recovered_from_overuse, + bool in_alr, + Timestamp at_time) { + Result result; + + // Currently overusing the bandwidth. + if (active_delay_detector_->State() == BandwidthUsage::kBwOverusing) { + if (acked_bitrate && + rate_control_.TimeToReduceFurther(at_time, *acked_bitrate)) { + result.updated = + UpdateEstimate(at_time, acked_bitrate, &result.target_bitrate); + } else if (!acked_bitrate && rate_control_.ValidEstimate() && + rate_control_.InitialTimeToReduceFurther(at_time)) { + // Overusing before we have a measured acknowledged bitrate. Reduce send + // rate by 50% every 200 ms. + // TODO(tschumim): Improve this and/or the acknowledged bitrate estimator + // so that we (almost) always have a bitrate estimate. + rate_control_.SetEstimate(rate_control_.LatestEstimate() / 2, at_time); + result.updated = true; + result.probe = false; + result.target_bitrate = rate_control_.LatestEstimate(); + } + } else { + if (probe_bitrate) { + result.probe = true; + result.updated = true; + rate_control_.SetEstimate(*probe_bitrate, at_time); + result.target_bitrate = rate_control_.LatestEstimate(); + } else { + result.updated = + UpdateEstimate(at_time, acked_bitrate, &result.target_bitrate); + result.recovered_from_overuse = recovered_from_overuse; + } + } + BandwidthUsage detector_state = active_delay_detector_->State(); + if ((result.updated && prev_bitrate_ != result.target_bitrate) || + detector_state != prev_state_) { + DataRate bitrate = result.updated ? result.target_bitrate : prev_bitrate_; + + BWE_TEST_LOGGING_PLOT(1, "target_bitrate_bps", at_time.ms(), bitrate.bps()); + + if (event_log_) { + event_log_->Log(std::make_unique( + bitrate.bps(), detector_state)); + } + + prev_bitrate_ = bitrate; + prev_state_ = detector_state; + } + + result.delay_detector_state = detector_state; + return result; +} + +bool DelayBasedBwe::UpdateEstimate(Timestamp at_time, + absl::optional acked_bitrate, + DataRate* target_rate) { + const RateControlInput input(active_delay_detector_->State(), acked_bitrate); + *target_rate = rate_control_.Update(&input, at_time); + return rate_control_.ValidEstimate(); +} + +void DelayBasedBwe::OnRttUpdate(TimeDelta avg_rtt) { + rate_control_.SetRtt(avg_rtt); +} + +bool DelayBasedBwe::LatestEstimate(std::vector* ssrcs, + DataRate* bitrate) const { + // Currently accessed from both the process thread (see + // ModuleRtpRtcpImpl::Process()) and the configuration thread (see + // Call::GetStats()). Should in the future only be accessed from a single + // thread. + RTC_DCHECK(ssrcs); + RTC_DCHECK(bitrate); + if (!rate_control_.ValidEstimate()) + return false; + + *ssrcs = {kFixedSsrc}; + *bitrate = rate_control_.LatestEstimate(); + return true; +} + +void DelayBasedBwe::SetStartBitrate(DataRate start_bitrate) { + RTC_LOG(LS_INFO) << "BWE Setting start bitrate to: " + << ToString(start_bitrate); + rate_control_.SetStartBitrate(start_bitrate); +} + +void DelayBasedBwe::SetMinBitrate(DataRate min_bitrate) { + // Called from both the configuration thread and the network thread. Shouldn't + // be called from the network thread in the future. + rate_control_.SetMinBitrate(min_bitrate); +} + +TimeDelta DelayBasedBwe::GetExpectedBwePeriod() const { + return rate_control_.GetExpectedBandwidthPeriod(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe.h new file mode 100644 index 0000000000..e91a1dff54 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_DELAY_BASED_BWE_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_DELAY_BASED_BWE_H_ + +#include +#include + +#include +#include + +#include "absl/types/optional.h" +#include "api/field_trials_view.h" +#include "api/network_state_predictor.h" +#include "api/transport/network_types.h" +#include "modules/congestion_controller/goog_cc/delay_increase_detector_interface.h" +#include "modules/congestion_controller/goog_cc/inter_arrival_delta.h" +#include "modules/congestion_controller/goog_cc/probe_bitrate_estimator.h" +#include "modules/remote_bitrate_estimator/aimd_rate_control.h" +#include "modules/remote_bitrate_estimator/inter_arrival.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/race_checker.h" + +namespace webrtc { +class RtcEventLog; + +struct BweSeparateAudioPacketsSettings { + static constexpr char kKey[] = "WebRTC-Bwe-SeparateAudioPackets"; + + BweSeparateAudioPacketsSettings() = default; + explicit BweSeparateAudioPacketsSettings( + const FieldTrialsView* key_value_config); + + bool enabled = false; + int packet_threshold = 10; + TimeDelta time_threshold = TimeDelta::Seconds(1); + + std::unique_ptr Parser(); +}; + +class DelayBasedBwe { + public: + struct Result { + Result(); + ~Result() = default; + bool updated; + bool probe; + DataRate target_bitrate = DataRate::Zero(); + bool recovered_from_overuse; + BandwidthUsage delay_detector_state; + }; + + explicit DelayBasedBwe(const FieldTrialsView* key_value_config, + RtcEventLog* event_log, + NetworkStatePredictor* network_state_predictor); + + DelayBasedBwe() = delete; + DelayBasedBwe(const DelayBasedBwe&) = delete; + DelayBasedBwe& operator=(const DelayBasedBwe&) = delete; + + virtual ~DelayBasedBwe(); + + Result IncomingPacketFeedbackVector( + const TransportPacketsFeedback& msg, + absl::optional acked_bitrate, + absl::optional probe_bitrate, + absl::optional network_estimate, + bool in_alr); + void OnRttUpdate(TimeDelta avg_rtt); + bool LatestEstimate(std::vector* ssrcs, DataRate* bitrate) const; + void SetStartBitrate(DataRate start_bitrate); + void SetMinBitrate(DataRate min_bitrate); + TimeDelta GetExpectedBwePeriod() const; + DataRate TriggerOveruse(Timestamp at_time, + absl::optional link_capacity); + DataRate last_estimate() const { return prev_bitrate_; } + BandwidthUsage last_state() const { return prev_state_; } + + private: + friend class GoogCcStatePrinter; + void IncomingPacketFeedback(const PacketResult& packet_feedback, + Timestamp at_time); + Result MaybeUpdateEstimate( + absl::optional acked_bitrate, + absl::optional probe_bitrate, + absl::optional state_estimate, + bool recovered_from_overuse, + bool in_alr, + Timestamp at_time); + // Updates the current remote rate estimate and returns true if a valid + // estimate exists. + bool UpdateEstimate(Timestamp at_time, + absl::optional acked_bitrate, + DataRate* target_rate); + + rtc::RaceChecker network_race_; + RtcEventLog* const event_log_; + const FieldTrialsView* const key_value_config_; + + // Alternatively, run two separate overuse detectors for audio and video, + // and fall back to the audio one if we haven't seen a video packet in a + // while. + BweSeparateAudioPacketsSettings separate_audio_; + int64_t audio_packets_since_last_video_; + Timestamp last_video_packet_recv_time_; + + NetworkStatePredictor* network_state_predictor_; + std::unique_ptr video_inter_arrival_; + std::unique_ptr video_inter_arrival_delta_; + std::unique_ptr video_delay_detector_; + std::unique_ptr audio_inter_arrival_; + std::unique_ptr audio_inter_arrival_delta_; + std::unique_ptr audio_delay_detector_; + DelayIncreaseDetectorInterface* active_delay_detector_; + + Timestamp last_seen_packet_; + bool uma_recorded_; + AimdRateControl rate_control_; + DataRate prev_bitrate_; + BandwidthUsage prev_state_; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_DELAY_BASED_BWE_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_gn/moz.build new file mode 100644 index 0000000000..15f5a583ef --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_gn/moz.build @@ -0,0 +1,234 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["BWE_TEST_LOGGING_COMPILE_TIME_ENABLE"] = "0" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe.cc", + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/inter_arrival_delta.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("delay_based_bwe_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest.cc new file mode 100644 index 0000000000..b7dc6aae47 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest.cc @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/delay_based_bwe.h" + +#include + +#include "api/transport/network_types.h" +#include "modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.h" +#include "modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +constexpr int kNumProbesCluster0 = 5; +constexpr int kNumProbesCluster1 = 8; +const PacedPacketInfo kPacingInfo0(0, kNumProbesCluster0, 2000); +const PacedPacketInfo kPacingInfo1(1, kNumProbesCluster1, 4000); +constexpr float kTargetUtilizationFraction = 0.95f; +} // namespace + +TEST_F(DelayBasedBweTest, ProbeDetection) { + int64_t now_ms = clock_.TimeInMilliseconds(); + + // First burst sent at 8 * 1000 / 10 = 800 kbps. + for (int i = 0; i < kNumProbesCluster0; ++i) { + clock_.AdvanceTimeMilliseconds(10); + now_ms = clock_.TimeInMilliseconds(); + IncomingFeedback(now_ms, now_ms, 1000, kPacingInfo0); + } + EXPECT_TRUE(bitrate_observer_.updated()); + + // Second burst sent at 8 * 1000 / 5 = 1600 kbps. + for (int i = 0; i < kNumProbesCluster1; ++i) { + clock_.AdvanceTimeMilliseconds(5); + now_ms = clock_.TimeInMilliseconds(); + IncomingFeedback(now_ms, now_ms, 1000, kPacingInfo1); + } + + EXPECT_TRUE(bitrate_observer_.updated()); + EXPECT_GT(bitrate_observer_.latest_bitrate(), 1500000u); +} + +TEST_F(DelayBasedBweTest, ProbeDetectionNonPacedPackets) { + int64_t now_ms = clock_.TimeInMilliseconds(); + // First burst sent at 8 * 1000 / 10 = 800 kbps, but with every other packet + // not being paced which could mess things up. + for (int i = 0; i < kNumProbesCluster0; ++i) { + clock_.AdvanceTimeMilliseconds(5); + now_ms = clock_.TimeInMilliseconds(); + IncomingFeedback(now_ms, now_ms, 1000, kPacingInfo0); + // Non-paced packet, arriving 5 ms after. + clock_.AdvanceTimeMilliseconds(5); + IncomingFeedback(now_ms, now_ms, 100, PacedPacketInfo()); + } + + EXPECT_TRUE(bitrate_observer_.updated()); + EXPECT_GT(bitrate_observer_.latest_bitrate(), 800000u); +} + +TEST_F(DelayBasedBweTest, ProbeDetectionFasterArrival) { + int64_t now_ms = clock_.TimeInMilliseconds(); + // First burst sent at 8 * 1000 / 10 = 800 kbps. + // Arriving at 8 * 1000 / 5 = 1600 kbps. + int64_t send_time_ms = 0; + for (int i = 0; i < kNumProbesCluster0; ++i) { + clock_.AdvanceTimeMilliseconds(1); + send_time_ms += 10; + now_ms = clock_.TimeInMilliseconds(); + IncomingFeedback(now_ms, send_time_ms, 1000, kPacingInfo0); + } + + EXPECT_FALSE(bitrate_observer_.updated()); +} + +TEST_F(DelayBasedBweTest, ProbeDetectionSlowerArrival) { + int64_t now_ms = clock_.TimeInMilliseconds(); + // First burst sent at 8 * 1000 / 5 = 1600 kbps. + // Arriving at 8 * 1000 / 7 = 1142 kbps. + // Since the receive rate is significantly below the send rate, we expect to + // use 95% of the estimated capacity. + int64_t send_time_ms = 0; + for (int i = 0; i < kNumProbesCluster1; ++i) { + clock_.AdvanceTimeMilliseconds(7); + send_time_ms += 5; + now_ms = clock_.TimeInMilliseconds(); + IncomingFeedback(now_ms, send_time_ms, 1000, kPacingInfo1); + } + + EXPECT_TRUE(bitrate_observer_.updated()); + EXPECT_NEAR(bitrate_observer_.latest_bitrate(), + kTargetUtilizationFraction * 1140000u, 10000u); +} + +TEST_F(DelayBasedBweTest, ProbeDetectionSlowerArrivalHighBitrate) { + int64_t now_ms = clock_.TimeInMilliseconds(); + // Burst sent at 8 * 1000 / 1 = 8000 kbps. + // Arriving at 8 * 1000 / 2 = 4000 kbps. + // Since the receive rate is significantly below the send rate, we expect to + // use 95% of the estimated capacity. + int64_t send_time_ms = 0; + for (int i = 0; i < kNumProbesCluster1; ++i) { + clock_.AdvanceTimeMilliseconds(2); + send_time_ms += 1; + now_ms = clock_.TimeInMilliseconds(); + IncomingFeedback(now_ms, send_time_ms, 1000, kPacingInfo1); + } + + EXPECT_TRUE(bitrate_observer_.updated()); + EXPECT_NEAR(bitrate_observer_.latest_bitrate(), + kTargetUtilizationFraction * 4000000u, 10000u); +} + +TEST_F(DelayBasedBweTest, GetExpectedBwePeriodMs) { + auto default_interval = bitrate_estimator_->GetExpectedBwePeriod(); + EXPECT_GT(default_interval.ms(), 0); + CapacityDropTestHelper(1, true, 333, 0); + auto interval = bitrate_estimator_->GetExpectedBwePeriod(); + EXPECT_GT(interval.ms(), 0); + EXPECT_NE(interval.ms(), default_interval.ms()); +} + +TEST_F(DelayBasedBweTest, InitialBehavior) { + InitialBehaviorTestHelper(730000); +} + +TEST_F(DelayBasedBweTest, RateIncreaseReordering) { + RateIncreaseReorderingTestHelper(730000); +} +TEST_F(DelayBasedBweTest, RateIncreaseRtpTimestamps) { + RateIncreaseRtpTimestampsTestHelper(622); +} + +TEST_F(DelayBasedBweTest, CapacityDropOneStream) { + CapacityDropTestHelper(1, false, 300, 0); +} + +TEST_F(DelayBasedBweTest, CapacityDropPosOffsetChange) { + CapacityDropTestHelper(1, false, 867, 30000); +} + +TEST_F(DelayBasedBweTest, CapacityDropNegOffsetChange) { + CapacityDropTestHelper(1, false, 933, -30000); +} + +TEST_F(DelayBasedBweTest, CapacityDropOneStreamWrap) { + CapacityDropTestHelper(1, true, 333, 0); +} + +TEST_F(DelayBasedBweTest, TestTimestampGrouping) { + TestTimestampGroupingTestHelper(); +} + +TEST_F(DelayBasedBweTest, TestShortTimeoutAndWrap) { + // Simulate a client leaving and rejoining the call after 35 seconds. This + // will make abs send time wrap, so if streams aren't timed out properly + // the next 30 seconds of packets will be out of order. + TestWrappingHelper(35); +} + +TEST_F(DelayBasedBweTest, TestLongTimeoutAndWrap) { + // Simulate a client leaving and rejoining the call after some multiple of + // 64 seconds later. This will cause a zero difference in abs send times due + // to the wrap, but a big difference in arrival time, if streams aren't + // properly timed out. + TestWrappingHelper(10 * 64); +} + +TEST_F(DelayBasedBweTest, TestInitialOveruse) { + const DataRate kStartBitrate = DataRate::KilobitsPerSec(300); + const DataRate kInitialCapacity = DataRate::KilobitsPerSec(200); + const uint32_t kDummySsrc = 0; + // High FPS to ensure that we send a lot of packets in a short time. + const int kFps = 90; + + stream_generator_->AddStream(new test::RtpStream(kFps, kStartBitrate.bps())); + stream_generator_->set_capacity_bps(kInitialCapacity.bps()); + + // Needed to initialize the AimdRateControl. + bitrate_estimator_->SetStartBitrate(kStartBitrate); + + // Produce 30 frames (in 1/3 second) and give them to the estimator. + int64_t bitrate_bps = kStartBitrate.bps(); + bool seen_overuse = false; + for (int i = 0; i < 30; ++i) { + bool overuse = GenerateAndProcessFrame(kDummySsrc, bitrate_bps); + // The purpose of this test is to ensure that we back down even if we don't + // have any acknowledged bitrate estimate yet. Hence, if the test works + // as expected, we should not have a measured bitrate yet. + EXPECT_FALSE(acknowledged_bitrate_estimator_->bitrate().has_value()); + if (overuse) { + EXPECT_TRUE(bitrate_observer_.updated()); + EXPECT_NEAR(bitrate_observer_.latest_bitrate(), kStartBitrate.bps() / 2, + 15000); + bitrate_bps = bitrate_observer_.latest_bitrate(); + seen_overuse = true; + break; + } else if (bitrate_observer_.updated()) { + bitrate_bps = bitrate_observer_.latest_bitrate(); + bitrate_observer_.Reset(); + } + } + EXPECT_TRUE(seen_overuse); + EXPECT_NEAR(bitrate_observer_.latest_bitrate(), kStartBitrate.bps() / 2, + 15000); +} + +TEST_F(DelayBasedBweTest, TestTimestampPrecisionHandling) { + // This test does some basic checks to make sure that timestamps with higher + // than millisecond precision are handled properly and do not cause any + // problems in the estimator. Specifically, previously reported in + // webrtc:14023 and described in more details there, the rounding to the + // nearest milliseconds caused discrepancy in the accumulated delay. This lead + // to false-positive overuse detection. + // Technical details of the test: + // Send times(ms): 0.000, 9.725, 20.000, 29.725, 40.000, 49.725, ... + // Recv times(ms): 0.500, 10.000, 20.500, 30.000, 40.500, 50.000, ... + // Send deltas(ms): 9.750, 10.250, 9.750, 10.250, 9.750, ... + // Recv deltas(ms): 9.500, 10.500, 9.500, 10.500, 9.500, ... + // There is no delay building up between the send times and the receive times, + // therefore this case should never lead to an overuse detection. However, if + // the time deltas were accidentally rounded to the nearest milliseconds, then + // all the send deltas would be equal to 10ms while some recv deltas would + // round up to 11ms which would lead in a false illusion of delay build up. + uint32_t last_bitrate = bitrate_observer_.latest_bitrate(); + for (int i = 0; i < 1000; ++i) { + clock_.AdvanceTimeMicroseconds(500); + IncomingFeedback(clock_.CurrentTime(), + clock_.CurrentTime() - TimeDelta::Micros(500), 1000, + PacedPacketInfo()); + clock_.AdvanceTimeMicroseconds(9500); + IncomingFeedback(clock_.CurrentTime(), + clock_.CurrentTime() - TimeDelta::Micros(250), 1000, + PacedPacketInfo()); + clock_.AdvanceTimeMicroseconds(10000); + + // The bitrate should never decrease in this test. + EXPECT_LE(last_bitrate, bitrate_observer_.latest_bitrate()); + last_bitrate = bitrate_observer_.latest_bitrate(); + } +} + +class DelayBasedBweTestWithBackoffTimeoutExperiment : public DelayBasedBweTest { + public: + DelayBasedBweTestWithBackoffTimeoutExperiment() + : DelayBasedBweTest( + "WebRTC-BweAimdRateControlConfig/initial_backoff_interval:200ms/") { + } +}; + +// This test subsumes and improves DelayBasedBweTest.TestInitialOveruse above. +TEST_F(DelayBasedBweTestWithBackoffTimeoutExperiment, TestInitialOveruse) { + const DataRate kStartBitrate = DataRate::KilobitsPerSec(300); + const DataRate kInitialCapacity = DataRate::KilobitsPerSec(200); + const uint32_t kDummySsrc = 0; + // High FPS to ensure that we send a lot of packets in a short time. + const int kFps = 90; + + stream_generator_->AddStream(new test::RtpStream(kFps, kStartBitrate.bps())); + stream_generator_->set_capacity_bps(kInitialCapacity.bps()); + + // Needed to initialize the AimdRateControl. + bitrate_estimator_->SetStartBitrate(kStartBitrate); + + // Produce 30 frames (in 1/3 second) and give them to the estimator. + int64_t bitrate_bps = kStartBitrate.bps(); + bool seen_overuse = false; + for (int frames = 0; frames < 30 && !seen_overuse; ++frames) { + bool overuse = GenerateAndProcessFrame(kDummySsrc, bitrate_bps); + // The purpose of this test is to ensure that we back down even if we don't + // have any acknowledged bitrate estimate yet. Hence, if the test works + // as expected, we should not have a measured bitrate yet. + EXPECT_FALSE(acknowledged_bitrate_estimator_->bitrate().has_value()); + if (overuse) { + EXPECT_TRUE(bitrate_observer_.updated()); + EXPECT_NEAR(bitrate_observer_.latest_bitrate(), kStartBitrate.bps() / 2, + 15000); + bitrate_bps = bitrate_observer_.latest_bitrate(); + seen_overuse = true; + } else if (bitrate_observer_.updated()) { + bitrate_bps = bitrate_observer_.latest_bitrate(); + bitrate_observer_.Reset(); + } + } + EXPECT_TRUE(seen_overuse); + // Continue generating an additional 15 frames (equivalent to 167 ms) and + // verify that we don't back down further. + for (int frames = 0; frames < 15 && seen_overuse; ++frames) { + bool overuse = GenerateAndProcessFrame(kDummySsrc, bitrate_bps); + EXPECT_FALSE(overuse); + if (bitrate_observer_.updated()) { + bitrate_bps = bitrate_observer_.latest_bitrate(); + EXPECT_GE(bitrate_bps, kStartBitrate.bps() / 2 - 15000); + EXPECT_LE(bitrate_bps, kInitialCapacity.bps() + 15000); + bitrate_observer_.Reset(); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.cc new file mode 100644 index 0000000000..8618a7814e --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.cc @@ -0,0 +1,529 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.h" + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/congestion_controller/goog_cc/delay_based_bwe.h" +#include "rtc_base/checks.h" + +namespace webrtc { +constexpr size_t kMtu = 1200; +constexpr uint32_t kAcceptedBitrateErrorBps = 50000; + +// Number of packets needed before we have a valid estimate. +constexpr int kNumInitialPackets = 2; + +constexpr int kInitialProbingPackets = 5; + +namespace test { + +void TestBitrateObserver::OnReceiveBitrateChanged(uint32_t bitrate) { + latest_bitrate_ = bitrate; + updated_ = true; +} + +RtpStream::RtpStream(int fps, int bitrate_bps) + : fps_(fps), bitrate_bps_(bitrate_bps), next_rtp_time_(0) { + RTC_CHECK_GT(fps_, 0); +} + +// Generates a new frame for this stream. If called too soon after the +// previous frame, no frame will be generated. The frame is split into +// packets. +int64_t RtpStream::GenerateFrame(int64_t time_now_us, + std::vector* packets) { + if (time_now_us < next_rtp_time_) { + return next_rtp_time_; + } + RTC_CHECK(packets != NULL); + size_t bits_per_frame = (bitrate_bps_ + fps_ / 2) / fps_; + size_t n_packets = + std::max((bits_per_frame + 4 * kMtu) / (8 * kMtu), 1u); + size_t payload_size = (bits_per_frame + 4 * n_packets) / (8 * n_packets); + for (size_t i = 0; i < n_packets; ++i) { + PacketResult packet; + packet.sent_packet.send_time = + Timestamp::Micros(time_now_us + kSendSideOffsetUs); + packet.sent_packet.size = DataSize::Bytes(payload_size); + packets->push_back(packet); + } + next_rtp_time_ = time_now_us + (1000000 + fps_ / 2) / fps_; + return next_rtp_time_; +} + +// The send-side time when the next frame can be generated. +int64_t RtpStream::next_rtp_time() const { + return next_rtp_time_; +} + +void RtpStream::set_bitrate_bps(int bitrate_bps) { + ASSERT_GE(bitrate_bps, 0); + bitrate_bps_ = bitrate_bps; +} + +int RtpStream::bitrate_bps() const { + return bitrate_bps_; +} + +bool RtpStream::Compare(const std::unique_ptr& lhs, + const std::unique_ptr& rhs) { + return lhs->next_rtp_time_ < rhs->next_rtp_time_; +} + +StreamGenerator::StreamGenerator(int capacity, int64_t time_now) + : capacity_(capacity), prev_arrival_time_us_(time_now) {} + +StreamGenerator::~StreamGenerator() = default; + +// Add a new stream. +void StreamGenerator::AddStream(RtpStream* stream) { + streams_.push_back(std::unique_ptr(stream)); +} + +// Set the link capacity. +void StreamGenerator::set_capacity_bps(int capacity_bps) { + ASSERT_GT(capacity_bps, 0); + capacity_ = capacity_bps; +} + +// Divides `bitrate_bps` among all streams. The allocated bitrate per stream +// is decided by the current allocation ratios. +void StreamGenerator::SetBitrateBps(int bitrate_bps) { + ASSERT_GE(streams_.size(), 0u); + int total_bitrate_before = 0; + for (const auto& stream : streams_) { + total_bitrate_before += stream->bitrate_bps(); + } + int64_t bitrate_before = 0; + int total_bitrate_after = 0; + for (const auto& stream : streams_) { + bitrate_before += stream->bitrate_bps(); + int64_t bitrate_after = + (bitrate_before * bitrate_bps + total_bitrate_before / 2) / + total_bitrate_before; + stream->set_bitrate_bps(bitrate_after - total_bitrate_after); + total_bitrate_after += stream->bitrate_bps(); + } + ASSERT_EQ(bitrate_before, total_bitrate_before); + EXPECT_EQ(total_bitrate_after, bitrate_bps); +} + +// TODO(holmer): Break out the channel simulation part from this class to make +// it possible to simulate different types of channels. +int64_t StreamGenerator::GenerateFrame(std::vector* packets, + int64_t time_now_us) { + RTC_CHECK(packets != NULL); + RTC_CHECK(packets->empty()); + RTC_CHECK_GT(capacity_, 0); + auto it = + std::min_element(streams_.begin(), streams_.end(), RtpStream::Compare); + (*it)->GenerateFrame(time_now_us, packets); + for (PacketResult& packet : *packets) { + int capacity_bpus = capacity_ / 1000; + int64_t required_network_time_us = + (8 * 1000 * packet.sent_packet.size.bytes() + capacity_bpus / 2) / + capacity_bpus; + prev_arrival_time_us_ = + std::max(time_now_us + required_network_time_us, + prev_arrival_time_us_ + required_network_time_us); + packet.receive_time = Timestamp::Micros(prev_arrival_time_us_); + } + it = std::min_element(streams_.begin(), streams_.end(), RtpStream::Compare); + return std::max((*it)->next_rtp_time(), time_now_us); +} +} // namespace test + +DelayBasedBweTest::DelayBasedBweTest() : DelayBasedBweTest("") {} + +DelayBasedBweTest::DelayBasedBweTest(absl::string_view field_trial_string) + : field_trial( + std::make_unique(field_trial_string)), + clock_(100000000), + acknowledged_bitrate_estimator_( + AcknowledgedBitrateEstimatorInterface::Create(&field_trial_config_)), + probe_bitrate_estimator_(new ProbeBitrateEstimator(nullptr)), + bitrate_estimator_( + new DelayBasedBwe(&field_trial_config_, nullptr, nullptr)), + stream_generator_(new test::StreamGenerator(1e6, // Capacity. + clock_.TimeInMicroseconds())), + arrival_time_offset_ms_(0), + first_update_(true) {} + +DelayBasedBweTest::~DelayBasedBweTest() {} + +void DelayBasedBweTest::AddDefaultStream() { + stream_generator_->AddStream(new test::RtpStream(30, 3e5)); +} + +const uint32_t DelayBasedBweTest::kDefaultSsrc = 0; + +void DelayBasedBweTest::IncomingFeedback(int64_t arrival_time_ms, + int64_t send_time_ms, + size_t payload_size) { + IncomingFeedback(arrival_time_ms, send_time_ms, payload_size, + PacedPacketInfo()); +} + +void DelayBasedBweTest::IncomingFeedback(int64_t arrival_time_ms, + int64_t send_time_ms, + size_t payload_size, + const PacedPacketInfo& pacing_info) { + RTC_CHECK_GE(arrival_time_ms + arrival_time_offset_ms_, 0); + IncomingFeedback(Timestamp::Millis(arrival_time_ms + arrival_time_offset_ms_), + Timestamp::Millis(send_time_ms), payload_size, pacing_info); +} + +void DelayBasedBweTest::IncomingFeedback(Timestamp receive_time, + Timestamp send_time, + size_t payload_size, + const PacedPacketInfo& pacing_info) { + PacketResult packet; + packet.receive_time = receive_time; + packet.sent_packet.send_time = send_time; + packet.sent_packet.size = DataSize::Bytes(payload_size); + packet.sent_packet.pacing_info = pacing_info; + if (packet.sent_packet.pacing_info.probe_cluster_id != + PacedPacketInfo::kNotAProbe) + probe_bitrate_estimator_->HandleProbeAndEstimateBitrate(packet); + + TransportPacketsFeedback msg; + msg.feedback_time = Timestamp::Millis(clock_.TimeInMilliseconds()); + msg.packet_feedbacks.push_back(packet); + acknowledged_bitrate_estimator_->IncomingPacketFeedbackVector( + msg.SortedByReceiveTime()); + DelayBasedBwe::Result result = + bitrate_estimator_->IncomingPacketFeedbackVector( + msg, acknowledged_bitrate_estimator_->bitrate(), + probe_bitrate_estimator_->FetchAndResetLastEstimatedBitrate(), + /*network_estimate*/ absl::nullopt, /*in_alr*/ false); + if (result.updated) { + bitrate_observer_.OnReceiveBitrateChanged(result.target_bitrate.bps()); + } +} + +// Generates a frame of packets belonging to a stream at a given bitrate and +// with a given ssrc. The stream is pushed through a very simple simulated +// network, and is then given to the receive-side bandwidth estimator. +// Returns true if an over-use was seen, false otherwise. +// The StreamGenerator::updated() should be used to check for any changes in +// target bitrate after the call to this function. +bool DelayBasedBweTest::GenerateAndProcessFrame(uint32_t ssrc, + uint32_t bitrate_bps) { + stream_generator_->SetBitrateBps(bitrate_bps); + std::vector packets; + + int64_t next_time_us = + stream_generator_->GenerateFrame(&packets, clock_.TimeInMicroseconds()); + if (packets.empty()) + return false; + + bool overuse = false; + bitrate_observer_.Reset(); + clock_.AdvanceTimeMicroseconds(packets.back().receive_time.us() - + clock_.TimeInMicroseconds()); + for (auto& packet : packets) { + RTC_CHECK_GE(packet.receive_time.ms() + arrival_time_offset_ms_, 0); + packet.receive_time += TimeDelta::Millis(arrival_time_offset_ms_); + + if (packet.sent_packet.pacing_info.probe_cluster_id != + PacedPacketInfo::kNotAProbe) + probe_bitrate_estimator_->HandleProbeAndEstimateBitrate(packet); + } + + acknowledged_bitrate_estimator_->IncomingPacketFeedbackVector(packets); + TransportPacketsFeedback msg; + msg.packet_feedbacks = packets; + msg.feedback_time = Timestamp::Millis(clock_.TimeInMilliseconds()); + + DelayBasedBwe::Result result = + bitrate_estimator_->IncomingPacketFeedbackVector( + msg, acknowledged_bitrate_estimator_->bitrate(), + probe_bitrate_estimator_->FetchAndResetLastEstimatedBitrate(), + /*network_estimate*/ absl::nullopt, /*in_alr*/ false); + if (result.updated) { + bitrate_observer_.OnReceiveBitrateChanged(result.target_bitrate.bps()); + if (!first_update_ && result.target_bitrate.bps() < bitrate_bps) + overuse = true; + first_update_ = false; + } + + clock_.AdvanceTimeMicroseconds(next_time_us - clock_.TimeInMicroseconds()); + return overuse; +} + +// Run the bandwidth estimator with a stream of `number_of_frames` frames, or +// until it reaches `target_bitrate`. +// Can for instance be used to run the estimator for some time to get it +// into a steady state. +uint32_t DelayBasedBweTest::SteadyStateRun(uint32_t ssrc, + int max_number_of_frames, + uint32_t start_bitrate, + uint32_t min_bitrate, + uint32_t max_bitrate, + uint32_t target_bitrate) { + uint32_t bitrate_bps = start_bitrate; + bool bitrate_update_seen = false; + // Produce `number_of_frames` frames and give them to the estimator. + for (int i = 0; i < max_number_of_frames; ++i) { + bool overuse = GenerateAndProcessFrame(ssrc, bitrate_bps); + if (overuse) { + EXPECT_LT(bitrate_observer_.latest_bitrate(), max_bitrate); + EXPECT_GT(bitrate_observer_.latest_bitrate(), min_bitrate); + bitrate_bps = bitrate_observer_.latest_bitrate(); + bitrate_update_seen = true; + } else if (bitrate_observer_.updated()) { + bitrate_bps = bitrate_observer_.latest_bitrate(); + bitrate_observer_.Reset(); + } + if (bitrate_update_seen && bitrate_bps > target_bitrate) { + break; + } + } + EXPECT_TRUE(bitrate_update_seen); + return bitrate_bps; +} + +void DelayBasedBweTest::InitialBehaviorTestHelper( + uint32_t expected_converge_bitrate) { + const int kFramerate = 50; // 50 fps to avoid rounding errors. + const int kFrameIntervalMs = 1000 / kFramerate; + const PacedPacketInfo kPacingInfo(0, 5, 5000); + DataRate bitrate = DataRate::Zero(); + int64_t send_time_ms = 0; + std::vector ssrcs; + EXPECT_FALSE(bitrate_estimator_->LatestEstimate(&ssrcs, &bitrate)); + EXPECT_EQ(0u, ssrcs.size()); + clock_.AdvanceTimeMilliseconds(1000); + EXPECT_FALSE(bitrate_estimator_->LatestEstimate(&ssrcs, &bitrate)); + EXPECT_FALSE(bitrate_observer_.updated()); + bitrate_observer_.Reset(); + clock_.AdvanceTimeMilliseconds(1000); + // Inserting packets for 5 seconds to get a valid estimate. + for (int i = 0; i < 5 * kFramerate + 1 + kNumInitialPackets; ++i) { + // NOTE!!! If the following line is moved under the if case then this test + // wont work on windows realease bots. + PacedPacketInfo pacing_info = + i < kInitialProbingPackets ? kPacingInfo : PacedPacketInfo(); + + if (i == kNumInitialPackets) { + EXPECT_FALSE(bitrate_estimator_->LatestEstimate(&ssrcs, &bitrate)); + EXPECT_EQ(0u, ssrcs.size()); + EXPECT_FALSE(bitrate_observer_.updated()); + bitrate_observer_.Reset(); + } + IncomingFeedback(clock_.TimeInMilliseconds(), send_time_ms, kMtu, + pacing_info); + clock_.AdvanceTimeMilliseconds(1000 / kFramerate); + send_time_ms += kFrameIntervalMs; + } + EXPECT_TRUE(bitrate_estimator_->LatestEstimate(&ssrcs, &bitrate)); + ASSERT_EQ(1u, ssrcs.size()); + EXPECT_EQ(kDefaultSsrc, ssrcs.front()); + EXPECT_NEAR(expected_converge_bitrate, bitrate.bps(), + kAcceptedBitrateErrorBps); + EXPECT_TRUE(bitrate_observer_.updated()); + bitrate_observer_.Reset(); + EXPECT_EQ(bitrate_observer_.latest_bitrate(), bitrate.bps()); +} + +void DelayBasedBweTest::RateIncreaseReorderingTestHelper( + uint32_t expected_bitrate_bps) { + const int kFramerate = 50; // 50 fps to avoid rounding errors. + const int kFrameIntervalMs = 1000 / kFramerate; + const PacedPacketInfo kPacingInfo(0, 5, 5000); + int64_t send_time_ms = 0; + // Inserting packets for five seconds to get a valid estimate. + for (int i = 0; i < 5 * kFramerate + 1 + kNumInitialPackets; ++i) { + // NOTE!!! If the following line is moved under the if case then this test + // wont work on windows realease bots. + PacedPacketInfo pacing_info = + i < kInitialProbingPackets ? kPacingInfo : PacedPacketInfo(); + + // TODO(sprang): Remove this hack once the single stream estimator is gone, + // as it doesn't do anything in Process(). + if (i == kNumInitialPackets) { + // Process after we have enough frames to get a valid input rate estimate. + + EXPECT_FALSE(bitrate_observer_.updated()); // No valid estimate. + } + IncomingFeedback(clock_.TimeInMilliseconds(), send_time_ms, kMtu, + pacing_info); + clock_.AdvanceTimeMilliseconds(kFrameIntervalMs); + send_time_ms += kFrameIntervalMs; + } + EXPECT_TRUE(bitrate_observer_.updated()); + EXPECT_NEAR(expected_bitrate_bps, bitrate_observer_.latest_bitrate(), + kAcceptedBitrateErrorBps); + for (int i = 0; i < 10; ++i) { + clock_.AdvanceTimeMilliseconds(2 * kFrameIntervalMs); + send_time_ms += 2 * kFrameIntervalMs; + IncomingFeedback(clock_.TimeInMilliseconds(), send_time_ms, 1000); + IncomingFeedback(clock_.TimeInMilliseconds(), + send_time_ms - kFrameIntervalMs, 1000); + } + EXPECT_TRUE(bitrate_observer_.updated()); + EXPECT_NEAR(expected_bitrate_bps, bitrate_observer_.latest_bitrate(), + kAcceptedBitrateErrorBps); +} + +// Make sure we initially increase the bitrate as expected. +void DelayBasedBweTest::RateIncreaseRtpTimestampsTestHelper( + int expected_iterations) { + // This threshold corresponds approximately to increasing linearly with + // bitrate(i) = 1.04 * bitrate(i-1) + 1000 + // until bitrate(i) > 500000, with bitrate(1) ~= 30000. + uint32_t bitrate_bps = 30000; + int iterations = 0; + AddDefaultStream(); + // Feed the estimator with a stream of packets and verify that it reaches + // 500 kbps at the expected time. + while (bitrate_bps < 5e5) { + bool overuse = GenerateAndProcessFrame(kDefaultSsrc, bitrate_bps); + if (overuse) { + EXPECT_GT(bitrate_observer_.latest_bitrate(), bitrate_bps); + bitrate_bps = bitrate_observer_.latest_bitrate(); + bitrate_observer_.Reset(); + } else if (bitrate_observer_.updated()) { + bitrate_bps = bitrate_observer_.latest_bitrate(); + bitrate_observer_.Reset(); + } + ++iterations; + } + ASSERT_EQ(expected_iterations, iterations); +} + +void DelayBasedBweTest::CapacityDropTestHelper( + int number_of_streams, + bool wrap_time_stamp, + uint32_t expected_bitrate_drop_delta, + int64_t receiver_clock_offset_change_ms) { + const int kFramerate = 30; + const int kStartBitrate = 900e3; + const int kMinExpectedBitrate = 800e3; + const int kMaxExpectedBitrate = 1100e3; + const uint32_t kInitialCapacityBps = 1000e3; + const uint32_t kReducedCapacityBps = 500e3; + + int steady_state_time = 0; + if (number_of_streams <= 1) { + steady_state_time = 10; + AddDefaultStream(); + } else { + steady_state_time = 10 * number_of_streams; + int bitrate_sum = 0; + int kBitrateDenom = number_of_streams * (number_of_streams - 1); + for (int i = 0; i < number_of_streams; i++) { + // First stream gets half available bitrate, while the rest share the + // remaining half i.e.: 1/2 = Sum[n/(N*(N-1))] for n=1..N-1 (rounded up) + int bitrate = kStartBitrate / 2; + if (i > 0) { + bitrate = (kStartBitrate * i + kBitrateDenom / 2) / kBitrateDenom; + } + stream_generator_->AddStream(new test::RtpStream(kFramerate, bitrate)); + bitrate_sum += bitrate; + } + ASSERT_EQ(bitrate_sum, kStartBitrate); + } + + // Run in steady state to make the estimator converge. + stream_generator_->set_capacity_bps(kInitialCapacityBps); + uint32_t bitrate_bps = SteadyStateRun( + kDefaultSsrc, steady_state_time * kFramerate, kStartBitrate, + kMinExpectedBitrate, kMaxExpectedBitrate, kInitialCapacityBps); + EXPECT_NEAR(kInitialCapacityBps, bitrate_bps, 180000u); + bitrate_observer_.Reset(); + + // Add an offset to make sure the BWE can handle it. + arrival_time_offset_ms_ += receiver_clock_offset_change_ms; + + // Reduce the capacity and verify the decrease time. + stream_generator_->set_capacity_bps(kReducedCapacityBps); + int64_t overuse_start_time = clock_.TimeInMilliseconds(); + int64_t bitrate_drop_time = -1; + for (int i = 0; i < 100 * number_of_streams; ++i) { + GenerateAndProcessFrame(kDefaultSsrc, bitrate_bps); + if (bitrate_drop_time == -1 && + bitrate_observer_.latest_bitrate() <= kReducedCapacityBps) { + bitrate_drop_time = clock_.TimeInMilliseconds(); + } + if (bitrate_observer_.updated()) + bitrate_bps = bitrate_observer_.latest_bitrate(); + } + + EXPECT_NEAR(expected_bitrate_drop_delta, + bitrate_drop_time - overuse_start_time, 33); +} + +void DelayBasedBweTest::TestTimestampGroupingTestHelper() { + const int kFramerate = 50; // 50 fps to avoid rounding errors. + const int kFrameIntervalMs = 1000 / kFramerate; + int64_t send_time_ms = 0; + // Initial set of frames to increase the bitrate. 6 seconds to have enough + // time for the first estimate to be generated and for Process() to be called. + for (int i = 0; i <= 6 * kFramerate; ++i) { + IncomingFeedback(clock_.TimeInMilliseconds(), send_time_ms, 1000); + + clock_.AdvanceTimeMilliseconds(kFrameIntervalMs); + send_time_ms += kFrameIntervalMs; + } + EXPECT_TRUE(bitrate_observer_.updated()); + EXPECT_GE(bitrate_observer_.latest_bitrate(), 400000u); + + // Insert batches of frames which were sent very close in time. Also simulate + // capacity over-use to see that we back off correctly. + const int kTimestampGroupLength = 15; + for (int i = 0; i < 100; ++i) { + for (int j = 0; j < kTimestampGroupLength; ++j) { + // Insert `kTimestampGroupLength` frames with just 1 timestamp ticks in + // between. Should be treated as part of the same group by the estimator. + IncomingFeedback(clock_.TimeInMilliseconds(), send_time_ms, 100); + clock_.AdvanceTimeMilliseconds(kFrameIntervalMs / kTimestampGroupLength); + send_time_ms += 1; + } + // Increase time until next batch to simulate over-use. + clock_.AdvanceTimeMilliseconds(10); + send_time_ms += kFrameIntervalMs - kTimestampGroupLength; + } + EXPECT_TRUE(bitrate_observer_.updated()); + // Should have reduced the estimate. + EXPECT_LT(bitrate_observer_.latest_bitrate(), 400000u); +} + +void DelayBasedBweTest::TestWrappingHelper(int silence_time_s) { + const int kFramerate = 100; + const int kFrameIntervalMs = 1000 / kFramerate; + int64_t send_time_ms = 0; + + for (size_t i = 0; i < 3000; ++i) { + IncomingFeedback(clock_.TimeInMilliseconds(), send_time_ms, 1000); + clock_.AdvanceTimeMilliseconds(kFrameIntervalMs); + send_time_ms += kFrameIntervalMs; + } + DataRate bitrate_before = DataRate::Zero(); + std::vector ssrcs; + bitrate_estimator_->LatestEstimate(&ssrcs, &bitrate_before); + + clock_.AdvanceTimeMilliseconds(silence_time_s * 1000); + send_time_ms += silence_time_s * 1000; + + for (size_t i = 0; i < 24; ++i) { + IncomingFeedback(clock_.TimeInMilliseconds(), send_time_ms, 1000); + clock_.AdvanceTimeMilliseconds(2 * kFrameIntervalMs); + send_time_ms += kFrameIntervalMs; + } + DataRate bitrate_after = DataRate::Zero(); + bitrate_estimator_->LatestEstimate(&ssrcs, &bitrate_after); + EXPECT_LT(bitrate_after, bitrate_before); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.h new file mode 100644 index 0000000000..d56fe892d5 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_based_bwe_unittest_helper.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_DELAY_BASED_BWE_UNITTEST_HELPER_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_DELAY_BASED_BWE_UNITTEST_HELPER_H_ + +#include +#include + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/transport/field_trial_based_config.h" +#include "api/transport/network_types.h" +#include "modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.h" +#include "modules/congestion_controller/goog_cc/delay_based_bwe.h" +#include "system_wrappers/include/clock.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +class TestBitrateObserver { + public: + TestBitrateObserver() : updated_(false), latest_bitrate_(0) {} + ~TestBitrateObserver() {} + + void OnReceiveBitrateChanged(uint32_t bitrate); + + void Reset() { updated_ = false; } + + bool updated() const { return updated_; } + + uint32_t latest_bitrate() const { return latest_bitrate_; } + + private: + bool updated_; + uint32_t latest_bitrate_; +}; + +class RtpStream { + public: + enum { kSendSideOffsetUs = 1000000 }; + + RtpStream(int fps, int bitrate_bps); + + RtpStream(const RtpStream&) = delete; + RtpStream& operator=(const RtpStream&) = delete; + + // Generates a new frame for this stream. If called too soon after the + // previous frame, no frame will be generated. The frame is split into + // packets. + int64_t GenerateFrame(int64_t time_now_us, + std::vector* packets); + + // The send-side time when the next frame can be generated. + int64_t next_rtp_time() const; + + void set_bitrate_bps(int bitrate_bps); + + int bitrate_bps() const; + + static bool Compare(const std::unique_ptr& lhs, + const std::unique_ptr& rhs); + + private: + int fps_; + int bitrate_bps_; + int64_t next_rtp_time_; +}; + +class StreamGenerator { + public: + StreamGenerator(int capacity, int64_t time_now); + ~StreamGenerator(); + + StreamGenerator(const StreamGenerator&) = delete; + StreamGenerator& operator=(const StreamGenerator&) = delete; + + // Add a new stream. + void AddStream(RtpStream* stream); + + // Set the link capacity. + void set_capacity_bps(int capacity_bps); + + // Divides `bitrate_bps` among all streams. The allocated bitrate per stream + // is decided by the initial allocation ratios. + void SetBitrateBps(int bitrate_bps); + + // Set the RTP timestamp offset for the stream identified by `ssrc`. + void set_rtp_timestamp_offset(uint32_t ssrc, uint32_t offset); + + // TODO(holmer): Break out the channel simulation part from this class to make + // it possible to simulate different types of channels. + int64_t GenerateFrame(std::vector* packets, + int64_t time_now_us); + + private: + // Capacity of the simulated channel in bits per second. + int capacity_; + // The time when the last packet arrived. + int64_t prev_arrival_time_us_; + // All streams being transmitted on this simulated channel. + std::vector> streams_; +}; +} // namespace test + +class DelayBasedBweTest : public ::testing::Test { + public: + DelayBasedBweTest(); + explicit DelayBasedBweTest(absl::string_view field_trial_string); + ~DelayBasedBweTest() override; + + protected: + void AddDefaultStream(); + + // Helpers to insert a single packet into the delay-based BWE. + void IncomingFeedback(int64_t arrival_time_ms, + int64_t send_time_ms, + size_t payload_size); + void IncomingFeedback(int64_t arrival_time_ms, + int64_t send_time_ms, + size_t payload_size, + const PacedPacketInfo& pacing_info); + void IncomingFeedback(Timestamp receive_time, + Timestamp send_time, + size_t payload_size, + const PacedPacketInfo& pacing_info); + + // Generates a frame of packets belonging to a stream at a given bitrate and + // with a given ssrc. The stream is pushed through a very simple simulated + // network, and is then given to the receive-side bandwidth estimator. + // Returns true if an over-use was seen, false otherwise. + // The StreamGenerator::updated() should be used to check for any changes in + // target bitrate after the call to this function. + bool GenerateAndProcessFrame(uint32_t ssrc, uint32_t bitrate_bps); + + // Run the bandwidth estimator with a stream of `number_of_frames` frames, or + // until it reaches `target_bitrate`. + // Can for instance be used to run the estimator for some time to get it + // into a steady state. + uint32_t SteadyStateRun(uint32_t ssrc, + int number_of_frames, + uint32_t start_bitrate, + uint32_t min_bitrate, + uint32_t max_bitrate, + uint32_t target_bitrate); + + void TestTimestampGroupingTestHelper(); + + void TestWrappingHelper(int silence_time_s); + + void InitialBehaviorTestHelper(uint32_t expected_converge_bitrate); + void RateIncreaseReorderingTestHelper(uint32_t expected_bitrate); + void RateIncreaseRtpTimestampsTestHelper(int expected_iterations); + void CapacityDropTestHelper(int number_of_streams, + bool wrap_time_stamp, + uint32_t expected_bitrate_drop_delta, + int64_t receiver_clock_offset_change_ms); + + static const uint32_t kDefaultSsrc; + FieldTrialBasedConfig field_trial_config_; + + std::unique_ptr + field_trial; // Must be initialized first. + SimulatedClock clock_; // Time at the receiver. + test::TestBitrateObserver bitrate_observer_; + std::unique_ptr + acknowledged_bitrate_estimator_; + const std::unique_ptr probe_bitrate_estimator_; + std::unique_ptr bitrate_estimator_; + std::unique_ptr stream_generator_; + int64_t arrival_time_offset_ms_; + bool first_update_; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_DELAY_BASED_BWE_UNITTEST_HELPER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_increase_detector_interface.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_increase_detector_interface.h new file mode 100644 index 0000000000..fc12cff7d5 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/delay_increase_detector_interface.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_DELAY_INCREASE_DETECTOR_INTERFACE_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_DELAY_INCREASE_DETECTOR_INTERFACE_H_ + +#include + +#include "api/network_state_predictor.h" + +namespace webrtc { + +class DelayIncreaseDetectorInterface { + public: + DelayIncreaseDetectorInterface() {} + virtual ~DelayIncreaseDetectorInterface() {} + + DelayIncreaseDetectorInterface(const DelayIncreaseDetectorInterface&) = + delete; + DelayIncreaseDetectorInterface& operator=( + const DelayIncreaseDetectorInterface&) = delete; + + // Update the detector with a new sample. The deltas should represent deltas + // between timestamp groups as defined by the InterArrival class. + virtual void Update(double recv_delta_ms, + double send_delta_ms, + int64_t send_time_ms, + int64_t arrival_time_ms, + size_t packet_size, + bool calculated_deltas) = 0; + + virtual BandwidthUsage State() const = 0; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_DELAY_INCREASE_DETECTOR_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/estimators_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/estimators_gn/moz.build new file mode 100644 index 0000000000..b15062af41 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/estimators_gn/moz.build @@ -0,0 +1,238 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["BWE_TEST_LOGGING_COMPILE_TIME_ENABLE"] = "0" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator.cc", + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.cc", + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/bitrate_estimator.cc", + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator.cc", + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator.cc", + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("estimators_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_gn/moz.build new file mode 100644 index 0000000000..973d4bf32f --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["BWE_TEST_LOGGING_COMPILE_TIME_ENABLE"] = "0" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("goog_cc_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control.cc new file mode 100644 index 0000000000..3a9de8c4dc --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control.cc @@ -0,0 +1,725 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/goog_cc_network_control.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "api/network_state_predictor.h" +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "logging/rtc_event_log/events/rtc_event_remote_estimate.h" +#include "modules/congestion_controller/goog_cc/alr_detector.h" +#include "modules/congestion_controller/goog_cc/loss_based_bwe_v2.h" +#include "modules/congestion_controller/goog_cc/probe_controller.h" +#include "modules/remote_bitrate_estimator/include/bwe_defines.h" +#include "modules/remote_bitrate_estimator/test/bwe_test_logging.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { +// From RTCPSender video report interval. +constexpr TimeDelta kLossUpdateInterval = TimeDelta::Millis(1000); + +// Pacing-rate relative to our target send rate. +// Multiplicative factor that is applied to the target bitrate to calculate +// the number of bytes that can be transmitted per interval. +// Increasing this factor will result in lower delays in cases of bitrate +// overshoots from the encoder. +constexpr float kDefaultPaceMultiplier = 2.5f; + +// If the probe result is far below the current throughput estimate +// it's unlikely that the probe is accurate, so we don't want to drop too far. +// However, if we actually are overusing, we want to drop to something slightly +// below the current throughput estimate to drain the network queues. +constexpr double kProbeDropThroughputFraction = 0.85; + +bool IsEnabled(const FieldTrialsView* config, absl::string_view key) { + return absl::StartsWith(config->Lookup(key), "Enabled"); +} + +bool IsNotDisabled(const FieldTrialsView* config, absl::string_view key) { + return !absl::StartsWith(config->Lookup(key), "Disabled"); +} + +BandwidthLimitedCause GetBandwidthLimitedCause( + LossBasedState loss_based_state, + BandwidthUsage bandwidth_usage, + bool not_probe_if_delay_increased) { + if (not_probe_if_delay_increased && + (bandwidth_usage == BandwidthUsage::kBwOverusing || + bandwidth_usage == BandwidthUsage::kBwUnderusing)) { + return BandwidthLimitedCause::kDelayBasedLimitedDelayIncreased; + } + switch (loss_based_state) { + case LossBasedState::kDecreasing: + return BandwidthLimitedCause::kLossLimitedBweDecreasing; + case LossBasedState::kIncreasing: + return BandwidthLimitedCause::kLossLimitedBweIncreasing; + default: + return BandwidthLimitedCause::kDelayBasedLimited; + } +} + +} // namespace + +GoogCcNetworkController::GoogCcNetworkController(NetworkControllerConfig config, + GoogCcConfig goog_cc_config) + : key_value_config_(config.key_value_config ? config.key_value_config + : &trial_based_config_), + event_log_(config.event_log), + packet_feedback_only_(goog_cc_config.feedback_only), + safe_reset_on_route_change_("Enabled"), + safe_reset_acknowledged_rate_("ack"), + use_min_allocatable_as_lower_bound_( + IsNotDisabled(key_value_config_, "WebRTC-Bwe-MinAllocAsLowerBound")), + ignore_probes_lower_than_network_estimate_(IsNotDisabled( + key_value_config_, + "WebRTC-Bwe-IgnoreProbesLowerThanNetworkStateEstimate")), + limit_probes_lower_than_throughput_estimate_( + IsEnabled(key_value_config_, + "WebRTC-Bwe-LimitProbesLowerThanThroughputEstimate")), + rate_control_settings_( + RateControlSettings::ParseFromKeyValueConfig(key_value_config_)), + pace_at_max_of_bwe_and_lower_link_capacity_( + IsEnabled(key_value_config_, + "WebRTC-Bwe-PaceAtMaxOfBweAndLowerLinkCapacity")), + probe_controller_( + new ProbeController(key_value_config_, config.event_log)), + congestion_window_pushback_controller_( + rate_control_settings_.UseCongestionWindowPushback() + ? std::make_unique( + key_value_config_) + : nullptr), + bandwidth_estimation_( + std::make_unique(key_value_config_, + event_log_)), + alr_detector_( + std::make_unique(key_value_config_, config.event_log)), + probe_bitrate_estimator_(new ProbeBitrateEstimator(config.event_log)), + network_estimator_(std::move(goog_cc_config.network_state_estimator)), + network_state_predictor_( + std::move(goog_cc_config.network_state_predictor)), + delay_based_bwe_(new DelayBasedBwe(key_value_config_, + event_log_, + network_state_predictor_.get())), + acknowledged_bitrate_estimator_( + AcknowledgedBitrateEstimatorInterface::Create(key_value_config_)), + initial_config_(config), + last_loss_based_target_rate_(*config.constraints.starting_rate), + last_pushback_target_rate_(last_loss_based_target_rate_), + last_stable_target_rate_(last_loss_based_target_rate_), + pacing_factor_(config.stream_based_config.pacing_factor.value_or( + kDefaultPaceMultiplier)), + min_total_allocated_bitrate_( + config.stream_based_config.min_total_allocated_bitrate.value_or( + DataRate::Zero())), + max_padding_rate_(config.stream_based_config.max_padding_rate.value_or( + DataRate::Zero())) { + RTC_DCHECK(config.constraints.at_time.IsFinite()); + ParseFieldTrial( + {&safe_reset_on_route_change_, &safe_reset_acknowledged_rate_}, + key_value_config_->Lookup("WebRTC-Bwe-SafeResetOnRouteChange")); + if (delay_based_bwe_) + delay_based_bwe_->SetMinBitrate(kCongestionControllerMinBitrate); +} + +GoogCcNetworkController::~GoogCcNetworkController() {} + +NetworkControlUpdate GoogCcNetworkController::OnNetworkAvailability( + NetworkAvailability msg) { + NetworkControlUpdate update; + update.probe_cluster_configs = probe_controller_->OnNetworkAvailability(msg); + return update; +} + +NetworkControlUpdate GoogCcNetworkController::OnNetworkRouteChange( + NetworkRouteChange msg) { + if (safe_reset_on_route_change_) { + absl::optional estimated_bitrate; + if (safe_reset_acknowledged_rate_) { + estimated_bitrate = acknowledged_bitrate_estimator_->bitrate(); + if (!estimated_bitrate) + estimated_bitrate = acknowledged_bitrate_estimator_->PeekRate(); + } else { + estimated_bitrate = bandwidth_estimation_->target_rate(); + } + if (estimated_bitrate) { + if (msg.constraints.starting_rate) { + msg.constraints.starting_rate = + std::min(*msg.constraints.starting_rate, *estimated_bitrate); + } else { + msg.constraints.starting_rate = estimated_bitrate; + } + } + } + + acknowledged_bitrate_estimator_ = + AcknowledgedBitrateEstimatorInterface::Create(key_value_config_); + probe_bitrate_estimator_.reset(new ProbeBitrateEstimator(event_log_)); + if (network_estimator_) + network_estimator_->OnRouteChange(msg); + delay_based_bwe_.reset(new DelayBasedBwe(key_value_config_, event_log_, + network_state_predictor_.get())); + bandwidth_estimation_->OnRouteChange(); + probe_controller_->Reset(msg.at_time); + NetworkControlUpdate update; + update.probe_cluster_configs = ResetConstraints(msg.constraints); + MaybeTriggerOnNetworkChanged(&update, msg.at_time); + return update; +} + +NetworkControlUpdate GoogCcNetworkController::OnProcessInterval( + ProcessInterval msg) { + NetworkControlUpdate update; + if (initial_config_) { + update.probe_cluster_configs = + ResetConstraints(initial_config_->constraints); + update.pacer_config = GetPacingRates(msg.at_time); + + if (initial_config_->stream_based_config.requests_alr_probing) { + probe_controller_->EnablePeriodicAlrProbing( + *initial_config_->stream_based_config.requests_alr_probing); + } + absl::optional total_bitrate = + initial_config_->stream_based_config.max_total_allocated_bitrate; + if (total_bitrate) { + auto probes = probe_controller_->OnMaxTotalAllocatedBitrate( + *total_bitrate, msg.at_time); + update.probe_cluster_configs.insert(update.probe_cluster_configs.end(), + probes.begin(), probes.end()); + } + initial_config_.reset(); + } + if (congestion_window_pushback_controller_ && msg.pacer_queue) { + congestion_window_pushback_controller_->UpdatePacingQueue( + msg.pacer_queue->bytes()); + } + bandwidth_estimation_->UpdateEstimate(msg.at_time); + absl::optional start_time_ms = + alr_detector_->GetApplicationLimitedRegionStartTime(); + probe_controller_->SetAlrStartTimeMs(start_time_ms); + + auto probes = probe_controller_->Process(msg.at_time); + update.probe_cluster_configs.insert(update.probe_cluster_configs.end(), + probes.begin(), probes.end()); + + if (rate_control_settings_.UseCongestionWindow() && + last_packet_received_time_.IsFinite() && !feedback_max_rtts_.empty()) { + UpdateCongestionWindowSize(); + } + if (congestion_window_pushback_controller_ && current_data_window_) { + congestion_window_pushback_controller_->SetDataWindow( + *current_data_window_); + } else { + update.congestion_window = current_data_window_; + } + MaybeTriggerOnNetworkChanged(&update, msg.at_time); + return update; +} + +NetworkControlUpdate GoogCcNetworkController::OnRemoteBitrateReport( + RemoteBitrateReport msg) { + if (packet_feedback_only_) { + RTC_LOG(LS_ERROR) << "Received REMB for packet feedback only GoogCC"; + return NetworkControlUpdate(); + } + bandwidth_estimation_->UpdateReceiverEstimate(msg.receive_time, + msg.bandwidth); + BWE_TEST_LOGGING_PLOT(1, "REMB_kbps", msg.receive_time.ms(), + msg.bandwidth.bps() / 1000); + return NetworkControlUpdate(); +} + +NetworkControlUpdate GoogCcNetworkController::OnRoundTripTimeUpdate( + RoundTripTimeUpdate msg) { + if (packet_feedback_only_ || msg.smoothed) + return NetworkControlUpdate(); + RTC_DCHECK(!msg.round_trip_time.IsZero()); + if (delay_based_bwe_) + delay_based_bwe_->OnRttUpdate(msg.round_trip_time); + bandwidth_estimation_->UpdateRtt(msg.round_trip_time, msg.receive_time); + return NetworkControlUpdate(); +} + +NetworkControlUpdate GoogCcNetworkController::OnSentPacket( + SentPacket sent_packet) { + alr_detector_->OnBytesSent(sent_packet.size.bytes(), + sent_packet.send_time.ms()); + acknowledged_bitrate_estimator_->SetAlr( + alr_detector_->GetApplicationLimitedRegionStartTime().has_value()); + + if (!first_packet_sent_) { + first_packet_sent_ = true; + // Initialize feedback time to send time to allow estimation of RTT until + // first feedback is received. + bandwidth_estimation_->UpdatePropagationRtt(sent_packet.send_time, + TimeDelta::Zero()); + } + bandwidth_estimation_->OnSentPacket(sent_packet); + + if (congestion_window_pushback_controller_) { + congestion_window_pushback_controller_->UpdateOutstandingData( + sent_packet.data_in_flight.bytes()); + NetworkControlUpdate update; + MaybeTriggerOnNetworkChanged(&update, sent_packet.send_time); + return update; + } else { + return NetworkControlUpdate(); + } +} + +NetworkControlUpdate GoogCcNetworkController::OnReceivedPacket( + ReceivedPacket received_packet) { + last_packet_received_time_ = received_packet.receive_time; + return NetworkControlUpdate(); +} + +NetworkControlUpdate GoogCcNetworkController::OnStreamsConfig( + StreamsConfig msg) { + NetworkControlUpdate update; + if (msg.requests_alr_probing) { + probe_controller_->EnablePeriodicAlrProbing(*msg.requests_alr_probing); + } + if (msg.max_total_allocated_bitrate) { + update.probe_cluster_configs = + probe_controller_->OnMaxTotalAllocatedBitrate( + *msg.max_total_allocated_bitrate, msg.at_time); + } + + bool pacing_changed = false; + if (msg.pacing_factor && *msg.pacing_factor != pacing_factor_) { + pacing_factor_ = *msg.pacing_factor; + pacing_changed = true; + } + if (msg.min_total_allocated_bitrate && + *msg.min_total_allocated_bitrate != min_total_allocated_bitrate_) { + min_total_allocated_bitrate_ = *msg.min_total_allocated_bitrate; + pacing_changed = true; + + if (use_min_allocatable_as_lower_bound_) { + ClampConstraints(); + delay_based_bwe_->SetMinBitrate(min_data_rate_); + bandwidth_estimation_->SetMinMaxBitrate(min_data_rate_, max_data_rate_); + } + } + if (msg.max_padding_rate && *msg.max_padding_rate != max_padding_rate_) { + max_padding_rate_ = *msg.max_padding_rate; + pacing_changed = true; + } + + if (pacing_changed) + update.pacer_config = GetPacingRates(msg.at_time); + return update; +} + +NetworkControlUpdate GoogCcNetworkController::OnTargetRateConstraints( + TargetRateConstraints constraints) { + NetworkControlUpdate update; + update.probe_cluster_configs = ResetConstraints(constraints); + MaybeTriggerOnNetworkChanged(&update, constraints.at_time); + return update; +} + +void GoogCcNetworkController::ClampConstraints() { + // TODO(holmer): We should make sure the default bitrates are set to 10 kbps, + // and that we don't try to set the min bitrate to 0 from any applications. + // The congestion controller should allow a min bitrate of 0. + min_data_rate_ = std::max(min_target_rate_, kCongestionControllerMinBitrate); + if (use_min_allocatable_as_lower_bound_) { + min_data_rate_ = std::max(min_data_rate_, min_total_allocated_bitrate_); + } + if (max_data_rate_ < min_data_rate_) { + RTC_LOG(LS_WARNING) << "max bitrate smaller than min bitrate"; + max_data_rate_ = min_data_rate_; + } + if (starting_rate_ && starting_rate_ < min_data_rate_) { + RTC_LOG(LS_WARNING) << "start bitrate smaller than min bitrate"; + starting_rate_ = min_data_rate_; + } +} + +std::vector GoogCcNetworkController::ResetConstraints( + TargetRateConstraints new_constraints) { + min_target_rate_ = new_constraints.min_data_rate.value_or(DataRate::Zero()); + max_data_rate_ = + new_constraints.max_data_rate.value_or(DataRate::PlusInfinity()); + starting_rate_ = new_constraints.starting_rate; + ClampConstraints(); + + bandwidth_estimation_->SetBitrates(starting_rate_, min_data_rate_, + max_data_rate_, new_constraints.at_time); + + if (starting_rate_) + delay_based_bwe_->SetStartBitrate(*starting_rate_); + delay_based_bwe_->SetMinBitrate(min_data_rate_); + + return probe_controller_->SetBitrates( + min_data_rate_, starting_rate_.value_or(DataRate::Zero()), max_data_rate_, + new_constraints.at_time); +} + +NetworkControlUpdate GoogCcNetworkController::OnTransportLossReport( + TransportLossReport msg) { + if (packet_feedback_only_) + return NetworkControlUpdate(); + int64_t total_packets_delta = + msg.packets_received_delta + msg.packets_lost_delta; + bandwidth_estimation_->UpdatePacketsLost( + msg.packets_lost_delta, total_packets_delta, msg.receive_time); + return NetworkControlUpdate(); +} + +void GoogCcNetworkController::UpdateCongestionWindowSize() { + TimeDelta min_feedback_max_rtt = TimeDelta::Millis( + *std::min_element(feedback_max_rtts_.begin(), feedback_max_rtts_.end())); + + const DataSize kMinCwnd = DataSize::Bytes(2 * 1500); + TimeDelta time_window = + min_feedback_max_rtt + + TimeDelta::Millis( + rate_control_settings_.GetCongestionWindowAdditionalTimeMs()); + + DataSize data_window = last_loss_based_target_rate_ * time_window; + if (current_data_window_) { + data_window = + std::max(kMinCwnd, (data_window + current_data_window_.value()) / 2); + } else { + data_window = std::max(kMinCwnd, data_window); + } + current_data_window_ = data_window; +} + +NetworkControlUpdate GoogCcNetworkController::OnTransportPacketsFeedback( + TransportPacketsFeedback report) { + if (report.packet_feedbacks.empty()) { + // TODO(bugs.webrtc.org/10125): Design a better mechanism to safe-guard + // against building very large network queues. + return NetworkControlUpdate(); + } + + if (congestion_window_pushback_controller_) { + congestion_window_pushback_controller_->UpdateOutstandingData( + report.data_in_flight.bytes()); + } + TimeDelta max_feedback_rtt = TimeDelta::MinusInfinity(); + TimeDelta min_propagation_rtt = TimeDelta::PlusInfinity(); + Timestamp max_recv_time = Timestamp::MinusInfinity(); + + std::vector feedbacks = report.ReceivedWithSendInfo(); + for (const auto& feedback : feedbacks) + max_recv_time = std::max(max_recv_time, feedback.receive_time); + + for (const auto& feedback : feedbacks) { + TimeDelta feedback_rtt = + report.feedback_time - feedback.sent_packet.send_time; + TimeDelta min_pending_time = max_recv_time - feedback.receive_time; + TimeDelta propagation_rtt = feedback_rtt - min_pending_time; + max_feedback_rtt = std::max(max_feedback_rtt, feedback_rtt); + min_propagation_rtt = std::min(min_propagation_rtt, propagation_rtt); + } + + if (max_feedback_rtt.IsFinite()) { + feedback_max_rtts_.push_back(max_feedback_rtt.ms()); + const size_t kMaxFeedbackRttWindow = 32; + if (feedback_max_rtts_.size() > kMaxFeedbackRttWindow) + feedback_max_rtts_.pop_front(); + // TODO(srte): Use time since last unacknowledged packet. + bandwidth_estimation_->UpdatePropagationRtt(report.feedback_time, + min_propagation_rtt); + } + if (packet_feedback_only_) { + if (!feedback_max_rtts_.empty()) { + int64_t sum_rtt_ms = + std::accumulate(feedback_max_rtts_.begin(), feedback_max_rtts_.end(), + static_cast(0)); + int64_t mean_rtt_ms = sum_rtt_ms / feedback_max_rtts_.size(); + if (delay_based_bwe_) + delay_based_bwe_->OnRttUpdate(TimeDelta::Millis(mean_rtt_ms)); + } + + TimeDelta feedback_min_rtt = TimeDelta::PlusInfinity(); + for (const auto& packet_feedback : feedbacks) { + TimeDelta pending_time = packet_feedback.receive_time - max_recv_time; + TimeDelta rtt = report.feedback_time - + packet_feedback.sent_packet.send_time - pending_time; + // Value used for predicting NACK round trip time in FEC controller. + feedback_min_rtt = std::min(rtt, feedback_min_rtt); + } + if (feedback_min_rtt.IsFinite()) { + bandwidth_estimation_->UpdateRtt(feedback_min_rtt, report.feedback_time); + } + + expected_packets_since_last_loss_update_ += + report.PacketsWithFeedback().size(); + for (const auto& packet_feedback : report.PacketsWithFeedback()) { + if (!packet_feedback.IsReceived()) + lost_packets_since_last_loss_update_ += 1; + } + if (report.feedback_time > next_loss_update_) { + next_loss_update_ = report.feedback_time + kLossUpdateInterval; + bandwidth_estimation_->UpdatePacketsLost( + lost_packets_since_last_loss_update_, + expected_packets_since_last_loss_update_, report.feedback_time); + expected_packets_since_last_loss_update_ = 0; + lost_packets_since_last_loss_update_ = 0; + } + } + absl::optional alr_start_time = + alr_detector_->GetApplicationLimitedRegionStartTime(); + + if (previously_in_alr_ && !alr_start_time.has_value()) { + int64_t now_ms = report.feedback_time.ms(); + acknowledged_bitrate_estimator_->SetAlrEndedTime(report.feedback_time); + probe_controller_->SetAlrEndedTimeMs(now_ms); + } + previously_in_alr_ = alr_start_time.has_value(); + acknowledged_bitrate_estimator_->IncomingPacketFeedbackVector( + report.SortedByReceiveTime()); + auto acknowledged_bitrate = acknowledged_bitrate_estimator_->bitrate(); + bandwidth_estimation_->SetAcknowledgedRate(acknowledged_bitrate, + report.feedback_time); + for (const auto& feedback : report.SortedByReceiveTime()) { + if (feedback.sent_packet.pacing_info.probe_cluster_id != + PacedPacketInfo::kNotAProbe) { + probe_bitrate_estimator_->HandleProbeAndEstimateBitrate(feedback); + } + } + + if (network_estimator_) { + network_estimator_->OnTransportPacketsFeedback(report); + auto prev_estimate = estimate_; + estimate_ = network_estimator_->GetCurrentEstimate(); + // TODO(srte): Make OnTransportPacketsFeedback signal whether the state + // changed to avoid the need for this check. + if (estimate_ && (!prev_estimate || estimate_->last_feed_time != + prev_estimate->last_feed_time)) { + event_log_->Log(std::make_unique( + estimate_->link_capacity_lower, estimate_->link_capacity_upper)); + probe_controller_->SetNetworkStateEstimate(*estimate_); + } + } + absl::optional probe_bitrate = + probe_bitrate_estimator_->FetchAndResetLastEstimatedBitrate(); + if (ignore_probes_lower_than_network_estimate_ && probe_bitrate && + estimate_ && *probe_bitrate < delay_based_bwe_->last_estimate() && + *probe_bitrate < estimate_->link_capacity_lower) { + probe_bitrate.reset(); + } + if (limit_probes_lower_than_throughput_estimate_ && probe_bitrate && + acknowledged_bitrate) { + // Limit the backoff to something slightly below the acknowledged + // bitrate. ("Slightly below" because we want to drain the queues + // if we are actually overusing.) + // The acknowledged bitrate shouldn't normally be higher than the delay + // based estimate, but it could happen e.g. due to packet bursts or + // encoder overshoot. We use std::min to ensure that a probe result + // below the current BWE never causes an increase. + DataRate limit = + std::min(delay_based_bwe_->last_estimate(), + *acknowledged_bitrate * kProbeDropThroughputFraction); + probe_bitrate = std::max(*probe_bitrate, limit); + } + + NetworkControlUpdate update; + bool recovered_from_overuse = false; + + DelayBasedBwe::Result result; + result = delay_based_bwe_->IncomingPacketFeedbackVector( + report, acknowledged_bitrate, probe_bitrate, estimate_, + alr_start_time.has_value()); + + if (result.updated) { + if (result.probe) { + bandwidth_estimation_->SetSendBitrate(result.target_bitrate, + report.feedback_time); + } + // Since SetSendBitrate now resets the delay-based estimate, we have to + // call UpdateDelayBasedEstimate after SetSendBitrate. + bandwidth_estimation_->UpdateDelayBasedEstimate(report.feedback_time, + result.target_bitrate); + } + bandwidth_estimation_->UpdateLossBasedEstimator( + report, result.delay_detector_state, probe_bitrate, + estimate_ ? estimate_->link_capacity_upper : DataRate::PlusInfinity()); + if (result.updated) { + // Update the estimate in the ProbeController, in case we want to probe. + MaybeTriggerOnNetworkChanged(&update, report.feedback_time); + } + + recovered_from_overuse = result.recovered_from_overuse; + + if (recovered_from_overuse) { + probe_controller_->SetAlrStartTimeMs(alr_start_time); + auto probes = probe_controller_->RequestProbe(report.feedback_time); + update.probe_cluster_configs.insert(update.probe_cluster_configs.end(), + probes.begin(), probes.end()); + } + + // No valid RTT could be because send-side BWE isn't used, in which case + // we don't try to limit the outstanding packets. + if (rate_control_settings_.UseCongestionWindow() && + max_feedback_rtt.IsFinite()) { + UpdateCongestionWindowSize(); + } + if (congestion_window_pushback_controller_ && current_data_window_) { + congestion_window_pushback_controller_->SetDataWindow( + *current_data_window_); + } else { + update.congestion_window = current_data_window_; + } + + return update; +} + +NetworkControlUpdate GoogCcNetworkController::OnNetworkStateEstimate( + NetworkStateEstimate msg) { + estimate_ = msg; + return NetworkControlUpdate(); +} + +NetworkControlUpdate GoogCcNetworkController::GetNetworkState( + Timestamp at_time) const { + NetworkControlUpdate update; + update.target_rate = TargetTransferRate(); + update.target_rate->network_estimate.at_time = at_time; + update.target_rate->network_estimate.loss_rate_ratio = + last_estimated_fraction_loss_.value_or(0) / 255.0; + update.target_rate->network_estimate.round_trip_time = + last_estimated_round_trip_time_; + update.target_rate->network_estimate.bwe_period = + delay_based_bwe_->GetExpectedBwePeriod(); + + update.target_rate->at_time = at_time; + update.target_rate->target_rate = last_pushback_target_rate_; + update.target_rate->stable_target_rate = + bandwidth_estimation_->GetEstimatedLinkCapacity(); + update.pacer_config = GetPacingRates(at_time); + update.congestion_window = current_data_window_; + return update; +} + +void GoogCcNetworkController::MaybeTriggerOnNetworkChanged( + NetworkControlUpdate* update, + Timestamp at_time) { + uint8_t fraction_loss = bandwidth_estimation_->fraction_loss(); + TimeDelta round_trip_time = bandwidth_estimation_->round_trip_time(); + DataRate loss_based_target_rate = bandwidth_estimation_->target_rate(); + DataRate pushback_target_rate = loss_based_target_rate; + + BWE_TEST_LOGGING_PLOT(1, "fraction_loss_%", at_time.ms(), + (fraction_loss * 100) / 256); + BWE_TEST_LOGGING_PLOT(1, "rtt_ms", at_time.ms(), round_trip_time.ms()); + BWE_TEST_LOGGING_PLOT(1, "Target_bitrate_kbps", at_time.ms(), + loss_based_target_rate.kbps()); + + double cwnd_reduce_ratio = 0.0; + if (congestion_window_pushback_controller_) { + int64_t pushback_rate = + congestion_window_pushback_controller_->UpdateTargetBitrate( + loss_based_target_rate.bps()); + pushback_rate = std::max(bandwidth_estimation_->GetMinBitrate(), + pushback_rate); + pushback_target_rate = DataRate::BitsPerSec(pushback_rate); + if (rate_control_settings_.UseCongestionWindowDropFrameOnly()) { + cwnd_reduce_ratio = static_cast(loss_based_target_rate.bps() - + pushback_target_rate.bps()) / + loss_based_target_rate.bps(); + } + } + DataRate stable_target_rate = + bandwidth_estimation_->GetEstimatedLinkCapacity(); + stable_target_rate = std::min(stable_target_rate, pushback_target_rate); + + if ((loss_based_target_rate != last_loss_based_target_rate_) || + (fraction_loss != last_estimated_fraction_loss_) || + (round_trip_time != last_estimated_round_trip_time_) || + (pushback_target_rate != last_pushback_target_rate_) || + (stable_target_rate != last_stable_target_rate_)) { + last_loss_based_target_rate_ = loss_based_target_rate; + last_pushback_target_rate_ = pushback_target_rate; + last_estimated_fraction_loss_ = fraction_loss; + last_estimated_round_trip_time_ = round_trip_time; + last_stable_target_rate_ = stable_target_rate; + + alr_detector_->SetEstimatedBitrate(loss_based_target_rate.bps()); + + TimeDelta bwe_period = delay_based_bwe_->GetExpectedBwePeriod(); + + TargetTransferRate target_rate_msg; + target_rate_msg.at_time = at_time; + if (rate_control_settings_.UseCongestionWindowDropFrameOnly()) { + target_rate_msg.target_rate = loss_based_target_rate; + target_rate_msg.cwnd_reduce_ratio = cwnd_reduce_ratio; + } else { + target_rate_msg.target_rate = pushback_target_rate; + } + target_rate_msg.stable_target_rate = stable_target_rate; + target_rate_msg.network_estimate.at_time = at_time; + target_rate_msg.network_estimate.round_trip_time = round_trip_time; + target_rate_msg.network_estimate.loss_rate_ratio = fraction_loss / 255.0f; + target_rate_msg.network_estimate.bwe_period = bwe_period; + + update->target_rate = target_rate_msg; + + auto probes = probe_controller_->SetEstimatedBitrate( + loss_based_target_rate, + GetBandwidthLimitedCause( + bandwidth_estimation_->loss_based_state(), + delay_based_bwe_->last_state(), + probe_controller_->DontProbeIfDelayIncreased()), + at_time); + update->probe_cluster_configs.insert(update->probe_cluster_configs.end(), + probes.begin(), probes.end()); + update->pacer_config = GetPacingRates(at_time); + RTC_LOG(LS_VERBOSE) << "bwe " << at_time.ms() << " pushback_target_bps=" + << last_pushback_target_rate_.bps() + << " estimate_bps=" << loss_based_target_rate.bps(); + } +} + +PacerConfig GoogCcNetworkController::GetPacingRates(Timestamp at_time) const { + // Pacing rate is based on target rate before congestion window pushback, + // because we don't want to build queues in the pacer when pushback occurs. + DataRate pacing_rate = DataRate::Zero(); + if (pace_at_max_of_bwe_and_lower_link_capacity_ && estimate_) { + pacing_rate = + std::max({min_total_allocated_bitrate_, estimate_->link_capacity_lower, + last_loss_based_target_rate_}) * + pacing_factor_; + } else { + pacing_rate = + std::max(min_total_allocated_bitrate_, last_loss_based_target_rate_) * + pacing_factor_; + } + DataRate padding_rate = + std::min(max_padding_rate_, last_pushback_target_rate_); + PacerConfig msg; + msg.at_time = at_time; + msg.time_window = TimeDelta::Seconds(1); + msg.data_window = pacing_rate * msg.time_window; + msg.pad_window = padding_rate * msg.time_window; + return msg; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control.h new file mode 100644 index 0000000000..37a064e37c --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_GOOG_CC_NETWORK_CONTROL_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_GOOG_CC_NETWORK_CONTROL_H_ + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/field_trials_view.h" +#include "api/network_state_predictor.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "api/transport/field_trial_based_config.h" +#include "api/transport/network_control.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/data_size.h" +#include "api/units/timestamp.h" +#include "modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.h" +#include "modules/congestion_controller/goog_cc/alr_detector.h" +#include "modules/congestion_controller/goog_cc/congestion_window_pushback_controller.h" +#include "modules/congestion_controller/goog_cc/delay_based_bwe.h" +#include "modules/congestion_controller/goog_cc/probe_controller.h" +#include "modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/experiments/rate_control_settings.h" + +namespace webrtc { +struct GoogCcConfig { + std::unique_ptr network_state_estimator = nullptr; + std::unique_ptr network_state_predictor = nullptr; + bool feedback_only = false; +}; + +class GoogCcNetworkController : public NetworkControllerInterface { + public: + GoogCcNetworkController(NetworkControllerConfig config, + GoogCcConfig goog_cc_config); + + GoogCcNetworkController() = delete; + GoogCcNetworkController(const GoogCcNetworkController&) = delete; + GoogCcNetworkController& operator=(const GoogCcNetworkController&) = delete; + + ~GoogCcNetworkController() override; + + // NetworkControllerInterface + NetworkControlUpdate OnNetworkAvailability(NetworkAvailability msg) override; + NetworkControlUpdate OnNetworkRouteChange(NetworkRouteChange msg) override; + NetworkControlUpdate OnProcessInterval(ProcessInterval msg) override; + NetworkControlUpdate OnRemoteBitrateReport(RemoteBitrateReport msg) override; + NetworkControlUpdate OnRoundTripTimeUpdate(RoundTripTimeUpdate msg) override; + NetworkControlUpdate OnSentPacket(SentPacket msg) override; + NetworkControlUpdate OnReceivedPacket(ReceivedPacket msg) override; + NetworkControlUpdate OnStreamsConfig(StreamsConfig msg) override; + NetworkControlUpdate OnTargetRateConstraints( + TargetRateConstraints msg) override; + NetworkControlUpdate OnTransportLossReport(TransportLossReport msg) override; + NetworkControlUpdate OnTransportPacketsFeedback( + TransportPacketsFeedback msg) override; + NetworkControlUpdate OnNetworkStateEstimate( + NetworkStateEstimate msg) override; + + NetworkControlUpdate GetNetworkState(Timestamp at_time) const; + + private: + friend class GoogCcStatePrinter; + std::vector ResetConstraints( + TargetRateConstraints new_constraints); + void ClampConstraints(); + void MaybeTriggerOnNetworkChanged(NetworkControlUpdate* update, + Timestamp at_time); + void UpdateCongestionWindowSize(); + PacerConfig GetPacingRates(Timestamp at_time) const; + const FieldTrialBasedConfig trial_based_config_; + + const FieldTrialsView* const key_value_config_; + RtcEventLog* const event_log_; + const bool packet_feedback_only_; + FieldTrialFlag safe_reset_on_route_change_; + FieldTrialFlag safe_reset_acknowledged_rate_; + const bool use_min_allocatable_as_lower_bound_; + const bool ignore_probes_lower_than_network_estimate_; + const bool limit_probes_lower_than_throughput_estimate_; + const RateControlSettings rate_control_settings_; + const bool pace_at_max_of_bwe_and_lower_link_capacity_; + + const std::unique_ptr probe_controller_; + const std::unique_ptr + congestion_window_pushback_controller_; + + std::unique_ptr bandwidth_estimation_; + std::unique_ptr alr_detector_; + std::unique_ptr probe_bitrate_estimator_; + std::unique_ptr network_estimator_; + std::unique_ptr network_state_predictor_; + std::unique_ptr delay_based_bwe_; + std::unique_ptr + acknowledged_bitrate_estimator_; + + absl::optional initial_config_; + + DataRate min_target_rate_ = DataRate::Zero(); + DataRate min_data_rate_ = DataRate::Zero(); + DataRate max_data_rate_ = DataRate::PlusInfinity(); + absl::optional starting_rate_; + + bool first_packet_sent_ = false; + + absl::optional estimate_; + + Timestamp next_loss_update_ = Timestamp::MinusInfinity(); + int lost_packets_since_last_loss_update_ = 0; + int expected_packets_since_last_loss_update_ = 0; + + std::deque feedback_max_rtts_; + + DataRate last_loss_based_target_rate_; + DataRate last_pushback_target_rate_; + DataRate last_stable_target_rate_; + + absl::optional last_estimated_fraction_loss_ = 0; + TimeDelta last_estimated_round_trip_time_ = TimeDelta::PlusInfinity(); + Timestamp last_packet_received_time_ = Timestamp::MinusInfinity(); + + double pacing_factor_; + DataRate min_total_allocated_bitrate_; + DataRate max_padding_rate_; + + bool previously_in_alr_ = false; + + absl::optional current_data_window_; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_GOOG_CC_NETWORK_CONTROL_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control_unittest.cc new file mode 100644 index 0000000000..7e051f505b --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/goog_cc_network_control_unittest.cc @@ -0,0 +1,934 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "absl/strings/string_view.h" +#include "api/test/network_emulation/create_cross_traffic.h" +#include "api/test/network_emulation/cross_traffic.h" +#include "api/transport/goog_cc_factory.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "test/field_trial.h" +#include "test/gtest.h" +#include "test/scenario/scenario.h" + +using ::testing::NiceMock; + +namespace webrtc { +namespace test { +namespace { +// Count dips from a constant high bandwidth level within a short window. +int CountBandwidthDips(std::queue bandwidth_history, + DataRate threshold) { + if (bandwidth_history.empty()) + return true; + DataRate first = bandwidth_history.front(); + bandwidth_history.pop(); + + int dips = 0; + bool state_high = true; + while (!bandwidth_history.empty()) { + if (bandwidth_history.front() + threshold < first && state_high) { + ++dips; + state_high = false; + } else if (bandwidth_history.front() == first) { + state_high = true; + } else if (bandwidth_history.front() > first) { + // If this is toggling we will catch it later when front becomes first. + state_high = false; + } + bandwidth_history.pop(); + } + return dips; +} +GoogCcNetworkControllerFactory CreateFeedbackOnlyFactory() { + GoogCcFactoryConfig config; + config.feedback_only = true; + return GoogCcNetworkControllerFactory(std::move(config)); +} + +const uint32_t kInitialBitrateKbps = 60; +const DataRate kInitialBitrate = DataRate::KilobitsPerSec(kInitialBitrateKbps); +const float kDefaultPacingRate = 2.5f; + +CallClient* CreateVideoSendingClient( + Scenario* s, + CallClientConfig config, + std::vector send_link, + std::vector return_link) { + auto* client = s->CreateClient("send", std::move(config)); + auto* route = s->CreateRoutes(client, send_link, + s->CreateClient("return", CallClientConfig()), + return_link); + s->CreateVideoStream(route->forward(), VideoStreamConfig()); + return client; +} + +NetworkRouteChange CreateRouteChange( + Timestamp time, + absl::optional start_rate = absl::nullopt, + absl::optional min_rate = absl::nullopt, + absl::optional max_rate = absl::nullopt) { + NetworkRouteChange route_change; + route_change.at_time = time; + route_change.constraints.at_time = time; + route_change.constraints.min_data_rate = min_rate; + route_change.constraints.max_data_rate = max_rate; + route_change.constraints.starting_rate = start_rate; + return route_change; +} + +PacketResult CreatePacketResult(Timestamp arrival_time, + Timestamp send_time, + size_t payload_size, + PacedPacketInfo pacing_info) { + PacketResult packet_result; + packet_result.sent_packet = SentPacket(); + packet_result.sent_packet.send_time = send_time; + packet_result.sent_packet.size = DataSize::Bytes(payload_size); + packet_result.sent_packet.pacing_info = pacing_info; + packet_result.receive_time = arrival_time; + return packet_result; +} + +// Simulate sending packets and receiving transport feedback during +// `runtime_ms`. +absl::optional PacketTransmissionAndFeedbackBlock( + NetworkControllerInterface* controller, + int64_t runtime_ms, + int64_t delay, + Timestamp& current_time) { + NetworkControlUpdate update; + absl::optional target_bitrate; + int64_t delay_buildup = 0; + int64_t start_time_ms = current_time.ms(); + while (current_time.ms() - start_time_ms < runtime_ms) { + constexpr size_t kPayloadSize = 1000; + PacketResult packet = + CreatePacketResult(current_time + TimeDelta::Millis(delay_buildup), + current_time, kPayloadSize, PacedPacketInfo()); + delay_buildup += delay; + update = controller->OnSentPacket(packet.sent_packet); + if (update.target_rate) { + target_bitrate = update.target_rate->target_rate; + } + TransportPacketsFeedback feedback; + feedback.feedback_time = packet.receive_time; + feedback.packet_feedbacks.push_back(packet); + update = controller->OnTransportPacketsFeedback(feedback); + if (update.target_rate) { + target_bitrate = update.target_rate->target_rate; + } + current_time += TimeDelta::Millis(50); + update = controller->OnProcessInterval({.at_time = current_time}); + if (update.target_rate) { + target_bitrate = update.target_rate->target_rate; + } + } + return target_bitrate; +} + +// Scenarios: + +void UpdatesTargetRateBasedOnLinkCapacity(absl::string_view test_name = "") { + auto factory = CreateFeedbackOnlyFactory(); + Scenario s("googcc_unit/target_capacity" + std::string(test_name), false); + CallClientConfig config; + config.transport.cc_factory = &factory; + config.transport.rates.min_rate = DataRate::KilobitsPerSec(10); + config.transport.rates.max_rate = DataRate::KilobitsPerSec(1500); + config.transport.rates.start_rate = DataRate::KilobitsPerSec(300); + auto send_net = s.CreateMutableSimulationNode([](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(500); + c->delay = TimeDelta::Millis(100); + c->loss_rate = 0.0; + }); + auto ret_net = s.CreateMutableSimulationNode( + [](NetworkSimulationConfig* c) { c->delay = TimeDelta::Millis(100); }); + StatesPrinter* truth = s.CreatePrinter( + "send.truth.txt", TimeDelta::PlusInfinity(), {send_net->ConfigPrinter()}); + + auto* client = CreateVideoSendingClient(&s, config, {send_net->node()}, + {ret_net->node()}); + + truth->PrintRow(); + s.RunFor(TimeDelta::Seconds(25)); + truth->PrintRow(); + EXPECT_NEAR(client->target_rate().kbps(), 450, 100); + + send_net->UpdateConfig([](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(800); + c->delay = TimeDelta::Millis(100); + }); + + truth->PrintRow(); + s.RunFor(TimeDelta::Seconds(20)); + truth->PrintRow(); + EXPECT_NEAR(client->target_rate().kbps(), 750, 150); + + send_net->UpdateConfig([](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(100); + c->delay = TimeDelta::Millis(200); + }); + ret_net->UpdateConfig( + [](NetworkSimulationConfig* c) { c->delay = TimeDelta::Millis(200); }); + + truth->PrintRow(); + s.RunFor(TimeDelta::Seconds(50)); + truth->PrintRow(); + EXPECT_NEAR(client->target_rate().kbps(), 90, 25); +} + +DataRate RunRembDipScenario(absl::string_view test_name) { + Scenario s(test_name); + NetworkSimulationConfig net_conf; + net_conf.bandwidth = DataRate::KilobitsPerSec(2000); + net_conf.delay = TimeDelta::Millis(50); + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = DataRate::KilobitsPerSec(1000); + }); + auto send_net = {s.CreateSimulationNode(net_conf)}; + auto ret_net = {s.CreateSimulationNode(net_conf)}; + auto* route = s.CreateRoutes( + client, send_net, s.CreateClient("return", CallClientConfig()), ret_net); + s.CreateVideoStream(route->forward(), VideoStreamConfig()); + + s.RunFor(TimeDelta::Seconds(10)); + EXPECT_GT(client->send_bandwidth().kbps(), 1500); + + DataRate RembLimit = DataRate::KilobitsPerSec(250); + client->SetRemoteBitrate(RembLimit); + s.RunFor(TimeDelta::Seconds(1)); + EXPECT_EQ(client->send_bandwidth(), RembLimit); + + DataRate RembLimitLifted = DataRate::KilobitsPerSec(10000); + client->SetRemoteBitrate(RembLimitLifted); + s.RunFor(TimeDelta::Seconds(10)); + + return client->send_bandwidth(); +} + +} // namespace + +class NetworkControllerTestFixture { + public: + NetworkControllerTestFixture() : factory_() {} + + std::unique_ptr CreateController() { + NetworkControllerConfig config = InitialConfig(); + std::unique_ptr controller = + factory_.Create(config); + return controller; + } + + private: + NetworkControllerConfig InitialConfig( + int starting_bandwidth_kbps = kInitialBitrateKbps, + int min_data_rate_kbps = 0, + int max_data_rate_kbps = 5 * kInitialBitrateKbps) { + NetworkControllerConfig config; + config.constraints.at_time = Timestamp::Zero(); + config.constraints.min_data_rate = + DataRate::KilobitsPerSec(min_data_rate_kbps); + config.constraints.max_data_rate = + DataRate::KilobitsPerSec(max_data_rate_kbps); + config.constraints.starting_rate = + DataRate::KilobitsPerSec(starting_bandwidth_kbps); + config.event_log = &event_log_; + return config; + } + + NiceMock event_log_; + GoogCcNetworkControllerFactory factory_; +}; + +TEST(GoogCcNetworkControllerTest, InitializeTargetRateOnFirstProcessInterval) { + NetworkControllerTestFixture fixture; + std::unique_ptr controller = + fixture.CreateController(); + + NetworkControlUpdate update = + controller->OnProcessInterval({.at_time = Timestamp::Millis(123456)}); + + EXPECT_EQ(update.target_rate->target_rate, kInitialBitrate); + EXPECT_EQ(update.pacer_config->data_rate(), + kInitialBitrate * kDefaultPacingRate); + EXPECT_EQ(update.probe_cluster_configs[0].target_data_rate, + kInitialBitrate * 3); + EXPECT_EQ(update.probe_cluster_configs[1].target_data_rate, + kInitialBitrate * 5); +} + +TEST(GoogCcNetworkControllerTest, ReactsToChangedNetworkConditions) { + NetworkControllerTestFixture fixture; + std::unique_ptr controller = + fixture.CreateController(); + Timestamp current_time = Timestamp::Millis(123); + NetworkControlUpdate update = + controller->OnProcessInterval({.at_time = current_time}); + update = controller->OnRemoteBitrateReport( + {.receive_time = current_time, .bandwidth = kInitialBitrate * 2}); + + current_time += TimeDelta::Millis(25); + update = controller->OnProcessInterval({.at_time = current_time}); + EXPECT_EQ(update.target_rate->target_rate, kInitialBitrate * 2); + EXPECT_EQ(update.pacer_config->data_rate(), + kInitialBitrate * 2 * kDefaultPacingRate); + + update = controller->OnRemoteBitrateReport( + {.receive_time = current_time, .bandwidth = kInitialBitrate}); + current_time += TimeDelta::Millis(25); + update = controller->OnProcessInterval({.at_time = current_time}); + EXPECT_EQ(update.target_rate->target_rate, kInitialBitrate); + EXPECT_EQ(update.pacer_config->data_rate(), + kInitialBitrate * kDefaultPacingRate); +} + +TEST(GoogCcNetworkControllerTest, OnNetworkRouteChanged) { + NetworkControllerTestFixture fixture; + std::unique_ptr controller = + fixture.CreateController(); + Timestamp current_time = Timestamp::Millis(123); + DataRate new_bitrate = DataRate::BitsPerSec(200000); + NetworkControlUpdate update = controller->OnNetworkRouteChange( + CreateRouteChange(current_time, new_bitrate)); + EXPECT_EQ(update.target_rate->target_rate, new_bitrate); + EXPECT_EQ(update.pacer_config->data_rate(), new_bitrate * kDefaultPacingRate); + EXPECT_EQ(update.probe_cluster_configs.size(), 2u); + + // If the bitrate is reset to -1, the new starting bitrate will be + // the minimum default bitrate. + const DataRate kDefaultMinBitrate = DataRate::KilobitsPerSec(5); + update = controller->OnNetworkRouteChange(CreateRouteChange(current_time)); + EXPECT_EQ(update.target_rate->target_rate, kDefaultMinBitrate); + EXPECT_NEAR(update.pacer_config->data_rate().bps(), + kDefaultMinBitrate.bps() * kDefaultPacingRate, 10); + EXPECT_EQ(update.probe_cluster_configs.size(), 2u); +} + +TEST(GoogCcNetworkControllerTest, ProbeOnRouteChange) { + NetworkControllerTestFixture fixture; + std::unique_ptr controller = + fixture.CreateController(); + Timestamp current_time = Timestamp::Millis(123); + NetworkControlUpdate update = controller->OnNetworkRouteChange( + CreateRouteChange(current_time, 2 * kInitialBitrate, DataRate::Zero(), + 20 * kInitialBitrate)); + + EXPECT_TRUE(update.pacer_config.has_value()); + EXPECT_EQ(update.target_rate->target_rate, kInitialBitrate * 2); + EXPECT_EQ(update.probe_cluster_configs.size(), 2u); + EXPECT_EQ(update.probe_cluster_configs[0].target_data_rate, + kInitialBitrate * 6); + EXPECT_EQ(update.probe_cluster_configs[1].target_data_rate, + kInitialBitrate * 12); + + update = controller->OnProcessInterval({.at_time = current_time}); +} + +// Bandwidth estimation is updated when feedbacks are received. +// Feedbacks which show an increasing delay cause the estimation to be reduced. +TEST(GoogCcNetworkControllerTest, UpdatesDelayBasedEstimate) { + NetworkControllerTestFixture fixture; + std::unique_ptr controller = + fixture.CreateController(); + const int64_t kRunTimeMs = 6000; + Timestamp current_time = Timestamp::Millis(123); + + // The test must run and insert packets/feedback long enough that the + // BWE computes a valid estimate. This is first done in an environment which + // simulates no bandwidth limitation, and therefore not built-up delay. + absl::optional target_bitrate_before_delay = + PacketTransmissionAndFeedbackBlock(controller.get(), kRunTimeMs, 0, + current_time); + ASSERT_TRUE(target_bitrate_before_delay.has_value()); + + // Repeat, but this time with a building delay, and make sure that the + // estimation is adjusted downwards. + absl::optional target_bitrate_after_delay = + PacketTransmissionAndFeedbackBlock(controller.get(), kRunTimeMs, 50, + current_time); + EXPECT_LT(*target_bitrate_after_delay, *target_bitrate_before_delay); +} + +TEST(GoogCcNetworkControllerTest, PaceAtMaxOfLowerLinkCapacityAndBwe) { + ScopedFieldTrials trial( + "WebRTC-Bwe-PaceAtMaxOfBweAndLowerLinkCapacity/Enabled/"); + NetworkControllerTestFixture fixture; + std::unique_ptr controller = + fixture.CreateController(); + Timestamp current_time = Timestamp::Millis(123); + NetworkControlUpdate update = + controller->OnProcessInterval({.at_time = current_time}); + current_time += TimeDelta::Millis(100); + NetworkStateEstimate network_estimate = {.link_capacity_lower = + 10 * kInitialBitrate}; + update = controller->OnNetworkStateEstimate(network_estimate); + // OnNetworkStateEstimate does not trigger processing a new estimate. So add a + // dummy loss report to trigger a BWE update in the next process interval. + TransportLossReport loss_report; + loss_report.start_time = current_time; + loss_report.end_time = current_time; + loss_report.receive_time = current_time; + loss_report.packets_received_delta = 50; + loss_report.packets_lost_delta = 1; + update = controller->OnTransportLossReport(loss_report); + update = controller->OnProcessInterval({.at_time = current_time}); + ASSERT_TRUE(update.pacer_config); + ASSERT_TRUE(update.target_rate); + ASSERT_LT(update.target_rate->target_rate, + network_estimate.link_capacity_lower); + EXPECT_EQ(update.pacer_config->data_rate().kbps(), + network_estimate.link_capacity_lower.kbps() * kDefaultPacingRate); + + current_time += TimeDelta::Millis(100); + // Set a low link capacity estimate and verify that pacing rate is set + // relative to loss based/delay based estimate. + network_estimate = {.link_capacity_lower = 0.5 * kInitialBitrate}; + update = controller->OnNetworkStateEstimate(network_estimate); + // Again, we need to inject a dummy loss report to trigger an update of the + // BWE in the next process interval. + loss_report.start_time = current_time; + loss_report.end_time = current_time; + loss_report.receive_time = current_time; + loss_report.packets_received_delta = 50; + loss_report.packets_lost_delta = 0; + update = controller->OnTransportLossReport(loss_report); + update = controller->OnProcessInterval({.at_time = current_time}); + ASSERT_TRUE(update.target_rate); + ASSERT_GT(update.target_rate->target_rate, + network_estimate.link_capacity_lower); + EXPECT_EQ(update.pacer_config->data_rate().kbps(), + update.target_rate->target_rate.kbps() * kDefaultPacingRate); +} + +// Test congestion window pushback on network delay happens. +TEST(GoogCcScenario, CongestionWindowPushbackOnNetworkDelay) { + auto factory = CreateFeedbackOnlyFactory(); + ScopedFieldTrials trial( + "WebRTC-CongestionWindow/QueueSize:800,MinBitrate:30000/"); + Scenario s("googcc_unit/cwnd_on_delay", false); + auto send_net = + s.CreateMutableSimulationNode([=](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(1000); + c->delay = TimeDelta::Millis(100); + }); + auto ret_net = s.CreateSimulationNode( + [](NetworkSimulationConfig* c) { c->delay = TimeDelta::Millis(100); }); + CallClientConfig config; + config.transport.cc_factory = &factory; + // Start high so bandwidth drop has max effect. + config.transport.rates.start_rate = DataRate::KilobitsPerSec(300); + config.transport.rates.max_rate = DataRate::KilobitsPerSec(2000); + config.transport.rates.min_rate = DataRate::KilobitsPerSec(10); + + auto* client = CreateVideoSendingClient(&s, std::move(config), + {send_net->node()}, {ret_net}); + + s.RunFor(TimeDelta::Seconds(10)); + send_net->PauseTransmissionUntil(s.Now() + TimeDelta::Seconds(10)); + s.RunFor(TimeDelta::Seconds(3)); + + // After 3 seconds without feedback from any sent packets, we expect that the + // target rate is reduced to the minimum pushback threshold + // kDefaultMinPushbackTargetBitrateBps, which is defined as 30 kbps in + // congestion_window_pushback_controller. + EXPECT_LT(client->target_rate().kbps(), 40); +} + +// Test congestion window pushback on network delay happens. +TEST(GoogCcScenario, CongestionWindowPushbackDropFrameOnNetworkDelay) { + auto factory = CreateFeedbackOnlyFactory(); + ScopedFieldTrials trial( + "WebRTC-CongestionWindow/QueueSize:800,MinBitrate:30000,DropFrame:true/"); + Scenario s("googcc_unit/cwnd_on_delay", false); + auto send_net = + s.CreateMutableSimulationNode([=](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(1000); + c->delay = TimeDelta::Millis(100); + }); + auto ret_net = s.CreateSimulationNode( + [](NetworkSimulationConfig* c) { c->delay = TimeDelta::Millis(100); }); + CallClientConfig config; + config.transport.cc_factory = &factory; + // Start high so bandwidth drop has max effect. + config.transport.rates.start_rate = DataRate::KilobitsPerSec(300); + config.transport.rates.max_rate = DataRate::KilobitsPerSec(2000); + config.transport.rates.min_rate = DataRate::KilobitsPerSec(10); + + auto* client = CreateVideoSendingClient(&s, std::move(config), + {send_net->node()}, {ret_net}); + + s.RunFor(TimeDelta::Seconds(10)); + send_net->PauseTransmissionUntil(s.Now() + TimeDelta::Seconds(10)); + s.RunFor(TimeDelta::Seconds(3)); + + // As the dropframe is set, after 3 seconds without feedback from any sent + // packets, we expect that the target rate is not reduced by congestion + // window. + EXPECT_GT(client->target_rate().kbps(), 300); +} + +TEST(GoogCcScenario, PaddingRateLimitedByCongestionWindowInTrial) { + ScopedFieldTrials trial( + "WebRTC-CongestionWindow/QueueSize:200,MinBitrate:30000/"); + + Scenario s("googcc_unit/padding_limited", false); + auto send_net = + s.CreateMutableSimulationNode([=](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(1000); + c->delay = TimeDelta::Millis(100); + }); + auto ret_net = s.CreateSimulationNode( + [](NetworkSimulationConfig* c) { c->delay = TimeDelta::Millis(100); }); + CallClientConfig config; + // Start high so bandwidth drop has max effect. + config.transport.rates.start_rate = DataRate::KilobitsPerSec(1000); + config.transport.rates.max_rate = DataRate::KilobitsPerSec(2000); + auto* client = s.CreateClient("send", config); + auto* route = + s.CreateRoutes(client, {send_net->node()}, + s.CreateClient("return", CallClientConfig()), {ret_net}); + VideoStreamConfig video; + video.stream.pad_to_rate = config.transport.rates.max_rate; + s.CreateVideoStream(route->forward(), video); + + // Run for a few seconds to allow the controller to stabilize. + s.RunFor(TimeDelta::Seconds(10)); + + // Check that padding rate matches target rate. + EXPECT_NEAR(client->padding_rate().kbps(), client->target_rate().kbps(), 1); + + // Check this is also the case when congestion window pushback kicks in. + send_net->PauseTransmissionUntil(s.Now() + TimeDelta::Seconds(1)); + EXPECT_NEAR(client->padding_rate().kbps(), client->target_rate().kbps(), 1); +} + +TEST(GoogCcScenario, LimitsToFloorIfRttIsHighInTrial) { + // The field trial limits maximum RTT to 2 seconds, higher RTT means that the + // controller backs off until it reaches the minimum configured bitrate. This + // allows the RTT to recover faster than the regular control mechanism would + // achieve. + const DataRate kBandwidthFloor = DataRate::KilobitsPerSec(50); + ScopedFieldTrials trial("WebRTC-Bwe-MaxRttLimit/limit:2s,floor:" + + std::to_string(kBandwidthFloor.kbps()) + "kbps/"); + // In the test case, we limit the capacity and add a cross traffic packet + // burst that blocks media from being sent. This causes the RTT to quickly + // increase above the threshold in the trial. + const DataRate kLinkCapacity = DataRate::KilobitsPerSec(100); + const TimeDelta kBufferBloatDuration = TimeDelta::Seconds(10); + Scenario s("googcc_unit/limit_trial", false); + auto send_net = s.CreateSimulationNode([=](NetworkSimulationConfig* c) { + c->bandwidth = kLinkCapacity; + c->delay = TimeDelta::Millis(100); + }); + auto ret_net = s.CreateSimulationNode( + [](NetworkSimulationConfig* c) { c->delay = TimeDelta::Millis(100); }); + CallClientConfig config; + config.transport.rates.start_rate = kLinkCapacity; + + auto* client = CreateVideoSendingClient(&s, config, {send_net}, {ret_net}); + // Run for a few seconds to allow the controller to stabilize. + s.RunFor(TimeDelta::Seconds(10)); + const DataSize kBloatPacketSize = DataSize::Bytes(1000); + const int kBloatPacketCount = + static_cast(kBufferBloatDuration * kLinkCapacity / kBloatPacketSize); + // This will cause the RTT to be large for a while. + s.TriggerPacketBurst({send_net}, kBloatPacketCount, kBloatPacketSize.bytes()); + // Wait to allow the high RTT to be detected and acted upon. + s.RunFor(TimeDelta::Seconds(6)); + // By now the target rate should have dropped to the minimum configured rate. + EXPECT_NEAR(client->target_rate().kbps(), kBandwidthFloor.kbps(), 5); +} + +TEST(GoogCcScenario, UpdatesTargetRateBasedOnLinkCapacity) { + UpdatesTargetRateBasedOnLinkCapacity(); +} + +TEST(GoogCcScenario, StableEstimateDoesNotVaryInSteadyState) { + auto factory = CreateFeedbackOnlyFactory(); + Scenario s("googcc_unit/stable_target", false); + CallClientConfig config; + config.transport.cc_factory = &factory; + NetworkSimulationConfig net_conf; + net_conf.bandwidth = DataRate::KilobitsPerSec(500); + net_conf.delay = TimeDelta::Millis(100); + auto send_net = s.CreateSimulationNode(net_conf); + auto ret_net = s.CreateSimulationNode(net_conf); + + auto* client = CreateVideoSendingClient(&s, config, {send_net}, {ret_net}); + // Run for a while to allow the estimate to stabilize. + s.RunFor(TimeDelta::Seconds(30)); + DataRate min_stable_target = DataRate::PlusInfinity(); + DataRate max_stable_target = DataRate::MinusInfinity(); + DataRate min_target = DataRate::PlusInfinity(); + DataRate max_target = DataRate::MinusInfinity(); + + // Measure variation in steady state. + for (int i = 0; i < 20; ++i) { + auto stable_target_rate = client->stable_target_rate(); + auto target_rate = client->target_rate(); + EXPECT_LE(stable_target_rate, target_rate); + + min_stable_target = std::min(min_stable_target, stable_target_rate); + max_stable_target = std::max(max_stable_target, stable_target_rate); + min_target = std::min(min_target, target_rate); + max_target = std::max(max_target, target_rate); + s.RunFor(TimeDelta::Seconds(1)); + } + // We should expect drops by at least 15% (default backoff.) + EXPECT_LT(min_target / max_target, 0.85); + // We should expect the stable target to be more stable than the immediate one + EXPECT_GE(min_stable_target / max_stable_target, min_target / max_target); +} + +TEST(GoogCcScenario, LossBasedControlUpdatesTargetRateBasedOnLinkCapacity) { + ScopedFieldTrials trial("WebRTC-Bwe-LossBasedControl/Enabled/"); + // TODO(srte): Should the behavior be unaffected at low loss rates? + UpdatesTargetRateBasedOnLinkCapacity("_loss_based"); +} + +TEST(GoogCcScenario, LossBasedControlDoesModestBackoffToHighLoss) { + ScopedFieldTrials trial("WebRTC-Bwe-LossBasedControl/Enabled/"); + Scenario s("googcc_unit/high_loss_channel", false); + CallClientConfig config; + config.transport.rates.min_rate = DataRate::KilobitsPerSec(10); + config.transport.rates.max_rate = DataRate::KilobitsPerSec(1500); + config.transport.rates.start_rate = DataRate::KilobitsPerSec(300); + auto send_net = s.CreateSimulationNode([](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(2000); + c->delay = TimeDelta::Millis(200); + c->loss_rate = 0.1; + }); + auto ret_net = s.CreateSimulationNode( + [](NetworkSimulationConfig* c) { c->delay = TimeDelta::Millis(200); }); + + auto* client = CreateVideoSendingClient(&s, config, {send_net}, {ret_net}); + + s.RunFor(TimeDelta::Seconds(120)); + // Without LossBasedControl trial, bandwidth drops to ~10 kbps. + EXPECT_GT(client->target_rate().kbps(), 100); +} + +DataRate AverageBitrateAfterCrossInducedLoss(absl::string_view name) { + Scenario s(name, false); + NetworkSimulationConfig net_conf; + net_conf.bandwidth = DataRate::KilobitsPerSec(1000); + net_conf.delay = TimeDelta::Millis(100); + // Short queue length means that we'll induce loss when sudden TCP traffic + // spikes are induced. This corresponds to ca 200 ms for a packet size of 1000 + // bytes. Such limited buffers are common on for instance wifi routers. + net_conf.packet_queue_length_limit = 25; + + auto send_net = {s.CreateSimulationNode(net_conf)}; + auto ret_net = {s.CreateSimulationNode(net_conf)}; + + auto* client = s.CreateClient("send", CallClientConfig()); + auto* callee = s.CreateClient("return", CallClientConfig()); + auto* route = s.CreateRoutes(client, send_net, callee, ret_net); + // TODO(srte): Make this work with RTX enabled or remove it. + auto* video = s.CreateVideoStream(route->forward(), [](VideoStreamConfig* c) { + c->stream.use_rtx = false; + }); + s.RunFor(TimeDelta::Seconds(10)); + for (int i = 0; i < 4; ++i) { + // Sends TCP cross traffic inducing loss. + auto* tcp_traffic = s.net()->StartCrossTraffic(CreateFakeTcpCrossTraffic( + s.net()->CreateRoute(send_net), s.net()->CreateRoute(ret_net), + FakeTcpConfig())); + s.RunFor(TimeDelta::Seconds(2)); + // Allow the ccongestion controller to recover. + s.net()->StopCrossTraffic(tcp_traffic); + s.RunFor(TimeDelta::Seconds(20)); + } + + // Querying the video stats from within the expected runtime environment + // (i.e. the TQ that belongs to the CallClient, not the Scenario TQ that + // we're currently on). + VideoReceiveStreamInterface::Stats video_receive_stats; + auto* video_stream = video->receive(); + callee->SendTask([&video_stream, &video_receive_stats]() { + video_receive_stats = video_stream->GetStats(); + }); + return DataSize::Bytes( + video_receive_stats.rtp_stats.packet_counter.TotalBytes()) / + s.TimeSinceStart(); +} + +TEST(GoogCcScenario, MaintainsLowRateInSafeResetTrial) { + const DataRate kLinkCapacity = DataRate::KilobitsPerSec(200); + const DataRate kStartRate = DataRate::KilobitsPerSec(300); + + ScopedFieldTrials trial("WebRTC-Bwe-SafeResetOnRouteChange/Enabled/"); + Scenario s("googcc_unit/safe_reset_low"); + auto* send_net = s.CreateSimulationNode([&](NetworkSimulationConfig* c) { + c->bandwidth = kLinkCapacity; + c->delay = TimeDelta::Millis(10); + }); + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = kStartRate; + }); + auto* route = s.CreateRoutes( + client, {send_net}, s.CreateClient("return", CallClientConfig()), + {s.CreateSimulationNode(NetworkSimulationConfig())}); + s.CreateVideoStream(route->forward(), VideoStreamConfig()); + // Allow the controller to stabilize. + s.RunFor(TimeDelta::Millis(500)); + EXPECT_NEAR(client->send_bandwidth().kbps(), kLinkCapacity.kbps(), 50); + s.ChangeRoute(route->forward(), {send_net}); + // Allow new settings to propagate. + s.RunFor(TimeDelta::Millis(100)); + // Under the trial, the target should be unchanged for low rates. + EXPECT_NEAR(client->send_bandwidth().kbps(), kLinkCapacity.kbps(), 50); +} + +TEST(GoogCcScenario, CutsHighRateInSafeResetTrial) { + const DataRate kLinkCapacity = DataRate::KilobitsPerSec(1000); + const DataRate kStartRate = DataRate::KilobitsPerSec(300); + + ScopedFieldTrials trial("WebRTC-Bwe-SafeResetOnRouteChange/Enabled/"); + Scenario s("googcc_unit/safe_reset_high_cut"); + auto send_net = s.CreateSimulationNode([&](NetworkSimulationConfig* c) { + c->bandwidth = kLinkCapacity; + c->delay = TimeDelta::Millis(50); + }); + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = kStartRate; + }); + auto* route = s.CreateRoutes( + client, {send_net}, s.CreateClient("return", CallClientConfig()), + {s.CreateSimulationNode(NetworkSimulationConfig())}); + s.CreateVideoStream(route->forward(), VideoStreamConfig()); + // Allow the controller to stabilize. + s.RunFor(TimeDelta::Millis(500)); + EXPECT_NEAR(client->send_bandwidth().kbps(), kLinkCapacity.kbps(), 300); + s.ChangeRoute(route->forward(), {send_net}); + // Allow new settings to propagate. + s.RunFor(TimeDelta::Millis(50)); + // Under the trial, the target should be reset from high values. + EXPECT_NEAR(client->send_bandwidth().kbps(), kStartRate.kbps(), 30); +} + +TEST(GoogCcScenario, DetectsHighRateInSafeResetTrial) { + ScopedFieldTrials trial("WebRTC-Bwe-SafeResetOnRouteChange/Enabled,ack/"); + const DataRate kInitialLinkCapacity = DataRate::KilobitsPerSec(200); + const DataRate kNewLinkCapacity = DataRate::KilobitsPerSec(800); + const DataRate kStartRate = DataRate::KilobitsPerSec(300); + + Scenario s("googcc_unit/safe_reset_high_detect"); + auto* initial_net = s.CreateSimulationNode([&](NetworkSimulationConfig* c) { + c->bandwidth = kInitialLinkCapacity; + c->delay = TimeDelta::Millis(50); + }); + auto* new_net = s.CreateSimulationNode([&](NetworkSimulationConfig* c) { + c->bandwidth = kNewLinkCapacity; + c->delay = TimeDelta::Millis(50); + }); + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = kStartRate; + }); + auto* route = s.CreateRoutes( + client, {initial_net}, s.CreateClient("return", CallClientConfig()), + {s.CreateSimulationNode(NetworkSimulationConfig())}); + s.CreateVideoStream(route->forward(), VideoStreamConfig()); + // Allow the controller to stabilize. + s.RunFor(TimeDelta::Millis(2000)); + EXPECT_NEAR(client->send_bandwidth().kbps(), kInitialLinkCapacity.kbps(), 50); + s.ChangeRoute(route->forward(), {new_net}); + // Allow new settings to propagate, but not probes to be received. + s.RunFor(TimeDelta::Millis(50)); + // Under the field trial, the target rate should be unchanged since it's lower + // than the starting rate. + EXPECT_NEAR(client->send_bandwidth().kbps(), kInitialLinkCapacity.kbps(), 50); + // However, probing should have made us detect the higher rate. + // NOTE: This test causes high loss rate, and the loss-based estimator reduces + // the bitrate, making the test fail if we wait longer than one second here. + s.RunFor(TimeDelta::Millis(1000)); + EXPECT_GT(client->send_bandwidth().kbps(), kNewLinkCapacity.kbps() - 300); +} + +TEST(GoogCcScenario, TargetRateReducedOnPacingBufferBuildupInTrial) { + // Configure strict pacing to ensure build-up. + ScopedFieldTrials trial( + "WebRTC-CongestionWindow/QueueSize:100,MinBitrate:30000/" + "WebRTC-Video-Pacing/factor:1.0/" + "WebRTC-AddPacingToCongestionWindowPushback/Enabled/"); + + const DataRate kLinkCapacity = DataRate::KilobitsPerSec(1000); + const DataRate kStartRate = DataRate::KilobitsPerSec(1000); + + Scenario s("googcc_unit/pacing_buffer_buildup"); + auto* net = s.CreateSimulationNode([&](NetworkSimulationConfig* c) { + c->bandwidth = kLinkCapacity; + c->delay = TimeDelta::Millis(50); + }); + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = kStartRate; + }); + auto* route = s.CreateRoutes( + client, {net}, s.CreateClient("return", CallClientConfig()), + {s.CreateSimulationNode(NetworkSimulationConfig())}); + s.CreateVideoStream(route->forward(), VideoStreamConfig()); + // Allow some time for the buffer to build up. + s.RunFor(TimeDelta::Seconds(5)); + + // Without trial, pacer delay reaches ~250 ms. + EXPECT_LT(client->GetStats().pacer_delay_ms, 150); +} + +TEST(GoogCcScenario, NoBandwidthTogglingInLossControlTrial) { + ScopedFieldTrials trial("WebRTC-Bwe-LossBasedControl/Enabled/"); + Scenario s("googcc_unit/no_toggling"); + auto* send_net = s.CreateSimulationNode([&](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(2000); + c->loss_rate = 0.2; + c->delay = TimeDelta::Millis(10); + }); + + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = DataRate::KilobitsPerSec(300); + }); + auto* route = s.CreateRoutes( + client, {send_net}, s.CreateClient("return", CallClientConfig()), + {s.CreateSimulationNode(NetworkSimulationConfig())}); + s.CreateVideoStream(route->forward(), VideoStreamConfig()); + // Allow the controller to initialize. + s.RunFor(TimeDelta::Millis(250)); + + std::queue bandwidth_history; + const TimeDelta step = TimeDelta::Millis(50); + for (TimeDelta time = TimeDelta::Zero(); time < TimeDelta::Millis(2000); + time += step) { + s.RunFor(step); + const TimeDelta window = TimeDelta::Millis(500); + if (bandwidth_history.size() >= window / step) + bandwidth_history.pop(); + bandwidth_history.push(client->send_bandwidth()); + EXPECT_LT( + CountBandwidthDips(bandwidth_history, DataRate::KilobitsPerSec(100)), + 2); + } +} + +TEST(GoogCcScenario, NoRttBackoffCollapseWhenVideoStops) { + ScopedFieldTrials trial("WebRTC-Bwe-MaxRttLimit/limit:2s/"); + Scenario s("googcc_unit/rttbackoff_video_stop"); + auto* send_net = s.CreateSimulationNode([&](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(2000); + c->delay = TimeDelta::Millis(100); + }); + + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = DataRate::KilobitsPerSec(1000); + }); + auto* route = s.CreateRoutes( + client, {send_net}, s.CreateClient("return", CallClientConfig()), + {s.CreateSimulationNode(NetworkSimulationConfig())}); + auto* video = s.CreateVideoStream(route->forward(), VideoStreamConfig()); + // Allow the controller to initialize, then stop video. + s.RunFor(TimeDelta::Seconds(1)); + video->send()->Stop(); + s.RunFor(TimeDelta::Seconds(4)); + EXPECT_GT(client->send_bandwidth().kbps(), 1000); +} + +TEST(GoogCcScenario, NoCrashOnVeryLateFeedback) { + Scenario s; + auto ret_net = s.CreateMutableSimulationNode(NetworkSimulationConfig()); + auto* route = s.CreateRoutes( + s.CreateClient("send", CallClientConfig()), + {s.CreateSimulationNode(NetworkSimulationConfig())}, + s.CreateClient("return", CallClientConfig()), {ret_net->node()}); + auto* video = s.CreateVideoStream(route->forward(), VideoStreamConfig()); + s.RunFor(TimeDelta::Seconds(5)); + // Delay feedback by several minutes. This will cause removal of the send time + // history for the packets as long as kSendTimeHistoryWindow is configured for + // a shorter time span. + ret_net->PauseTransmissionUntil(s.Now() + TimeDelta::Seconds(300)); + // Stopping video stream while waiting to save test execution time. + video->send()->Stop(); + s.RunFor(TimeDelta::Seconds(299)); + // Starting to cause addition of new packet to history, which cause old + // packets to be removed. + video->send()->Start(); + // Runs until the lost packets are received. We expect that this will run + // without causing any runtime failures. + s.RunFor(TimeDelta::Seconds(2)); +} + +TEST(GoogCcScenario, IsFairToTCP) { + Scenario s("googcc_unit/tcp_fairness"); + NetworkSimulationConfig net_conf; + net_conf.bandwidth = DataRate::KilobitsPerSec(1000); + net_conf.delay = TimeDelta::Millis(50); + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = DataRate::KilobitsPerSec(1000); + }); + auto send_net = {s.CreateSimulationNode(net_conf)}; + auto ret_net = {s.CreateSimulationNode(net_conf)}; + auto* route = s.CreateRoutes( + client, send_net, s.CreateClient("return", CallClientConfig()), ret_net); + s.CreateVideoStream(route->forward(), VideoStreamConfig()); + s.net()->StartCrossTraffic(CreateFakeTcpCrossTraffic( + s.net()->CreateRoute(send_net), s.net()->CreateRoute(ret_net), + FakeTcpConfig())); + s.RunFor(TimeDelta::Seconds(10)); + + // Currently only testing for the upper limit as we in practice back out + // quite a lot in this scenario. If this behavior is fixed, we should add a + // lower bound to ensure it stays fixed. + EXPECT_LT(client->send_bandwidth().kbps(), 750); +} + +TEST(GoogCcScenario, FastRampupOnRembCapLifted) { + DataRate final_estimate = + RunRembDipScenario("googcc_unit/default_fast_rampup_on_remb_cap_lifted"); + EXPECT_GT(final_estimate.kbps(), 1500); +} + +TEST(GoogCcScenario, FallbackToLossBasedBweWithoutPacketFeedback) { + const DataRate kLinkCapacity = DataRate::KilobitsPerSec(1000); + const DataRate kStartRate = DataRate::KilobitsPerSec(1000); + + Scenario s("googcc_unit/high_loss_channel", false); + auto* net = s.CreateMutableSimulationNode([&](NetworkSimulationConfig* c) { + c->bandwidth = kLinkCapacity; + c->delay = TimeDelta::Millis(100); + }); + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = kStartRate; + }); + auto* route = s.CreateRoutes( + client, {net->node()}, s.CreateClient("return", CallClientConfig()), + {s.CreateSimulationNode(NetworkSimulationConfig())}); + + // Create a config without packet feedback. + VideoStreamConfig video_config; + video_config.stream.packet_feedback = false; + s.CreateVideoStream(route->forward(), video_config); + + s.RunFor(TimeDelta::Seconds(20)); + // Bandwith does not backoff because network is normal. + EXPECT_GE(client->target_rate().kbps(), 500); + + // Update the network to create high loss ratio + net->UpdateConfig([](NetworkSimulationConfig* c) { + c->loss_rate = 0.15; + }); + s.RunFor(TimeDelta::Seconds(20)); + + // Bandwidth decreases thanks to loss based bwe v0. + EXPECT_LE(client->target_rate().kbps(), 300); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/inter_arrival_delta.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/inter_arrival_delta.cc new file mode 100644 index 0000000000..2d50d08e6a --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/inter_arrival_delta.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/inter_arrival_delta.h" + +#include + +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +static constexpr TimeDelta kBurstDeltaThreshold = TimeDelta::Millis(5); +static constexpr TimeDelta kMaxBurstDuration = TimeDelta::Millis(100); +constexpr TimeDelta InterArrivalDelta::kArrivalTimeOffsetThreshold; + +InterArrivalDelta::InterArrivalDelta(TimeDelta send_time_group_length) + : send_time_group_length_(send_time_group_length), + current_timestamp_group_(), + prev_timestamp_group_(), + num_consecutive_reordered_packets_(0) {} + +bool InterArrivalDelta::ComputeDeltas(Timestamp send_time, + Timestamp arrival_time, + Timestamp system_time, + size_t packet_size, + TimeDelta* send_time_delta, + TimeDelta* arrival_time_delta, + int* packet_size_delta) { + bool calculated_deltas = false; + if (current_timestamp_group_.IsFirstPacket()) { + // We don't have enough data to update the filter, so we store it until we + // have two frames of data to process. + current_timestamp_group_.send_time = send_time; + current_timestamp_group_.first_send_time = send_time; + current_timestamp_group_.first_arrival = arrival_time; + } else if (current_timestamp_group_.first_send_time > send_time) { + // Reordered packet. + return false; + } else if (NewTimestampGroup(arrival_time, send_time)) { + // First packet of a later send burst, the previous packets sample is ready. + if (prev_timestamp_group_.complete_time.IsFinite()) { + *send_time_delta = + current_timestamp_group_.send_time - prev_timestamp_group_.send_time; + *arrival_time_delta = current_timestamp_group_.complete_time - + prev_timestamp_group_.complete_time; + + TimeDelta system_time_delta = current_timestamp_group_.last_system_time - + prev_timestamp_group_.last_system_time; + + if (*arrival_time_delta - system_time_delta >= + kArrivalTimeOffsetThreshold) { + RTC_LOG(LS_WARNING) + << "The arrival time clock offset has changed (diff = " + << arrival_time_delta->ms() - system_time_delta.ms() + << " ms), resetting."; + Reset(); + return false; + } + if (*arrival_time_delta < TimeDelta::Zero()) { + // The group of packets has been reordered since receiving its local + // arrival timestamp. + ++num_consecutive_reordered_packets_; + if (num_consecutive_reordered_packets_ >= kReorderedResetThreshold) { + RTC_LOG(LS_WARNING) + << "Packets between send burst arrived out of order, resetting:" + << " arrival_time_delta_ms=" << arrival_time_delta->ms() + << ", send_time_delta_ms=" << send_time_delta->ms(); + Reset(); + } + return false; + } else { + num_consecutive_reordered_packets_ = 0; + } + *packet_size_delta = static_cast(current_timestamp_group_.size) - + static_cast(prev_timestamp_group_.size); + calculated_deltas = true; + } + prev_timestamp_group_ = current_timestamp_group_; + // The new timestamp is now the current frame. + current_timestamp_group_.first_send_time = send_time; + current_timestamp_group_.send_time = send_time; + current_timestamp_group_.first_arrival = arrival_time; + current_timestamp_group_.size = 0; + } else { + current_timestamp_group_.send_time = + std::max(current_timestamp_group_.send_time, send_time); + } + // Accumulate the frame size. + current_timestamp_group_.size += packet_size; + current_timestamp_group_.complete_time = arrival_time; + current_timestamp_group_.last_system_time = system_time; + + return calculated_deltas; +} + +// Assumes that `timestamp` is not reordered compared to +// `current_timestamp_group_`. +bool InterArrivalDelta::NewTimestampGroup(Timestamp arrival_time, + Timestamp send_time) const { + if (current_timestamp_group_.IsFirstPacket()) { + return false; + } else if (BelongsToBurst(arrival_time, send_time)) { + return false; + } else { + return send_time - current_timestamp_group_.first_send_time > + send_time_group_length_; + } +} + +bool InterArrivalDelta::BelongsToBurst(Timestamp arrival_time, + Timestamp send_time) const { + RTC_DCHECK(current_timestamp_group_.complete_time.IsFinite()); + TimeDelta arrival_time_delta = + arrival_time - current_timestamp_group_.complete_time; + TimeDelta send_time_delta = send_time - current_timestamp_group_.send_time; + if (send_time_delta.IsZero()) + return true; + TimeDelta propagation_delta = arrival_time_delta - send_time_delta; + if (propagation_delta < TimeDelta::Zero() && + arrival_time_delta <= kBurstDeltaThreshold && + arrival_time - current_timestamp_group_.first_arrival < kMaxBurstDuration) + return true; + return false; +} + +void InterArrivalDelta::Reset() { + num_consecutive_reordered_packets_ = 0; + current_timestamp_group_ = SendTimeGroup(); + prev_timestamp_group_ = SendTimeGroup(); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/inter_arrival_delta.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/inter_arrival_delta.h new file mode 100644 index 0000000000..4046590eeb --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/inter_arrival_delta.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_INTER_ARRIVAL_DELTA_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_INTER_ARRIVAL_DELTA_H_ + +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" + +namespace webrtc { + +// Helper class to compute the inter-arrival time delta and the size delta +// between two send bursts. This code is branched from +// modules/remote_bitrate_estimator/inter_arrival. +class InterArrivalDelta { + public: + // After this many packet groups received out of order InterArrival will + // reset, assuming that clocks have made a jump. + static constexpr int kReorderedResetThreshold = 3; + static constexpr TimeDelta kArrivalTimeOffsetThreshold = + TimeDelta::Seconds(3); + + // A send time group is defined as all packets with a send time which are at + // most send_time_group_length older than the first timestamp in that + // group. + explicit InterArrivalDelta(TimeDelta send_time_group_length); + + InterArrivalDelta() = delete; + InterArrivalDelta(const InterArrivalDelta&) = delete; + InterArrivalDelta& operator=(const InterArrivalDelta&) = delete; + + // This function returns true if a delta was computed, or false if the current + // group is still incomplete or if only one group has been completed. + // `send_time` is the send time. + // `arrival_time` is the time at which the packet arrived. + // `packet_size` is the size of the packet. + // `timestamp_delta` (output) is the computed send time delta. + // `arrival_time_delta` (output) is the computed arrival-time delta. + // `packet_size_delta` (output) is the computed size delta. + bool ComputeDeltas(Timestamp send_time, + Timestamp arrival_time, + Timestamp system_time, + size_t packet_size, + TimeDelta* send_time_delta, + TimeDelta* arrival_time_delta, + int* packet_size_delta); + + private: + struct SendTimeGroup { + SendTimeGroup() + : size(0), + first_send_time(Timestamp::MinusInfinity()), + send_time(Timestamp::MinusInfinity()), + first_arrival(Timestamp::MinusInfinity()), + complete_time(Timestamp::MinusInfinity()), + last_system_time(Timestamp::MinusInfinity()) {} + + bool IsFirstPacket() const { return complete_time.IsInfinite(); } + + size_t size; + Timestamp first_send_time; + Timestamp send_time; + Timestamp first_arrival; + Timestamp complete_time; + Timestamp last_system_time; + }; + + // Returns true if the last packet was the end of the current batch and the + // packet with `send_time` is the first of a new batch. + bool NewTimestampGroup(Timestamp arrival_time, Timestamp send_time) const; + + bool BelongsToBurst(Timestamp arrival_time, Timestamp send_time) const; + + void Reset(); + + const TimeDelta send_time_group_length_; + SendTimeGroup current_timestamp_group_; + SendTimeGroup prev_timestamp_group_; + int num_consecutive_reordered_packets_; +}; +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_INTER_ARRIVAL_DELTA_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator.cc new file mode 100644 index 0000000000..9fd537a422 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator.cc @@ -0,0 +1,77 @@ +/* + * Copyright 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/congestion_controller/goog_cc/link_capacity_estimator.h" + +#include + +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +LinkCapacityEstimator::LinkCapacityEstimator() {} + +DataRate LinkCapacityEstimator::UpperBound() const { + if (estimate_kbps_.has_value()) + return DataRate::KilobitsPerSec(estimate_kbps_.value() + + 3 * deviation_estimate_kbps()); + return DataRate::Infinity(); +} + +DataRate LinkCapacityEstimator::LowerBound() const { + if (estimate_kbps_.has_value()) + return DataRate::KilobitsPerSec( + std::max(0.0, estimate_kbps_.value() - 3 * deviation_estimate_kbps())); + return DataRate::Zero(); +} + +void LinkCapacityEstimator::Reset() { + estimate_kbps_.reset(); +} + +void LinkCapacityEstimator::OnOveruseDetected(DataRate acknowledged_rate) { + Update(acknowledged_rate, 0.05); +} + +void LinkCapacityEstimator::OnProbeRate(DataRate probe_rate) { + Update(probe_rate, 0.5); +} + +void LinkCapacityEstimator::Update(DataRate capacity_sample, double alpha) { + double sample_kbps = capacity_sample.kbps(); + if (!estimate_kbps_.has_value()) { + estimate_kbps_ = sample_kbps; + } else { + estimate_kbps_ = (1 - alpha) * estimate_kbps_.value() + alpha * sample_kbps; + } + // Estimate the variance of the link capacity estimate and normalize the + // variance with the link capacity estimate. + const double norm = std::max(estimate_kbps_.value(), 1.0); + double error_kbps = estimate_kbps_.value() - sample_kbps; + deviation_kbps_ = + (1 - alpha) * deviation_kbps_ + alpha * error_kbps * error_kbps / norm; + // 0.4 ~= 14 kbit/s at 500 kbit/s + // 2.5f ~= 35 kbit/s at 500 kbit/s + deviation_kbps_ = rtc::SafeClamp(deviation_kbps_, 0.4f, 2.5f); +} + +bool LinkCapacityEstimator::has_estimate() const { + return estimate_kbps_.has_value(); +} + +DataRate LinkCapacityEstimator::estimate() const { + return DataRate::KilobitsPerSec(*estimate_kbps_); +} + +double LinkCapacityEstimator::deviation_estimate_kbps() const { + // Calculate the max bit rate std dev given the normalized + // variance and the current throughput bitrate. The standard deviation will + // only be used if estimate_kbps_ has a value. + return sqrt(deviation_kbps_ * estimate_kbps_.value()); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator.h new file mode 100644 index 0000000000..aa23491d9d --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator.h @@ -0,0 +1,38 @@ +/* + * Copyright 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_LINK_CAPACITY_ESTIMATOR_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_LINK_CAPACITY_ESTIMATOR_H_ + +#include "absl/types/optional.h" +#include "api/units/data_rate.h" + +namespace webrtc { +class LinkCapacityEstimator { + public: + LinkCapacityEstimator(); + DataRate UpperBound() const; + DataRate LowerBound() const; + void Reset(); + void OnOveruseDetected(DataRate acknowledged_rate); + void OnProbeRate(DataRate probe_rate); + bool has_estimate() const; + DataRate estimate() const; + + private: + friend class GoogCcStatePrinter; + void Update(DataRate capacity_sample, double alpha); + + double deviation_estimate_kbps() const; + absl::optional estimate_kbps_; + double deviation_kbps_ = 0.4; +}; +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_LINK_CAPACITY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator_gn/moz.build new file mode 100644 index 0000000000..829ff47d87 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/link_capacity_estimator.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("link_capacity_estimator_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.cc new file mode 100644 index 0000000000..7524c84d92 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.cc @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.h" + +#include +#include +#include + +#include "absl/strings/match.h" +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" + +namespace webrtc { +namespace { +const char kBweLossBasedControl[] = "WebRTC-Bwe-LossBasedControl"; + +// Expecting RTCP feedback to be sent with roughly 1s intervals, a 5s gap +// indicates a channel outage. +constexpr TimeDelta kMaxRtcpFeedbackInterval = TimeDelta::Millis(5000); + +// Increase slower when RTT is high. +double GetIncreaseFactor(const LossBasedControlConfig& config, TimeDelta rtt) { + // Clamp the RTT + if (rtt < config.increase_low_rtt) { + rtt = config.increase_low_rtt; + } else if (rtt > config.increase_high_rtt) { + rtt = config.increase_high_rtt; + } + auto rtt_range = config.increase_high_rtt.Get() - config.increase_low_rtt; + if (rtt_range <= TimeDelta::Zero()) { + RTC_DCHECK_NOTREACHED(); // Only on misconfiguration. + return config.min_increase_factor; + } + auto rtt_offset = rtt - config.increase_low_rtt; + auto relative_offset = std::max(0.0, std::min(rtt_offset / rtt_range, 1.0)); + auto factor_range = config.max_increase_factor - config.min_increase_factor; + return config.min_increase_factor + (1 - relative_offset) * factor_range; +} + +double LossFromBitrate(DataRate bitrate, + DataRate loss_bandwidth_balance, + double exponent) { + if (loss_bandwidth_balance >= bitrate) + return 1.0; + return pow(loss_bandwidth_balance / bitrate, exponent); +} + +DataRate BitrateFromLoss(double loss, + DataRate loss_bandwidth_balance, + double exponent) { + if (exponent <= 0) { + RTC_DCHECK_NOTREACHED(); + return DataRate::Infinity(); + } + if (loss < 1e-5) + return DataRate::Infinity(); + return loss_bandwidth_balance * pow(loss, -1.0 / exponent); +} + +double ExponentialUpdate(TimeDelta window, TimeDelta interval) { + // Use the convention that exponential window length (which is really + // infinite) is the time it takes to dampen to 1/e. + if (window <= TimeDelta::Zero()) { + RTC_DCHECK_NOTREACHED(); + return 1.0f; + } + return 1.0f - exp(interval / window * -1.0); +} + +bool IsEnabled(const webrtc::FieldTrialsView& key_value_config, + absl::string_view name) { + return absl::StartsWith(key_value_config.Lookup(name), "Enabled"); +} + +} // namespace + +LossBasedControlConfig::LossBasedControlConfig( + const FieldTrialsView* key_value_config) + : enabled(IsEnabled(*key_value_config, kBweLossBasedControl)), + min_increase_factor("min_incr", 1.02), + max_increase_factor("max_incr", 1.08), + increase_low_rtt("incr_low_rtt", TimeDelta::Millis(200)), + increase_high_rtt("incr_high_rtt", TimeDelta::Millis(800)), + decrease_factor("decr", 0.99), + loss_window("loss_win", TimeDelta::Millis(800)), + loss_max_window("loss_max_win", TimeDelta::Millis(800)), + acknowledged_rate_max_window("ackrate_max_win", TimeDelta::Millis(800)), + increase_offset("incr_offset", DataRate::BitsPerSec(1000)), + loss_bandwidth_balance_increase("balance_incr", + DataRate::KilobitsPerSec(0.5)), + loss_bandwidth_balance_decrease("balance_decr", + DataRate::KilobitsPerSec(4)), + loss_bandwidth_balance_reset("balance_reset", + DataRate::KilobitsPerSec(0.1)), + loss_bandwidth_balance_exponent("exponent", 0.5), + allow_resets("resets", false), + decrease_interval("decr_intvl", TimeDelta::Millis(300)), + loss_report_timeout("timeout", TimeDelta::Millis(6000)) { + ParseFieldTrial( + {&min_increase_factor, &max_increase_factor, &increase_low_rtt, + &increase_high_rtt, &decrease_factor, &loss_window, &loss_max_window, + &acknowledged_rate_max_window, &increase_offset, + &loss_bandwidth_balance_increase, &loss_bandwidth_balance_decrease, + &loss_bandwidth_balance_reset, &loss_bandwidth_balance_exponent, + &allow_resets, &decrease_interval, &loss_report_timeout}, + key_value_config->Lookup(kBweLossBasedControl)); +} +LossBasedControlConfig::LossBasedControlConfig(const LossBasedControlConfig&) = + default; +LossBasedControlConfig::~LossBasedControlConfig() = default; + +LossBasedBandwidthEstimation::LossBasedBandwidthEstimation( + const FieldTrialsView* key_value_config) + : config_(key_value_config), + average_loss_(0), + average_loss_max_(0), + loss_based_bitrate_(DataRate::Zero()), + acknowledged_bitrate_max_(DataRate::Zero()), + acknowledged_bitrate_last_update_(Timestamp::MinusInfinity()), + time_last_decrease_(Timestamp::MinusInfinity()), + has_decreased_since_last_loss_report_(false), + last_loss_packet_report_(Timestamp::MinusInfinity()), + last_loss_ratio_(0) {} + +void LossBasedBandwidthEstimation::UpdateLossStatistics( + const std::vector& packet_results, + Timestamp at_time) { + if (packet_results.empty()) { + RTC_DCHECK_NOTREACHED(); + return; + } + int loss_count = 0; + for (const auto& pkt : packet_results) { + loss_count += !pkt.IsReceived() ? 1 : 0; + } + last_loss_ratio_ = static_cast(loss_count) / packet_results.size(); + const TimeDelta time_passed = last_loss_packet_report_.IsFinite() + ? at_time - last_loss_packet_report_ + : TimeDelta::Seconds(1); + last_loss_packet_report_ = at_time; + has_decreased_since_last_loss_report_ = false; + + average_loss_ += ExponentialUpdate(config_.loss_window, time_passed) * + (last_loss_ratio_ - average_loss_); + if (average_loss_ > average_loss_max_) { + average_loss_max_ = average_loss_; + } else { + average_loss_max_ += + ExponentialUpdate(config_.loss_max_window, time_passed) * + (average_loss_ - average_loss_max_); + } +} + +void LossBasedBandwidthEstimation::UpdateAcknowledgedBitrate( + DataRate acknowledged_bitrate, + Timestamp at_time) { + const TimeDelta time_passed = + acknowledged_bitrate_last_update_.IsFinite() + ? at_time - acknowledged_bitrate_last_update_ + : TimeDelta::Seconds(1); + acknowledged_bitrate_last_update_ = at_time; + if (acknowledged_bitrate > acknowledged_bitrate_max_) { + acknowledged_bitrate_max_ = acknowledged_bitrate; + } else { + acknowledged_bitrate_max_ -= + ExponentialUpdate(config_.acknowledged_rate_max_window, time_passed) * + (acknowledged_bitrate_max_ - acknowledged_bitrate); + } +} + +DataRate LossBasedBandwidthEstimation::Update(Timestamp at_time, + DataRate min_bitrate, + DataRate wanted_bitrate, + TimeDelta last_round_trip_time) { + if (loss_based_bitrate_.IsZero()) { + loss_based_bitrate_ = wanted_bitrate; + } + + // Only increase if loss has been low for some time. + const double loss_estimate_for_increase = average_loss_max_; + // Avoid multiple decreases from averaging over one loss spike. + const double loss_estimate_for_decrease = + std::min(average_loss_, last_loss_ratio_); + const bool allow_decrease = + !has_decreased_since_last_loss_report_ && + (at_time - time_last_decrease_ >= + last_round_trip_time + config_.decrease_interval); + // If packet lost reports are too old, dont increase bitrate. + const bool loss_report_valid = + at_time - last_loss_packet_report_ < 1.2 * kMaxRtcpFeedbackInterval; + + if (loss_report_valid && config_.allow_resets && + loss_estimate_for_increase < loss_reset_threshold()) { + loss_based_bitrate_ = wanted_bitrate; + } else if (loss_report_valid && + loss_estimate_for_increase < loss_increase_threshold()) { + // Increase bitrate by RTT-adaptive ratio. + DataRate new_increased_bitrate = + min_bitrate * GetIncreaseFactor(config_, last_round_trip_time) + + config_.increase_offset; + // The bitrate that would make the loss "just high enough". + const DataRate new_increased_bitrate_cap = BitrateFromLoss( + loss_estimate_for_increase, config_.loss_bandwidth_balance_increase, + config_.loss_bandwidth_balance_exponent); + new_increased_bitrate = + std::min(new_increased_bitrate, new_increased_bitrate_cap); + loss_based_bitrate_ = std::max(new_increased_bitrate, loss_based_bitrate_); + } else if (loss_estimate_for_decrease > loss_decrease_threshold() && + allow_decrease) { + // The bitrate that would make the loss "just acceptable". + const DataRate new_decreased_bitrate_floor = BitrateFromLoss( + loss_estimate_for_decrease, config_.loss_bandwidth_balance_decrease, + config_.loss_bandwidth_balance_exponent); + DataRate new_decreased_bitrate = + std::max(decreased_bitrate(), new_decreased_bitrate_floor); + if (new_decreased_bitrate < loss_based_bitrate_) { + time_last_decrease_ = at_time; + has_decreased_since_last_loss_report_ = true; + loss_based_bitrate_ = new_decreased_bitrate; + } + } + return loss_based_bitrate_; +} + +void LossBasedBandwidthEstimation::Initialize(DataRate bitrate) { + loss_based_bitrate_ = bitrate; + average_loss_ = 0; + average_loss_max_ = 0; +} + +double LossBasedBandwidthEstimation::loss_reset_threshold() const { + return LossFromBitrate(loss_based_bitrate_, + config_.loss_bandwidth_balance_reset, + config_.loss_bandwidth_balance_exponent); +} + +double LossBasedBandwidthEstimation::loss_increase_threshold() const { + return LossFromBitrate(loss_based_bitrate_, + config_.loss_bandwidth_balance_increase, + config_.loss_bandwidth_balance_exponent); +} + +double LossBasedBandwidthEstimation::loss_decrease_threshold() const { + return LossFromBitrate(loss_based_bitrate_, + config_.loss_bandwidth_balance_decrease, + config_.loss_bandwidth_balance_exponent); +} + +DataRate LossBasedBandwidthEstimation::decreased_bitrate() const { + return config_.decrease_factor * acknowledged_bitrate_max_; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.h new file mode 100644 index 0000000000..9f69caba89 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_LOSS_BASED_BANDWIDTH_ESTIMATION_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_LOSS_BASED_BANDWIDTH_ESTIMATION_H_ + +#include + +#include "api/field_trials_view.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "rtc_base/experiments/field_trial_parser.h" + +namespace webrtc { + +struct LossBasedControlConfig { + explicit LossBasedControlConfig(const FieldTrialsView* key_value_config); + LossBasedControlConfig(const LossBasedControlConfig&); + LossBasedControlConfig& operator=(const LossBasedControlConfig&) = default; + ~LossBasedControlConfig(); + bool enabled; + FieldTrialParameter min_increase_factor; + FieldTrialParameter max_increase_factor; + FieldTrialParameter increase_low_rtt; + FieldTrialParameter increase_high_rtt; + FieldTrialParameter decrease_factor; + FieldTrialParameter loss_window; + FieldTrialParameter loss_max_window; + FieldTrialParameter acknowledged_rate_max_window; + FieldTrialParameter increase_offset; + FieldTrialParameter loss_bandwidth_balance_increase; + FieldTrialParameter loss_bandwidth_balance_decrease; + FieldTrialParameter loss_bandwidth_balance_reset; + FieldTrialParameter loss_bandwidth_balance_exponent; + FieldTrialParameter allow_resets; + FieldTrialParameter decrease_interval; + FieldTrialParameter loss_report_timeout; +}; + +// Estimates an upper BWE limit based on loss. +// It requires knowledge about lost packets and acknowledged bitrate. +// Ie, this class require transport feedback. +class LossBasedBandwidthEstimation { + public: + explicit LossBasedBandwidthEstimation( + const FieldTrialsView* key_value_config); + // Returns the new estimate. + DataRate Update(Timestamp at_time, + DataRate min_bitrate, + DataRate wanted_bitrate, + TimeDelta last_round_trip_time); + void UpdateAcknowledgedBitrate(DataRate acknowledged_bitrate, + Timestamp at_time); + void Initialize(DataRate bitrate); + bool Enabled() const { return config_.enabled; } + // Returns true if LossBasedBandwidthEstimation is enabled and have + // received loss statistics. Ie, this class require transport feedback. + bool InUse() const { + return Enabled() && last_loss_packet_report_.IsFinite(); + } + void UpdateLossStatistics(const std::vector& packet_results, + Timestamp at_time); + DataRate GetEstimate() const { return loss_based_bitrate_; } + + private: + friend class GoogCcStatePrinter; + void Reset(DataRate bitrate); + double loss_increase_threshold() const; + double loss_decrease_threshold() const; + double loss_reset_threshold() const; + + DataRate decreased_bitrate() const; + + const LossBasedControlConfig config_; + double average_loss_; + double average_loss_max_; + DataRate loss_based_bitrate_; + DataRate acknowledged_bitrate_max_; + Timestamp acknowledged_bitrate_last_update_; + Timestamp time_last_decrease_; + bool has_decreased_since_last_loss_report_; + Timestamp last_loss_packet_report_; + double last_loss_ratio_; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_LOSS_BASED_BANDWIDTH_ESTIMATION_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v1_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v1_gn/moz.build new file mode 100644 index 0000000000..aae4b82520 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v1_gn/moz.build @@ -0,0 +1,226 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["BWE_TEST_LOGGING_COMPILE_TIME_ENABLE"] = "0" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("loss_based_bwe_v1_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2.cc new file mode 100644 index 0000000000..b6efdeee9e --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2.cc @@ -0,0 +1,1080 @@ +/* + * Copyright 2021 The WebRTC project authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/loss_based_bwe_v2.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/algorithm/container.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/field_trials_view.h" +#include "api/network_state_predictor.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "modules/remote_bitrate_estimator/include/bwe_defines.h" +#include "rtc_base/experiments/field_trial_list.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { + +bool IsValid(DataRate datarate) { + return datarate.IsFinite(); +} + +bool IsValid(absl::optional datarate) { + return datarate.has_value() && IsValid(datarate.value()); +} + +bool IsValid(Timestamp timestamp) { + return timestamp.IsFinite(); +} + +struct PacketResultsSummary { + int num_packets = 0; + int num_lost_packets = 0; + DataSize total_size = DataSize::Zero(); + Timestamp first_send_time = Timestamp::PlusInfinity(); + Timestamp last_send_time = Timestamp::MinusInfinity(); +}; + +// Returns a `PacketResultsSummary` where `first_send_time` is `PlusInfinity, +// and `last_send_time` is `MinusInfinity`, if `packet_results` is empty. +PacketResultsSummary GetPacketResultsSummary( + rtc::ArrayView packet_results) { + PacketResultsSummary packet_results_summary; + + packet_results_summary.num_packets = packet_results.size(); + for (const PacketResult& packet : packet_results) { + if (!packet.IsReceived()) { + packet_results_summary.num_lost_packets++; + } + packet_results_summary.total_size += packet.sent_packet.size; + packet_results_summary.first_send_time = std::min( + packet_results_summary.first_send_time, packet.sent_packet.send_time); + packet_results_summary.last_send_time = std::max( + packet_results_summary.last_send_time, packet.sent_packet.send_time); + } + + return packet_results_summary; +} + +double GetLossProbability(double inherent_loss, + DataRate loss_limited_bandwidth, + DataRate sending_rate) { + if (inherent_loss < 0.0 || inherent_loss > 1.0) { + RTC_LOG(LS_WARNING) << "The inherent loss must be in [0,1]: " + << inherent_loss; + inherent_loss = std::min(std::max(inherent_loss, 0.0), 1.0); + } + if (!sending_rate.IsFinite()) { + RTC_LOG(LS_WARNING) << "The sending rate must be finite: " + << ToString(sending_rate); + } + if (!loss_limited_bandwidth.IsFinite()) { + RTC_LOG(LS_WARNING) << "The loss limited bandwidth must be finite: " + << ToString(loss_limited_bandwidth); + } + + double loss_probability = inherent_loss; + if (IsValid(sending_rate) && IsValid(loss_limited_bandwidth) && + (sending_rate > loss_limited_bandwidth)) { + loss_probability += (1 - inherent_loss) * + (sending_rate - loss_limited_bandwidth) / sending_rate; + } + return std::min(std::max(loss_probability, 1.0e-6), 1.0 - 1.0e-6); +} + +} // namespace + +LossBasedBweV2::LossBasedBweV2(const FieldTrialsView* key_value_config) + : config_(CreateConfig(key_value_config)) { + if (!config_.has_value()) { + RTC_LOG(LS_VERBOSE) << "The configuration does not specify that the " + "estimator should be enabled, disabling it."; + return; + } + if (!IsConfigValid()) { + RTC_LOG(LS_WARNING) + << "The configuration is not valid, disabling the estimator."; + config_.reset(); + return; + } + + current_estimate_.inherent_loss = config_->initial_inherent_loss_estimate; + observations_.resize(config_->observation_window_size); + temporal_weights_.resize(config_->observation_window_size); + instant_upper_bound_temporal_weights_.resize( + config_->observation_window_size); + CalculateTemporalWeights(); +} + +bool LossBasedBweV2::IsEnabled() const { + return config_.has_value(); +} + +bool LossBasedBweV2::IsReady() const { + return IsEnabled() && IsValid(current_estimate_.loss_limited_bandwidth) && + num_observations_ > 0; +} + +LossBasedBweV2::Result LossBasedBweV2::GetLossBasedResult() const { + Result result; + result.state = current_state_; + if (!IsReady()) { + if (!IsEnabled()) { + RTC_LOG(LS_WARNING) + << "The estimator must be enabled before it can be used."; + } else { + if (!IsValid(current_estimate_.loss_limited_bandwidth)) { + RTC_LOG(LS_WARNING) + << "The estimator must be initialized before it can be used."; + } + if (num_observations_ <= 0) { + RTC_LOG(LS_WARNING) << "The estimator must receive enough loss " + "statistics before it can be used."; + } + } + result.bandwidth_estimate = IsValid(delay_based_estimate_) + ? delay_based_estimate_ + : DataRate::PlusInfinity(); + return result; + } + + if (IsValid(delay_based_estimate_)) { + result.bandwidth_estimate = + std::min({current_estimate_.loss_limited_bandwidth, + GetInstantUpperBound(), delay_based_estimate_}); + } else { + result.bandwidth_estimate = std::min( + current_estimate_.loss_limited_bandwidth, GetInstantUpperBound()); + } + return result; +} + +void LossBasedBweV2::SetAcknowledgedBitrate(DataRate acknowledged_bitrate) { + if (IsValid(acknowledged_bitrate)) { + acknowledged_bitrate_ = acknowledged_bitrate; + } else { + RTC_LOG(LS_WARNING) << "The acknowledged bitrate must be finite: " + << ToString(acknowledged_bitrate); + } +} + +void LossBasedBweV2::SetBandwidthEstimate(DataRate bandwidth_estimate) { + if (IsValid(bandwidth_estimate)) { + current_estimate_.loss_limited_bandwidth = bandwidth_estimate; + } else { + RTC_LOG(LS_WARNING) << "The bandwidth estimate must be finite: " + << ToString(bandwidth_estimate); + } +} + +void LossBasedBweV2::SetMinMaxBitrate(DataRate min_bitrate, + DataRate max_bitrate) { + if (IsValid(min_bitrate)) { + min_bitrate_ = min_bitrate; + } else { + RTC_LOG(LS_WARNING) << "The min bitrate must be finite: " + << ToString(min_bitrate); + } + + if (IsValid(max_bitrate)) { + max_bitrate_ = max_bitrate; + } else { + RTC_LOG(LS_WARNING) << "The max bitrate must be finite: " + << ToString(max_bitrate); + } +} + +void LossBasedBweV2::SetProbeBitrate(absl::optional probe_bitrate) { + if (probe_bitrate.has_value() && IsValid(probe_bitrate.value())) { + if (!IsValid(probe_bitrate_) || probe_bitrate_ > probe_bitrate.value()) { + probe_bitrate_ = probe_bitrate.value(); + } + } +} + +void LossBasedBweV2::UpdateBandwidthEstimate( + rtc::ArrayView packet_results, + DataRate delay_based_estimate, + BandwidthUsage delay_detector_state, + absl::optional probe_bitrate, + DataRate upper_link_capacity) { + delay_based_estimate_ = delay_based_estimate; + upper_link_capacity_ = upper_link_capacity; + if (!IsEnabled()) { + RTC_LOG(LS_WARNING) + << "The estimator must be enabled before it can be used."; + return; + } + SetProbeBitrate(probe_bitrate); + if (packet_results.empty()) { + RTC_LOG(LS_VERBOSE) + << "The estimate cannot be updated without any loss statistics."; + return; + } + + if (!PushBackObservation(packet_results, delay_detector_state)) { + return; + } + + if (!IsValid(current_estimate_.loss_limited_bandwidth)) { + RTC_LOG(LS_VERBOSE) + << "The estimator must be initialized before it can be used."; + return; + } + + ChannelParameters best_candidate = current_estimate_; + double objective_max = std::numeric_limits::lowest(); + for (ChannelParameters candidate : GetCandidates()) { + NewtonsMethodUpdate(candidate); + + const double candidate_objective = GetObjective(candidate); + if (candidate_objective > objective_max) { + objective_max = candidate_objective; + best_candidate = candidate; + } + } + if (best_candidate.loss_limited_bandwidth < + current_estimate_.loss_limited_bandwidth) { + last_time_estimate_reduced_ = last_send_time_most_recent_observation_; + } + + // Do not increase the estimate if the average loss is greater than current + // inherent loss. + if (GetAverageReportedLossRatio() > best_candidate.inherent_loss && + config_->not_increase_if_inherent_loss_less_than_average_loss && + current_estimate_.loss_limited_bandwidth < + best_candidate.loss_limited_bandwidth) { + best_candidate.loss_limited_bandwidth = + current_estimate_.loss_limited_bandwidth; + } + + if (IsBandwidthLimitedDueToLoss()) { + // Bound the estimate increase if: + // 1. The estimate has been increased for less than + // `delayed_increase_window` ago, and + // 2. The best candidate is greater than bandwidth_limit_in_current_window. + if (recovering_after_loss_timestamp_.IsFinite() && + recovering_after_loss_timestamp_ + config_->delayed_increase_window > + last_send_time_most_recent_observation_ && + best_candidate.loss_limited_bandwidth > + bandwidth_limit_in_current_window_) { + best_candidate.loss_limited_bandwidth = + bandwidth_limit_in_current_window_; + } + + bool increasing_when_loss_limited = + IsEstimateIncreasingWhenLossLimited(best_candidate); + // Bound the best candidate by the acked bitrate unless there is a recent + // probe result. + if (increasing_when_loss_limited && !IsValid(probe_bitrate_) && + IsValid(acknowledged_bitrate_)) { + best_candidate.loss_limited_bandwidth = + IsValid(best_candidate.loss_limited_bandwidth) + ? std::min(best_candidate.loss_limited_bandwidth, + config_->bandwidth_rampup_upper_bound_factor * + (*acknowledged_bitrate_)) + : config_->bandwidth_rampup_upper_bound_factor * + (*acknowledged_bitrate_); + } + + // Use probe bitrate as the estimate as probe bitrate is trusted to be + // correct. After being used, the probe bitrate is reset. + if (config_->probe_integration_enabled && IsValid(probe_bitrate_)) { + best_candidate.loss_limited_bandwidth = + std::min(probe_bitrate_, best_candidate.loss_limited_bandwidth); + probe_bitrate_ = DataRate::MinusInfinity(); + } + } + + if (IsEstimateIncreasingWhenLossLimited(best_candidate) && + best_candidate.loss_limited_bandwidth < delay_based_estimate) { + current_state_ = LossBasedState::kIncreasing; + } else if (best_candidate.loss_limited_bandwidth < delay_based_estimate_) { + current_state_ = LossBasedState::kDecreasing; + } else if (best_candidate.loss_limited_bandwidth >= delay_based_estimate_) { + current_state_ = LossBasedState::kDelayBasedEstimate; + } + current_estimate_ = best_candidate; + + if (IsBandwidthLimitedDueToLoss() && + (recovering_after_loss_timestamp_.IsInfinite() || + recovering_after_loss_timestamp_ + config_->delayed_increase_window < + last_send_time_most_recent_observation_)) { + bandwidth_limit_in_current_window_ = + std::max(kCongestionControllerMinBitrate, + current_estimate_.loss_limited_bandwidth * + config_->max_increase_factor); + recovering_after_loss_timestamp_ = last_send_time_most_recent_observation_; + } +} + +bool LossBasedBweV2::IsEstimateIncreasingWhenLossLimited( + const ChannelParameters& best_candidate) { + return (current_estimate_.loss_limited_bandwidth < + best_candidate.loss_limited_bandwidth || + (current_estimate_.loss_limited_bandwidth == + best_candidate.loss_limited_bandwidth && + current_state_ == LossBasedState::kIncreasing)) && + IsBandwidthLimitedDueToLoss(); +} + +// Returns a `LossBasedBweV2::Config` iff the `key_value_config` specifies a +// configuration for the `LossBasedBweV2` which is explicitly enabled. +absl::optional LossBasedBweV2::CreateConfig( + const FieldTrialsView* key_value_config) { + FieldTrialParameter enabled("Enabled", true); + FieldTrialParameter bandwidth_rampup_upper_bound_factor( + "BwRampupUpperBoundFactor", 1000000.0); + FieldTrialParameter rampup_acceleration_max_factor( + "BwRampupAccelMaxFactor", 0.0); + FieldTrialParameter rampup_acceleration_maxout_time( + "BwRampupAccelMaxoutTime", TimeDelta::Seconds(60)); + FieldTrialList candidate_factors("CandidateFactors", + {1.02, 1.0, 0.95}); + FieldTrialParameter higher_bandwidth_bias_factor("HigherBwBiasFactor", + 0.0002); + FieldTrialParameter higher_log_bandwidth_bias_factor( + "HigherLogBwBiasFactor", 0.02); + FieldTrialParameter inherent_loss_lower_bound( + "InherentLossLowerBound", 1.0e-3); + FieldTrialParameter loss_threshold_of_high_bandwidth_preference( + "LossThresholdOfHighBandwidthPreference", 0.15); + FieldTrialParameter bandwidth_preference_smoothing_factor( + "BandwidthPreferenceSmoothingFactor", 0.002); + FieldTrialParameter inherent_loss_upper_bound_bandwidth_balance( + "InherentLossUpperBoundBwBalance", DataRate::KilobitsPerSec(75.0)); + FieldTrialParameter inherent_loss_upper_bound_offset( + "InherentLossUpperBoundOffset", 0.05); + FieldTrialParameter initial_inherent_loss_estimate( + "InitialInherentLossEstimate", 0.01); + FieldTrialParameter newton_iterations("NewtonIterations", 1); + FieldTrialParameter newton_step_size("NewtonStepSize", 0.75); + FieldTrialParameter append_acknowledged_rate_candidate( + "AckedRateCandidate", true); + FieldTrialParameter append_delay_based_estimate_candidate( + "DelayBasedCandidate", true); + FieldTrialParameter observation_duration_lower_bound( + "ObservationDurationLowerBound", TimeDelta::Millis(250)); + FieldTrialParameter observation_window_size("ObservationWindowSize", 20); + FieldTrialParameter sending_rate_smoothing_factor( + "SendingRateSmoothingFactor", 0.0); + FieldTrialParameter instant_upper_bound_temporal_weight_factor( + "InstantUpperBoundTemporalWeightFactor", 0.9); + FieldTrialParameter instant_upper_bound_bandwidth_balance( + "InstantUpperBoundBwBalance", DataRate::KilobitsPerSec(75.0)); + FieldTrialParameter instant_upper_bound_loss_offset( + "InstantUpperBoundLossOffset", 0.05); + FieldTrialParameter temporal_weight_factor("TemporalWeightFactor", + 0.9); + FieldTrialParameter bandwidth_backoff_lower_bound_factor( + "BwBackoffLowerBoundFactor", 1.0); + FieldTrialParameter trendline_integration_enabled( + "TrendlineIntegrationEnabled", false); + FieldTrialParameter trendline_observations_window_size( + "TrendlineObservationsWindowSize", 20); + FieldTrialParameter max_increase_factor("MaxIncreaseFactor", 1.3); + FieldTrialParameter delayed_increase_window( + "DelayedIncreaseWindow", TimeDelta::Millis(300)); + FieldTrialParameter use_acked_bitrate_only_when_overusing( + "UseAckedBitrateOnlyWhenOverusing", false); + FieldTrialParameter + not_increase_if_inherent_loss_less_than_average_loss( + "NotIncreaseIfInherentLossLessThanAverageLoss", true); + FieldTrialParameter high_loss_rate_threshold("HighLossRateThreshold", + 1.0); + FieldTrialParameter bandwidth_cap_at_high_loss_rate( + "BandwidthCapAtHighLossRate", DataRate::KilobitsPerSec(500.0)); + FieldTrialParameter slope_of_bwe_high_loss_func( + "SlopeOfBweHighLossFunc", 1000); + FieldTrialParameter probe_integration_enabled("ProbeIntegrationEnabled", + false); + FieldTrialParameter bound_by_upper_link_capacity_when_loss_limited( + "BoundByUpperLinkCapacityWhenLossLimited", true); + if (key_value_config) { + ParseFieldTrial({&enabled, + &bandwidth_rampup_upper_bound_factor, + &rampup_acceleration_max_factor, + &rampup_acceleration_maxout_time, + &candidate_factors, + &higher_bandwidth_bias_factor, + &higher_log_bandwidth_bias_factor, + &inherent_loss_lower_bound, + &loss_threshold_of_high_bandwidth_preference, + &bandwidth_preference_smoothing_factor, + &inherent_loss_upper_bound_bandwidth_balance, + &inherent_loss_upper_bound_offset, + &initial_inherent_loss_estimate, + &newton_iterations, + &newton_step_size, + &append_acknowledged_rate_candidate, + &append_delay_based_estimate_candidate, + &observation_duration_lower_bound, + &observation_window_size, + &sending_rate_smoothing_factor, + &instant_upper_bound_temporal_weight_factor, + &instant_upper_bound_bandwidth_balance, + &instant_upper_bound_loss_offset, + &temporal_weight_factor, + &bandwidth_backoff_lower_bound_factor, + &trendline_integration_enabled, + &trendline_observations_window_size, + &max_increase_factor, + &delayed_increase_window, + &use_acked_bitrate_only_when_overusing, + ¬_increase_if_inherent_loss_less_than_average_loss, + &probe_integration_enabled, + &high_loss_rate_threshold, + &bandwidth_cap_at_high_loss_rate, + &slope_of_bwe_high_loss_func, + &bound_by_upper_link_capacity_when_loss_limited}, + key_value_config->Lookup("WebRTC-Bwe-LossBasedBweV2")); + } + + absl::optional config; + if (!enabled.Get()) { + return config; + } + config.emplace(Config()); + config->bandwidth_rampup_upper_bound_factor = + bandwidth_rampup_upper_bound_factor.Get(); + config->rampup_acceleration_max_factor = rampup_acceleration_max_factor.Get(); + config->rampup_acceleration_maxout_time = + rampup_acceleration_maxout_time.Get(); + config->candidate_factors = candidate_factors.Get(); + config->higher_bandwidth_bias_factor = higher_bandwidth_bias_factor.Get(); + config->higher_log_bandwidth_bias_factor = + higher_log_bandwidth_bias_factor.Get(); + config->inherent_loss_lower_bound = inherent_loss_lower_bound.Get(); + config->loss_threshold_of_high_bandwidth_preference = + loss_threshold_of_high_bandwidth_preference.Get(); + config->bandwidth_preference_smoothing_factor = + bandwidth_preference_smoothing_factor.Get(); + config->inherent_loss_upper_bound_bandwidth_balance = + inherent_loss_upper_bound_bandwidth_balance.Get(); + config->inherent_loss_upper_bound_offset = + inherent_loss_upper_bound_offset.Get(); + config->initial_inherent_loss_estimate = initial_inherent_loss_estimate.Get(); + config->newton_iterations = newton_iterations.Get(); + config->newton_step_size = newton_step_size.Get(); + config->append_acknowledged_rate_candidate = + append_acknowledged_rate_candidate.Get(); + config->append_delay_based_estimate_candidate = + append_delay_based_estimate_candidate.Get(); + config->observation_duration_lower_bound = + observation_duration_lower_bound.Get(); + config->observation_window_size = observation_window_size.Get(); + config->sending_rate_smoothing_factor = sending_rate_smoothing_factor.Get(); + config->instant_upper_bound_temporal_weight_factor = + instant_upper_bound_temporal_weight_factor.Get(); + config->instant_upper_bound_bandwidth_balance = + instant_upper_bound_bandwidth_balance.Get(); + config->instant_upper_bound_loss_offset = + instant_upper_bound_loss_offset.Get(); + config->temporal_weight_factor = temporal_weight_factor.Get(); + config->bandwidth_backoff_lower_bound_factor = + bandwidth_backoff_lower_bound_factor.Get(); + config->trendline_integration_enabled = trendline_integration_enabled.Get(); + config->trendline_observations_window_size = + trendline_observations_window_size.Get(); + config->max_increase_factor = max_increase_factor.Get(); + config->delayed_increase_window = delayed_increase_window.Get(); + config->use_acked_bitrate_only_when_overusing = + use_acked_bitrate_only_when_overusing.Get(); + config->not_increase_if_inherent_loss_less_than_average_loss = + not_increase_if_inherent_loss_less_than_average_loss.Get(); + config->high_loss_rate_threshold = high_loss_rate_threshold.Get(); + config->bandwidth_cap_at_high_loss_rate = + bandwidth_cap_at_high_loss_rate.Get(); + config->slope_of_bwe_high_loss_func = slope_of_bwe_high_loss_func.Get(); + config->probe_integration_enabled = probe_integration_enabled.Get(); + config->bound_by_upper_link_capacity_when_loss_limited = + bound_by_upper_link_capacity_when_loss_limited.Get(); + + return config; +} + +bool LossBasedBweV2::IsConfigValid() const { + if (!config_.has_value()) { + return false; + } + + bool valid = true; + + if (config_->bandwidth_rampup_upper_bound_factor <= 1.0) { + RTC_LOG(LS_WARNING) + << "The bandwidth rampup upper bound factor must be greater than 1: " + << config_->bandwidth_rampup_upper_bound_factor; + valid = false; + } + if (config_->rampup_acceleration_max_factor < 0.0) { + RTC_LOG(LS_WARNING) + << "The rampup acceleration max factor must be non-negative.: " + << config_->rampup_acceleration_max_factor; + valid = false; + } + if (config_->rampup_acceleration_maxout_time <= TimeDelta::Zero()) { + RTC_LOG(LS_WARNING) + << "The rampup acceleration maxout time must be above zero: " + << config_->rampup_acceleration_maxout_time.seconds(); + valid = false; + } + for (double candidate_factor : config_->candidate_factors) { + if (candidate_factor <= 0.0) { + RTC_LOG(LS_WARNING) << "All candidate factors must be greater than zero: " + << candidate_factor; + valid = false; + } + } + + // Ensure that the configuration allows generation of at least one candidate + // other than the current estimate. + if (!config_->append_acknowledged_rate_candidate && + !config_->append_delay_based_estimate_candidate && + !absl::c_any_of(config_->candidate_factors, + [](double cf) { return cf != 1.0; })) { + RTC_LOG(LS_WARNING) + << "The configuration does not allow generating candidates. Specify " + "a candidate factor other than 1.0, allow the acknowledged rate " + "to be a candidate, and/or allow the delay based estimate to be a " + "candidate."; + valid = false; + } + + if (config_->higher_bandwidth_bias_factor < 0.0) { + RTC_LOG(LS_WARNING) + << "The higher bandwidth bias factor must be non-negative: " + << config_->higher_bandwidth_bias_factor; + valid = false; + } + if (config_->inherent_loss_lower_bound < 0.0 || + config_->inherent_loss_lower_bound >= 1.0) { + RTC_LOG(LS_WARNING) << "The inherent loss lower bound must be in [0, 1): " + << config_->inherent_loss_lower_bound; + valid = false; + } + if (config_->loss_threshold_of_high_bandwidth_preference < 0.0 || + config_->loss_threshold_of_high_bandwidth_preference >= 1.0) { + RTC_LOG(LS_WARNING) + << "The loss threshold of high bandwidth preference must be in [0, 1): " + << config_->loss_threshold_of_high_bandwidth_preference; + valid = false; + } + if (config_->bandwidth_preference_smoothing_factor <= 0.0 || + config_->bandwidth_preference_smoothing_factor > 1.0) { + RTC_LOG(LS_WARNING) + << "The bandwidth preference smoothing factor must be in (0, 1]: " + << config_->bandwidth_preference_smoothing_factor; + valid = false; + } + if (config_->inherent_loss_upper_bound_bandwidth_balance <= + DataRate::Zero()) { + RTC_LOG(LS_WARNING) + << "The inherent loss upper bound bandwidth balance " + "must be positive: " + << ToString(config_->inherent_loss_upper_bound_bandwidth_balance); + valid = false; + } + if (config_->inherent_loss_upper_bound_offset < + config_->inherent_loss_lower_bound || + config_->inherent_loss_upper_bound_offset >= 1.0) { + RTC_LOG(LS_WARNING) << "The inherent loss upper bound must be greater " + "than or equal to the inherent " + "loss lower bound, which is " + << config_->inherent_loss_lower_bound + << ", and less than 1: " + << config_->inherent_loss_upper_bound_offset; + valid = false; + } + if (config_->initial_inherent_loss_estimate < 0.0 || + config_->initial_inherent_loss_estimate >= 1.0) { + RTC_LOG(LS_WARNING) + << "The initial inherent loss estimate must be in [0, 1): " + << config_->initial_inherent_loss_estimate; + valid = false; + } + if (config_->newton_iterations <= 0) { + RTC_LOG(LS_WARNING) << "The number of Newton iterations must be positive: " + << config_->newton_iterations; + valid = false; + } + if (config_->newton_step_size <= 0.0) { + RTC_LOG(LS_WARNING) << "The Newton step size must be positive: " + << config_->newton_step_size; + valid = false; + } + if (config_->observation_duration_lower_bound <= TimeDelta::Zero()) { + RTC_LOG(LS_WARNING) + << "The observation duration lower bound must be positive: " + << ToString(config_->observation_duration_lower_bound); + valid = false; + } + if (config_->observation_window_size < 2) { + RTC_LOG(LS_WARNING) << "The observation window size must be at least 2: " + << config_->observation_window_size; + valid = false; + } + if (config_->sending_rate_smoothing_factor < 0.0 || + config_->sending_rate_smoothing_factor >= 1.0) { + RTC_LOG(LS_WARNING) + << "The sending rate smoothing factor must be in [0, 1): " + << config_->sending_rate_smoothing_factor; + valid = false; + } + if (config_->instant_upper_bound_temporal_weight_factor <= 0.0 || + config_->instant_upper_bound_temporal_weight_factor > 1.0) { + RTC_LOG(LS_WARNING) + << "The instant upper bound temporal weight factor must be in (0, 1]" + << config_->instant_upper_bound_temporal_weight_factor; + valid = false; + } + if (config_->instant_upper_bound_bandwidth_balance <= DataRate::Zero()) { + RTC_LOG(LS_WARNING) + << "The instant upper bound bandwidth balance must be positive: " + << ToString(config_->instant_upper_bound_bandwidth_balance); + valid = false; + } + if (config_->instant_upper_bound_loss_offset < 0.0 || + config_->instant_upper_bound_loss_offset >= 1.0) { + RTC_LOG(LS_WARNING) + << "The instant upper bound loss offset must be in [0, 1): " + << config_->instant_upper_bound_loss_offset; + valid = false; + } + if (config_->temporal_weight_factor <= 0.0 || + config_->temporal_weight_factor > 1.0) { + RTC_LOG(LS_WARNING) << "The temporal weight factor must be in (0, 1]: " + << config_->temporal_weight_factor; + valid = false; + } + if (config_->bandwidth_backoff_lower_bound_factor > 1.0) { + RTC_LOG(LS_WARNING) + << "The bandwidth backoff lower bound factor must not be greater than " + "1: " + << config_->bandwidth_backoff_lower_bound_factor; + valid = false; + } + if (config_->trendline_observations_window_size < 1) { + RTC_LOG(LS_WARNING) << "The trendline window size must be at least 1: " + << config_->trendline_observations_window_size; + valid = false; + } + if (config_->max_increase_factor <= 0.0) { + RTC_LOG(LS_WARNING) << "The maximum increase factor must be positive: " + << config_->max_increase_factor; + valid = false; + } + if (config_->delayed_increase_window <= TimeDelta::Zero()) { + RTC_LOG(LS_WARNING) << "The delayed increase window must be positive: " + << config_->delayed_increase_window.ms(); + valid = false; + } + if (config_->high_loss_rate_threshold <= 0.0 || + config_->high_loss_rate_threshold > 1.0) { + RTC_LOG(LS_WARNING) << "The high loss rate threshold must be in (0, 1]: " + << config_->high_loss_rate_threshold; + valid = false; + } + return valid; +} + +double LossBasedBweV2::GetAverageReportedLossRatio() const { + if (num_observations_ <= 0) { + return 0.0; + } + + double num_packets = 0; + double num_lost_packets = 0; + for (const Observation& observation : observations_) { + if (!observation.IsInitialized()) { + continue; + } + + double instant_temporal_weight = + instant_upper_bound_temporal_weights_[(num_observations_ - 1) - + observation.id]; + num_packets += instant_temporal_weight * observation.num_packets; + num_lost_packets += instant_temporal_weight * observation.num_lost_packets; + } + + return num_lost_packets / num_packets; +} + +DataRate LossBasedBweV2::GetCandidateBandwidthUpperBound() const { + DataRate candidate_bandwidth_upper_bound = max_bitrate_; + if (IsBandwidthLimitedDueToLoss() && + IsValid(bandwidth_limit_in_current_window_)) { + candidate_bandwidth_upper_bound = bandwidth_limit_in_current_window_; + } + + if (config_->trendline_integration_enabled) { + candidate_bandwidth_upper_bound = + std::min(GetInstantUpperBound(), candidate_bandwidth_upper_bound); + if (IsValid(delay_based_estimate_)) { + candidate_bandwidth_upper_bound = + std::min(delay_based_estimate_, candidate_bandwidth_upper_bound); + } + } + + if (!acknowledged_bitrate_.has_value()) + return candidate_bandwidth_upper_bound; + + if (config_->rampup_acceleration_max_factor > 0.0) { + const TimeDelta time_since_bandwidth_reduced = std::min( + config_->rampup_acceleration_maxout_time, + std::max(TimeDelta::Zero(), last_send_time_most_recent_observation_ - + last_time_estimate_reduced_)); + const double rampup_acceleration = config_->rampup_acceleration_max_factor * + time_since_bandwidth_reduced / + config_->rampup_acceleration_maxout_time; + + candidate_bandwidth_upper_bound += + rampup_acceleration * (*acknowledged_bitrate_); + } + return candidate_bandwidth_upper_bound; +} + +std::vector LossBasedBweV2::GetCandidates() + const { + std::vector bandwidths; + bool can_increase_bitrate = TrendlineEsimateAllowBitrateIncrease(); + for (double candidate_factor : config_->candidate_factors) { + if (!can_increase_bitrate && candidate_factor > 1.0) { + continue; + } + bandwidths.push_back(candidate_factor * + current_estimate_.loss_limited_bandwidth); + } + + if (acknowledged_bitrate_.has_value() && + config_->append_acknowledged_rate_candidate && + TrendlineEsimateAllowEmergencyBackoff()) { + bandwidths.push_back(*acknowledged_bitrate_ * + config_->bandwidth_backoff_lower_bound_factor); + } + + if (IsValid(delay_based_estimate_) && + config_->append_delay_based_estimate_candidate) { + if (can_increase_bitrate && + delay_based_estimate_ > current_estimate_.loss_limited_bandwidth) { + bandwidths.push_back(delay_based_estimate_); + } + } + + const DataRate candidate_bandwidth_upper_bound = + GetCandidateBandwidthUpperBound(); + + std::vector candidates; + candidates.resize(bandwidths.size()); + for (size_t i = 0; i < bandwidths.size(); ++i) { + ChannelParameters candidate = current_estimate_; + if (config_->trendline_integration_enabled) { + candidate.loss_limited_bandwidth = + std::min(bandwidths[i], candidate_bandwidth_upper_bound); + } else { + candidate.loss_limited_bandwidth = std::min( + bandwidths[i], std::max(current_estimate_.loss_limited_bandwidth, + candidate_bandwidth_upper_bound)); + } + candidate.inherent_loss = GetFeasibleInherentLoss(candidate); + candidates[i] = candidate; + } + return candidates; +} + +LossBasedBweV2::Derivatives LossBasedBweV2::GetDerivatives( + const ChannelParameters& channel_parameters) const { + Derivatives derivatives; + + for (const Observation& observation : observations_) { + if (!observation.IsInitialized()) { + continue; + } + + double loss_probability = GetLossProbability( + channel_parameters.inherent_loss, + channel_parameters.loss_limited_bandwidth, observation.sending_rate); + + double temporal_weight = + temporal_weights_[(num_observations_ - 1) - observation.id]; + + derivatives.first += + temporal_weight * + ((observation.num_lost_packets / loss_probability) - + (observation.num_received_packets / (1.0 - loss_probability))); + derivatives.second -= + temporal_weight * + ((observation.num_lost_packets / std::pow(loss_probability, 2)) + + (observation.num_received_packets / + std::pow(1.0 - loss_probability, 2))); + } + + if (derivatives.second >= 0.0) { + RTC_LOG(LS_ERROR) << "The second derivative is mathematically guaranteed " + "to be negative but is " + << derivatives.second << "."; + derivatives.second = -1.0e-6; + } + + return derivatives; +} + +double LossBasedBweV2::GetFeasibleInherentLoss( + const ChannelParameters& channel_parameters) const { + return std::min( + std::max(channel_parameters.inherent_loss, + config_->inherent_loss_lower_bound), + GetInherentLossUpperBound(channel_parameters.loss_limited_bandwidth)); +} + +double LossBasedBweV2::GetInherentLossUpperBound(DataRate bandwidth) const { + if (bandwidth.IsZero()) { + return 1.0; + } + + double inherent_loss_upper_bound = + config_->inherent_loss_upper_bound_offset + + config_->inherent_loss_upper_bound_bandwidth_balance / bandwidth; + return std::min(inherent_loss_upper_bound, 1.0); +} + +double LossBasedBweV2::AdjustBiasFactor(double loss_rate, + double bias_factor) const { + return bias_factor * + (config_->loss_threshold_of_high_bandwidth_preference - loss_rate) / + (config_->bandwidth_preference_smoothing_factor + + std::abs(config_->loss_threshold_of_high_bandwidth_preference - + loss_rate)); +} + +double LossBasedBweV2::GetHighBandwidthBias(DataRate bandwidth) const { + if (IsValid(bandwidth)) { + const double average_reported_loss_ratio = GetAverageReportedLossRatio(); + return AdjustBiasFactor(average_reported_loss_ratio, + config_->higher_bandwidth_bias_factor) * + bandwidth.kbps() + + AdjustBiasFactor(average_reported_loss_ratio, + config_->higher_log_bandwidth_bias_factor) * + std::log(1.0 + bandwidth.kbps()); + } + return 0.0; +} + +double LossBasedBweV2::GetObjective( + const ChannelParameters& channel_parameters) const { + double objective = 0.0; + + const double high_bandwidth_bias = + GetHighBandwidthBias(channel_parameters.loss_limited_bandwidth); + + for (const Observation& observation : observations_) { + if (!observation.IsInitialized()) { + continue; + } + + double loss_probability = GetLossProbability( + channel_parameters.inherent_loss, + channel_parameters.loss_limited_bandwidth, observation.sending_rate); + + double temporal_weight = + temporal_weights_[(num_observations_ - 1) - observation.id]; + + objective += + temporal_weight * + ((observation.num_lost_packets * std::log(loss_probability)) + + (observation.num_received_packets * std::log(1.0 - loss_probability))); + objective += + temporal_weight * high_bandwidth_bias * observation.num_packets; + } + + return objective; +} + +DataRate LossBasedBweV2::GetSendingRate( + DataRate instantaneous_sending_rate) const { + if (num_observations_ <= 0) { + return instantaneous_sending_rate; + } + + const int most_recent_observation_idx = + (num_observations_ - 1) % config_->observation_window_size; + const Observation& most_recent_observation = + observations_[most_recent_observation_idx]; + DataRate sending_rate_previous_observation = + most_recent_observation.sending_rate; + + return config_->sending_rate_smoothing_factor * + sending_rate_previous_observation + + (1.0 - config_->sending_rate_smoothing_factor) * + instantaneous_sending_rate; +} + +DataRate LossBasedBweV2::GetInstantUpperBound() const { + return cached_instant_upper_bound_.value_or(max_bitrate_); +} + +void LossBasedBweV2::CalculateInstantUpperBound() { + DataRate instant_limit = max_bitrate_; + const double average_reported_loss_ratio = GetAverageReportedLossRatio(); + if (average_reported_loss_ratio > config_->instant_upper_bound_loss_offset) { + instant_limit = config_->instant_upper_bound_bandwidth_balance / + (average_reported_loss_ratio - + config_->instant_upper_bound_loss_offset); + if (average_reported_loss_ratio > config_->high_loss_rate_threshold) { + instant_limit = std::min( + instant_limit, DataRate::KilobitsPerSec(std::max( + static_cast(min_bitrate_.kbps()), + config_->bandwidth_cap_at_high_loss_rate.kbps() - + config_->slope_of_bwe_high_loss_func * + average_reported_loss_ratio))); + } + } + + if (IsBandwidthLimitedDueToLoss()) { + if (IsValid(upper_link_capacity_) && + config_->bound_by_upper_link_capacity_when_loss_limited) { + instant_limit = std::min(instant_limit, upper_link_capacity_); + } + } + cached_instant_upper_bound_ = instant_limit; +} + +void LossBasedBweV2::CalculateTemporalWeights() { + for (int i = 0; i < config_->observation_window_size; ++i) { + temporal_weights_[i] = std::pow(config_->temporal_weight_factor, i); + instant_upper_bound_temporal_weights_[i] = + std::pow(config_->instant_upper_bound_temporal_weight_factor, i); + } +} + +void LossBasedBweV2::NewtonsMethodUpdate( + ChannelParameters& channel_parameters) const { + if (num_observations_ <= 0) { + return; + } + + for (int i = 0; i < config_->newton_iterations; ++i) { + const Derivatives derivatives = GetDerivatives(channel_parameters); + channel_parameters.inherent_loss -= + config_->newton_step_size * derivatives.first / derivatives.second; + channel_parameters.inherent_loss = + GetFeasibleInherentLoss(channel_parameters); + } +} + +bool LossBasedBweV2::TrendlineEsimateAllowBitrateIncrease() const { + if (!config_->trendline_integration_enabled) { + return true; + } + + for (const auto& detector_state : delay_detector_states_) { + if (detector_state == BandwidthUsage::kBwOverusing || + detector_state == BandwidthUsage::kBwUnderusing) { + return false; + } + } + return true; +} + +bool LossBasedBweV2::TrendlineEsimateAllowEmergencyBackoff() const { + if (!config_->trendline_integration_enabled) { + return true; + } + + if (!config_->use_acked_bitrate_only_when_overusing) { + return true; + } + + for (const auto& detector_state : delay_detector_states_) { + if (detector_state == BandwidthUsage::kBwOverusing) { + return true; + } + } + + return false; +} + +bool LossBasedBweV2::PushBackObservation( + rtc::ArrayView packet_results, + BandwidthUsage delay_detector_state) { + delay_detector_states_.push_front(delay_detector_state); + if (static_cast(delay_detector_states_.size()) > + config_->trendline_observations_window_size) { + delay_detector_states_.pop_back(); + } + + if (packet_results.empty()) { + return false; + } + + PacketResultsSummary packet_results_summary = + GetPacketResultsSummary(packet_results); + + partial_observation_.num_packets += packet_results_summary.num_packets; + partial_observation_.num_lost_packets += + packet_results_summary.num_lost_packets; + partial_observation_.size += packet_results_summary.total_size; + + // This is the first packet report we have received. + if (!IsValid(last_send_time_most_recent_observation_)) { + last_send_time_most_recent_observation_ = + packet_results_summary.first_send_time; + } + + const Timestamp last_send_time = packet_results_summary.last_send_time; + const TimeDelta observation_duration = + last_send_time - last_send_time_most_recent_observation_; + // Too small to be meaningful. + if (observation_duration <= TimeDelta::Zero() || + (observation_duration < config_->observation_duration_lower_bound && + (delay_detector_state != BandwidthUsage::kBwOverusing || + !config_->trendline_integration_enabled))) { + return false; + } + + last_send_time_most_recent_observation_ = last_send_time; + + Observation observation; + observation.num_packets = partial_observation_.num_packets; + observation.num_lost_packets = partial_observation_.num_lost_packets; + observation.num_received_packets = + observation.num_packets - observation.num_lost_packets; + observation.sending_rate = + GetSendingRate(partial_observation_.size / observation_duration); + observation.id = num_observations_++; + observations_[observation.id % config_->observation_window_size] = + observation; + + partial_observation_ = PartialObservation(); + + CalculateInstantUpperBound(); + return true; +} + +bool LossBasedBweV2::IsBandwidthLimitedDueToLoss() const { + return current_state_ != LossBasedState::kDelayBasedEstimate; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2.h new file mode 100644 index 0000000000..9ff9cb74c6 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2.h @@ -0,0 +1,203 @@ +/* + * Copyright 2021 The WebRTC project authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_LOSS_BASED_BWE_V2_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_LOSS_BASED_BWE_V2_H_ + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/field_trials_view.h" +#include "api/network_state_predictor.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" + +namespace webrtc { + +// State of the loss based estimate, which can be either increasing/decreasing +// when network is loss limited, or equal to the delay based estimate. +enum class LossBasedState { + kIncreasing = 0, + kDecreasing = 1, + kDelayBasedEstimate = 2 +}; + +class LossBasedBweV2 { + public: + struct Result { + ~Result() = default; + DataRate bandwidth_estimate = DataRate::Zero(); + LossBasedState state = LossBasedState::kDelayBasedEstimate; + }; + // Creates a disabled `LossBasedBweV2` if the + // `key_value_config` is not valid. + explicit LossBasedBweV2(const FieldTrialsView* key_value_config); + + LossBasedBweV2(const LossBasedBweV2&) = delete; + LossBasedBweV2& operator=(const LossBasedBweV2&) = delete; + + ~LossBasedBweV2() = default; + + bool IsEnabled() const; + // Returns true iff a BWE can be calculated, i.e., the estimator has been + // initialized with a BWE and then has received enough `PacketResult`s. + bool IsReady() const; + + // Returns `DataRate::PlusInfinity` if no BWE can be calculated. + Result GetLossBasedResult() const; + + void SetAcknowledgedBitrate(DataRate acknowledged_bitrate); + void SetBandwidthEstimate(DataRate bandwidth_estimate); + void SetMinMaxBitrate(DataRate min_bitrate, DataRate max_bitrate); + void UpdateBandwidthEstimate( + rtc::ArrayView packet_results, + DataRate delay_based_estimate, + BandwidthUsage delay_detector_state, + absl::optional probe_bitrate, + DataRate upper_link_capacity); + + private: + struct ChannelParameters { + double inherent_loss = 0.0; + DataRate loss_limited_bandwidth = DataRate::MinusInfinity(); + }; + + struct Config { + double bandwidth_rampup_upper_bound_factor = 0.0; + double rampup_acceleration_max_factor = 0.0; + TimeDelta rampup_acceleration_maxout_time = TimeDelta::Zero(); + std::vector candidate_factors; + double higher_bandwidth_bias_factor = 0.0; + double higher_log_bandwidth_bias_factor = 0.0; + double inherent_loss_lower_bound = 0.0; + double loss_threshold_of_high_bandwidth_preference = 0.0; + double bandwidth_preference_smoothing_factor = 0.0; + DataRate inherent_loss_upper_bound_bandwidth_balance = + DataRate::MinusInfinity(); + double inherent_loss_upper_bound_offset = 0.0; + double initial_inherent_loss_estimate = 0.0; + int newton_iterations = 0; + double newton_step_size = 0.0; + bool append_acknowledged_rate_candidate = true; + bool append_delay_based_estimate_candidate = false; + TimeDelta observation_duration_lower_bound = TimeDelta::Zero(); + int observation_window_size = 0; + double sending_rate_smoothing_factor = 0.0; + double instant_upper_bound_temporal_weight_factor = 0.0; + DataRate instant_upper_bound_bandwidth_balance = DataRate::MinusInfinity(); + double instant_upper_bound_loss_offset = 0.0; + double temporal_weight_factor = 0.0; + double bandwidth_backoff_lower_bound_factor = 0.0; + bool trendline_integration_enabled = false; + int trendline_observations_window_size = 0; + double max_increase_factor = 0.0; + TimeDelta delayed_increase_window = TimeDelta::Zero(); + bool use_acked_bitrate_only_when_overusing = false; + bool not_increase_if_inherent_loss_less_than_average_loss = false; + double high_loss_rate_threshold = 1.0; + DataRate bandwidth_cap_at_high_loss_rate = DataRate::MinusInfinity(); + double slope_of_bwe_high_loss_func = 1000.0; + bool probe_integration_enabled = false; + bool bound_by_upper_link_capacity_when_loss_limited = false; + }; + + struct Derivatives { + double first = 0.0; + double second = 0.0; + }; + + struct Observation { + bool IsInitialized() const { return id != -1; } + + int num_packets = 0; + int num_lost_packets = 0; + int num_received_packets = 0; + DataRate sending_rate = DataRate::MinusInfinity(); + int id = -1; + }; + + struct PartialObservation { + int num_packets = 0; + int num_lost_packets = 0; + DataSize size = DataSize::Zero(); + }; + + static absl::optional CreateConfig( + const FieldTrialsView* key_value_config); + bool IsConfigValid() const; + + // Returns `0.0` if not enough loss statistics have been received. + double GetAverageReportedLossRatio() const; + std::vector GetCandidates() const; + DataRate GetCandidateBandwidthUpperBound() const; + Derivatives GetDerivatives(const ChannelParameters& channel_parameters) const; + double GetFeasibleInherentLoss( + const ChannelParameters& channel_parameters) const; + double GetInherentLossUpperBound(DataRate bandwidth) const; + double AdjustBiasFactor(double loss_rate, double bias_factor) const; + double GetHighBandwidthBias(DataRate bandwidth) const; + double GetObjective(const ChannelParameters& channel_parameters) const; + DataRate GetSendingRate(DataRate instantaneous_sending_rate) const; + DataRate GetInstantUpperBound() const; + void CalculateInstantUpperBound(); + + void CalculateTemporalWeights(); + void NewtonsMethodUpdate(ChannelParameters& channel_parameters) const; + + // Returns false if there exists a kBwOverusing or kBwUnderusing in the + // window. + bool TrendlineEsimateAllowBitrateIncrease() const; + + // Returns true if there exists an overusing state in the window. + bool TrendlineEsimateAllowEmergencyBackoff() const; + + // Returns false if no observation was created. + bool PushBackObservation(rtc::ArrayView packet_results, + BandwidthUsage delay_detector_state); + void UpdateTrendlineEstimator( + const std::vector& packet_feedbacks, + Timestamp at_time); + void UpdateDelayDetector(BandwidthUsage delay_detector_state); + bool IsEstimateIncreasingWhenLossLimited( + const ChannelParameters& best_candidate); + bool IsBandwidthLimitedDueToLoss() const; + void SetProbeBitrate(absl::optional probe_bitrate); + + absl::optional acknowledged_bitrate_; + absl::optional config_; + ChannelParameters current_estimate_; + int num_observations_ = 0; + std::vector observations_; + PartialObservation partial_observation_; + Timestamp last_send_time_most_recent_observation_ = Timestamp::PlusInfinity(); + Timestamp last_time_estimate_reduced_ = Timestamp::MinusInfinity(); + absl::optional cached_instant_upper_bound_; + std::vector instant_upper_bound_temporal_weights_; + std::vector temporal_weights_; + std::deque delay_detector_states_; + Timestamp recovering_after_loss_timestamp_ = Timestamp::MinusInfinity(); + DataRate bandwidth_limit_in_current_window_ = DataRate::PlusInfinity(); + DataRate min_bitrate_ = DataRate::KilobitsPerSec(1); + DataRate max_bitrate_ = DataRate::PlusInfinity(); + LossBasedState current_state_ = LossBasedState::kDelayBasedEstimate; + DataRate probe_bitrate_ = DataRate::PlusInfinity(); + DataRate delay_based_estimate_ = DataRate::PlusInfinity(); + DataRate upper_link_capacity_ = DataRate::PlusInfinity(); +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_LOSS_BASED_BWE_V2_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2_gn/moz.build new file mode 100644 index 0000000000..5728e0c4b2 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("loss_based_bwe_v2_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2_test.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2_test.cc new file mode 100644 index 0000000000..c303c29d68 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/loss_based_bwe_v2_test.cc @@ -0,0 +1,1526 @@ +/* + * Copyright 2021 The WebRTC project authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/loss_based_bwe_v2.h" + +#include +#include + +#include "api/network_state_predictor.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "rtc_base/strings/string_builder.h" +#include "test/explicit_key_value_config.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +using ::webrtc::test::ExplicitKeyValueConfig; + +constexpr TimeDelta kObservationDurationLowerBound = TimeDelta::Millis(200); +constexpr TimeDelta kDelayedIncreaseWindow = TimeDelta::Millis(300); +constexpr double kMaxIncreaseFactor = 1.5; + +class LossBasedBweV2Test : public ::testing::TestWithParam { + protected: + std::string Config(bool enabled, + bool valid, + bool trendline_integration_enabled) { + char buffer[1024]; + rtc::SimpleStringBuilder config_string(buffer); + + config_string << "WebRTC-Bwe-LossBasedBweV2/"; + + if (enabled) { + config_string << "Enabled:true"; + } else { + config_string << "Enabled:false"; + } + + if (valid) { + config_string << ",BwRampupUpperBoundFactor:1.2"; + } else { + config_string << ",BwRampupUpperBoundFactor:0.0"; + } + + if (trendline_integration_enabled) { + config_string << ",TrendlineIntegrationEnabled:true"; + } else { + config_string << ",TrendlineIntegrationEnabled:false"; + } + + config_string + << ",CandidateFactors:1.1|1.0|0.95,HigherBwBiasFactor:0.01," + "DelayBasedCandidate:true," + "InherentLossLowerBound:0.001,InherentLossUpperBoundBwBalance:" + "14kbps," + "InherentLossUpperBoundOffset:0.9,InitialInherentLossEstimate:0.01," + "NewtonIterations:2,NewtonStepSize:0.4,ObservationWindowSize:15," + "SendingRateSmoothingFactor:0.01," + "InstantUpperBoundTemporalWeightFactor:0.97," + "InstantUpperBoundBwBalance:90kbps," + "InstantUpperBoundLossOffset:0.1,TemporalWeightFactor:0.98"; + + config_string.AppendFormat( + ",ObservationDurationLowerBound:%dms", + static_cast(kObservationDurationLowerBound.ms())); + config_string.AppendFormat(",MaxIncreaseFactor:%f", kMaxIncreaseFactor); + config_string.AppendFormat(",DelayedIncreaseWindow:%dms", + static_cast(kDelayedIncreaseWindow.ms())); + + config_string << "/"; + + return config_string.str(); + } + + std::vector CreatePacketResultsWithReceivedPackets( + Timestamp first_packet_timestamp) { + std::vector enough_feedback(2); + enough_feedback[0].sent_packet.size = DataSize::Bytes(15'000); + enough_feedback[1].sent_packet.size = DataSize::Bytes(15'000); + enough_feedback[0].sent_packet.send_time = first_packet_timestamp; + enough_feedback[1].sent_packet.send_time = + first_packet_timestamp + kObservationDurationLowerBound; + enough_feedback[0].receive_time = + first_packet_timestamp + kObservationDurationLowerBound; + enough_feedback[1].receive_time = + first_packet_timestamp + 2 * kObservationDurationLowerBound; + return enough_feedback; + } + + std::vector CreatePacketResultsWith10pLossRate( + Timestamp first_packet_timestamp) { + std::vector enough_feedback(10); + enough_feedback[0].sent_packet.size = DataSize::Bytes(15'000); + for (unsigned i = 0; i < enough_feedback.size(); ++i) { + enough_feedback[i].sent_packet.size = DataSize::Bytes(15'000); + enough_feedback[i].sent_packet.send_time = + first_packet_timestamp + + static_cast(i) * kObservationDurationLowerBound; + enough_feedback[i].receive_time = + first_packet_timestamp + + static_cast(i + 1) * kObservationDurationLowerBound; + } + enough_feedback[9].receive_time = Timestamp::PlusInfinity(); + return enough_feedback; + } + + std::vector CreatePacketResultsWith50pLossRate( + Timestamp first_packet_timestamp) { + std::vector enough_feedback(2); + enough_feedback[0].sent_packet.size = DataSize::Bytes(15'000); + enough_feedback[1].sent_packet.size = DataSize::Bytes(15'000); + enough_feedback[0].sent_packet.send_time = first_packet_timestamp; + enough_feedback[1].sent_packet.send_time = + first_packet_timestamp + kObservationDurationLowerBound; + enough_feedback[0].receive_time = + first_packet_timestamp + kObservationDurationLowerBound; + enough_feedback[1].receive_time = Timestamp::PlusInfinity(); + return enough_feedback; + } + + std::vector CreatePacketResultsWith100pLossRate( + Timestamp first_packet_timestamp) { + std::vector enough_feedback(2); + enough_feedback[0].sent_packet.size = DataSize::Bytes(15'000); + enough_feedback[1].sent_packet.size = DataSize::Bytes(15'000); + enough_feedback[0].sent_packet.send_time = first_packet_timestamp; + enough_feedback[1].sent_packet.send_time = + first_packet_timestamp + kObservationDurationLowerBound; + enough_feedback[0].receive_time = Timestamp::PlusInfinity(); + enough_feedback[1].receive_time = Timestamp::PlusInfinity(); + return enough_feedback; + } +}; + +TEST_P(LossBasedBweV2Test, EnabledWhenGivenValidConfigurationValues) { + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + EXPECT_TRUE(loss_based_bandwidth_estimator.IsEnabled()); +} + +TEST_P(LossBasedBweV2Test, DisabledWhenGivenDisabledConfiguration) { + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/false, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + EXPECT_FALSE(loss_based_bandwidth_estimator.IsEnabled()); +} + +TEST_P(LossBasedBweV2Test, DisabledWhenGivenNonValidConfigurationValues) { + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/false, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + EXPECT_FALSE(loss_based_bandwidth_estimator.IsEnabled()); +} + +TEST_P(LossBasedBweV2Test, DisabledWhenGivenNonPositiveCandidateFactor) { + ExplicitKeyValueConfig key_value_config_negative_candidate_factor( + "WebRTC-Bwe-LossBasedBweV2/Enabled:true,CandidateFactors:-1.3|1.1/"); + LossBasedBweV2 loss_based_bandwidth_estimator_1( + &key_value_config_negative_candidate_factor); + EXPECT_FALSE(loss_based_bandwidth_estimator_1.IsEnabled()); + + ExplicitKeyValueConfig key_value_config_zero_candidate_factor( + "WebRTC-Bwe-LossBasedBweV2/Enabled:true,CandidateFactors:0.0|1.1/"); + LossBasedBweV2 loss_based_bandwidth_estimator_2( + &key_value_config_zero_candidate_factor); + EXPECT_FALSE(loss_based_bandwidth_estimator_2.IsEnabled()); +} + +TEST_P(LossBasedBweV2Test, + DisabledWhenGivenConfigurationThatDoesNotAllowGeneratingCandidates) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.0,AckedRateCandidate:false," + "DelayBasedCandidate:false/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + EXPECT_FALSE(loss_based_bandwidth_estimator.IsEnabled()); +} + +TEST_P(LossBasedBweV2Test, ReturnsDelayBasedEstimateWhenDisabled) { + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/false, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + /*packet_results=*/{}, + /*delay_based_estimate=*/DataRate::KilobitsPerSec(100), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(100)); +} + +TEST_P(LossBasedBweV2Test, + ReturnsDelayBasedEstimateWhenWhenGivenNonValidConfigurationValues) { + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/false, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + /*packet_results=*/{}, + /*delay_based_estimate=*/DataRate::KilobitsPerSec(100), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(100)); +} + +TEST_P(LossBasedBweV2Test, + BandwidthEstimateGivenInitializationAndThenFeedback) { + std::vector enough_feedback = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + EXPECT_TRUE(loss_based_bandwidth_estimator.IsReady()); + EXPECT_TRUE(loss_based_bandwidth_estimator.GetLossBasedResult() + .bandwidth_estimate.IsFinite()); +} + +TEST_P(LossBasedBweV2Test, NoBandwidthEstimateGivenNoInitialization) { + std::vector enough_feedback = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + EXPECT_FALSE(loss_based_bandwidth_estimator.IsReady()); + EXPECT_TRUE(loss_based_bandwidth_estimator.GetLossBasedResult() + .bandwidth_estimate.IsPlusInfinity()); +} + +TEST_P(LossBasedBweV2Test, NoBandwidthEstimateGivenNotEnoughFeedback) { + // Create packet results where the observation duration is less than the lower + // bound. + PacketResult not_enough_feedback[2]; + not_enough_feedback[0].sent_packet.size = DataSize::Bytes(15'000); + not_enough_feedback[1].sent_packet.size = DataSize::Bytes(15'000); + not_enough_feedback[0].sent_packet.send_time = Timestamp::Zero(); + not_enough_feedback[1].sent_packet.send_time = + Timestamp::Zero() + kObservationDurationLowerBound / 2; + not_enough_feedback[0].receive_time = + Timestamp::Zero() + kObservationDurationLowerBound / 2; + not_enough_feedback[1].receive_time = + Timestamp::Zero() + kObservationDurationLowerBound; + + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + EXPECT_FALSE(loss_based_bandwidth_estimator.IsReady()); + EXPECT_TRUE(loss_based_bandwidth_estimator.GetLossBasedResult() + .bandwidth_estimate.IsPlusInfinity()); + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + not_enough_feedback, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + EXPECT_FALSE(loss_based_bandwidth_estimator.IsReady()); + EXPECT_TRUE(loss_based_bandwidth_estimator.GetLossBasedResult() + .bandwidth_estimate.IsPlusInfinity()); +} + +TEST_P(LossBasedBweV2Test, + SetValueIsTheEstimateUntilAdditionalFeedbackHasBeenReceived) { + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + 2 * kObservationDurationLowerBound); + + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + EXPECT_NE( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(600)); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(600)); + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + EXPECT_NE( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(600)); +} + +TEST_P(LossBasedBweV2Test, + SetAcknowledgedBitrateOnlyAffectsTheBweWhenAdditionalFeedbackIsGiven) { + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + 2 * kObservationDurationLowerBound); + + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator_1(&key_value_config); + LossBasedBweV2 loss_based_bandwidth_estimator_2(&key_value_config); + + loss_based_bandwidth_estimator_1.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator_2.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator_1.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + loss_based_bandwidth_estimator_2.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + EXPECT_EQ( + loss_based_bandwidth_estimator_1.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(660)); + + loss_based_bandwidth_estimator_1.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(900)); + + EXPECT_EQ( + loss_based_bandwidth_estimator_1.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(660)); + + loss_based_bandwidth_estimator_1.UpdateBandwidthEstimate( + enough_feedback_2, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + loss_based_bandwidth_estimator_2.UpdateBandwidthEstimate( + enough_feedback_2, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + EXPECT_NE( + loss_based_bandwidth_estimator_1.GetLossBasedResult().bandwidth_estimate, + loss_based_bandwidth_estimator_2.GetLossBasedResult().bandwidth_estimate); +} + +TEST_P(LossBasedBweV2Test, + BandwidthEstimateIsCappedToBeTcpFairGivenTooHighLossRate) { + std::vector enough_feedback_no_received_packets = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_no_received_packets, + /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(100)); +} + +TEST_P(LossBasedBweV2Test, BandwidthEstimateNotIncreaseWhenNetworkUnderusing) { + if (!GetParam()) { + GTEST_SKIP() << "This test should run only if " + "trendline_integration_enabled is enabled"; + } + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + 2 * kObservationDurationLowerBound); + + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwUnderusing, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + EXPECT_LE( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + EXPECT_LE( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(600)); +} + +// When network is normal, estimate can increase but never be higher than +// the delay based estimate. +TEST_P(LossBasedBweV2Test, + BandwidthEstimateCappedByDelayBasedEstimateWhenNetworkNormal) { + // Create two packet results, network is in normal state, 100% packets are + // received, and no delay increase. + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + 2 * kObservationDurationLowerBound); + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + // If the delay based estimate is infinity, then loss based estimate increases + // and not bounded by delay based estimate. + EXPECT_GT( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, /*delay_based_estimate=*/DataRate::KilobitsPerSec(500), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + // If the delay based estimate is not infinity, then loss based estimate is + // bounded by delay based estimate. + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(500)); +} + +// When loss based bwe receives a strong signal of overusing and an increase in +// loss rate, it should acked bitrate for emegency backoff. +TEST_P(LossBasedBweV2Test, UseAckedBitrateForEmegencyBackOff) { + // Create two packet results, first packet has 50% loss rate, second packet + // has 100% loss rate. + std::vector enough_feedback_1 = + CreatePacketResultsWith50pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + 2 * kObservationDurationLowerBound); + + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + DataRate acked_bitrate = DataRate::KilobitsPerSec(300); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate(acked_bitrate); + // Update estimate when network is overusing, and 50% loss rate. + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwOverusing, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + // Update estimate again when network is continuously overusing, and 100% + // loss rate. + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwOverusing, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + // The estimate bitrate now is backed off based on acked bitrate. + EXPECT_LE( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + acked_bitrate); +} + +// When receiving the same packet feedback, loss based bwe ignores the feedback +// and returns the current estimate. +TEST_P(LossBasedBweV2Test, NoBweChangeIfObservationDurationUnchanged) { + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(300)); + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + DataRate estimate_1 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + + // Use the same feedback and check if the estimate is unchanged. + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + DataRate estimate_2 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + EXPECT_EQ(estimate_2, estimate_1); +} + +// When receiving feedback of packets that were sent within an observation +// duration, and network is in the normal state, loss based bwe returns the +// current estimate. +TEST_P(LossBasedBweV2Test, + NoBweChangeIfObservationDurationIsSmallAndNetworkNormal) { + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound - TimeDelta::Millis(1)); + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + DataRate estimate_1 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + DataRate estimate_2 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + EXPECT_EQ(estimate_2, estimate_1); +} + +// When receiving feedback of packets that were sent within an observation +// duration, and network is in the underusing state, loss based bwe returns the +// current estimate. +TEST_P(LossBasedBweV2Test, + NoBweIncreaseIfObservationDurationIsSmallAndNetworkUnderusing) { + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound - TimeDelta::Millis(1)); + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + DataRate estimate_1 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwUnderusing, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + DataRate estimate_2 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + EXPECT_LE(estimate_2, estimate_1); +} + +// When receiving feedback of packets that were sent within an observation +// duration, network is overusing, and trendline integration is enabled, loss +// based bwe updates its estimate. +TEST_P(LossBasedBweV2Test, + UpdateEstimateIfObservationDurationIsSmallAndNetworkOverusing) { + if (!GetParam()) { + GTEST_SKIP() << "This test should run only if " + "trendline_integration_enabled is enabled"; + } + std::vector enough_feedback_1 = + CreatePacketResultsWith50pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound - TimeDelta::Millis(1)); + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(300)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + DataRate estimate_1 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwOverusing, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + DataRate estimate_2 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + EXPECT_LT(estimate_2, estimate_1); +} + +TEST_P(LossBasedBweV2Test, + IncreaseToDelayBasedEstimateIfNoLossOrDelayIncrease) { + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + 2 * kObservationDurationLowerBound); + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + delay_based_estimate); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + delay_based_estimate); +} + +TEST_P(LossBasedBweV2Test, + IncreaseByMaxIncreaseFactorAfterLossBasedBweBacksOff) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.2|1|0.5,AckedRateCandidate:true," + "ObservationWindowSize:2,ObservationDurationLowerBound:200ms," + "InstantUpperBoundBwBalance:10000kbps," + "DelayBasedCandidate:true,MaxIncreaseFactor:1.5,BwRampupUpperBoundFactor:" + "2.0,NotIncreaseIfInherentLossLessThanAverageLoss:false/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + DataRate acked_rate = DataRate::KilobitsPerSec(300); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate(acked_rate); + + // Create some loss to create the loss limited scenario. + std::vector enough_feedback_1 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + LossBasedBweV2::Result result_at_loss = + loss_based_bandwidth_estimator.GetLossBasedResult(); + + // Network recovers after loss. + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + LossBasedBweV2::Result result_after_recovery = + loss_based_bandwidth_estimator.GetLossBasedResult(); + EXPECT_EQ(result_after_recovery.bandwidth_estimate, + result_at_loss.bandwidth_estimate * 1.5); +} + +TEST_P(LossBasedBweV2Test, + LossBasedStateIsDelayBasedEstimateAfterNetworkRecovering) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:100|1|0.5,AckedRateCandidate:true," + "ObservationWindowSize:2,ObservationDurationLowerBound:200ms," + "InstantUpperBoundBwBalance:10000kbps," + "DelayBasedCandidate:true,MaxIncreaseFactor:100," + "BwRampupUpperBoundFactor:" + "2.0,NotIncreaseIfInherentLossLessThanAverageLoss:false/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(600); + DataRate acked_rate = DataRate::KilobitsPerSec(300); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate(acked_rate); + + // Create some loss to create the loss limited scenario. + std::vector enough_feedback_1 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + ASSERT_EQ(loss_based_bandwidth_estimator.GetLossBasedResult().state, + LossBasedState::kDecreasing); + + // Network recovers after loss. + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + EXPECT_EQ(loss_based_bandwidth_estimator.GetLossBasedResult().state, + LossBasedState::kDelayBasedEstimate); + + // Network recovers continuing. + std::vector enough_feedback_3 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound * 2); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_3, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + EXPECT_EQ(loss_based_bandwidth_estimator.GetLossBasedResult().state, + LossBasedState::kDelayBasedEstimate); +} + +TEST_P(LossBasedBweV2Test, + LossBasedStateIsNotDelayBasedEstimateIfDelayBasedEsimtateInfinite) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:100|1|0.5,AckedRateCandidate:true," + "ObservationWindowSize:2,ObservationDurationLowerBound:200ms," + "InstantUpperBoundBwBalance:10000kbps," + "DelayBasedCandidate:true,MaxIncreaseFactor:100," + "BwRampupUpperBoundFactor:" + "2.0/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::PlusInfinity(); + DataRate acked_rate = DataRate::KilobitsPerSec(300); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate(acked_rate); + + // Create some loss to create the loss limited scenario. + std::vector enough_feedback_1 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + ASSERT_EQ(loss_based_bandwidth_estimator.GetLossBasedResult().state, + LossBasedState::kDecreasing); + + // Network recovers after loss. + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + EXPECT_NE(loss_based_bandwidth_estimator.GetLossBasedResult().state, + LossBasedState::kDelayBasedEstimate); +} + +// After loss based bwe backs off, the next estimate is capped by +// a factor of acked bitrate. +TEST_P(LossBasedBweV2Test, + IncreaseByFactorOfAckedBitrateAfterLossBasedBweBacksOff) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,LossThresholdOfHighBandwidthPreference:0.99," + "BwRampupUpperBoundFactor:1.2," + "InherentLossUpperBoundOffset:0.9,ObservationDurationLowerBound:200ms/"); + std::vector enough_feedback_1 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWith10pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(300)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // Change the acked bitrate to make sure that the estimate is bounded by a + // factor of acked bitrate. + DataRate acked_bitrate = DataRate::KilobitsPerSec(50); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate(acked_bitrate); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // The estimate is capped by acked_bitrate * BwRampupUpperBoundFactor. + DataRate estimate_2 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + EXPECT_EQ(estimate_2, acked_bitrate * 1.2); +} + +// After loss based bwe backs off, the estimate is bounded during the delayed +// window. +TEST_P(LossBasedBweV2Test, + EstimateBitrateIsBoundedDuringDelayedWindowAfterLossBasedBweBacksOff) { + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWith50pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + kDelayedIncreaseWindow - TimeDelta::Millis(2)); + std::vector enough_feedback_3 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kDelayedIncreaseWindow - TimeDelta::Millis(1)); + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(300)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + // Increase the acknowledged bitrate to make sure that the estimate is not + // capped too low. + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(5000)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // The estimate is capped by current_estimate * kMaxIncreaseFactor because + // it recently backed off. + DataRate estimate_2 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_3, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + // The latest estimate is the same as the previous estimate since the sent + // packets were sent within the DelayedIncreaseWindow. + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + estimate_2); +} + +// The estimate is not bounded after the delayed increase window. +TEST_P(LossBasedBweV2Test, KeepIncreasingEstimateAfterDelayedIncreaseWindow) { + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kDelayedIncreaseWindow - TimeDelta::Millis(1)); + std::vector enough_feedback_3 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kDelayedIncreaseWindow + TimeDelta::Millis(1)); + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(300)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + // Increase the acknowledged bitrate to make sure that the estimate is not + // capped too low. + loss_based_bandwidth_estimator.SetAcknowledgedBitrate( + DataRate::KilobitsPerSec(5000)); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // The estimate is capped by current_estimate * kMaxIncreaseFactor because it + // recently backed off. + DataRate estimate_2 = + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate; + + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_3, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + // The estimate can continue increasing after the DelayedIncreaseWindow. + EXPECT_GE( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + estimate_2); +} + +TEST_P(LossBasedBweV2Test, NotIncreaseIfInherentLossLessThanAverageLoss) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.2,AckedRateCandidate:false," + "ObservationWindowSize:2," + "DelayBasedCandidate:true,InstantUpperBoundBwBalance:100kbps," + "ObservationDurationLowerBound:200ms," + "NotIncreaseIfInherentLossLessThanAverageLoss:true/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + std::vector enough_feedback_10p_loss_1 = + CreatePacketResultsWith10pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_10p_loss_1, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + std::vector enough_feedback_10p_loss_2 = + CreatePacketResultsWith10pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_10p_loss_2, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // Do not increase the bitrate because inherent loss is less than average loss + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(600)); +} + +TEST_P(LossBasedBweV2Test, + SelectHighBandwidthCandidateIfLossRateIsLessThanThreshold) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.2|0.8,AckedRateCandidate:false," + "ObservationWindowSize:2," + "DelayBasedCandidate:true,InstantUpperBoundBwBalance:100kbps," + "ObservationDurationLowerBound:200ms,HigherBwBiasFactor:1000," + "HigherLogBwBiasFactor:1000,LossThresholdOfHighBandwidthPreference:0." + "20,NotIncreaseIfInherentLossLessThanAverageLoss:false/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + std::vector enough_feedback_10p_loss_1 = + CreatePacketResultsWith10pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_10p_loss_1, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + std::vector enough_feedback_10p_loss_2 = + CreatePacketResultsWith10pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_10p_loss_2, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // Because LossThresholdOfHighBandwidthPreference is 20%, the average loss is + // 10%, bandwidth estimate should increase. + EXPECT_GT( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(600)); +} + +TEST_P(LossBasedBweV2Test, + SelectLowBandwidthCandidateIfLossRateIsIsHigherThanThreshold) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.2|0.8,AckedRateCandidate:false," + "ObservationWindowSize:2," + "DelayBasedCandidate:true,InstantUpperBoundBwBalance:100kbps," + "ObservationDurationLowerBound:200ms,HigherBwBiasFactor:1000," + "HigherLogBwBiasFactor:1000,LossThresholdOfHighBandwidthPreference:0." + "05/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + std::vector enough_feedback_10p_loss_1 = + CreatePacketResultsWith10pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_10p_loss_1, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + std::vector enough_feedback_10p_loss_2 = + CreatePacketResultsWith10pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_10p_loss_2, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // Because LossThresholdOfHighBandwidthPreference is 5%, the average loss is + // 10%, bandwidth estimate should decrease. + EXPECT_LT( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(600)); +} + +TEST_P(LossBasedBweV2Test, UseProbeResultWhenRecoveringFromLoss) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.2|1|0.5,AckedRateCandidate:true," + "ObservationWindowSize:2,ObservationDurationLowerBound:200ms," + "InstantUpperBoundBwBalance:10000kbps," + "DelayBasedCandidate:true,MaxIncreaseFactor:1000," + "BwRampupUpperBoundFactor:2.0,ProbeIntegrationEnabled:true/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + DataRate acked_rate = DataRate::KilobitsPerSec(300); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate(acked_rate); + + // Create some loss to create the loss limited scenario. + std::vector enough_feedback_1 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // Network recovers after loss. + DataRate probe_estimate = DataRate::KilobitsPerSec(300); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + probe_estimate, /*upper_link_capacity=*/DataRate::PlusInfinity()); + + LossBasedBweV2::Result result_after_recovery = + loss_based_bandwidth_estimator.GetLossBasedResult(); + EXPECT_EQ(result_after_recovery.bandwidth_estimate, probe_estimate); +} + +// If BoundByUpperLinkCapacityWhenLossLimited is enabled, the estimate is +// bounded by the upper link capacity when bandwidth is loss limited. +TEST_P(LossBasedBweV2Test, BoundEstimateByUpperLinkCapacityWhenLossLimited) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.2|1|0.5,AckedRateCandidate:true," + "ObservationWindowSize:2,ObservationDurationLowerBound:200ms," + "InstantUpperBoundBwBalance:10000kbps," + "DelayBasedCandidate:true,MaxIncreaseFactor:1000," + "BwRampupUpperBoundFactor:2.0,BoundByUpperLinkCapacityWhenLossLimited:" + "true/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + DataRate acked_rate = DataRate::KilobitsPerSec(300); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate(acked_rate); + + // Create some loss to create the loss limited scenario. + std::vector enough_feedback_1 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // Network recovers after loss. + DataRate upper_link_capacity = DataRate::KilobitsPerSec(10); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, upper_link_capacity); + + LossBasedBweV2::Result result_after_recovery = + loss_based_bandwidth_estimator.GetLossBasedResult(); + EXPECT_EQ(result_after_recovery.bandwidth_estimate, upper_link_capacity); +} + +// If BoundByUpperLinkCapacityWhenLossLimited is enabled, the estimate is not +// bounded by the upper link capacity when bandwidth is not loss limited. +TEST_P(LossBasedBweV2Test, + NotBoundEstimateByUpperLinkCapacityWhenNotLossLimited) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.2|1|0.5,AckedRateCandidate:true," + "ObservationWindowSize:2,ObservationDurationLowerBound:200ms," + "InstantUpperBoundBwBalance:10000kbps," + "DelayBasedCandidate:true,MaxIncreaseFactor:1000," + "BwRampupUpperBoundFactor:2.0,BoundByUpperLinkCapacityWhenLossLimited:" + "true/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + DataRate acked_rate = DataRate::KilobitsPerSec(300); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate(acked_rate); + + // Create a normal network without loss + std::vector enough_feedback_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + DataRate upper_link_capacity = DataRate::KilobitsPerSec(10); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, upper_link_capacity); + + LossBasedBweV2::Result loss_based_result = + loss_based_bandwidth_estimator.GetLossBasedResult(); + EXPECT_GT(loss_based_result.bandwidth_estimate, upper_link_capacity); +} + +// If BoundByUpperLinkCapacityWhenLossLimited is disabled, the estimate is not +// bounded by the upper link capacity. +TEST_P(LossBasedBweV2Test, NotBoundEstimateByUpperLinkCapacity) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.2|1|0.5,AckedRateCandidate:true," + "ObservationWindowSize:2,ObservationDurationLowerBound:200ms," + "InstantUpperBoundBwBalance:10000kbps," + "DelayBasedCandidate:true,MaxIncreaseFactor:1000," + "BwRampupUpperBoundFactor:2.0,BoundByUpperLinkCapacityWhenLossLimited:" + "false/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + DataRate acked_rate = DataRate::KilobitsPerSec(300); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + loss_based_bandwidth_estimator.SetAcknowledgedBitrate(acked_rate); + + // Create some loss to create the loss limited scenario. + std::vector enough_feedback_1 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_1, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // Network recovers after loss. + DataRate upper_link_capacity = DataRate::KilobitsPerSec(10); + std::vector enough_feedback_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_2, delay_based_estimate, BandwidthUsage::kBwNormal, + /*probe_estimate=*/absl::nullopt, upper_link_capacity); + + LossBasedBweV2::Result result_after_recovery = + loss_based_bandwidth_estimator.GetLossBasedResult(); + EXPECT_GT(result_after_recovery.bandwidth_estimate, upper_link_capacity); +} + +TEST_P(LossBasedBweV2Test, + StricterBoundUsingHighLossRateThresholdAt10pLossRate) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.0,AckedRateCandidate:false," + "ObservationWindowSize:2," + "DelayBasedCandidate:true,InstantUpperBoundBwBalance:100kbps," + "ObservationDurationLowerBound:200ms,HigherBwBiasFactor:1000," + "HigherLogBwBiasFactor:1000,LossThresholdOfHighBandwidthPreference:0." + "05,HighLossRateThreshold:0.09/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.SetMinMaxBitrate( + /*min_bitrate=*/DataRate::KilobitsPerSec(10), + /*max_bitrate=*/DataRate::KilobitsPerSec(1000000)); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + std::vector enough_feedback_10p_loss_1 = + CreatePacketResultsWith10pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_10p_loss_1, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + std::vector enough_feedback_10p_loss_2 = + CreatePacketResultsWith10pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_10p_loss_2, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // At 10% loss rate and high loss rate threshold to be 10%, cap the estimate + // to be 500 * 1000-0.1 = 400kbps. + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(400)); +} + +TEST_P(LossBasedBweV2Test, + StricterBoundUsingHighLossRateThresholdAt50pLossRate) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.0,AckedRateCandidate:false," + "ObservationWindowSize:2," + "DelayBasedCandidate:true,InstantUpperBoundBwBalance:100kbps," + "ObservationDurationLowerBound:200ms,HigherBwBiasFactor:1000," + "HigherLogBwBiasFactor:1000,LossThresholdOfHighBandwidthPreference:0." + "05,HighLossRateThreshold:0.3/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.SetMinMaxBitrate( + /*min_bitrate=*/DataRate::KilobitsPerSec(10), + /*max_bitrate=*/DataRate::KilobitsPerSec(1000000)); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + std::vector enough_feedback_50p_loss_1 = + CreatePacketResultsWith50pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_50p_loss_1, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + std::vector enough_feedback_50p_loss_2 = + CreatePacketResultsWith50pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_50p_loss_2, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // At 50% loss rate and high loss rate threshold to be 30%, cap the estimate + // to be the min bitrate. + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(10)); +} + +TEST_P(LossBasedBweV2Test, + StricterBoundUsingHighLossRateThresholdAt100pLossRate) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.0,AckedRateCandidate:false," + "ObservationWindowSize:2," + "DelayBasedCandidate:true,InstantUpperBoundBwBalance:100kbps," + "ObservationDurationLowerBound:200ms,HigherBwBiasFactor:1000," + "HigherLogBwBiasFactor:1000,LossThresholdOfHighBandwidthPreference:0." + "05,HighLossRateThreshold:0.3/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.SetMinMaxBitrate( + /*min_bitrate=*/DataRate::KilobitsPerSec(10), + /*max_bitrate=*/DataRate::KilobitsPerSec(1000000)); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + std::vector enough_feedback_100p_loss_1 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_100p_loss_1, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + std::vector enough_feedback_100p_loss_2 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_100p_loss_2, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // At 100% loss rate and high loss rate threshold to be 30%, cap the estimate + // to be the min bitrate. + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(10)); +} + +TEST_P(LossBasedBweV2Test, EstimateRecoversAfterHighLoss) { + ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-LossBasedBweV2/" + "Enabled:true,CandidateFactors:1.1|1.0|0.9,AckedRateCandidate:false," + "ObservationWindowSize:2," + "DelayBasedCandidate:true,InstantUpperBoundBwBalance:100kbps," + "ObservationDurationLowerBound:200ms,HigherBwBiasFactor:1000," + "HigherLogBwBiasFactor:1000,LossThresholdOfHighBandwidthPreference:0." + "05,HighLossRateThreshold:0.3/"); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.SetMinMaxBitrate( + /*min_bitrate=*/DataRate::KilobitsPerSec(10), + /*max_bitrate=*/DataRate::KilobitsPerSec(1000000)); + DataRate delay_based_estimate = DataRate::KilobitsPerSec(5000); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(600)); + + std::vector enough_feedback_100p_loss_1 = + CreatePacketResultsWith100pLossRate( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_100p_loss_1, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // Make sure that the estimate is set to min bitrate because of 100% loss + // rate. + EXPECT_EQ( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(10)); + + // Create some feedbacks with 0 loss rate to simulate network recovering. + std::vector enough_feedback_0p_loss_1 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_0p_loss_1, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + std::vector enough_feedback_0p_loss_2 = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero() + + kObservationDurationLowerBound * 2); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback_0p_loss_2, delay_based_estimate, + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + // The estimate increases as network recovers. + EXPECT_GT( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(10)); +} + +TEST_P(LossBasedBweV2Test, EstimateIsNotHigherThanMaxBitrate) { + ExplicitKeyValueConfig key_value_config( + Config(/*enabled=*/true, /*valid=*/true, + /*trendline_integration_enabled=*/GetParam())); + LossBasedBweV2 loss_based_bandwidth_estimator(&key_value_config); + loss_based_bandwidth_estimator.SetMinMaxBitrate( + /*min_bitrate=*/DataRate::KilobitsPerSec(10), + /*max_bitrate=*/DataRate::KilobitsPerSec(1000)); + loss_based_bandwidth_estimator.SetBandwidthEstimate( + DataRate::KilobitsPerSec(1000)); + std::vector enough_feedback = + CreatePacketResultsWithReceivedPackets( + /*first_packet_timestamp=*/Timestamp::Zero()); + loss_based_bandwidth_estimator.UpdateBandwidthEstimate( + enough_feedback, /*delay_based_estimate=*/DataRate::PlusInfinity(), + BandwidthUsage::kBwNormal, /*probe_estimate=*/absl::nullopt, + /*upper_link_capacity=*/DataRate::PlusInfinity()); + + EXPECT_LE( + loss_based_bandwidth_estimator.GetLossBasedResult().bandwidth_estimate, + DataRate::KilobitsPerSec(1000)); +} + +INSTANTIATE_TEST_SUITE_P(LossBasedBweV2Tests, + LossBasedBweV2Test, + ::testing::Bool()); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator.cc new file mode 100644 index 0000000000..a94f653157 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator.cc @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/probe_bitrate_estimator.h" + +#include +#include + +#include "api/rtc_event_log/rtc_event_log.h" +#include "logging/rtc_event_log/events/rtc_event_probe_result_failure.h" +#include "logging/rtc_event_log/events/rtc_event_probe_result_success.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace { +// The minumum number of probes we need to receive feedback about in percent +// in order to have a valid estimate. +constexpr double kMinReceivedProbesRatio = .80; + +// The minumum number of bytes we need to receive feedback about in percent +// in order to have a valid estimate. +constexpr double kMinReceivedBytesRatio = .80; + +// The maximum |receive rate| / |send rate| ratio for a valid estimate. +constexpr float kMaxValidRatio = 2.0f; + +// The minimum |receive rate| / |send rate| ratio assuming that the link is +// not saturated, i.e. we assume that we will receive at least +// kMinRatioForUnsaturatedLink * |send rate| if |send rate| is less than the +// link capacity. +constexpr float kMinRatioForUnsaturatedLink = 0.9f; + +// The target utilization of the link. If we know true link capacity +// we'd like to send at 95% of that rate. +constexpr float kTargetUtilizationFraction = 0.95f; + +// The maximum time period over which the cluster history is retained. +// This is also the maximum time period beyond which a probing burst is not +// expected to last. +constexpr TimeDelta kMaxClusterHistory = TimeDelta::Seconds(1); + +// The maximum time interval between first and the last probe on a cluster +// on the sender side as well as the receive side. +constexpr TimeDelta kMaxProbeInterval = TimeDelta::Seconds(1); + +} // namespace + +ProbeBitrateEstimator::ProbeBitrateEstimator(RtcEventLog* event_log) + : event_log_(event_log) {} + +ProbeBitrateEstimator::~ProbeBitrateEstimator() = default; + +absl::optional ProbeBitrateEstimator::HandleProbeAndEstimateBitrate( + const PacketResult& packet_feedback) { + int cluster_id = packet_feedback.sent_packet.pacing_info.probe_cluster_id; + RTC_DCHECK_NE(cluster_id, PacedPacketInfo::kNotAProbe); + + EraseOldClusters(packet_feedback.receive_time); + + AggregatedCluster* cluster = &clusters_[cluster_id]; + + if (packet_feedback.sent_packet.send_time < cluster->first_send) { + cluster->first_send = packet_feedback.sent_packet.send_time; + } + if (packet_feedback.sent_packet.send_time > cluster->last_send) { + cluster->last_send = packet_feedback.sent_packet.send_time; + cluster->size_last_send = packet_feedback.sent_packet.size; + } + if (packet_feedback.receive_time < cluster->first_receive) { + cluster->first_receive = packet_feedback.receive_time; + cluster->size_first_receive = packet_feedback.sent_packet.size; + } + if (packet_feedback.receive_time > cluster->last_receive) { + cluster->last_receive = packet_feedback.receive_time; + } + cluster->size_total += packet_feedback.sent_packet.size; + cluster->num_probes += 1; + + RTC_DCHECK_GT( + packet_feedback.sent_packet.pacing_info.probe_cluster_min_probes, 0); + RTC_DCHECK_GT(packet_feedback.sent_packet.pacing_info.probe_cluster_min_bytes, + 0); + + int min_probes = + packet_feedback.sent_packet.pacing_info.probe_cluster_min_probes * + kMinReceivedProbesRatio; + DataSize min_size = + DataSize::Bytes( + packet_feedback.sent_packet.pacing_info.probe_cluster_min_bytes) * + kMinReceivedBytesRatio; + if (cluster->num_probes < min_probes || cluster->size_total < min_size) + return absl::nullopt; + + TimeDelta send_interval = cluster->last_send - cluster->first_send; + TimeDelta receive_interval = cluster->last_receive - cluster->first_receive; + + if (send_interval <= TimeDelta::Zero() || send_interval > kMaxProbeInterval || + receive_interval <= TimeDelta::Zero() || + receive_interval > kMaxProbeInterval) { + RTC_LOG(LS_INFO) << "Probing unsuccessful, invalid send/receive interval" + " [cluster id: " + << cluster_id + << "] [send interval: " << ToString(send_interval) + << "]" + " [receive interval: " + << ToString(receive_interval) << "]"; + if (event_log_) { + event_log_->Log(std::make_unique( + cluster_id, ProbeFailureReason::kInvalidSendReceiveInterval)); + } + return absl::nullopt; + } + // Since the `send_interval` does not include the time it takes to actually + // send the last packet the size of the last sent packet should not be + // included when calculating the send bitrate. + RTC_DCHECK_GT(cluster->size_total, cluster->size_last_send); + DataSize send_size = cluster->size_total - cluster->size_last_send; + DataRate send_rate = send_size / send_interval; + + // Since the `receive_interval` does not include the time it takes to + // actually receive the first packet the size of the first received packet + // should not be included when calculating the receive bitrate. + RTC_DCHECK_GT(cluster->size_total, cluster->size_first_receive); + DataSize receive_size = cluster->size_total - cluster->size_first_receive; + DataRate receive_rate = receive_size / receive_interval; + + double ratio = receive_rate / send_rate; + if (ratio > kMaxValidRatio) { + RTC_LOG(LS_INFO) << "Probing unsuccessful, receive/send ratio too high" + " [cluster id: " + << cluster_id << "] [send: " << ToString(send_size) + << " / " << ToString(send_interval) << " = " + << ToString(send_rate) + << "]" + " [receive: " + << ToString(receive_size) << " / " + << ToString(receive_interval) << " = " + << ToString(receive_rate) + << " ]" + " [ratio: " + << ToString(receive_rate) << " / " << ToString(send_rate) + << " = " << ratio << " > kMaxValidRatio (" + << kMaxValidRatio << ")]"; + if (event_log_) { + event_log_->Log(std::make_unique( + cluster_id, ProbeFailureReason::kInvalidSendReceiveRatio)); + } + return absl::nullopt; + } + RTC_LOG(LS_INFO) << "Probing successful" + " [cluster id: " + << cluster_id << "] [send: " << ToString(send_size) << " / " + << ToString(send_interval) << " = " << ToString(send_rate) + << " ]" + " [receive: " + << ToString(receive_size) << " / " + << ToString(receive_interval) << " = " + << ToString(receive_rate) << "]"; + + DataRate res = std::min(send_rate, receive_rate); + // If we're receiving at significantly lower bitrate than we were sending at, + // it suggests that we've found the true capacity of the link. In this case, + // set the target bitrate slightly lower to not immediately overuse. + if (receive_rate < kMinRatioForUnsaturatedLink * send_rate) { + RTC_DCHECK_GT(send_rate, receive_rate); + res = kTargetUtilizationFraction * receive_rate; + } + if (event_log_) { + event_log_->Log( + std::make_unique(cluster_id, res.bps())); + } + estimated_data_rate_ = res; + return estimated_data_rate_; +} + +absl::optional +ProbeBitrateEstimator::FetchAndResetLastEstimatedBitrate() { + absl::optional estimated_data_rate = estimated_data_rate_; + estimated_data_rate_.reset(); + return estimated_data_rate; +} + +void ProbeBitrateEstimator::EraseOldClusters(Timestamp timestamp) { + for (auto it = clusters_.begin(); it != clusters_.end();) { + if (it->second.last_receive + kMaxClusterHistory < timestamp) { + it = clusters_.erase(it); + } else { + ++it; + } + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator.h new file mode 100644 index 0000000000..d5a523b7f3 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_PROBE_BITRATE_ESTIMATOR_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_PROBE_BITRATE_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" + +namespace webrtc { +class RtcEventLog; + +class ProbeBitrateEstimator { + public: + explicit ProbeBitrateEstimator(RtcEventLog* event_log); + ~ProbeBitrateEstimator(); + + // Should be called for every probe packet we receive feedback about. + // Returns the estimated bitrate if the probe completes a valid cluster. + absl::optional HandleProbeAndEstimateBitrate( + const PacketResult& packet_feedback); + + absl::optional FetchAndResetLastEstimatedBitrate(); + + private: + struct AggregatedCluster { + int num_probes = 0; + Timestamp first_send = Timestamp::PlusInfinity(); + Timestamp last_send = Timestamp::MinusInfinity(); + Timestamp first_receive = Timestamp::PlusInfinity(); + Timestamp last_receive = Timestamp::MinusInfinity(); + DataSize size_last_send = DataSize::Zero(); + DataSize size_first_receive = DataSize::Zero(); + DataSize size_total = DataSize::Zero(); + }; + + // Erases old cluster data that was seen before `timestamp`. + void EraseOldClusters(Timestamp timestamp); + + std::map clusters_; + RtcEventLog* const event_log_; + absl::optional estimated_data_rate_; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_PROBE_BITRATE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator_unittest.cc new file mode 100644 index 0000000000..6b4146d2bf --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_bitrate_estimator_unittest.cc @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/probe_bitrate_estimator.h" + +#include + +#include "api/transport/network_types.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +constexpr int kDefaultMinProbes = 5; +constexpr int kDefaultMinBytes = 5000; +constexpr float kTargetUtilizationFraction = 0.95f; +} // anonymous namespace + +class TestProbeBitrateEstimator : public ::testing::Test { + public: + TestProbeBitrateEstimator() : probe_bitrate_estimator_(nullptr) {} + + // TODO(philipel): Use PacedPacketInfo when ProbeBitrateEstimator is rewritten + // to use that information. + void AddPacketFeedback(int probe_cluster_id, + size_t size_bytes, + int64_t send_time_ms, + int64_t arrival_time_ms, + int min_probes = kDefaultMinProbes, + int min_bytes = kDefaultMinBytes) { + const Timestamp kReferenceTime = Timestamp::Seconds(1000); + PacketResult feedback; + feedback.sent_packet.send_time = + kReferenceTime + TimeDelta::Millis(send_time_ms); + feedback.sent_packet.size = DataSize::Bytes(size_bytes); + feedback.sent_packet.pacing_info = + PacedPacketInfo(probe_cluster_id, min_probes, min_bytes); + feedback.receive_time = kReferenceTime + TimeDelta::Millis(arrival_time_ms); + measured_data_rate_ = + probe_bitrate_estimator_.HandleProbeAndEstimateBitrate(feedback); + } + + protected: + absl::optional measured_data_rate_; + ProbeBitrateEstimator probe_bitrate_estimator_; +}; + +TEST_F(TestProbeBitrateEstimator, OneCluster) { + AddPacketFeedback(0, 1000, 0, 10); + AddPacketFeedback(0, 1000, 10, 20); + AddPacketFeedback(0, 1000, 20, 30); + AddPacketFeedback(0, 1000, 30, 40); + + EXPECT_NEAR(measured_data_rate_->bps(), 800000, 10); +} + +TEST_F(TestProbeBitrateEstimator, OneClusterTooFewProbes) { + AddPacketFeedback(0, 2000, 0, 10); + AddPacketFeedback(0, 2000, 10, 20); + AddPacketFeedback(0, 2000, 20, 30); + + EXPECT_FALSE(measured_data_rate_); +} + +TEST_F(TestProbeBitrateEstimator, OneClusterTooFewBytes) { + const int kMinBytes = 6000; + AddPacketFeedback(0, 800, 0, 10, kDefaultMinProbes, kMinBytes); + AddPacketFeedback(0, 800, 10, 20, kDefaultMinProbes, kMinBytes); + AddPacketFeedback(0, 800, 20, 30, kDefaultMinProbes, kMinBytes); + AddPacketFeedback(0, 800, 30, 40, kDefaultMinProbes, kMinBytes); + AddPacketFeedback(0, 800, 40, 50, kDefaultMinProbes, kMinBytes); + + EXPECT_FALSE(measured_data_rate_); +} + +TEST_F(TestProbeBitrateEstimator, SmallCluster) { + const int kMinBytes = 1000; + AddPacketFeedback(0, 150, 0, 10, kDefaultMinProbes, kMinBytes); + AddPacketFeedback(0, 150, 10, 20, kDefaultMinProbes, kMinBytes); + AddPacketFeedback(0, 150, 20, 30, kDefaultMinProbes, kMinBytes); + AddPacketFeedback(0, 150, 30, 40, kDefaultMinProbes, kMinBytes); + AddPacketFeedback(0, 150, 40, 50, kDefaultMinProbes, kMinBytes); + AddPacketFeedback(0, 150, 50, 60, kDefaultMinProbes, kMinBytes); + EXPECT_NEAR(measured_data_rate_->bps(), 120000, 10); +} + +TEST_F(TestProbeBitrateEstimator, LargeCluster) { + const int kMinProbes = 30; + const int kMinBytes = 312500; + + int64_t send_time = 0; + int64_t receive_time = 5; + for (int i = 0; i < 25; ++i) { + AddPacketFeedback(0, 12500, send_time, receive_time, kMinProbes, kMinBytes); + ++send_time; + ++receive_time; + } + EXPECT_NEAR(measured_data_rate_->bps(), 100000000, 10); +} + +TEST_F(TestProbeBitrateEstimator, FastReceive) { + AddPacketFeedback(0, 1000, 0, 15); + AddPacketFeedback(0, 1000, 10, 30); + AddPacketFeedback(0, 1000, 20, 35); + AddPacketFeedback(0, 1000, 30, 40); + + EXPECT_NEAR(measured_data_rate_->bps(), 800000, 10); +} + +TEST_F(TestProbeBitrateEstimator, TooFastReceive) { + AddPacketFeedback(0, 1000, 0, 19); + AddPacketFeedback(0, 1000, 10, 22); + AddPacketFeedback(0, 1000, 20, 25); + AddPacketFeedback(0, 1000, 40, 27); + + EXPECT_FALSE(measured_data_rate_); +} + +TEST_F(TestProbeBitrateEstimator, SlowReceive) { + AddPacketFeedback(0, 1000, 0, 10); + AddPacketFeedback(0, 1000, 10, 40); + AddPacketFeedback(0, 1000, 20, 70); + AddPacketFeedback(0, 1000, 30, 85); + // Expected send rate = 800 kbps, expected receive rate = 320 kbps. + + EXPECT_NEAR(measured_data_rate_->bps(), kTargetUtilizationFraction * 320000, + 10); +} + +TEST_F(TestProbeBitrateEstimator, BurstReceive) { + AddPacketFeedback(0, 1000, 0, 50); + AddPacketFeedback(0, 1000, 10, 50); + AddPacketFeedback(0, 1000, 20, 50); + AddPacketFeedback(0, 1000, 40, 50); + + EXPECT_FALSE(measured_data_rate_); +} + +TEST_F(TestProbeBitrateEstimator, MultipleClusters) { + AddPacketFeedback(0, 1000, 0, 10); + AddPacketFeedback(0, 1000, 10, 20); + AddPacketFeedback(0, 1000, 20, 30); + AddPacketFeedback(0, 1000, 40, 60); + // Expected send rate = 600 kbps, expected receive rate = 480 kbps. + EXPECT_NEAR(measured_data_rate_->bps(), kTargetUtilizationFraction * 480000, + 10); + + AddPacketFeedback(0, 1000, 50, 60); + // Expected send rate = 640 kbps, expected receive rate = 640 kbps. + EXPECT_NEAR(measured_data_rate_->bps(), 640000, 10); + + AddPacketFeedback(1, 1000, 60, 70); + AddPacketFeedback(1, 1000, 65, 77); + AddPacketFeedback(1, 1000, 70, 84); + AddPacketFeedback(1, 1000, 75, 90); + // Expected send rate = 1600 kbps, expected receive rate = 1200 kbps. + + EXPECT_NEAR(measured_data_rate_->bps(), kTargetUtilizationFraction * 1200000, + 10); +} + +TEST_F(TestProbeBitrateEstimator, IgnoreOldClusters) { + AddPacketFeedback(0, 1000, 0, 10); + AddPacketFeedback(0, 1000, 10, 20); + AddPacketFeedback(0, 1000, 20, 30); + + AddPacketFeedback(1, 1000, 60, 70); + AddPacketFeedback(1, 1000, 65, 77); + AddPacketFeedback(1, 1000, 70, 84); + AddPacketFeedback(1, 1000, 75, 90); + // Expected send rate = 1600 kbps, expected receive rate = 1200 kbps. + + EXPECT_NEAR(measured_data_rate_->bps(), kTargetUtilizationFraction * 1200000, + 10); + + // Coming in 6s later + AddPacketFeedback(0, 1000, 40 + 6000, 60 + 6000); + + EXPECT_FALSE(measured_data_rate_); +} + +TEST_F(TestProbeBitrateEstimator, IgnoreSizeLastSendPacket) { + AddPacketFeedback(0, 1000, 0, 10); + AddPacketFeedback(0, 1000, 10, 20); + AddPacketFeedback(0, 1000, 20, 30); + AddPacketFeedback(0, 1000, 30, 40); + AddPacketFeedback(0, 1500, 40, 50); + // Expected send rate = 800 kbps, expected receive rate = 900 kbps. + + EXPECT_NEAR(measured_data_rate_->bps(), 800000, 10); +} + +TEST_F(TestProbeBitrateEstimator, IgnoreSizeFirstReceivePacket) { + AddPacketFeedback(0, 1500, 0, 10); + AddPacketFeedback(0, 1000, 10, 20); + AddPacketFeedback(0, 1000, 20, 30); + AddPacketFeedback(0, 1000, 30, 40); + // Expected send rate = 933 kbps, expected receive rate = 800 kbps. + + EXPECT_NEAR(measured_data_rate_->bps(), kTargetUtilizationFraction * 800000, + 10); +} + +TEST_F(TestProbeBitrateEstimator, NoLastEstimatedBitrateBps) { + EXPECT_FALSE(probe_bitrate_estimator_.FetchAndResetLastEstimatedBitrate()); +} + +TEST_F(TestProbeBitrateEstimator, FetchLastEstimatedBitrateBps) { + AddPacketFeedback(0, 1000, 0, 10); + AddPacketFeedback(0, 1000, 10, 20); + AddPacketFeedback(0, 1000, 20, 30); + AddPacketFeedback(0, 1000, 30, 40); + + auto estimated_bitrate = + probe_bitrate_estimator_.FetchAndResetLastEstimatedBitrate(); + EXPECT_TRUE(estimated_bitrate); + EXPECT_NEAR(estimated_bitrate->bps(), 800000, 10); + EXPECT_FALSE(probe_bitrate_estimator_.FetchAndResetLastEstimatedBitrate()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller.cc new file mode 100644 index 0000000000..1af943c4cb --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller.cc @@ -0,0 +1,558 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/probe_controller.h" + +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "absl/types/optional.h" +#include "api/units/data_rate.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "logging/rtc_event_log/events/rtc_event_probe_cluster_created.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { +// Maximum waiting time from the time of initiating probing to getting +// the measured results back. +constexpr TimeDelta kMaxWaitingTimeForProbingResult = TimeDelta::Seconds(1); + +// Default probing bitrate limit. Applied only when the application didn't +// specify max bitrate. +constexpr DataRate kDefaultMaxProbingBitrate = DataRate::KilobitsPerSec(5000); + +// If the bitrate drops to a factor `kBitrateDropThreshold` or lower +// and we recover within `kBitrateDropTimeoutMs`, then we'll send +// a probe at a fraction `kProbeFractionAfterDrop` of the original bitrate. +constexpr double kBitrateDropThreshold = 0.66; +constexpr TimeDelta kBitrateDropTimeout = TimeDelta::Seconds(5); +constexpr double kProbeFractionAfterDrop = 0.85; + +// Timeout for probing after leaving ALR. If the bitrate drops significantly, +// (as determined by the delay based estimator) and we leave ALR, then we will +// send a probe if we recover within `kLeftAlrTimeoutMs` ms. +constexpr TimeDelta kAlrEndedTimeout = TimeDelta::Seconds(3); + +// The expected uncertainty of probe result (as a fraction of the target probe +// This is a limit on how often probing can be done when there is a BW +// drop detected in ALR. +constexpr TimeDelta kMinTimeBetweenAlrProbes = TimeDelta::Seconds(5); + +// bitrate). Used to avoid probing if the probe bitrate is close to our current +// estimate. +constexpr double kProbeUncertainty = 0.05; + +// Use probing to recover faster after large bitrate estimate drops. +constexpr char kBweRapidRecoveryExperiment[] = + "WebRTC-BweRapidRecoveryExperiment"; + +void MaybeLogProbeClusterCreated(RtcEventLog* event_log, + const ProbeClusterConfig& probe) { + RTC_DCHECK(event_log); + if (!event_log) { + return; + } + + DataSize min_data_size = probe.target_data_rate * probe.target_duration; + event_log->Log(std::make_unique( + probe.id, probe.target_data_rate.bps(), probe.target_probe_count, + min_data_size.bytes())); +} + +} // namespace + +ProbeControllerConfig::ProbeControllerConfig( + const FieldTrialsView* key_value_config) + : first_exponential_probe_scale("p1", 3.0), + second_exponential_probe_scale("p2", 6.0), + further_exponential_probe_scale("step_size", 2), + further_probe_threshold("further_probe_threshold", 0.7), + alr_probing_interval("alr_interval", TimeDelta::Seconds(5)), + alr_probe_scale("alr_scale", 2), + network_state_estimate_probing_interval("network_state_interval", + TimeDelta::PlusInfinity()), + probe_if_estimate_lower_than_network_state_estimate_ratio( + "est_lower_than_network_ratio", + 0), + estimate_lower_than_network_state_estimate_probing_interval( + "est_lower_than_network_interval", + TimeDelta::Seconds(3)), + network_state_probe_scale("network_state_scale", 1.0), + network_state_probe_duration("network_state_probe_duration", + TimeDelta::Millis(15)), + + probe_on_max_allocated_bitrate_change("probe_max_allocation", true), + first_allocation_probe_scale("alloc_p1", 1), + second_allocation_probe_scale("alloc_p2", 2), + allocation_allow_further_probing("alloc_probe_further", false), + allocation_probe_max("alloc_probe_max", DataRate::PlusInfinity()), + min_probe_packets_sent("min_probe_packets_sent", 5), + min_probe_duration("min_probe_duration", TimeDelta::Millis(15)), + limit_probe_target_rate_to_loss_bwe("limit_probe_target_rate_to_loss_bwe", + false), + loss_limited_probe_scale("loss_limited_scale", 1.5), + skip_if_estimate_larger_than_fraction_of_max( + "skip_if_est_larger_than_fraction_of_max", + 0.0), + not_probe_if_delay_increased("not_probe_if_delay_increased", false) { + ParseFieldTrial({&first_exponential_probe_scale, + &second_exponential_probe_scale, + &further_exponential_probe_scale, + &further_probe_threshold, + &alr_probing_interval, + &alr_probe_scale, + &probe_on_max_allocated_bitrate_change, + &first_allocation_probe_scale, + &second_allocation_probe_scale, + &allocation_allow_further_probing, + &min_probe_duration, + &network_state_estimate_probing_interval, + &probe_if_estimate_lower_than_network_state_estimate_ratio, + &estimate_lower_than_network_state_estimate_probing_interval, + &network_state_probe_scale, + &network_state_probe_duration, + &min_probe_packets_sent, + &limit_probe_target_rate_to_loss_bwe, + &loss_limited_probe_scale, + &skip_if_estimate_larger_than_fraction_of_max, + ¬_probe_if_delay_increased}, + key_value_config->Lookup("WebRTC-Bwe-ProbingConfiguration")); + + // Specialized keys overriding subsets of WebRTC-Bwe-ProbingConfiguration + ParseFieldTrial( + {&first_exponential_probe_scale, &second_exponential_probe_scale}, + key_value_config->Lookup("WebRTC-Bwe-InitialProbing")); + ParseFieldTrial({&further_exponential_probe_scale, &further_probe_threshold}, + key_value_config->Lookup("WebRTC-Bwe-ExponentialProbing")); + ParseFieldTrial( + {&alr_probing_interval, &alr_probe_scale, &loss_limited_probe_scale}, + key_value_config->Lookup("WebRTC-Bwe-AlrProbing")); + ParseFieldTrial( + {&first_allocation_probe_scale, &second_allocation_probe_scale, + &allocation_allow_further_probing, &allocation_probe_max}, + key_value_config->Lookup("WebRTC-Bwe-AllocationProbing")); + ParseFieldTrial({&min_probe_packets_sent, &min_probe_duration}, + key_value_config->Lookup("WebRTC-Bwe-ProbingBehavior")); +} + +ProbeControllerConfig::ProbeControllerConfig(const ProbeControllerConfig&) = + default; +ProbeControllerConfig::~ProbeControllerConfig() = default; + +ProbeController::ProbeController(const FieldTrialsView* key_value_config, + RtcEventLog* event_log) + : enable_periodic_alr_probing_(false), + in_rapid_recovery_experiment_(absl::StartsWith( + key_value_config->Lookup(kBweRapidRecoveryExperiment), + "Enabled")), + event_log_(event_log), + config_(ProbeControllerConfig(key_value_config)) { + Reset(Timestamp::Zero()); +} + +ProbeController::~ProbeController() {} + +std::vector ProbeController::SetBitrates( + DataRate min_bitrate, + DataRate start_bitrate, + DataRate max_bitrate, + Timestamp at_time) { + if (start_bitrate > DataRate::Zero()) { + start_bitrate_ = start_bitrate; + estimated_bitrate_ = start_bitrate; + } else if (start_bitrate_.IsZero()) { + start_bitrate_ = min_bitrate; + } + + // The reason we use the variable `old_max_bitrate_pbs` is because we + // need to set `max_bitrate_` before we call InitiateProbing. + DataRate old_max_bitrate = max_bitrate_; + max_bitrate_ = + max_bitrate.IsFinite() ? max_bitrate : kDefaultMaxProbingBitrate; + + switch (state_) { + case State::kInit: + if (network_available_) + return InitiateExponentialProbing(at_time); + break; + + case State::kWaitingForProbingResult: + break; + + case State::kProbingComplete: + // If the new max bitrate is higher than both the old max bitrate and the + // estimate then initiate probing. + if (!estimated_bitrate_.IsZero() && old_max_bitrate < max_bitrate_ && + estimated_bitrate_ < max_bitrate_) { + return InitiateProbing(at_time, {max_bitrate_}, false); + } + break; + } + return std::vector(); +} + +std::vector ProbeController::OnMaxTotalAllocatedBitrate( + DataRate max_total_allocated_bitrate, + Timestamp at_time) { + const bool in_alr = alr_start_time_.has_value(); + const bool allow_allocation_probe = in_alr; + + if (config_.probe_on_max_allocated_bitrate_change && + state_ == State::kProbingComplete && + max_total_allocated_bitrate != max_total_allocated_bitrate_ && + estimated_bitrate_ < max_bitrate_ && + estimated_bitrate_ < max_total_allocated_bitrate && + allow_allocation_probe) { + max_total_allocated_bitrate_ = max_total_allocated_bitrate; + + if (!config_.first_allocation_probe_scale) + return std::vector(); + + DataRate first_probe_rate = max_total_allocated_bitrate * + config_.first_allocation_probe_scale.Value(); + DataRate probe_cap = config_.allocation_probe_max.Get(); + first_probe_rate = std::min(first_probe_rate, probe_cap); + std::vector probes = {first_probe_rate}; + if (config_.second_allocation_probe_scale) { + DataRate second_probe_rate = + max_total_allocated_bitrate * + config_.second_allocation_probe_scale.Value(); + second_probe_rate = std::min(second_probe_rate, probe_cap); + if (second_probe_rate > first_probe_rate) + probes.push_back(second_probe_rate); + } + return InitiateProbing(at_time, probes, + config_.allocation_allow_further_probing.Get()); + } + max_total_allocated_bitrate_ = max_total_allocated_bitrate; + return std::vector(); +} + +std::vector ProbeController::OnNetworkAvailability( + NetworkAvailability msg) { + network_available_ = msg.network_available; + + if (!network_available_ && state_ == State::kWaitingForProbingResult) { + state_ = State::kProbingComplete; + min_bitrate_to_probe_further_ = DataRate::PlusInfinity(); + } + + if (network_available_ && state_ == State::kInit && !start_bitrate_.IsZero()) + return InitiateExponentialProbing(msg.at_time); + return std::vector(); +} + +std::vector ProbeController::InitiateExponentialProbing( + Timestamp at_time) { + RTC_DCHECK(network_available_); + RTC_DCHECK(state_ == State::kInit); + RTC_DCHECK_GT(start_bitrate_, DataRate::Zero()); + + // When probing at 1.8 Mbps ( 6x 300), this represents a threshold of + // 1.2 Mbps to continue probing. + std::vector probes = {config_.first_exponential_probe_scale * + start_bitrate_}; + if (config_.second_exponential_probe_scale && + config_.second_exponential_probe_scale.GetOptional().value() > 0) { + probes.push_back(config_.second_exponential_probe_scale.Value() * + start_bitrate_); + } + return InitiateProbing(at_time, probes, true); +} + +std::vector ProbeController::SetEstimatedBitrate( + DataRate bitrate, + BandwidthLimitedCause bandwidth_limited_cause, + Timestamp at_time) { + bandwidth_limited_cause_ = bandwidth_limited_cause; + if (bitrate < kBitrateDropThreshold * estimated_bitrate_) { + time_of_last_large_drop_ = at_time; + bitrate_before_last_large_drop_ = estimated_bitrate_; + } + estimated_bitrate_ = bitrate; + + if (state_ == State::kWaitingForProbingResult) { + // Continue probing if probing results indicate channel has greater + // capacity. + DataRate network_state_estimate_probe_further_limit = + config_.network_state_estimate_probing_interval->IsFinite() && + network_estimate_ + ? network_estimate_->link_capacity_upper * + config_.further_probe_threshold + : DataRate::PlusInfinity(); + RTC_LOG(LS_INFO) << "Measured bitrate: " << bitrate + << " Minimum to probe further: " + << min_bitrate_to_probe_further_ << " upper limit: " + << network_state_estimate_probe_further_limit; + + if (bitrate > min_bitrate_to_probe_further_ && + bitrate <= network_state_estimate_probe_further_limit) { + return InitiateProbing( + at_time, {config_.further_exponential_probe_scale * bitrate}, true); + } + } + return {}; +} + +void ProbeController::EnablePeriodicAlrProbing(bool enable) { + enable_periodic_alr_probing_ = enable; +} + +void ProbeController::SetAlrStartTimeMs( + absl::optional alr_start_time_ms) { + if (alr_start_time_ms) { + alr_start_time_ = Timestamp::Millis(*alr_start_time_ms); + } else { + alr_start_time_ = absl::nullopt; + } +} +void ProbeController::SetAlrEndedTimeMs(int64_t alr_end_time_ms) { + alr_end_time_.emplace(Timestamp::Millis(alr_end_time_ms)); +} + +std::vector ProbeController::RequestProbe( + Timestamp at_time) { + // Called once we have returned to normal state after a large drop in + // estimated bandwidth. The current response is to initiate a single probe + // session (if not already probing) at the previous bitrate. + // + // If the probe session fails, the assumption is that this drop was a + // real one from a competing flow or a network change. + bool in_alr = alr_start_time_.has_value(); + bool alr_ended_recently = + (alr_end_time_.has_value() && + at_time - alr_end_time_.value() < kAlrEndedTimeout); + if (in_alr || alr_ended_recently || in_rapid_recovery_experiment_) { + if (state_ == State::kProbingComplete) { + DataRate suggested_probe = + kProbeFractionAfterDrop * bitrate_before_last_large_drop_; + DataRate min_expected_probe_result = + (1 - kProbeUncertainty) * suggested_probe; + TimeDelta time_since_drop = at_time - time_of_last_large_drop_; + TimeDelta time_since_probe = at_time - last_bwe_drop_probing_time_; + if (min_expected_probe_result > estimated_bitrate_ && + time_since_drop < kBitrateDropTimeout && + time_since_probe > kMinTimeBetweenAlrProbes) { + RTC_LOG(LS_INFO) << "Detected big bandwidth drop, start probing."; + // Track how often we probe in response to bandwidth drop in ALR. + RTC_HISTOGRAM_COUNTS_10000( + "WebRTC.BWE.BweDropProbingIntervalInS", + (at_time - last_bwe_drop_probing_time_).seconds()); + last_bwe_drop_probing_time_ = at_time; + return InitiateProbing(at_time, {suggested_probe}, false); + } + } + } + return std::vector(); +} + +void ProbeController::SetNetworkStateEstimate( + webrtc::NetworkStateEstimate estimate) { + network_estimate_ = estimate; +} + +void ProbeController::Reset(Timestamp at_time) { + network_available_ = true; + bandwidth_limited_cause_ = BandwidthLimitedCause::kDelayBasedLimited; + state_ = State::kInit; + min_bitrate_to_probe_further_ = DataRate::PlusInfinity(); + time_last_probing_initiated_ = Timestamp::Zero(); + estimated_bitrate_ = DataRate::Zero(); + network_estimate_ = absl::nullopt; + start_bitrate_ = DataRate::Zero(); + max_bitrate_ = kDefaultMaxProbingBitrate; + Timestamp now = at_time; + last_bwe_drop_probing_time_ = now; + alr_end_time_.reset(); + time_of_last_large_drop_ = now; + bitrate_before_last_large_drop_ = DataRate::Zero(); + max_total_allocated_bitrate_ = DataRate::Zero(); +} + +bool ProbeController::TimeForAlrProbe(Timestamp at_time) const { + if (enable_periodic_alr_probing_ && alr_start_time_) { + Timestamp next_probe_time = + std::max(*alr_start_time_, time_last_probing_initiated_) + + config_.alr_probing_interval; + return at_time >= next_probe_time; + } + return false; +} + +bool ProbeController::TimeForNetworkStateProbe(Timestamp at_time) const { + if (!network_estimate_ || + network_estimate_->link_capacity_upper.IsInfinite()) { + return false; + } + + bool probe_due_to_low_estimate = + bandwidth_limited_cause_ == BandwidthLimitedCause::kDelayBasedLimited && + estimated_bitrate_ < + config_.probe_if_estimate_lower_than_network_state_estimate_ratio * + network_estimate_->link_capacity_upper; + if (probe_due_to_low_estimate && + config_.estimate_lower_than_network_state_estimate_probing_interval + ->IsFinite()) { + Timestamp next_probe_time = + time_last_probing_initiated_ + + config_.estimate_lower_than_network_state_estimate_probing_interval; + return at_time >= next_probe_time; + } + + bool periodic_probe = + estimated_bitrate_ < network_estimate_->link_capacity_upper; + if (periodic_probe && + config_.network_state_estimate_probing_interval->IsFinite()) { + Timestamp next_probe_time = time_last_probing_initiated_ + + config_.network_state_estimate_probing_interval; + return at_time >= next_probe_time; + } + + return false; +} + +std::vector ProbeController::Process(Timestamp at_time) { + if (at_time - time_last_probing_initiated_ > + kMaxWaitingTimeForProbingResult) { + if (state_ == State::kWaitingForProbingResult) { + RTC_LOG(LS_INFO) << "kWaitingForProbingResult: timeout"; + state_ = State::kProbingComplete; + min_bitrate_to_probe_further_ = DataRate::PlusInfinity(); + } + } + if (estimated_bitrate_.IsZero() || state_ != State::kProbingComplete) { + return {}; + } + if (TimeForAlrProbe(at_time) || TimeForNetworkStateProbe(at_time)) { + return InitiateProbing( + at_time, {estimated_bitrate_ * config_.alr_probe_scale}, true); + } + return std::vector(); +} + +std::vector ProbeController::InitiateProbing( + Timestamp now, + std::vector bitrates_to_probe, + bool probe_further) { + if (config_.skip_if_estimate_larger_than_fraction_of_max > 0) { + DataRate network_estimate = network_estimate_ + ? network_estimate_->link_capacity_upper + : DataRate::PlusInfinity(); + DataRate max_probe_rate = + max_total_allocated_bitrate_.IsZero() + ? max_bitrate_ + : std::min(max_total_allocated_bitrate_, max_bitrate_); + if (std::min(network_estimate, estimated_bitrate_) > + config_.skip_if_estimate_larger_than_fraction_of_max * max_probe_rate) { + state_ = State::kProbingComplete; + min_bitrate_to_probe_further_ = DataRate::PlusInfinity(); + return {}; + } + } + + DataRate max_probe_bitrate = max_bitrate_; + if (max_total_allocated_bitrate_ > DataRate::Zero()) { + // If a max allocated bitrate has been configured, allow probing up to 2x + // that rate. This allows some overhead to account for bursty streams, + // which otherwise would have to ramp up when the overshoot is already in + // progress. + // It also avoids minor quality reduction caused by probes often being + // received at slightly less than the target probe bitrate. + max_probe_bitrate = + std::min(max_probe_bitrate, max_total_allocated_bitrate_ * 2); + } + + DataRate estimate_capped_bitrate = DataRate::PlusInfinity(); + if (config_.limit_probe_target_rate_to_loss_bwe) { + switch (bandwidth_limited_cause_) { + case BandwidthLimitedCause::kLossLimitedBweDecreasing: + // If bandwidth estimate is decreasing because of packet loss, do not + // send probes. + return {}; + case BandwidthLimitedCause::kLossLimitedBweIncreasing: + estimate_capped_bitrate = + std::min(max_probe_bitrate, + estimated_bitrate_ * config_.loss_limited_probe_scale); + break; + case BandwidthLimitedCause::kDelayBasedLimited: + break; + default: + break; + } + } + if (config_.not_probe_if_delay_increased && + bandwidth_limited_cause_ == + BandwidthLimitedCause::kDelayBasedLimitedDelayIncreased) { + return {}; + } + + if (config_.network_state_estimate_probing_interval->IsFinite() && + network_estimate_ && network_estimate_->link_capacity_upper.IsFinite()) { + if (network_estimate_->link_capacity_upper.IsZero()) { + RTC_LOG(LS_INFO) << "Not sending probe, Network state estimate is zero"; + return {}; + } + estimate_capped_bitrate = + std::min({estimate_capped_bitrate, max_probe_bitrate, + network_estimate_->link_capacity_upper * + config_.network_state_probe_scale}); + } + + std::vector pending_probes; + for (DataRate bitrate : bitrates_to_probe) { + RTC_DCHECK(!bitrate.IsZero()); + + bitrate = std::min(bitrate, estimate_capped_bitrate); + if (bitrate > max_probe_bitrate) { + bitrate = max_probe_bitrate; + probe_further = false; + } + + ProbeClusterConfig config; + config.at_time = now; + config.target_data_rate = bitrate; + if (network_estimate_ && + config_.network_state_estimate_probing_interval->IsFinite()) { + config.target_duration = config_.network_state_probe_duration; + } else { + config.target_duration = config_.min_probe_duration; + } + + config.target_probe_count = config_.min_probe_packets_sent; + config.id = next_probe_cluster_id_; + next_probe_cluster_id_++; + MaybeLogProbeClusterCreated(event_log_, config); + pending_probes.push_back(config); + } + time_last_probing_initiated_ = now; + if (probe_further) { + state_ = State::kWaitingForProbingResult; + // Dont expect probe results to be larger than a fraction of the actual + // probe rate. + min_bitrate_to_probe_further_ = + std::min(estimate_capped_bitrate, (*(bitrates_to_probe.end() - 1))) * + config_.further_probe_threshold; + } else { + state_ = State::kProbingComplete; + min_bitrate_to_probe_further_ = DataRate::PlusInfinity(); + } + return pending_probes; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller.h new file mode 100644 index 0000000000..aa8b526ab0 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_PROBE_CONTROLLER_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_PROBE_CONTROLLER_H_ + +#include + +#include +#include + +#include "absl/base/attributes.h" +#include "absl/types/optional.h" +#include "api/field_trials_view.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "api/transport/network_control.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/timestamp.h" +#include "rtc_base/experiments/field_trial_parser.h" + +namespace webrtc { + +struct ProbeControllerConfig { + explicit ProbeControllerConfig(const FieldTrialsView* key_value_config); + ProbeControllerConfig(const ProbeControllerConfig&); + ProbeControllerConfig& operator=(const ProbeControllerConfig&) = default; + ~ProbeControllerConfig(); + + // These parameters configure the initial probes. First we send one or two + // probes of sizes p1 * start_bitrate_ and p2 * start_bitrate_. + // Then whenever we get a bitrate estimate of at least further_probe_threshold + // times the size of the last sent probe we'll send another one of size + // step_size times the new estimate. + FieldTrialParameter first_exponential_probe_scale; + FieldTrialOptional second_exponential_probe_scale; + FieldTrialParameter further_exponential_probe_scale; + FieldTrialParameter further_probe_threshold; + + // Configures how often we send ALR probes and how big they are. + FieldTrialParameter alr_probing_interval; + FieldTrialParameter alr_probe_scale; + + // Configures how often we send probes if NetworkStateEstimate is available. + FieldTrialParameter network_state_estimate_probing_interval; + // Periodically probe as long as the the ratio beteeen current estimate and + // NetworkStateEstimate is lower then this. + FieldTrialParameter + probe_if_estimate_lower_than_network_state_estimate_ratio; + FieldTrialParameter + estimate_lower_than_network_state_estimate_probing_interval; + FieldTrialParameter network_state_probe_scale; + // Overrides min_probe_duration if network_state_estimate_probing_interval + // is set and a network state estimate is known. + FieldTrialParameter network_state_probe_duration; + + // Configures the probes emitted by changed to the allocated bitrate. + FieldTrialParameter probe_on_max_allocated_bitrate_change; + FieldTrialOptional first_allocation_probe_scale; + FieldTrialOptional second_allocation_probe_scale; + FieldTrialFlag allocation_allow_further_probing; + FieldTrialParameter allocation_probe_max; + + // The minimum number probing packets used. + FieldTrialParameter min_probe_packets_sent; + // The minimum probing duration. + FieldTrialParameter min_probe_duration; + // Periodically probe when bandwidth estimate is loss limited. + FieldTrialParameter limit_probe_target_rate_to_loss_bwe; + FieldTrialParameter loss_limited_probe_scale; + // Dont send a probe if min(estimate, network state estimate) is larger than + // this fraction of the set max bitrate. + FieldTrialParameter skip_if_estimate_larger_than_fraction_of_max; + // Do not send probes if network is either overusing or underusing. + FieldTrialParameter not_probe_if_delay_increased; +}; + +// Reason that bandwidth estimate is limited. Bandwidth estimate can be limited +// by either delay based bwe, or loss based bwe when it increases/decreases the +// estimate. +enum class BandwidthLimitedCause { + kLossLimitedBweIncreasing = 0, + kLossLimitedBweDecreasing = 1, + kDelayBasedLimited = 2, + kDelayBasedLimitedDelayIncreased = 3, +}; + +// This class controls initiation of probing to estimate initial channel +// capacity. There is also support for probing during a session when max +// bitrate is adjusted by an application. +class ProbeController { + public: + explicit ProbeController(const FieldTrialsView* key_value_config, + RtcEventLog* event_log); + ~ProbeController(); + + ProbeController(const ProbeController&) = delete; + ProbeController& operator=(const ProbeController&) = delete; + + ABSL_MUST_USE_RESULT std::vector SetBitrates( + DataRate min_bitrate, + DataRate start_bitrate, + DataRate max_bitrate, + Timestamp at_time); + + // The total bitrate, as opposed to the max bitrate, is the sum of the + // configured bitrates for all active streams. + ABSL_MUST_USE_RESULT std::vector + OnMaxTotalAllocatedBitrate(DataRate max_total_allocated_bitrate, + Timestamp at_time); + + ABSL_MUST_USE_RESULT std::vector OnNetworkAvailability( + NetworkAvailability msg); + + ABSL_MUST_USE_RESULT std::vector SetEstimatedBitrate( + DataRate bitrate, + BandwidthLimitedCause bandwidth_limited_cause, + Timestamp at_time); + + void EnablePeriodicAlrProbing(bool enable); + + void SetAlrStartTimeMs(absl::optional alr_start_time); + void SetAlrEndedTimeMs(int64_t alr_end_time); + + ABSL_MUST_USE_RESULT std::vector RequestProbe( + Timestamp at_time); + + void SetNetworkStateEstimate(webrtc::NetworkStateEstimate estimate); + + // Resets the ProbeController to a state equivalent to as if it was just + // created EXCEPT for `enable_periodic_alr_probing_`. + void Reset(Timestamp at_time); + + ABSL_MUST_USE_RESULT std::vector Process( + Timestamp at_time); + + // Gets the value of field trial not_probe_if_delay_increased. + bool DontProbeIfDelayIncreased() { + return config_.not_probe_if_delay_increased; + } + + private: + enum class State { + // Initial state where no probing has been triggered yet. + kInit, + // Waiting for probing results to continue further probing. + kWaitingForProbingResult, + // Probing is complete. + kProbingComplete, + }; + + ABSL_MUST_USE_RESULT std::vector + InitiateExponentialProbing(Timestamp at_time); + ABSL_MUST_USE_RESULT std::vector InitiateProbing( + Timestamp now, + std::vector bitrates_to_probe, + bool probe_further); + bool TimeForAlrProbe(Timestamp at_time) const; + bool TimeForNetworkStateProbe(Timestamp at_time) const; + + bool network_available_; + BandwidthLimitedCause bandwidth_limited_cause_ = + BandwidthLimitedCause::kDelayBasedLimited; + State state_; + DataRate min_bitrate_to_probe_further_ = DataRate::PlusInfinity(); + Timestamp time_last_probing_initiated_ = Timestamp::MinusInfinity(); + DataRate estimated_bitrate_ = DataRate::Zero(); + absl::optional network_estimate_; + DataRate start_bitrate_ = DataRate::Zero(); + DataRate max_bitrate_ = DataRate::PlusInfinity(); + Timestamp last_bwe_drop_probing_time_ = Timestamp::Zero(); + absl::optional alr_start_time_; + absl::optional alr_end_time_; + bool enable_periodic_alr_probing_; + Timestamp time_of_last_large_drop_ = Timestamp::MinusInfinity(); + DataRate bitrate_before_last_large_drop_ = DataRate::Zero(); + DataRate max_total_allocated_bitrate_ = DataRate::Zero(); + + const bool in_rapid_recovery_experiment_; + RtcEventLog* event_log_; + + int32_t next_probe_cluster_id_ = 1; + + ProbeControllerConfig config_; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_PROBE_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller_gn/moz.build new file mode 100644 index 0000000000..4f4f573cd9 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("probe_controller_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller_unittest.cc new file mode 100644 index 0000000000..e6a5c8ceef --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/probe_controller_unittest.cc @@ -0,0 +1,1131 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/congestion_controller/goog_cc/probe_controller.h" + +#include + +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/clock.h" +#include "test/explicit_key_value_config.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::NiceMock; + +namespace webrtc { +namespace test { + +namespace { + +constexpr DataRate kMinBitrate = DataRate::BitsPerSec(100); +constexpr DataRate kStartBitrate = DataRate::BitsPerSec(300); +constexpr DataRate kMaxBitrate = DataRate::BitsPerSec(10000); + +constexpr TimeDelta kExponentialProbingTimeout = TimeDelta::Seconds(5); + +constexpr TimeDelta kAlrProbeInterval = TimeDelta::Seconds(5); +constexpr TimeDelta kAlrEndedTimeout = TimeDelta::Seconds(3); +constexpr TimeDelta kBitrateDropTimeout = TimeDelta::Seconds(5); +} // namespace + +class ProbeControllerFixture { + public: + explicit ProbeControllerFixture(absl::string_view field_trials = "") + : field_trial_config_(field_trials), clock_(100000000L) {} + + std::unique_ptr CreateController() { + return std::make_unique(&field_trial_config_, + &mock_rtc_event_log); + } + + Timestamp CurrentTime() { return clock_.CurrentTime(); } + void AdvanceTime(TimeDelta delta) { clock_.AdvanceTime(delta); } + + ExplicitKeyValueConfig field_trial_config_; + SimulatedClock clock_; + NiceMock mock_rtc_event_log; +}; + +TEST(ProbeControllerTest, InitiatesProbingAtStart) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + EXPECT_GE(probes.size(), 2u); +} + +TEST(ProbeControllerTest, SetsDefaultTargetDurationAndTargetProbeCount) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + std::vector probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_GE(probes.size(), 2u); + + EXPECT_EQ(probes[0].target_duration, TimeDelta::Millis(15)); + EXPECT_EQ(probes[0].target_probe_count, 5); +} + +TEST(ProbeControllerTest, + FieldTrialsOverrideDefaultTargetDurationAndTargetProbeCount) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingBehavior/" + "min_probe_packets_sent:2,min_probe_duration:123ms/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + std::vector probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_GE(probes.size(), 2u); + + EXPECT_EQ(probes[0].target_duration, TimeDelta::Millis(123)); + EXPECT_EQ(probes[0].target_probe_count, 2); +} + +TEST(ProbeControllerTest, ProbeOnlyWhenNetworkIsUp) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->OnNetworkAvailability( + {.at_time = fixture.CurrentTime(), .network_available = false}); + probes = probe_controller->SetBitrates(kMinBitrate, kStartBitrate, + kMaxBitrate, fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + probes = probe_controller->OnNetworkAvailability( + {.at_time = fixture.CurrentTime(), .network_available = true}); + EXPECT_GE(probes.size(), 2u); +} + +TEST(ProbeControllerTest, CanConfigureInitialProbeRateFactor) { + ProbeControllerFixture fixture("WebRTC-Bwe-ProbingConfiguration/p1:2,p2:3/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 2u); + EXPECT_EQ(probes[0].target_data_rate, kStartBitrate * 2); + EXPECT_EQ(probes[1].target_data_rate, kStartBitrate * 3); +} + +TEST(ProbeControllerTest, DisableSecondInitialProbeIfRateFactorZero) { + ProbeControllerFixture fixture("WebRTC-Bwe-ProbingConfiguration/p1:2,p2:0/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, kStartBitrate * 2); +} + +TEST(ProbeControllerTest, InitiatesProbingOnMaxBitrateIncrease) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + // Long enough to time out exponential probing. + fixture.AdvanceTime(kExponentialProbingTimeout); + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + probes = probe_controller->Process(fixture.CurrentTime()); + probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate + DataRate::BitsPerSec(100), + fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate.bps(), kMaxBitrate.bps() + 100); +} + +TEST(ProbeControllerTest, ProbesOnMaxAllocatedBitrateIncreaseOnlyWhenInAlr) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + kMaxBitrate - DataRate::BitsPerSec(1), + BandwidthLimitedCause::kDelayBasedLimited, fixture.CurrentTime()); + + // Wait long enough to time out exponential probing. + fixture.AdvanceTime(kExponentialProbingTimeout); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + // Probe when in alr. + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + probes = probe_controller->OnMaxTotalAllocatedBitrate( + kMaxBitrate + DataRate::BitsPerSec(1), fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 2u); + EXPECT_EQ(probes.at(0).target_data_rate, kMaxBitrate); + + // Do not probe when not in alr. + probe_controller->SetAlrStartTimeMs(absl::nullopt); + probes = probe_controller->OnMaxTotalAllocatedBitrate( + kMaxBitrate + DataRate::BitsPerSec(2), fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, CanDisableProbingOnMaxTotalAllocatedBitrateIncrease) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "probe_max_allocation:false/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + kMaxBitrate - DataRate::BitsPerSec(1), + BandwidthLimitedCause::kDelayBasedLimited, fixture.CurrentTime()); + fixture.AdvanceTime(kExponentialProbingTimeout); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + + // Do no probe, since probe_max_allocation:false. + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + probes = probe_controller->OnMaxTotalAllocatedBitrate( + kMaxBitrate + DataRate::BitsPerSec(1), fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, InitiatesProbingOnMaxBitrateIncreaseAtMaxBitrate) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + // Long enough to time out exponential probing. + fixture.AdvanceTime(kExponentialProbingTimeout); + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + probes = probe_controller->Process(fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + kMaxBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate + DataRate::BitsPerSec(100), + fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, + kMaxBitrate + DataRate::BitsPerSec(100)); +} + +TEST(ProbeControllerTest, TestExponentialProbing) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + + // Repeated probe should only be sent when estimated bitrate climbs above + // 0.7 * 6 * kStartBitrate = 1260. + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(1000), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(1800), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate.bps(), 2 * 1800); +} + +TEST(ProbeControllerTest, TestExponentialProbingTimeout) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + // Advance far enough to cause a time out in waiting for probing result. + fixture.AdvanceTime(kExponentialProbingTimeout); + probes = probe_controller->Process(fixture.CurrentTime()); + + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(1800), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, RequestProbeInAlr) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + EXPECT_GE(probes.size(), 2u); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + fixture.AdvanceTime(kAlrProbeInterval + TimeDelta::Millis(1)); + probes = probe_controller->Process(fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(250), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + probes = probe_controller->RequestProbe(fixture.CurrentTime()); + + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate.bps(), 0.85 * 500); +} + +TEST(ProbeControllerTest, RequestProbeWhenAlrEndedRecently) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 2u); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + + probe_controller->SetAlrStartTimeMs(absl::nullopt); + fixture.AdvanceTime(kAlrProbeInterval + TimeDelta::Millis(1)); + probes = probe_controller->Process(fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(250), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + probe_controller->SetAlrEndedTimeMs(fixture.CurrentTime().ms()); + fixture.AdvanceTime(kAlrEndedTimeout - TimeDelta::Millis(1)); + probes = probe_controller->RequestProbe(fixture.CurrentTime()); + + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate.bps(), 0.85 * 500); +} + +TEST(ProbeControllerTest, RequestProbeWhenAlrNotEndedRecently) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 2u); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + + probe_controller->SetAlrStartTimeMs(absl::nullopt); + fixture.AdvanceTime(kAlrProbeInterval + TimeDelta::Millis(1)); + probes = probe_controller->Process(fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(250), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + probe_controller->SetAlrEndedTimeMs(fixture.CurrentTime().ms()); + fixture.AdvanceTime(kAlrEndedTimeout + TimeDelta::Millis(1)); + probes = probe_controller->RequestProbe(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, RequestProbeWhenBweDropNotRecent) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 2u); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + fixture.AdvanceTime(kAlrProbeInterval + TimeDelta::Millis(1)); + probes = probe_controller->Process(fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(250), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + fixture.AdvanceTime(kBitrateDropTimeout + TimeDelta::Millis(1)); + probes = probe_controller->RequestProbe(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, PeriodicProbing) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + probe_controller->EnablePeriodicAlrProbing(true); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 2u); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + + Timestamp start_time = fixture.CurrentTime(); + + // Expect the controller to send a new probe after 5s has passed. + probe_controller->SetAlrStartTimeMs(start_time.ms()); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate.bps(), 1000); + + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + + // The following probe should be sent at 10s into ALR. + probe_controller->SetAlrStartTimeMs(start_time.ms()); + fixture.AdvanceTime(TimeDelta::Seconds(4)); + probes = probe_controller->Process(fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + probe_controller->SetAlrStartTimeMs(start_time.ms()); + fixture.AdvanceTime(TimeDelta::Seconds(1)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, PeriodicProbingAfterReset) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + Timestamp alr_start_time = fixture.CurrentTime(); + + probe_controller->SetAlrStartTimeMs(alr_start_time.ms()); + probe_controller->EnablePeriodicAlrProbing(true); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probe_controller->Reset(fixture.CurrentTime()); + + fixture.AdvanceTime(TimeDelta::Seconds(10)); + probes = probe_controller->Process(fixture.CurrentTime()); + // Since bitrates are not yet set, no probe is sent event though we are in ALR + // mode. + EXPECT_TRUE(probes.empty()); + + probes = probe_controller->SetBitrates(kMinBitrate, kStartBitrate, + kMaxBitrate, fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 2u); + + // Make sure we use `kStartBitrateBps` as the estimated bitrate + // until SetEstimatedBitrate is called with an updated estimate. + fixture.AdvanceTime(TimeDelta::Seconds(10)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, kStartBitrate * 2); +} + +TEST(ProbeControllerTest, TestExponentialProbingOverflow) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + const DataRate kMbpsMultiplier = DataRate::KilobitsPerSec(1000); + auto probes = probe_controller->SetBitrates(kMinBitrate, 10 * kMbpsMultiplier, + 100 * kMbpsMultiplier, + fixture.CurrentTime()); + // Verify that probe bitrate is capped at the specified max bitrate. + probes = probe_controller->SetEstimatedBitrate( + 60 * kMbpsMultiplier, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, 100 * kMbpsMultiplier); + // Verify that repeated probes aren't sent. + probes = probe_controller->SetEstimatedBitrate( + 100 * kMbpsMultiplier, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, TestAllocatedBitrateCap) { + ProbeControllerFixture fixture; + std::unique_ptr probe_controller = + fixture.CreateController(); + const DataRate kMbpsMultiplier = DataRate::KilobitsPerSec(1000); + const DataRate kMaxBitrate = 100 * kMbpsMultiplier; + auto probes = probe_controller->SetBitrates( + kMinBitrate, 10 * kMbpsMultiplier, kMaxBitrate, fixture.CurrentTime()); + + // Configure ALR for periodic probing. + probe_controller->EnablePeriodicAlrProbing(true); + Timestamp alr_start_time = fixture.CurrentTime(); + probe_controller->SetAlrStartTimeMs(alr_start_time.ms()); + + DataRate estimated_bitrate = kMaxBitrate / 10; + probes = probe_controller->SetEstimatedBitrate( + estimated_bitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + + // Set a max allocated bitrate below the current estimate. + DataRate max_allocated = estimated_bitrate - 1 * kMbpsMultiplier; + probes = probe_controller->OnMaxTotalAllocatedBitrate(max_allocated, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); // No probe since lower than current max. + + // Probes such as ALR capped at 2x the max allocation limit. + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, 2 * max_allocated); + + // Remove allocation limit. + EXPECT_TRUE( + probe_controller + ->OnMaxTotalAllocatedBitrate(DataRate::Zero(), fixture.CurrentTime()) + .empty()); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, estimated_bitrate * 2); +} + +TEST(ProbeControllerTest, ConfigurableProbingFieldTrial) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "p1:2,p2:5,step_size:3,further_probe_threshold:0.8," + "alloc_p1:2,alloc_p2,min_probe_packets_sent:2/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates(kMinBitrate, kStartBitrate, + DataRate::KilobitsPerSec(5000), + fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 2u); + EXPECT_EQ(probes[0].target_data_rate.bps(), 600); + EXPECT_EQ(probes[0].target_probe_count, 2); + EXPECT_EQ(probes[1].target_data_rate.bps(), 1500); + EXPECT_EQ(probes[1].target_probe_count, 2); + + // Repeated probe should only be sent when estimated bitrate climbs above + // 0.8 * 5 * kStartBitrateBps = 1200. + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(1100), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 0u); + + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(1250), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate.bps(), 3 * 1250); + + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + probes = probe_controller->OnMaxTotalAllocatedBitrate( + DataRate::KilobitsPerSec(200), fixture.CurrentTime()); + EXPECT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate.bps(), 400'000); +} + +TEST(ProbeControllerTest, LimitAlrProbeWhenLossBasedBweLimited) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + probe_controller->EnablePeriodicAlrProbing(true); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + // Expect the controller to send a new probe after 5s has passed. + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), + BandwidthLimitedCause::kLossLimitedBweIncreasing, fixture.CurrentTime()); + fixture.AdvanceTime(TimeDelta::Seconds(6)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, 1.5 * DataRate::BitsPerSec(500)); + + probes = probe_controller->SetEstimatedBitrate( + 1.5 * DataRate::BitsPerSec(500), + BandwidthLimitedCause::kDelayBasedLimited, fixture.CurrentTime()); + fixture.AdvanceTime(TimeDelta::Seconds(6)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + EXPECT_GT(probes[0].target_data_rate, 1.5 * 1.5 * DataRate::BitsPerSec(500)); +} + +TEST(ProbeControllerTest, PeriodicProbeAtUpperNetworkStateEstimate) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/network_state_interval:5s/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(5000), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + // Expect the controller to send a new probe after 5s has passed. + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = DataRate::KilobitsPerSec(6); + probe_controller->SetNetworkStateEstimate(state_estimate); + + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, state_estimate.link_capacity_upper); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, state_estimate.link_capacity_upper); +} + +TEST(ProbeControllerTest, + LimitProbeAtUpperNetworkStateEstimateIfLossBasedLimited) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:5s,limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + // Expect the controller to send a new probe after 5s has passed. + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = DataRate::BitsPerSec(700); + probe_controller->SetNetworkStateEstimate(state_estimate); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + + probes = probe_controller->SetEstimatedBitrate( + DataRate::BitsPerSec(500), + BandwidthLimitedCause::kLossLimitedBweIncreasing, fixture.CurrentTime()); + // Expect the controller to send a new probe after 5s has passed. + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + EXPECT_EQ(probes[0].target_data_rate, DataRate::BitsPerSec(700)); +} + +TEST(ProbeControllerTest, AlrProbesLimitedByNetworkStateEstimate) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/network_state_interval:5s/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + probe_controller->EnablePeriodicAlrProbing(true); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::KilobitsPerSec(6), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, kMaxBitrate); + + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = DataRate::BitsPerSec(8000); + probe_controller->SetNetworkStateEstimate(state_estimate); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, state_estimate.link_capacity_upper); +} + +TEST(ProbeControllerTest, CanSetLongerProbeDurationAfterNetworkStateEstimate) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:5s,network_state_probe_duration:100ms/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + DataRate::KilobitsPerSec(5), BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + EXPECT_LT(probes[0].target_duration, TimeDelta::Millis(100)); + + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = DataRate::KilobitsPerSec(6); + probe_controller->SetNetworkStateEstimate(state_estimate); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_duration, TimeDelta::Millis(100)); +} + +TEST(ProbeControllerTest, ProbeInAlrIfLossBasedIncreasing) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probe_controller->EnablePeriodicAlrProbing(true); + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kLossLimitedBweIncreasing, + fixture.CurrentTime()); + + // Wait long enough to time out exponential probing. + fixture.AdvanceTime(kExponentialProbingTimeout); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + + // Probe when in alr. + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + fixture.AdvanceTime(kAlrProbeInterval + TimeDelta::Millis(1)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + EXPECT_EQ(probes.at(0).target_data_rate, 1.5 * kStartBitrate); +} + +TEST(ProbeControllerTest, ProbeFurtherInAlrIfLossBasedIncreasing) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probe_controller->EnablePeriodicAlrProbing(true); + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kLossLimitedBweIncreasing, + fixture.CurrentTime()); + + // Wait long enough to time out exponential probing. + fixture.AdvanceTime(kExponentialProbingTimeout); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + + // Probe when in alr. + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + fixture.AdvanceTime(kAlrProbeInterval + TimeDelta::Millis(1)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + ASSERT_EQ(probes.at(0).target_data_rate, 1.5 * kStartBitrate); + + probes = probe_controller->SetEstimatedBitrate( + 1.5 * kStartBitrate, BandwidthLimitedCause::kLossLimitedBweIncreasing, + fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + EXPECT_EQ(probes[0].target_data_rate, 1.5 * 1.5 * kStartBitrate); +} + +TEST(ProbeControllerTest, NotProbeWhenInAlrIfLossBasedDecreases) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:5s,limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probe_controller->EnablePeriodicAlrProbing(true); + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kLossLimitedBweDecreasing, + fixture.CurrentTime()); + + // Wait long enough to time out exponential probing. + fixture.AdvanceTime(kExponentialProbingTimeout); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + + // Not probe in alr when loss based estimate decreases. + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + fixture.AdvanceTime(kAlrProbeInterval + TimeDelta::Millis(1)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, NotProbeIfLossBasedIncreasingOutsideAlr) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probe_controller->EnablePeriodicAlrProbing(true); + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kLossLimitedBweIncreasing, + fixture.CurrentTime()); + + // Wait long enough to time out exponential probing. + fixture.AdvanceTime(kExponentialProbingTimeout); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + + probe_controller->SetAlrStartTimeMs(absl::nullopt); + fixture.AdvanceTime(kAlrProbeInterval + TimeDelta::Millis(1)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, ProbeFurtherWhenLossBasedIsSameAsDelayBasedEstimate) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:5s,limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + + // Need to wait at least one second before process can trigger a new probe. + fixture.AdvanceTime(TimeDelta::Millis(1100)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = 5 * kStartBitrate; + probe_controller->SetNetworkStateEstimate(state_estimate); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + + DataRate probe_target_rate = probes[0].target_data_rate; + EXPECT_LT(probe_target_rate, state_estimate.link_capacity_upper); + // Expect that more probes are sent if BWE is the same as delay based + // estimate. + probes = probe_controller->SetEstimatedBitrate( + probe_target_rate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + EXPECT_EQ(probes[0].target_data_rate, 2 * probe_target_rate); +} + +TEST(ProbeControllerTest, ProbeIfEstimateLowerThanNetworkStateEstimate) { + // Periodic probe every 1 second if estimate is lower than 50% of the + // NetworkStateEstimate. + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/est_lower_than_network_interval:1s," + "est_lower_than_network_ratio:0.5,limit_probe_" + "target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + // Need to wait at least one second before process can trigger a new probe. + fixture.AdvanceTime(TimeDelta::Millis(1100)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = kStartBitrate; + probe_controller->SetNetworkStateEstimate(state_estimate); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + state_estimate.link_capacity_upper = kStartBitrate * 3; + probe_controller->SetNetworkStateEstimate(state_estimate); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_EQ(probes.size(), 1u); + EXPECT_GT(probes[0].target_data_rate, kStartBitrate); + + // If network state not increased, send another probe. + fixture.AdvanceTime(TimeDelta::Millis(1100)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_FALSE(probes.empty()); + + // Stop probing if estimate increase. We might probe further here though. + probes = probe_controller->SetEstimatedBitrate( + 2 * kStartBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + // No more periodic probes. + fixture.AdvanceTime(TimeDelta::Millis(1100)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, DontProbeFurtherWhenLossLimited) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:5s,limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + + // Need to wait at least one second before process can trigger a new probe. + fixture.AdvanceTime(TimeDelta::Millis(1100)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = 3 * kStartBitrate; + probe_controller->SetNetworkStateEstimate(state_estimate); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_FALSE(probes.empty()); + EXPECT_LT(probes[0].target_data_rate, state_estimate.link_capacity_upper); + // Expect that no more probes are sent immediately if BWE is loss limited. + probes = probe_controller->SetEstimatedBitrate( + probes[0].target_data_rate, + BandwidthLimitedCause::kLossLimitedBweDecreasing, fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, ProbeFurtherWhenDelayBasedLimited) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:5s,limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + + // Need to wait at least one second before process can trigger a new probe. + fixture.AdvanceTime(TimeDelta::Millis(1100)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = 3 * kStartBitrate; + probe_controller->SetNetworkStateEstimate(state_estimate); + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_FALSE(probes.empty()); + EXPECT_LT(probes[0].target_data_rate, state_estimate.link_capacity_upper); + // Since the probe was successfull, expect to continue probing. + probes = probe_controller->SetEstimatedBitrate( + probes[0].target_data_rate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_FALSE(probes.empty()); + EXPECT_EQ(probes[0].target_data_rate, state_estimate.link_capacity_upper); +} + +TEST(ProbeControllerTest, + ProbeFurtherIfNetworkStateEstimateIncreaseAfterProbeSent) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:5s,limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = 1.2 * probes[0].target_data_rate / 2; + probe_controller->SetNetworkStateEstimate(state_estimate); + // No immediate further probing since probe result is low. + probes = probe_controller->SetEstimatedBitrate( + probes[0].target_data_rate / 2, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + EXPECT_LE(probes[0].target_data_rate, state_estimate.link_capacity_upper); + // If the network state estimate increase above the threshold to probe + // further, and the probe suceeed, expect a new probe. + state_estimate.link_capacity_upper = 3 * kStartBitrate; + probe_controller->SetNetworkStateEstimate(state_estimate); + probes = probe_controller->SetEstimatedBitrate( + probes[0].target_data_rate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_FALSE(probes.empty()); + + // But no more probes if estimate is close to the link capacity. + probes = probe_controller->SetEstimatedBitrate( + state_estimate.link_capacity_upper * 0.9, + BandwidthLimitedCause::kDelayBasedLimited, fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, SkipAlrProbeIfEstimateLargerThanMaxProbe) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "skip_if_est_larger_than_fraction_of_max:0.9/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + probe_controller->EnablePeriodicAlrProbing(true); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + + probes = probe_controller->SetEstimatedBitrate( + kMaxBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + fixture.AdvanceTime(TimeDelta::Seconds(10)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + // But if the max rate increase, A new probe is sent. + probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, 2 * kMaxBitrate, fixture.CurrentTime()); + EXPECT_FALSE(probes.empty()); +} + +TEST(ProbeControllerTest, + SkipAlrProbeIfEstimateLargerThanFractionOfMaxAllocated) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "skip_if_est_larger_than_fraction_of_max:1.0/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + probe_controller->EnablePeriodicAlrProbing(true); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + probes = probe_controller->SetEstimatedBitrate( + kMaxBitrate / 2, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + + fixture.AdvanceTime(TimeDelta::Seconds(10)); + probe_controller->SetAlrStartTimeMs(fixture.CurrentTime().ms()); + probes = probe_controller->OnMaxTotalAllocatedBitrate(kMaxBitrate / 2, + fixture.CurrentTime()); + // No probes since total allocated is not higher than the current estimate. + EXPECT_TRUE(probes.empty()); + fixture.AdvanceTime(TimeDelta::Seconds(2)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + // But if the max allocated increase, A new probe is sent. + probes = probe_controller->OnMaxTotalAllocatedBitrate( + kMaxBitrate / 2 + DataRate::BitsPerSec(1), fixture.CurrentTime()); + EXPECT_FALSE(probes.empty()); +} + +TEST(ProbeControllerTest, SkipNetworkStateProbeIfEstimateLargerThanMaxProbe) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:2s,skip_if_est_larger_than_fraction_of_max:0.9/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + + probe_controller->SetNetworkStateEstimate( + {.link_capacity_upper = 2 * kMaxBitrate}); + probes = probe_controller->SetEstimatedBitrate( + kMaxBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + fixture.AdvanceTime(TimeDelta::Seconds(10)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, SendsProbeIfNetworkStateEstimateLowerThanMaxProbe) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:2s,skip_if_est_larger_than_fraction_of_max:0.9," + "/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + probe_controller->SetNetworkStateEstimate( + {.link_capacity_upper = 2 * kMaxBitrate}); + probes = probe_controller->SetEstimatedBitrate( + kMaxBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + + // Need to wait at least two seconds before process can trigger a new probe. + fixture.AdvanceTime(TimeDelta::Millis(2100)); + + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + probe_controller->SetNetworkStateEstimate( + {.link_capacity_upper = 2 * kStartBitrate}); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_FALSE(probes.empty()); +} + +TEST(ProbeControllerTest, DontSendProbeIfNetworkStateEstimateIsZero) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:5s,limit_probe_target_rate_to_loss_bwe:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kDelayBasedLimited, + fixture.CurrentTime()); + probe_controller->SetNetworkStateEstimate( + {.link_capacity_upper = kStartBitrate}); + // Need to wait at least one second before process can trigger a new probe. + fixture.AdvanceTime(TimeDelta::Millis(1100)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + + probe_controller->SetNetworkStateEstimate( + {.link_capacity_upper = DataRate::Zero()}); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); + fixture.AdvanceTime(TimeDelta::Seconds(6)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} + +TEST(ProbeControllerTest, DontProbeIfDelayIncreased) { + ProbeControllerFixture fixture( + "WebRTC-Bwe-ProbingConfiguration/" + "network_state_interval:5s,not_probe_if_delay_increased:true/"); + std::unique_ptr probe_controller = + fixture.CreateController(); + + auto probes = probe_controller->SetBitrates( + kMinBitrate, kStartBitrate, kMaxBitrate, fixture.CurrentTime()); + ASSERT_FALSE(probes.empty()); + + // Need to wait at least one second before process can trigger a new probe. + fixture.AdvanceTime(TimeDelta::Millis(1100)); + probes = probe_controller->Process(fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + + NetworkStateEstimate state_estimate; + state_estimate.link_capacity_upper = 3 * kStartBitrate; + probe_controller->SetNetworkStateEstimate(state_estimate); + probes = probe_controller->SetEstimatedBitrate( + kStartBitrate, BandwidthLimitedCause::kDelayBasedLimitedDelayIncreased, + fixture.CurrentTime()); + ASSERT_TRUE(probes.empty()); + + fixture.AdvanceTime(TimeDelta::Seconds(5)); + probes = probe_controller->Process(fixture.CurrentTime()); + EXPECT_TRUE(probes.empty()); +} +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/pushback_controller_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/pushback_controller_gn/moz.build new file mode 100644 index 0000000000..e189363d3e --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/pushback_controller_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/congestion_window_pushback_controller.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("pushback_controller_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator.cc new file mode 100644 index 0000000000..792a93d41e --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator.cc @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/robust_throughput_estimator.h" + +#include + +#include +#include + +#include "api/units/data_rate.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +RobustThroughputEstimator::RobustThroughputEstimator( + const RobustThroughputEstimatorSettings& settings) + : settings_(settings), + latest_discarded_send_time_(Timestamp::MinusInfinity()) { + RTC_DCHECK(settings.enabled); +} + +RobustThroughputEstimator::~RobustThroughputEstimator() {} + +bool RobustThroughputEstimator::FirstPacketOutsideWindow() { + if (window_.empty()) + return false; + if (window_.size() > settings_.max_window_packets) + return true; + TimeDelta current_window_duration = + window_.back().receive_time - window_.front().receive_time; + if (current_window_duration > settings_.max_window_duration) + return true; + if (window_.size() > settings_.window_packets && + current_window_duration > settings_.min_window_duration) { + return true; + } + return false; +} + +void RobustThroughputEstimator::IncomingPacketFeedbackVector( + const std::vector& packet_feedback_vector) { + RTC_DCHECK(std::is_sorted(packet_feedback_vector.begin(), + packet_feedback_vector.end(), + PacketResult::ReceiveTimeOrder())); + for (const auto& packet : packet_feedback_vector) { + // Ignore packets without valid send or receive times. + // (This should not happen in production since lost packets are filtered + // out before passing the feedback vector to the throughput estimator. + // However, explicitly handling this case makes the estimator more robust + // and avoids a hard-to-detect bad state.) + if (packet.receive_time.IsInfinite() || + packet.sent_packet.send_time.IsInfinite()) { + continue; + } + + // Insert the new packet. + window_.push_back(packet); + window_.back().sent_packet.prior_unacked_data = + window_.back().sent_packet.prior_unacked_data * + settings_.unacked_weight; + // In most cases, receive timestamps should already be in order, but in the + // rare case where feedback packets have been reordered, we do some swaps to + // ensure that the window is sorted. + for (size_t i = window_.size() - 1; + i > 0 && window_[i].receive_time < window_[i - 1].receive_time; i--) { + std::swap(window_[i], window_[i - 1]); + } + } + + // Remove old packets. + while (FirstPacketOutsideWindow()) { + latest_discarded_send_time_ = std::max( + latest_discarded_send_time_, window_.front().sent_packet.send_time); + window_.pop_front(); + } +} + +absl::optional RobustThroughputEstimator::bitrate() const { + if (window_.empty() || window_.size() < settings_.required_packets) + return absl::nullopt; + + TimeDelta largest_recv_gap(TimeDelta::Zero()); + TimeDelta second_largest_recv_gap(TimeDelta::Zero()); + for (size_t i = 1; i < window_.size(); i++) { + // Find receive time gaps. + TimeDelta gap = window_[i].receive_time - window_[i - 1].receive_time; + if (gap > largest_recv_gap) { + second_largest_recv_gap = largest_recv_gap; + largest_recv_gap = gap; + } else if (gap > second_largest_recv_gap) { + second_largest_recv_gap = gap; + } + } + + Timestamp first_send_time = Timestamp::PlusInfinity(); + Timestamp last_send_time = Timestamp::MinusInfinity(); + Timestamp first_recv_time = Timestamp::PlusInfinity(); + Timestamp last_recv_time = Timestamp::MinusInfinity(); + DataSize recv_size = DataSize::Bytes(0); + DataSize send_size = DataSize::Bytes(0); + DataSize first_recv_size = DataSize::Bytes(0); + DataSize last_send_size = DataSize::Bytes(0); + size_t num_sent_packets_in_window = 0; + for (const auto& packet : window_) { + if (packet.receive_time < first_recv_time) { + first_recv_time = packet.receive_time; + first_recv_size = + packet.sent_packet.size + packet.sent_packet.prior_unacked_data; + } + last_recv_time = std::max(last_recv_time, packet.receive_time); + recv_size += packet.sent_packet.size; + recv_size += packet.sent_packet.prior_unacked_data; + + if (packet.sent_packet.send_time < latest_discarded_send_time_) { + // If we have dropped packets from the window that were sent after + // this packet, then this packet was reordered. Ignore it from + // the send rate computation (since the send time may be very far + // in the past, leading to underestimation of the send rate.) + // However, ignoring packets creates a risk that we end up without + // any packets left to compute a send rate. + continue; + } + if (packet.sent_packet.send_time > last_send_time) { + last_send_time = packet.sent_packet.send_time; + last_send_size = + packet.sent_packet.size + packet.sent_packet.prior_unacked_data; + } + first_send_time = std::min(first_send_time, packet.sent_packet.send_time); + + send_size += packet.sent_packet.size; + send_size += packet.sent_packet.prior_unacked_data; + ++num_sent_packets_in_window; + } + + // Suppose a packet of size S is sent every T milliseconds. + // A window of N packets would contain N*S bytes, but the time difference + // between the first and the last packet would only be (N-1)*T. Thus, we + // need to remove the size of one packet to get the correct rate of S/T. + // Which packet to remove (if the packets have varying sizes), + // depends on the network model. + // Suppose that 2 packets with sizes s1 and s2, are received at times t1 + // and t2, respectively. If the packets were transmitted back to back over + // a bottleneck with rate capacity r, then we'd expect t2 = t1 + r * s2. + // Thus, r = (t2-t1) / s2, so the size of the first packet doesn't affect + // the difference between t1 and t2. + // Analoguously, if the first packet is sent at time t1 and the sender + // paces the packets at rate r, then the second packet can be sent at time + // t2 = t1 + r * s1. Thus, the send rate estimate r = (t2-t1) / s1 doesn't + // depend on the size of the last packet. + recv_size -= first_recv_size; + send_size -= last_send_size; + + // Remove the largest gap by replacing it by the second largest gap. + // This is to ensure that spurious "delay spikes" (i.e. when the + // network stops transmitting packets for a short period, followed + // by a burst of delayed packets), don't cause the estimate to drop. + // This could cause an overestimation, which we guard against by + // never returning an estimate above the send rate. + RTC_DCHECK(first_recv_time.IsFinite()); + RTC_DCHECK(last_recv_time.IsFinite()); + TimeDelta recv_duration = (last_recv_time - first_recv_time) - + largest_recv_gap + second_largest_recv_gap; + recv_duration = std::max(recv_duration, TimeDelta::Millis(1)); + + if (num_sent_packets_in_window < settings_.required_packets) { + // Too few send times to calculate a reliable send rate. + return recv_size / recv_duration; + } + + RTC_DCHECK(first_send_time.IsFinite()); + RTC_DCHECK(last_send_time.IsFinite()); + TimeDelta send_duration = last_send_time - first_send_time; + send_duration = std::max(send_duration, TimeDelta::Millis(1)); + + return std::min(send_size / send_duration, recv_size / recv_duration); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator.h new file mode 100644 index 0000000000..9d89856496 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_ROBUST_THROUGHPUT_ESTIMATOR_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_ROBUST_THROUGHPUT_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/timestamp.h" +#include "modules/congestion_controller/goog_cc/acknowledged_bitrate_estimator_interface.h" + +namespace webrtc { + +class RobustThroughputEstimator : public AcknowledgedBitrateEstimatorInterface { + public: + explicit RobustThroughputEstimator( + const RobustThroughputEstimatorSettings& settings); + ~RobustThroughputEstimator() override; + + void IncomingPacketFeedbackVector( + const std::vector& packet_feedback_vector) override; + + absl::optional bitrate() const override; + + absl::optional PeekRate() const override { return bitrate(); } + void SetAlr(bool /*in_alr*/) override {} + void SetAlrEndedTime(Timestamp /*alr_ended_time*/) override {} + + private: + bool FirstPacketOutsideWindow(); + + const RobustThroughputEstimatorSettings settings_; + std::deque window_; + Timestamp latest_discarded_send_time_ = Timestamp::MinusInfinity(); +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_ROBUST_THROUGHPUT_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator_unittest.cc new file mode 100644 index 0000000000..95ac525640 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/robust_throughput_estimator_unittest.cc @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/robust_throughput_estimator.h" + +#include +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "test/explicit_key_value_config.h" +#include "test/gtest.h" + +namespace webrtc { + +RobustThroughputEstimatorSettings CreateRobustThroughputEstimatorSettings( + absl::string_view field_trial_string) { + test::ExplicitKeyValueConfig trials(field_trial_string); + RobustThroughputEstimatorSettings settings(&trials); + return settings; +} + +class FeedbackGenerator { + public: + std::vector CreateFeedbackVector(size_t number_of_packets, + DataSize packet_size, + DataRate send_rate, + DataRate recv_rate) { + std::vector packet_feedback_vector(number_of_packets); + for (size_t i = 0; i < number_of_packets; i++) { + packet_feedback_vector[i].sent_packet.send_time = send_clock_; + packet_feedback_vector[i].sent_packet.sequence_number = sequence_number_; + packet_feedback_vector[i].sent_packet.size = packet_size; + send_clock_ += packet_size / send_rate; + recv_clock_ += packet_size / recv_rate; + sequence_number_ += 1; + packet_feedback_vector[i].receive_time = recv_clock_; + } + return packet_feedback_vector; + } + + Timestamp CurrentReceiveClock() { return recv_clock_; } + + void AdvanceReceiveClock(TimeDelta delta) { recv_clock_ += delta; } + + void AdvanceSendClock(TimeDelta delta) { send_clock_ += delta; } + + private: + Timestamp send_clock_ = Timestamp::Millis(100000); + Timestamp recv_clock_ = Timestamp::Millis(10000); + uint16_t sequence_number_ = 100; +}; + +TEST(RobustThroughputEstimatorTest, InitialEstimate) { + FeedbackGenerator feedback_generator; + RobustThroughputEstimator throughput_estimator( + CreateRobustThroughputEstimatorSettings( + "WebRTC-Bwe-RobustThroughputEstimatorSettings/" + "enabled:true/")); + DataRate send_rate(DataRate::BytesPerSec(100000)); + DataRate recv_rate(DataRate::BytesPerSec(100000)); + + // No estimate until the estimator has enough data. + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(9, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + EXPECT_FALSE(throughput_estimator.bitrate().has_value()); + + // Estimate once `required_packets` packets have been received. + packet_feedback = feedback_generator.CreateFeedbackVector( + 1, DataSize::Bytes(1000), send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + + // Estimate remains stable when send and receive rates are stable. + packet_feedback = feedback_generator.CreateFeedbackVector( + 15, DataSize::Bytes(1000), send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); +} + +TEST(RobustThroughputEstimatorTest, EstimateAdapts) { + FeedbackGenerator feedback_generator; + RobustThroughputEstimator throughput_estimator( + CreateRobustThroughputEstimatorSettings( + "WebRTC-Bwe-RobustThroughputEstimatorSettings/" + "enabled:true/")); + + // 1 second, 800kbps, estimate is stable. + DataRate send_rate(DataRate::BytesPerSec(100000)); + DataRate recv_rate(DataRate::BytesPerSec(100000)); + for (int i = 0; i < 10; ++i) { + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(10, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + } + + // 1 second, 1600kbps, estimate increases + send_rate = DataRate::BytesPerSec(200000); + recv_rate = DataRate::BytesPerSec(200000); + for (int i = 0; i < 20; ++i) { + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(10, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_GE(throughput.value(), DataRate::BytesPerSec(100000)); + EXPECT_LE(throughput.value(), send_rate); + } + + // 1 second, 1600kbps, estimate is stable + for (int i = 0; i < 20; ++i) { + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(10, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + } + + // 1 second, 400kbps, estimate decreases + send_rate = DataRate::BytesPerSec(50000); + recv_rate = DataRate::BytesPerSec(50000); + for (int i = 0; i < 5; ++i) { + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(10, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_LE(throughput.value(), DataRate::BytesPerSec(200000)); + EXPECT_GE(throughput.value(), send_rate); + } + + // 1 second, 400kbps, estimate is stable + send_rate = DataRate::BytesPerSec(50000); + recv_rate = DataRate::BytesPerSec(50000); + for (int i = 0; i < 5; ++i) { + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(10, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + } +} + +TEST(RobustThroughputEstimatorTest, CappedByReceiveRate) { + FeedbackGenerator feedback_generator; + RobustThroughputEstimator throughput_estimator( + CreateRobustThroughputEstimatorSettings( + "WebRTC-Bwe-RobustThroughputEstimatorSettings/" + "enabled:true/")); + DataRate send_rate(DataRate::BytesPerSec(100000)); + DataRate recv_rate(DataRate::BytesPerSec(25000)); + + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(20, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_NEAR(throughput.value().bytes_per_sec(), + recv_rate.bytes_per_sec(), + 0.05 * recv_rate.bytes_per_sec()); // Allow 5% error +} + +TEST(RobustThroughputEstimatorTest, CappedBySendRate) { + FeedbackGenerator feedback_generator; + RobustThroughputEstimator throughput_estimator( + CreateRobustThroughputEstimatorSettings( + "WebRTC-Bwe-RobustThroughputEstimatorSettings/" + "enabled:true/")); + DataRate send_rate(DataRate::BytesPerSec(50000)); + DataRate recv_rate(DataRate::BytesPerSec(100000)); + + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(20, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_NEAR(throughput.value().bytes_per_sec(), + send_rate.bytes_per_sec(), + 0.05 * send_rate.bytes_per_sec()); // Allow 5% error +} + +TEST(RobustThroughputEstimatorTest, DelaySpike) { + FeedbackGenerator feedback_generator; + // This test uses a 500ms window to amplify the effect + // of a delay spike. + RobustThroughputEstimator throughput_estimator( + CreateRobustThroughputEstimatorSettings( + "WebRTC-Bwe-RobustThroughputEstimatorSettings/" + "enabled:true,window_duration:500ms/")); + DataRate send_rate(DataRate::BytesPerSec(100000)); + DataRate recv_rate(DataRate::BytesPerSec(100000)); + + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(20, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + + // Delay spike. 25 packets sent, but none received. + feedback_generator.AdvanceReceiveClock(TimeDelta::Millis(250)); + + // Deliver all of the packets during the next 50 ms. (During this time, + // we'll have sent an additional 5 packets, so we need to receive 30 + // packets at 1000 bytes each in 50 ms, i.e. 600000 bytes per second). + recv_rate = DataRate::BytesPerSec(600000); + // Estimate should not drop. + for (int i = 0; i < 30; ++i) { + packet_feedback = feedback_generator.CreateFeedbackVector( + 1, DataSize::Bytes(1000), send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_NEAR(throughput.value().bytes_per_sec(), + send_rate.bytes_per_sec(), + 0.05 * send_rate.bytes_per_sec()); // Allow 5% error + } + + // Delivery at normal rate. When the packets received before the gap + // has left the estimator's window, the receive rate will be high, but the + // estimate should be capped by the send rate. + recv_rate = DataRate::BytesPerSec(100000); + for (int i = 0; i < 20; ++i) { + packet_feedback = feedback_generator.CreateFeedbackVector( + 5, DataSize::Bytes(1000), send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_NEAR(throughput.value().bytes_per_sec(), + send_rate.bytes_per_sec(), + 0.05 * send_rate.bytes_per_sec()); // Allow 5% error + } +} + +TEST(RobustThroughputEstimatorTest, HighLoss) { + FeedbackGenerator feedback_generator; + RobustThroughputEstimator throughput_estimator( + CreateRobustThroughputEstimatorSettings( + "WebRTC-Bwe-RobustThroughputEstimatorSettings/" + "enabled:true/")); + DataRate send_rate(DataRate::BytesPerSec(100000)); + DataRate recv_rate(DataRate::BytesPerSec(100000)); + + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(20, DataSize::Bytes(1000), + send_rate, recv_rate); + + // 50% loss + for (size_t i = 0; i < packet_feedback.size(); i++) { + if (i % 2 == 1) { + packet_feedback[i].receive_time = Timestamp::PlusInfinity(); + } + } + + std::sort(packet_feedback.begin(), packet_feedback.end(), + PacketResult::ReceiveTimeOrder()); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_NEAR(throughput.value().bytes_per_sec(), + send_rate.bytes_per_sec() / 2, + 0.05 * send_rate.bytes_per_sec() / 2); // Allow 5% error +} + +TEST(RobustThroughputEstimatorTest, ReorderedFeedback) { + FeedbackGenerator feedback_generator; + RobustThroughputEstimator throughput_estimator( + CreateRobustThroughputEstimatorSettings( + "WebRTC-Bwe-RobustThroughputEstimatorSettings/" + "enabled:true/")); + DataRate send_rate(DataRate::BytesPerSec(100000)); + DataRate recv_rate(DataRate::BytesPerSec(100000)); + + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(20, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + + std::vector delayed_feedback = + feedback_generator.CreateFeedbackVector(10, DataSize::Bytes(1000), + send_rate, recv_rate); + packet_feedback = feedback_generator.CreateFeedbackVector( + 10, DataSize::Bytes(1000), send_rate, recv_rate); + + // Since we're missing some feedback, it's expected that the + // estimate will drop. + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_LT(throughput.value(), send_rate); + + // But it should completely recover as soon as we get the feedback. + throughput_estimator.IncomingPacketFeedbackVector(delayed_feedback); + throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + + // It should then remain stable (as if the feedbacks weren't reordered.) + for (int i = 0; i < 10; ++i) { + packet_feedback = feedback_generator.CreateFeedbackVector( + 15, DataSize::Bytes(1000), send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + } +} + +TEST(RobustThroughputEstimatorTest, DeepReordering) { + FeedbackGenerator feedback_generator; + // This test uses a 500ms window to amplify the + // effect of reordering. + RobustThroughputEstimator throughput_estimator( + CreateRobustThroughputEstimatorSettings( + "WebRTC-Bwe-RobustThroughputEstimatorSettings/" + "enabled:true,window_duration:500ms/")); + DataRate send_rate(DataRate::BytesPerSec(100000)); + DataRate recv_rate(DataRate::BytesPerSec(100000)); + + std::vector delayed_packets = + feedback_generator.CreateFeedbackVector(1, DataSize::Bytes(1000), + send_rate, recv_rate); + + for (int i = 0; i < 10; i++) { + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(10, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + } + + // Delayed packet arrives ~1 second after it should have. + // Since the window is 500 ms, the delayed packet was sent ~500 + // ms before the second oldest packet. However, the send rate + // should not drop. + delayed_packets.front().receive_time = + feedback_generator.CurrentReceiveClock(); + throughput_estimator.IncomingPacketFeedbackVector(delayed_packets); + auto throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_NEAR(throughput.value().bytes_per_sec(), + send_rate.bytes_per_sec(), + 0.05 * send_rate.bytes_per_sec()); // Allow 5% error + + // Thoughput should stay stable. + for (int i = 0; i < 10; i++) { + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(10, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + ASSERT_TRUE(throughput.has_value()); + EXPECT_NEAR(throughput.value().bytes_per_sec(), + send_rate.bytes_per_sec(), + 0.05 * send_rate.bytes_per_sec()); // Allow 5% error + } +} + +TEST(RobustThroughputEstimatorTest, StreamPausedAndResumed) { + FeedbackGenerator feedback_generator; + RobustThroughputEstimator throughput_estimator( + CreateRobustThroughputEstimatorSettings( + "WebRTC-Bwe-RobustThroughputEstimatorSettings/" + "enabled:true/")); + DataRate send_rate(DataRate::BytesPerSec(100000)); + DataRate recv_rate(DataRate::BytesPerSec(100000)); + + std::vector packet_feedback = + feedback_generator.CreateFeedbackVector(20, DataSize::Bytes(1000), + send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + auto throughput = throughput_estimator.bitrate(); + EXPECT_TRUE(throughput.has_value()); + double expected_bytes_per_sec = 100 * 1000.0; + EXPECT_NEAR(throughput.value().bytes_per_sec(), + expected_bytes_per_sec, + 0.05 * expected_bytes_per_sec); // Allow 5% error + + // No packets sent or feedback received for 60s. + feedback_generator.AdvanceSendClock(TimeDelta::Seconds(60)); + feedback_generator.AdvanceReceiveClock(TimeDelta::Seconds(60)); + + // Resume sending packets at the same rate as before. The estimate + // will initially be invalid, due to lack of recent data. + packet_feedback = feedback_generator.CreateFeedbackVector( + 5, DataSize::Bytes(1000), send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + throughput = throughput_estimator.bitrate(); + EXPECT_FALSE(throughput.has_value()); + + // But be back to the normal level once we have enough data. + for (int i = 0; i < 4; ++i) { + packet_feedback = feedback_generator.CreateFeedbackVector( + 5, DataSize::Bytes(1000), send_rate, recv_rate); + throughput_estimator.IncomingPacketFeedbackVector(packet_feedback); + throughput = throughput_estimator.bitrate(); + EXPECT_EQ(throughput, send_rate); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.cc new file mode 100644 index 0000000000..1e4db1ffaf --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.cc @@ -0,0 +1,695 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.h" + +#include +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "api/field_trials_view.h" +#include "api/network_state_predictor.h" +#include "api/rtc_event_log/rtc_event.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "logging/rtc_event_log/events/rtc_event_bwe_update_loss_based.h" +#include "modules/congestion_controller/goog_cc/loss_based_bwe_v2.h" +#include "modules/remote_bitrate_estimator/include/bwe_defines.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { +constexpr TimeDelta kBweIncreaseInterval = TimeDelta::Millis(1000); +constexpr TimeDelta kBweDecreaseInterval = TimeDelta::Millis(300); +constexpr TimeDelta kStartPhase = TimeDelta::Millis(2000); +constexpr TimeDelta kBweConverganceTime = TimeDelta::Millis(20000); +constexpr int kLimitNumPackets = 20; +constexpr DataRate kDefaultMaxBitrate = DataRate::BitsPerSec(1000000000); +constexpr TimeDelta kLowBitrateLogPeriod = TimeDelta::Millis(10000); +constexpr TimeDelta kRtcEventLogPeriod = TimeDelta::Millis(5000); +// Expecting that RTCP feedback is sent uniformly within [0.5, 1.5]s intervals. +constexpr TimeDelta kMaxRtcpFeedbackInterval = TimeDelta::Millis(5000); + +constexpr float kDefaultLowLossThreshold = 0.02f; +constexpr float kDefaultHighLossThreshold = 0.1f; +constexpr DataRate kDefaultBitrateThreshold = DataRate::Zero(); + +struct UmaRampUpMetric { + const char* metric_name; + int bitrate_kbps; +}; + +const UmaRampUpMetric kUmaRampupMetrics[] = { + {"WebRTC.BWE.RampUpTimeTo500kbpsInMs", 500}, + {"WebRTC.BWE.RampUpTimeTo1000kbpsInMs", 1000}, + {"WebRTC.BWE.RampUpTimeTo2000kbpsInMs", 2000}}; +const size_t kNumUmaRampupMetrics = + sizeof(kUmaRampupMetrics) / sizeof(kUmaRampupMetrics[0]); + +const char kBweLosExperiment[] = "WebRTC-BweLossExperiment"; + +bool BweLossExperimentIsEnabled() { + std::string experiment_string = + webrtc::field_trial::FindFullName(kBweLosExperiment); + // The experiment is enabled iff the field trial string begins with "Enabled". + return absl::StartsWith(experiment_string, "Enabled"); +} + +bool ReadBweLossExperimentParameters(float* low_loss_threshold, + float* high_loss_threshold, + uint32_t* bitrate_threshold_kbps) { + RTC_DCHECK(low_loss_threshold); + RTC_DCHECK(high_loss_threshold); + RTC_DCHECK(bitrate_threshold_kbps); + std::string experiment_string = + webrtc::field_trial::FindFullName(kBweLosExperiment); + int parsed_values = + sscanf(experiment_string.c_str(), "Enabled-%f,%f,%u", low_loss_threshold, + high_loss_threshold, bitrate_threshold_kbps); + if (parsed_values == 3) { + RTC_CHECK_GT(*low_loss_threshold, 0.0f) + << "Loss threshold must be greater than 0."; + RTC_CHECK_LE(*low_loss_threshold, 1.0f) + << "Loss threshold must be less than or equal to 1."; + RTC_CHECK_GT(*high_loss_threshold, 0.0f) + << "Loss threshold must be greater than 0."; + RTC_CHECK_LE(*high_loss_threshold, 1.0f) + << "Loss threshold must be less than or equal to 1."; + RTC_CHECK_LE(*low_loss_threshold, *high_loss_threshold) + << "The low loss threshold must be less than or equal to the high loss " + "threshold."; + RTC_CHECK_GE(*bitrate_threshold_kbps, 0) + << "Bitrate threshold can't be negative."; + RTC_CHECK_LT(*bitrate_threshold_kbps, + std::numeric_limits::max() / 1000) + << "Bitrate must be smaller enough to avoid overflows."; + return true; + } + RTC_LOG(LS_WARNING) << "Failed to parse parameters for BweLossExperiment " + "experiment from field trial string. Using default."; + *low_loss_threshold = kDefaultLowLossThreshold; + *high_loss_threshold = kDefaultHighLossThreshold; + *bitrate_threshold_kbps = kDefaultBitrateThreshold.kbps(); + return false; +} +} // namespace + +LinkCapacityTracker::LinkCapacityTracker() + : tracking_rate("rate", TimeDelta::Seconds(10)) { + ParseFieldTrial({&tracking_rate}, + field_trial::FindFullName("WebRTC-Bwe-LinkCapacity")); +} + +LinkCapacityTracker::~LinkCapacityTracker() {} + +void LinkCapacityTracker::UpdateDelayBasedEstimate( + Timestamp at_time, + DataRate delay_based_bitrate) { + if (delay_based_bitrate < last_delay_based_estimate_) { + capacity_estimate_bps_ = + std::min(capacity_estimate_bps_, delay_based_bitrate.bps()); + last_link_capacity_update_ = at_time; + } + last_delay_based_estimate_ = delay_based_bitrate; +} + +void LinkCapacityTracker::OnStartingRate(DataRate start_rate) { + if (last_link_capacity_update_.IsInfinite()) + capacity_estimate_bps_ = start_rate.bps(); +} + +void LinkCapacityTracker::OnRateUpdate(absl::optional acknowledged, + DataRate target, + Timestamp at_time) { + if (!acknowledged) + return; + DataRate acknowledged_target = std::min(*acknowledged, target); + if (acknowledged_target.bps() > capacity_estimate_bps_) { + TimeDelta delta = at_time - last_link_capacity_update_; + double alpha = delta.IsFinite() ? exp(-(delta / tracking_rate.Get())) : 0; + capacity_estimate_bps_ = alpha * capacity_estimate_bps_ + + (1 - alpha) * acknowledged_target.bps(); + } + last_link_capacity_update_ = at_time; +} + +void LinkCapacityTracker::OnRttBackoff(DataRate backoff_rate, + Timestamp at_time) { + capacity_estimate_bps_ = + std::min(capacity_estimate_bps_, backoff_rate.bps()); + last_link_capacity_update_ = at_time; +} + +DataRate LinkCapacityTracker::estimate() const { + return DataRate::BitsPerSec(capacity_estimate_bps_); +} + +RttBasedBackoff::RttBasedBackoff(const FieldTrialsView* key_value_config) + : disabled_("Disabled"), + configured_limit_("limit", TimeDelta::Seconds(3)), + drop_fraction_("fraction", 0.8), + drop_interval_("interval", TimeDelta::Seconds(1)), + bandwidth_floor_("floor", DataRate::KilobitsPerSec(5)), + rtt_limit_(TimeDelta::PlusInfinity()), + // By initializing this to plus infinity, we make sure that we never + // trigger rtt backoff unless packet feedback is enabled. + last_propagation_rtt_update_(Timestamp::PlusInfinity()), + last_propagation_rtt_(TimeDelta::Zero()), + last_packet_sent_(Timestamp::MinusInfinity()) { + ParseFieldTrial({&disabled_, &configured_limit_, &drop_fraction_, + &drop_interval_, &bandwidth_floor_}, + key_value_config->Lookup("WebRTC-Bwe-MaxRttLimit")); + if (!disabled_) { + rtt_limit_ = configured_limit_.Get(); + } +} + +void RttBasedBackoff::UpdatePropagationRtt(Timestamp at_time, + TimeDelta propagation_rtt) { + last_propagation_rtt_update_ = at_time; + last_propagation_rtt_ = propagation_rtt; +} + +TimeDelta RttBasedBackoff::CorrectedRtt(Timestamp at_time) const { + TimeDelta time_since_rtt = at_time - last_propagation_rtt_update_; + TimeDelta timeout_correction = time_since_rtt; + // Avoid timeout when no packets are being sent. + TimeDelta time_since_packet_sent = at_time - last_packet_sent_; + timeout_correction = + std::max(time_since_rtt - time_since_packet_sent, TimeDelta::Zero()); + return timeout_correction + last_propagation_rtt_; +} + +RttBasedBackoff::~RttBasedBackoff() = default; + +SendSideBandwidthEstimation::SendSideBandwidthEstimation( + const FieldTrialsView* key_value_config, + RtcEventLog* event_log) + : rtt_backoff_(key_value_config), + lost_packets_since_last_loss_update_(0), + expected_packets_since_last_loss_update_(0), + current_target_(DataRate::Zero()), + last_logged_target_(DataRate::Zero()), + min_bitrate_configured_(kCongestionControllerMinBitrate), + max_bitrate_configured_(kDefaultMaxBitrate), + last_low_bitrate_log_(Timestamp::MinusInfinity()), + has_decreased_since_last_fraction_loss_(false), + last_loss_feedback_(Timestamp::MinusInfinity()), + last_loss_packet_report_(Timestamp::MinusInfinity()), + last_fraction_loss_(0), + last_logged_fraction_loss_(0), + last_round_trip_time_(TimeDelta::Zero()), + receiver_limit_(DataRate::PlusInfinity()), + delay_based_limit_(DataRate::PlusInfinity()), + time_last_decrease_(Timestamp::MinusInfinity()), + first_report_time_(Timestamp::MinusInfinity()), + initially_lost_packets_(0), + bitrate_at_2_seconds_(DataRate::Zero()), + uma_update_state_(kNoUpdate), + uma_rtt_state_(kNoUpdate), + rampup_uma_stats_updated_(kNumUmaRampupMetrics, false), + event_log_(event_log), + last_rtc_event_log_(Timestamp::MinusInfinity()), + low_loss_threshold_(kDefaultLowLossThreshold), + high_loss_threshold_(kDefaultHighLossThreshold), + bitrate_threshold_(kDefaultBitrateThreshold), + loss_based_bandwidth_estimator_v1_(key_value_config), + loss_based_bandwidth_estimator_v2_(key_value_config), + loss_based_state_(LossBasedState::kDelayBasedEstimate), + disable_receiver_limit_caps_only_("Disabled") { + RTC_DCHECK(event_log); + if (BweLossExperimentIsEnabled()) { + uint32_t bitrate_threshold_kbps; + if (ReadBweLossExperimentParameters(&low_loss_threshold_, + &high_loss_threshold_, + &bitrate_threshold_kbps)) { + RTC_LOG(LS_INFO) << "Enabled BweLossExperiment with parameters " + << low_loss_threshold_ << ", " << high_loss_threshold_ + << ", " << bitrate_threshold_kbps; + bitrate_threshold_ = DataRate::KilobitsPerSec(bitrate_threshold_kbps); + } + } + ParseFieldTrial({&disable_receiver_limit_caps_only_}, + key_value_config->Lookup("WebRTC-Bwe-ReceiverLimitCapsOnly")); + if (LossBasedBandwidthEstimatorV2Enabled()) { + loss_based_bandwidth_estimator_v2_.SetMinMaxBitrate( + min_bitrate_configured_, max_bitrate_configured_); + } +} + +SendSideBandwidthEstimation::~SendSideBandwidthEstimation() {} + +void SendSideBandwidthEstimation::OnRouteChange() { + lost_packets_since_last_loss_update_ = 0; + expected_packets_since_last_loss_update_ = 0; + current_target_ = DataRate::Zero(); + min_bitrate_configured_ = kCongestionControllerMinBitrate; + max_bitrate_configured_ = kDefaultMaxBitrate; + last_low_bitrate_log_ = Timestamp::MinusInfinity(); + has_decreased_since_last_fraction_loss_ = false; + last_loss_feedback_ = Timestamp::MinusInfinity(); + last_loss_packet_report_ = Timestamp::MinusInfinity(); + last_fraction_loss_ = 0; + last_logged_fraction_loss_ = 0; + last_round_trip_time_ = TimeDelta::Zero(); + receiver_limit_ = DataRate::PlusInfinity(); + delay_based_limit_ = DataRate::PlusInfinity(); + time_last_decrease_ = Timestamp::MinusInfinity(); + first_report_time_ = Timestamp::MinusInfinity(); + initially_lost_packets_ = 0; + bitrate_at_2_seconds_ = DataRate::Zero(); + uma_update_state_ = kNoUpdate; + uma_rtt_state_ = kNoUpdate; + last_rtc_event_log_ = Timestamp::MinusInfinity(); +} + +void SendSideBandwidthEstimation::SetBitrates( + absl::optional send_bitrate, + DataRate min_bitrate, + DataRate max_bitrate, + Timestamp at_time) { + SetMinMaxBitrate(min_bitrate, max_bitrate); + if (send_bitrate) { + link_capacity_.OnStartingRate(*send_bitrate); + SetSendBitrate(*send_bitrate, at_time); + } +} + +void SendSideBandwidthEstimation::SetSendBitrate(DataRate bitrate, + Timestamp at_time) { + RTC_DCHECK_GT(bitrate, DataRate::Zero()); + // Reset to avoid being capped by the estimate. + delay_based_limit_ = DataRate::PlusInfinity(); + UpdateTargetBitrate(bitrate, at_time); + // Clear last sent bitrate history so the new value can be used directly + // and not capped. + min_bitrate_history_.clear(); +} + +void SendSideBandwidthEstimation::SetMinMaxBitrate(DataRate min_bitrate, + DataRate max_bitrate) { + min_bitrate_configured_ = + std::max(min_bitrate, kCongestionControllerMinBitrate); + if (max_bitrate > DataRate::Zero() && max_bitrate.IsFinite()) { + max_bitrate_configured_ = std::max(min_bitrate_configured_, max_bitrate); + } else { + max_bitrate_configured_ = kDefaultMaxBitrate; + } + loss_based_bandwidth_estimator_v2_.SetMinMaxBitrate(min_bitrate_configured_, + max_bitrate_configured_); +} + +int SendSideBandwidthEstimation::GetMinBitrate() const { + return min_bitrate_configured_.bps(); +} + +DataRate SendSideBandwidthEstimation::target_rate() const { + DataRate target = current_target_; + if (!disable_receiver_limit_caps_only_) + target = std::min(target, receiver_limit_); + return std::max(min_bitrate_configured_, target); +} + +LossBasedState SendSideBandwidthEstimation::loss_based_state() const { + return loss_based_state_; +} + +DataRate SendSideBandwidthEstimation::GetEstimatedLinkCapacity() const { + return link_capacity_.estimate(); +} + +void SendSideBandwidthEstimation::UpdateReceiverEstimate(Timestamp at_time, + DataRate bandwidth) { + // TODO(srte): Ensure caller passes PlusInfinity, not zero, to represent no + // limitation. + receiver_limit_ = bandwidth.IsZero() ? DataRate::PlusInfinity() : bandwidth; + ApplyTargetLimits(at_time); +} + +void SendSideBandwidthEstimation::UpdateDelayBasedEstimate(Timestamp at_time, + DataRate bitrate) { + link_capacity_.UpdateDelayBasedEstimate(at_time, bitrate); + // TODO(srte): Ensure caller passes PlusInfinity, not zero, to represent no + // limitation. + delay_based_limit_ = bitrate.IsZero() ? DataRate::PlusInfinity() : bitrate; + ApplyTargetLimits(at_time); +} + +void SendSideBandwidthEstimation::SetAcknowledgedRate( + absl::optional acknowledged_rate, + Timestamp at_time) { + acknowledged_rate_ = acknowledged_rate; + if (!acknowledged_rate.has_value()) { + return; + } + if (LossBasedBandwidthEstimatorV1Enabled()) { + loss_based_bandwidth_estimator_v1_.UpdateAcknowledgedBitrate( + *acknowledged_rate, at_time); + } + if (LossBasedBandwidthEstimatorV2Enabled()) { + loss_based_bandwidth_estimator_v2_.SetAcknowledgedBitrate( + *acknowledged_rate); + } +} + +void SendSideBandwidthEstimation::UpdateLossBasedEstimator( + const TransportPacketsFeedback& report, + BandwidthUsage delay_detector_state, + absl::optional probe_bitrate, + DataRate upper_link_capacity) { + if (LossBasedBandwidthEstimatorV1Enabled()) { + loss_based_bandwidth_estimator_v1_.UpdateLossStatistics( + report.packet_feedbacks, report.feedback_time); + } + if (LossBasedBandwidthEstimatorV2Enabled()) { + loss_based_bandwidth_estimator_v2_.UpdateBandwidthEstimate( + report.packet_feedbacks, delay_based_limit_, delay_detector_state, + probe_bitrate, upper_link_capacity); + UpdateEstimate(report.feedback_time); + } +} + +void SendSideBandwidthEstimation::UpdatePacketsLost(int64_t packets_lost, + int64_t number_of_packets, + Timestamp at_time) { + last_loss_feedback_ = at_time; + if (first_report_time_.IsInfinite()) + first_report_time_ = at_time; + + // Check sequence number diff and weight loss report + if (number_of_packets > 0) { + int64_t expected = + expected_packets_since_last_loss_update_ + number_of_packets; + + // Don't generate a loss rate until it can be based on enough packets. + if (expected < kLimitNumPackets) { + // Accumulate reports. + expected_packets_since_last_loss_update_ = expected; + lost_packets_since_last_loss_update_ += packets_lost; + return; + } + + has_decreased_since_last_fraction_loss_ = false; + int64_t lost_q8 = + std::max(lost_packets_since_last_loss_update_ + packets_lost, + 0) + << 8; + last_fraction_loss_ = std::min(lost_q8 / expected, 255); + + // Reset accumulators. + lost_packets_since_last_loss_update_ = 0; + expected_packets_since_last_loss_update_ = 0; + last_loss_packet_report_ = at_time; + UpdateEstimate(at_time); + } + + UpdateUmaStatsPacketsLost(at_time, packets_lost); +} + +void SendSideBandwidthEstimation::UpdateUmaStatsPacketsLost(Timestamp at_time, + int packets_lost) { + DataRate bitrate_kbps = + DataRate::KilobitsPerSec((current_target_.bps() + 500) / 1000); + for (size_t i = 0; i < kNumUmaRampupMetrics; ++i) { + if (!rampup_uma_stats_updated_[i] && + bitrate_kbps.kbps() >= kUmaRampupMetrics[i].bitrate_kbps) { + RTC_HISTOGRAMS_COUNTS_100000(i, kUmaRampupMetrics[i].metric_name, + (at_time - first_report_time_).ms()); + rampup_uma_stats_updated_[i] = true; + } + } + if (IsInStartPhase(at_time)) { + initially_lost_packets_ += packets_lost; + } else if (uma_update_state_ == kNoUpdate) { + uma_update_state_ = kFirstDone; + bitrate_at_2_seconds_ = bitrate_kbps; + RTC_HISTOGRAM_COUNTS("WebRTC.BWE.InitiallyLostPackets", + initially_lost_packets_, 0, 100, 50); + RTC_HISTOGRAM_COUNTS("WebRTC.BWE.InitialBandwidthEstimate", + bitrate_at_2_seconds_.kbps(), 0, 2000, 50); + } else if (uma_update_state_ == kFirstDone && + at_time - first_report_time_ >= kBweConverganceTime) { + uma_update_state_ = kDone; + int bitrate_diff_kbps = std::max( + bitrate_at_2_seconds_.kbps() - bitrate_kbps.kbps(), 0); + RTC_HISTOGRAM_COUNTS("WebRTC.BWE.InitialVsConvergedDiff", bitrate_diff_kbps, + 0, 2000, 50); + } +} + +void SendSideBandwidthEstimation::UpdateRtt(TimeDelta rtt, Timestamp at_time) { + // Update RTT if we were able to compute an RTT based on this RTCP. + // FlexFEC doesn't send RTCP SR, which means we won't be able to compute RTT. + if (rtt > TimeDelta::Zero()) + last_round_trip_time_ = rtt; + + if (!IsInStartPhase(at_time) && uma_rtt_state_ == kNoUpdate) { + uma_rtt_state_ = kDone; + RTC_HISTOGRAM_COUNTS("WebRTC.BWE.InitialRtt", rtt.ms(), 0, 2000, 50); + } +} + +void SendSideBandwidthEstimation::UpdateEstimate(Timestamp at_time) { + if (rtt_backoff_.CorrectedRtt(at_time) > rtt_backoff_.rtt_limit_) { + if (at_time - time_last_decrease_ >= rtt_backoff_.drop_interval_ && + current_target_ > rtt_backoff_.bandwidth_floor_) { + time_last_decrease_ = at_time; + DataRate new_bitrate = + std::max(current_target_ * rtt_backoff_.drop_fraction_, + rtt_backoff_.bandwidth_floor_.Get()); + link_capacity_.OnRttBackoff(new_bitrate, at_time); + UpdateTargetBitrate(new_bitrate, at_time); + return; + } + // TODO(srte): This is likely redundant in most cases. + ApplyTargetLimits(at_time); + return; + } + + // We trust the REMB and/or delay-based estimate during the first 2 seconds if + // we haven't had any packet loss reported, to allow startup bitrate probing. + if (last_fraction_loss_ == 0 && IsInStartPhase(at_time)) { + DataRate new_bitrate = current_target_; + // TODO(srte): We should not allow the new_bitrate to be larger than the + // receiver limit here. + if (receiver_limit_.IsFinite()) + new_bitrate = std::max(receiver_limit_, new_bitrate); + if (delay_based_limit_.IsFinite()) + new_bitrate = std::max(delay_based_limit_, new_bitrate); + if (LossBasedBandwidthEstimatorV1Enabled()) { + loss_based_bandwidth_estimator_v1_.Initialize(new_bitrate); + } + if (LossBasedBandwidthEstimatorV2Enabled()) { + loss_based_bandwidth_estimator_v2_.SetBandwidthEstimate(new_bitrate); + } + + if (new_bitrate != current_target_) { + min_bitrate_history_.clear(); + if (LossBasedBandwidthEstimatorV1Enabled()) { + min_bitrate_history_.push_back(std::make_pair(at_time, new_bitrate)); + } else { + min_bitrate_history_.push_back( + std::make_pair(at_time, current_target_)); + } + UpdateTargetBitrate(new_bitrate, at_time); + return; + } + } + UpdateMinHistory(at_time); + if (last_loss_packet_report_.IsInfinite()) { + // No feedback received. + // TODO(srte): This is likely redundant in most cases. + ApplyTargetLimits(at_time); + return; + } + + if (LossBasedBandwidthEstimatorV1ReadyForUse()) { + DataRate new_bitrate = loss_based_bandwidth_estimator_v1_.Update( + at_time, min_bitrate_history_.front().second, delay_based_limit_, + last_round_trip_time_); + UpdateTargetBitrate(new_bitrate, at_time); + return; + } + + if (LossBasedBandwidthEstimatorV2ReadyForUse()) { + LossBasedBweV2::Result result = + loss_based_bandwidth_estimator_v2_.GetLossBasedResult(); + loss_based_state_ = result.state; + UpdateTargetBitrate(result.bandwidth_estimate, at_time); + return; + } + + TimeDelta time_since_loss_packet_report = at_time - last_loss_packet_report_; + if (time_since_loss_packet_report < 1.2 * kMaxRtcpFeedbackInterval) { + // We only care about loss above a given bitrate threshold. + float loss = last_fraction_loss_ / 256.0f; + // We only make decisions based on loss when the bitrate is above a + // threshold. This is a crude way of handling loss which is uncorrelated + // to congestion. + if (current_target_ < bitrate_threshold_ || loss <= low_loss_threshold_) { + // Loss < 2%: Increase rate by 8% of the min bitrate in the last + // kBweIncreaseInterval. + // Note that by remembering the bitrate over the last second one can + // rampup up one second faster than if only allowed to start ramping + // at 8% per second rate now. E.g.: + // If sending a constant 100kbps it can rampup immediately to 108kbps + // whenever a receiver report is received with lower packet loss. + // If instead one would do: current_bitrate_ *= 1.08^(delta time), + // it would take over one second since the lower packet loss to achieve + // 108kbps. + DataRate new_bitrate = DataRate::BitsPerSec( + min_bitrate_history_.front().second.bps() * 1.08 + 0.5); + + // Add 1 kbps extra, just to make sure that we do not get stuck + // (gives a little extra increase at low rates, negligible at higher + // rates). + new_bitrate += DataRate::BitsPerSec(1000); + UpdateTargetBitrate(new_bitrate, at_time); + return; + } else if (current_target_ > bitrate_threshold_) { + if (loss <= high_loss_threshold_) { + // Loss between 2% - 10%: Do nothing. + } else { + // Loss > 10%: Limit the rate decreases to once a kBweDecreaseInterval + // + rtt. + if (!has_decreased_since_last_fraction_loss_ && + (at_time - time_last_decrease_) >= + (kBweDecreaseInterval + last_round_trip_time_)) { + time_last_decrease_ = at_time; + + // Reduce rate: + // newRate = rate * (1 - 0.5*lossRate); + // where packetLoss = 256*lossRate; + DataRate new_bitrate = DataRate::BitsPerSec( + (current_target_.bps() * + static_cast(512 - last_fraction_loss_)) / + 512.0); + has_decreased_since_last_fraction_loss_ = true; + UpdateTargetBitrate(new_bitrate, at_time); + return; + } + } + } + } + // TODO(srte): This is likely redundant in most cases. + ApplyTargetLimits(at_time); +} + +void SendSideBandwidthEstimation::UpdatePropagationRtt( + Timestamp at_time, + TimeDelta propagation_rtt) { + rtt_backoff_.UpdatePropagationRtt(at_time, propagation_rtt); +} + +void SendSideBandwidthEstimation::OnSentPacket(const SentPacket& sent_packet) { + // Only feedback-triggering packets will be reported here. + rtt_backoff_.last_packet_sent_ = sent_packet.send_time; +} + +bool SendSideBandwidthEstimation::IsInStartPhase(Timestamp at_time) const { + return first_report_time_.IsInfinite() || + at_time - first_report_time_ < kStartPhase; +} + +void SendSideBandwidthEstimation::UpdateMinHistory(Timestamp at_time) { + // Remove old data points from history. + // Since history precision is in ms, add one so it is able to increase + // bitrate if it is off by as little as 0.5ms. + while (!min_bitrate_history_.empty() && + at_time - min_bitrate_history_.front().first + TimeDelta::Millis(1) > + kBweIncreaseInterval) { + min_bitrate_history_.pop_front(); + } + + // Typical minimum sliding-window algorithm: Pop values higher than current + // bitrate before pushing it. + while (!min_bitrate_history_.empty() && + current_target_ <= min_bitrate_history_.back().second) { + min_bitrate_history_.pop_back(); + } + + min_bitrate_history_.push_back(std::make_pair(at_time, current_target_)); +} + +DataRate SendSideBandwidthEstimation::GetUpperLimit() const { + DataRate upper_limit = delay_based_limit_; + if (disable_receiver_limit_caps_only_) + upper_limit = std::min(upper_limit, receiver_limit_); + return std::min(upper_limit, max_bitrate_configured_); +} + +void SendSideBandwidthEstimation::MaybeLogLowBitrateWarning(DataRate bitrate, + Timestamp at_time) { + if (at_time - last_low_bitrate_log_ > kLowBitrateLogPeriod) { + RTC_LOG(LS_WARNING) << "Estimated available bandwidth " << ToString(bitrate) + << " is below configured min bitrate " + << ToString(min_bitrate_configured_) << "."; + last_low_bitrate_log_ = at_time; + } +} + +void SendSideBandwidthEstimation::MaybeLogLossBasedEvent(Timestamp at_time) { + if (current_target_ != last_logged_target_ || + last_fraction_loss_ != last_logged_fraction_loss_ || + at_time - last_rtc_event_log_ > kRtcEventLogPeriod) { + event_log_->Log(std::make_unique( + current_target_.bps(), last_fraction_loss_, + expected_packets_since_last_loss_update_)); + last_logged_fraction_loss_ = last_fraction_loss_; + last_logged_target_ = current_target_; + last_rtc_event_log_ = at_time; + } +} + +void SendSideBandwidthEstimation::UpdateTargetBitrate(DataRate new_bitrate, + Timestamp at_time) { + new_bitrate = std::min(new_bitrate, GetUpperLimit()); + if (new_bitrate < min_bitrate_configured_) { + MaybeLogLowBitrateWarning(new_bitrate, at_time); + new_bitrate = min_bitrate_configured_; + } + current_target_ = new_bitrate; + MaybeLogLossBasedEvent(at_time); + link_capacity_.OnRateUpdate(acknowledged_rate_, current_target_, at_time); +} + +void SendSideBandwidthEstimation::ApplyTargetLimits(Timestamp at_time) { + UpdateTargetBitrate(current_target_, at_time); +} + +bool SendSideBandwidthEstimation::LossBasedBandwidthEstimatorV1Enabled() const { + return loss_based_bandwidth_estimator_v1_.Enabled() && + !LossBasedBandwidthEstimatorV2Enabled(); +} + +bool SendSideBandwidthEstimation::LossBasedBandwidthEstimatorV1ReadyForUse() + const { + return LossBasedBandwidthEstimatorV1Enabled() && + loss_based_bandwidth_estimator_v1_.InUse(); +} + +bool SendSideBandwidthEstimation::LossBasedBandwidthEstimatorV2Enabled() const { + return loss_based_bandwidth_estimator_v2_.IsEnabled(); +} + +bool SendSideBandwidthEstimation::LossBasedBandwidthEstimatorV2ReadyForUse() + const { + return LossBasedBandwidthEstimatorV2Enabled() && + loss_based_bandwidth_estimator_v2_.IsReady(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.h new file mode 100644 index 0000000000..77510236d3 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + * FEC and NACK added bitrate is handled outside class + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_SEND_SIDE_BANDWIDTH_ESTIMATION_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_SEND_SIDE_BANDWIDTH_ESTIMATION_H_ + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/field_trials_view.h" +#include "api/network_state_predictor.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.h" +#include "modules/congestion_controller/goog_cc/loss_based_bwe_v2.h" +#include "rtc_base/experiments/field_trial_parser.h" + +namespace webrtc { + +class RtcEventLog; + +class LinkCapacityTracker { + public: + LinkCapacityTracker(); + ~LinkCapacityTracker(); + // Call when a new delay-based estimate is available. + void UpdateDelayBasedEstimate(Timestamp at_time, + DataRate delay_based_bitrate); + void OnStartingRate(DataRate start_rate); + void OnRateUpdate(absl::optional acknowledged, + DataRate target, + Timestamp at_time); + void OnRttBackoff(DataRate backoff_rate, Timestamp at_time); + DataRate estimate() const; + + private: + FieldTrialParameter tracking_rate; + double capacity_estimate_bps_ = 0; + Timestamp last_link_capacity_update_ = Timestamp::MinusInfinity(); + DataRate last_delay_based_estimate_ = DataRate::PlusInfinity(); +}; + +class RttBasedBackoff { + public: + explicit RttBasedBackoff(const FieldTrialsView* key_value_config); + ~RttBasedBackoff(); + void UpdatePropagationRtt(Timestamp at_time, TimeDelta propagation_rtt); + TimeDelta CorrectedRtt(Timestamp at_time) const; + + FieldTrialFlag disabled_; + FieldTrialParameter configured_limit_; + FieldTrialParameter drop_fraction_; + FieldTrialParameter drop_interval_; + FieldTrialParameter bandwidth_floor_; + + public: + TimeDelta rtt_limit_; + Timestamp last_propagation_rtt_update_; + TimeDelta last_propagation_rtt_; + Timestamp last_packet_sent_; +}; + +class SendSideBandwidthEstimation { + public: + SendSideBandwidthEstimation() = delete; + SendSideBandwidthEstimation(const FieldTrialsView* key_value_config, + RtcEventLog* event_log); + ~SendSideBandwidthEstimation(); + + void OnRouteChange(); + + DataRate target_rate() const; + LossBasedState loss_based_state() const; + uint8_t fraction_loss() const { return last_fraction_loss_; } + TimeDelta round_trip_time() const { return last_round_trip_time_; } + + DataRate GetEstimatedLinkCapacity() const; + // Call periodically to update estimate. + void UpdateEstimate(Timestamp at_time); + void OnSentPacket(const SentPacket& sent_packet); + void UpdatePropagationRtt(Timestamp at_time, TimeDelta propagation_rtt); + + // Call when we receive a RTCP message with TMMBR or REMB. + void UpdateReceiverEstimate(Timestamp at_time, DataRate bandwidth); + + // Call when a new delay-based estimate is available. + void UpdateDelayBasedEstimate(Timestamp at_time, DataRate bitrate); + + // Call when we receive a RTCP message with a ReceiveBlock. + void UpdatePacketsLost(int64_t packets_lost, + int64_t number_of_packets, + Timestamp at_time); + + // Call when we receive a RTCP message with a ReceiveBlock. + void UpdateRtt(TimeDelta rtt, Timestamp at_time); + + void SetBitrates(absl::optional send_bitrate, + DataRate min_bitrate, + DataRate max_bitrate, + Timestamp at_time); + void SetSendBitrate(DataRate bitrate, Timestamp at_time); + void SetMinMaxBitrate(DataRate min_bitrate, DataRate max_bitrate); + int GetMinBitrate() const; + void SetAcknowledgedRate(absl::optional acknowledged_rate, + Timestamp at_time); + void UpdateLossBasedEstimator(const TransportPacketsFeedback& report, + BandwidthUsage delay_detector_state, + absl::optional probe_bitrate, + DataRate upper_link_capacity); + + private: + friend class GoogCcStatePrinter; + + enum UmaState { kNoUpdate, kFirstDone, kDone }; + + bool IsInStartPhase(Timestamp at_time) const; + + void UpdateUmaStatsPacketsLost(Timestamp at_time, int packets_lost); + + // Updates history of min bitrates. + // After this method returns min_bitrate_history_.front().second contains the + // min bitrate used during last kBweIncreaseIntervalMs. + void UpdateMinHistory(Timestamp at_time); + + // Gets the upper limit for the target bitrate. This is the minimum of the + // delay based limit, the receiver limit and the loss based controller limit. + DataRate GetUpperLimit() const; + // Prints a warning if `bitrate` if sufficiently long time has past since last + // warning. + void MaybeLogLowBitrateWarning(DataRate bitrate, Timestamp at_time); + // Stores an update to the event log if the loss rate has changed, the target + // has changed, or sufficient time has passed since last stored event. + void MaybeLogLossBasedEvent(Timestamp at_time); + + // Cap `bitrate` to [min_bitrate_configured_, max_bitrate_configured_] and + // set `current_bitrate_` to the capped value and updates the event log. + void UpdateTargetBitrate(DataRate bitrate, Timestamp at_time); + // Applies lower and upper bounds to the current target rate. + // TODO(srte): This seems to be called even when limits haven't changed, that + // should be cleaned up. + void ApplyTargetLimits(Timestamp at_time); + + bool LossBasedBandwidthEstimatorV1Enabled() const; + bool LossBasedBandwidthEstimatorV2Enabled() const; + + bool LossBasedBandwidthEstimatorV1ReadyForUse() const; + bool LossBasedBandwidthEstimatorV2ReadyForUse() const; + + RttBasedBackoff rtt_backoff_; + LinkCapacityTracker link_capacity_; + + std::deque > min_bitrate_history_; + + // incoming filters + int lost_packets_since_last_loss_update_; + int expected_packets_since_last_loss_update_; + + absl::optional acknowledged_rate_; + DataRate current_target_; + DataRate last_logged_target_; + DataRate min_bitrate_configured_; + DataRate max_bitrate_configured_; + Timestamp last_low_bitrate_log_; + + bool has_decreased_since_last_fraction_loss_; + Timestamp last_loss_feedback_; + Timestamp last_loss_packet_report_; + uint8_t last_fraction_loss_; + uint8_t last_logged_fraction_loss_; + TimeDelta last_round_trip_time_; + + // The max bitrate as set by the receiver in the call. This is typically + // signalled using the REMB RTCP message and is used when we don't have any + // send side delay based estimate. + DataRate receiver_limit_; + DataRate delay_based_limit_; + Timestamp time_last_decrease_; + Timestamp first_report_time_; + int initially_lost_packets_; + DataRate bitrate_at_2_seconds_; + UmaState uma_update_state_; + UmaState uma_rtt_state_; + std::vector rampup_uma_stats_updated_; + RtcEventLog* const event_log_; + Timestamp last_rtc_event_log_; + float low_loss_threshold_; + float high_loss_threshold_; + DataRate bitrate_threshold_; + LossBasedBandwidthEstimation loss_based_bandwidth_estimator_v1_; + LossBasedBweV2 loss_based_bandwidth_estimator_v2_; + LossBasedState loss_based_state_; + FieldTrialFlag disable_receiver_limit_caps_only_; +}; +} // namespace webrtc +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_SEND_SIDE_BANDWIDTH_ESTIMATION_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation_unittest.cc new file mode 100644 index 0000000000..17d1aa1ada --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation_unittest.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.h" + +#include "api/rtc_event_log/rtc_event.h" +#include "logging/rtc_event_log/events/rtc_event_bwe_update_loss_based.h" +#include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "test/explicit_key_value_config.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +MATCHER(LossBasedBweUpdateWithBitrateOnly, "") { + if (arg->GetType() != RtcEvent::Type::BweUpdateLossBased) { + return false; + } + auto bwe_event = static_cast(arg); + return bwe_event->bitrate_bps() > 0 && bwe_event->fraction_loss() == 0; +} + +MATCHER(LossBasedBweUpdateWithBitrateAndLossFraction, "") { + if (arg->GetType() != RtcEvent::Type::BweUpdateLossBased) { + return false; + } + auto bwe_event = static_cast(arg); + return bwe_event->bitrate_bps() > 0 && bwe_event->fraction_loss() > 0; +} + +void TestProbing(bool use_delay_based) { + ::testing::NiceMock event_log; + test::ExplicitKeyValueConfig key_value_config(""); + SendSideBandwidthEstimation bwe(&key_value_config, &event_log); + int64_t now_ms = 0; + bwe.SetMinMaxBitrate(DataRate::BitsPerSec(100000), + DataRate::BitsPerSec(1500000)); + bwe.SetSendBitrate(DataRate::BitsPerSec(200000), Timestamp::Millis(now_ms)); + + const int kRembBps = 1000000; + const int kSecondRembBps = kRembBps + 500000; + + bwe.UpdatePacketsLost(/*packets_lost=*/0, /*number_of_packets=*/1, + Timestamp::Millis(now_ms)); + bwe.UpdateRtt(TimeDelta::Millis(50), Timestamp::Millis(now_ms)); + + // Initial REMB applies immediately. + if (use_delay_based) { + bwe.UpdateDelayBasedEstimate(Timestamp::Millis(now_ms), + DataRate::BitsPerSec(kRembBps)); + } else { + bwe.UpdateReceiverEstimate(Timestamp::Millis(now_ms), + DataRate::BitsPerSec(kRembBps)); + } + bwe.UpdateEstimate(Timestamp::Millis(now_ms)); + EXPECT_EQ(kRembBps, bwe.target_rate().bps()); + + // Second REMB doesn't apply immediately. + now_ms += 2001; + if (use_delay_based) { + bwe.UpdateDelayBasedEstimate(Timestamp::Millis(now_ms), + DataRate::BitsPerSec(kSecondRembBps)); + } else { + bwe.UpdateReceiverEstimate(Timestamp::Millis(now_ms), + DataRate::BitsPerSec(kSecondRembBps)); + } + bwe.UpdateEstimate(Timestamp::Millis(now_ms)); + EXPECT_EQ(kRembBps, bwe.target_rate().bps()); +} + +TEST(SendSideBweTest, InitialRembWithProbing) { + TestProbing(false); +} + +TEST(SendSideBweTest, InitialDelayBasedBweWithProbing) { + TestProbing(true); +} + +TEST(SendSideBweTest, DoesntReapplyBitrateDecreaseWithoutFollowingRemb) { + MockRtcEventLog event_log; + EXPECT_CALL(event_log, LogProxy(LossBasedBweUpdateWithBitrateOnly())) + .Times(1); + EXPECT_CALL(event_log, + LogProxy(LossBasedBweUpdateWithBitrateAndLossFraction())) + .Times(1); + test::ExplicitKeyValueConfig key_value_config(""); + SendSideBandwidthEstimation bwe(&key_value_config, &event_log); + static const int kMinBitrateBps = 100000; + static const int kInitialBitrateBps = 1000000; + int64_t now_ms = 1000; + bwe.SetMinMaxBitrate(DataRate::BitsPerSec(kMinBitrateBps), + DataRate::BitsPerSec(1500000)); + bwe.SetSendBitrate(DataRate::BitsPerSec(kInitialBitrateBps), + Timestamp::Millis(now_ms)); + + static const uint8_t kFractionLoss = 128; + static const int64_t kRttMs = 50; + now_ms += 10000; + + EXPECT_EQ(kInitialBitrateBps, bwe.target_rate().bps()); + EXPECT_EQ(0, bwe.fraction_loss()); + EXPECT_EQ(0, bwe.round_trip_time().ms()); + + // Signal heavy loss to go down in bitrate. + bwe.UpdatePacketsLost(/*packets_lost=*/50, /*number_of_packets=*/100, + Timestamp::Millis(now_ms)); + bwe.UpdateRtt(TimeDelta::Millis(kRttMs), Timestamp::Millis(now_ms)); + + // Trigger an update 2 seconds later to not be rate limited. + now_ms += 1000; + bwe.UpdateEstimate(Timestamp::Millis(now_ms)); + EXPECT_LT(bwe.target_rate().bps(), kInitialBitrateBps); + // Verify that the obtained bitrate isn't hitting the min bitrate, or this + // test doesn't make sense. If this ever happens, update the thresholds or + // loss rates so that it doesn't hit min bitrate after one bitrate update. + EXPECT_GT(bwe.target_rate().bps(), kMinBitrateBps); + EXPECT_EQ(kFractionLoss, bwe.fraction_loss()); + EXPECT_EQ(kRttMs, bwe.round_trip_time().ms()); + + // Triggering an update shouldn't apply further downgrade nor upgrade since + // there's no intermediate receiver block received indicating whether this is + // currently good or not. + int last_bitrate_bps = bwe.target_rate().bps(); + // Trigger an update 2 seconds later to not be rate limited (but it still + // shouldn't update). + now_ms += 1000; + bwe.UpdateEstimate(Timestamp::Millis(now_ms)); + + EXPECT_EQ(last_bitrate_bps, bwe.target_rate().bps()); + // The old loss rate should still be applied though. + EXPECT_EQ(kFractionLoss, bwe.fraction_loss()); + EXPECT_EQ(kRttMs, bwe.round_trip_time().ms()); +} + +TEST(SendSideBweTest, SettingSendBitrateOverridesDelayBasedEstimate) { + ::testing::NiceMock event_log; + test::ExplicitKeyValueConfig key_value_config(""); + SendSideBandwidthEstimation bwe(&key_value_config, &event_log); + static const int kMinBitrateBps = 10000; + static const int kMaxBitrateBps = 10000000; + static const int kInitialBitrateBps = 300000; + static const int kDelayBasedBitrateBps = 350000; + static const int kForcedHighBitrate = 2500000; + + int64_t now_ms = 0; + + bwe.SetMinMaxBitrate(DataRate::BitsPerSec(kMinBitrateBps), + DataRate::BitsPerSec(kMaxBitrateBps)); + bwe.SetSendBitrate(DataRate::BitsPerSec(kInitialBitrateBps), + Timestamp::Millis(now_ms)); + + bwe.UpdateDelayBasedEstimate(Timestamp::Millis(now_ms), + DataRate::BitsPerSec(kDelayBasedBitrateBps)); + bwe.UpdateEstimate(Timestamp::Millis(now_ms)); + EXPECT_GE(bwe.target_rate().bps(), kInitialBitrateBps); + EXPECT_LE(bwe.target_rate().bps(), kDelayBasedBitrateBps); + + bwe.SetSendBitrate(DataRate::BitsPerSec(kForcedHighBitrate), + Timestamp::Millis(now_ms)); + EXPECT_EQ(bwe.target_rate().bps(), kForcedHighBitrate); +} + +TEST(RttBasedBackoff, DefaultEnabled) { + test::ExplicitKeyValueConfig key_value_config(""); + RttBasedBackoff rtt_backoff(&key_value_config); + EXPECT_TRUE(rtt_backoff.rtt_limit_.IsFinite()); +} + +TEST(RttBasedBackoff, CanBeDisabled) { + test::ExplicitKeyValueConfig key_value_config( + "WebRTC-Bwe-MaxRttLimit/Disabled/"); + RttBasedBackoff rtt_backoff(&key_value_config); + EXPECT_TRUE(rtt_backoff.rtt_limit_.IsPlusInfinity()); +} + +TEST(SendSideBweTest, FractionLossIsNotOverflowed) { + MockRtcEventLog event_log; + test::ExplicitKeyValueConfig key_value_config(""); + SendSideBandwidthEstimation bwe(&key_value_config, &event_log); + static const int kMinBitrateBps = 100000; + static const int kInitialBitrateBps = 1000000; + int64_t now_ms = 1000; + bwe.SetMinMaxBitrate(DataRate::BitsPerSec(kMinBitrateBps), + DataRate::BitsPerSec(1500000)); + bwe.SetSendBitrate(DataRate::BitsPerSec(kInitialBitrateBps), + Timestamp::Millis(now_ms)); + + now_ms += 10000; + + EXPECT_EQ(kInitialBitrateBps, bwe.target_rate().bps()); + EXPECT_EQ(0, bwe.fraction_loss()); + + // Signal negative loss. + bwe.UpdatePacketsLost(/*packets_lost=*/-1, /*number_of_packets=*/100, + Timestamp::Millis(now_ms)); + EXPECT_EQ(0, bwe.fraction_loss()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bwe_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bwe_gn/moz.build new file mode 100644 index 0000000000..c93e412e83 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bwe_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["BWE_TEST_LOGGING_COMPILE_TIME_ENABLE"] = "0" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bandwidth_estimation.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("send_side_bwe_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/test/goog_cc_printer.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/test/goog_cc_printer.cc new file mode 100644 index 0000000000..6a8849ed6d --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/test/goog_cc_printer.cc @@ -0,0 +1,200 @@ +/* + * Copyright 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/congestion_controller/goog_cc/test/goog_cc_printer.h" + +#include + +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/congestion_controller/goog_cc/alr_detector.h" +#include "modules/congestion_controller/goog_cc/delay_based_bwe.h" +#include "modules/congestion_controller/goog_cc/trendline_estimator.h" +#include "modules/remote_bitrate_estimator/aimd_rate_control.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { +void WriteTypedValue(RtcEventLogOutput* out, int value) { + LogWriteFormat(out, "%i", value); +} +void WriteTypedValue(RtcEventLogOutput* out, double value) { + LogWriteFormat(out, "%.6f", value); +} +void WriteTypedValue(RtcEventLogOutput* out, absl::optional value) { + LogWriteFormat(out, "%.0f", value ? value->bytes_per_sec() : NAN); +} +void WriteTypedValue(RtcEventLogOutput* out, absl::optional value) { + LogWriteFormat(out, "%.0f", value ? value->bytes() : NAN); +} +void WriteTypedValue(RtcEventLogOutput* out, absl::optional value) { + LogWriteFormat(out, "%.3f", value ? value->seconds() : NAN); +} +void WriteTypedValue(RtcEventLogOutput* out, absl::optional value) { + LogWriteFormat(out, "%.3f", value ? value->seconds() : NAN); +} + +template +class TypedFieldLogger : public FieldLogger { + public: + TypedFieldLogger(absl::string_view name, F&& getter) + : name_(name), getter_(std::forward(getter)) {} + const std::string& name() const override { return name_; } + void WriteValue(RtcEventLogOutput* out) override { + WriteTypedValue(out, getter_()); + } + + private: + std::string name_; + F getter_; +}; + +template +FieldLogger* Log(absl::string_view name, F&& getter) { + return new TypedFieldLogger(name, std::forward(getter)); +} + +} // namespace +GoogCcStatePrinter::GoogCcStatePrinter() { + for (auto* logger : CreateLoggers()) { + loggers_.emplace_back(logger); + } +} + +std::deque GoogCcStatePrinter::CreateLoggers() { + auto stable_estimate = [this] { + return DataRate::KilobitsPerSec( + controller_->delay_based_bwe_->rate_control_.link_capacity_ + .estimate_kbps_.value_or(-INFINITY)); + }; + auto rate_control_state = [this] { + return static_cast( + controller_->delay_based_bwe_->rate_control_.rate_control_state_); + }; + auto trend = [this] { + return reinterpret_cast( + controller_->delay_based_bwe_->active_delay_detector_); + }; + auto acknowledged_rate = [this] { + return controller_->acknowledged_bitrate_estimator_->bitrate(); + }; + auto loss_cont = [&] { + return &controller_->bandwidth_estimation_ + ->loss_based_bandwidth_estimator_v1_; + }; + std::deque loggers({ + Log("time", [=] { return target_.at_time; }), + Log("rtt", [=] { return target_.network_estimate.round_trip_time; }), + Log("target", [=] { return target_.target_rate; }), + Log("stable_target", [=] { return target_.stable_target_rate; }), + Log("pacing", [=] { return pacing_.data_rate(); }), + Log("padding", [=] { return pacing_.pad_rate(); }), + Log("window", [=] { return congestion_window_; }), + Log("rate_control_state", [=] { return rate_control_state(); }), + Log("stable_estimate", [=] { return stable_estimate(); }), + Log("trendline", [=] { return trend()->prev_trend_; }), + Log("trendline_modified_offset", + [=] { return trend()->prev_modified_trend_; }), + Log("trendline_offset_threshold", [=] { return trend()->threshold_; }), + Log("acknowledged_rate", [=] { return acknowledged_rate(); }), + Log("est_capacity", [=] { return est_.link_capacity; }), + Log("est_capacity_dev", [=] { return est_.link_capacity_std_dev; }), + Log("est_capacity_min", [=] { return est_.link_capacity_min; }), + Log("est_cross_traffic", [=] { return est_.cross_traffic_ratio; }), + Log("est_cross_delay", [=] { return est_.cross_delay_rate; }), + Log("est_spike_delay", [=] { return est_.spike_delay_rate; }), + Log("est_pre_buffer", [=] { return est_.pre_link_buffer_delay; }), + Log("est_post_buffer", [=] { return est_.post_link_buffer_delay; }), + Log("est_propagation", [=] { return est_.propagation_delay; }), + Log("loss_ratio", [=] { return loss_cont()->last_loss_ratio_; }), + Log("loss_average", [=] { return loss_cont()->average_loss_; }), + Log("loss_average_max", [=] { return loss_cont()->average_loss_max_; }), + Log("loss_thres_inc", + [=] { return loss_cont()->loss_increase_threshold(); }), + Log("loss_thres_dec", + [=] { return loss_cont()->loss_decrease_threshold(); }), + Log("loss_dec_rate", [=] { return loss_cont()->decreased_bitrate(); }), + Log("loss_based_rate", [=] { return loss_cont()->loss_based_bitrate_; }), + Log("loss_ack_rate", + [=] { return loss_cont()->acknowledged_bitrate_max_; }), + Log("data_window", [=] { return controller_->current_data_window_; }), + Log("pushback_target", + [=] { return controller_->last_pushback_target_rate_; }), + }); + return loggers; +} +GoogCcStatePrinter::~GoogCcStatePrinter() = default; + +void GoogCcStatePrinter::PrintHeaders(RtcEventLogOutput* log) { + int ix = 0; + for (const auto& logger : loggers_) { + if (ix++) + log->Write(" "); + log->Write(logger->name()); + } + log->Write("\n"); + log->Flush(); +} + +void GoogCcStatePrinter::PrintState(RtcEventLogOutput* log, + GoogCcNetworkController* controller, + Timestamp at_time) { + controller_ = controller; + auto state_update = controller_->GetNetworkState(at_time); + target_ = state_update.target_rate.value(); + pacing_ = state_update.pacer_config.value(); + if (state_update.congestion_window) + congestion_window_ = *state_update.congestion_window; + if (controller_->network_estimator_) { + est_ = controller_->network_estimator_->GetCurrentEstimate().value_or( + NetworkStateEstimate()); + } + + int ix = 0; + for (const auto& logger : loggers_) { + if (ix++) + log->Write(" "); + logger->WriteValue(log); + } + + log->Write("\n"); + log->Flush(); +} + +GoogCcDebugFactory::GoogCcDebugFactory() + : GoogCcDebugFactory(GoogCcFactoryConfig()) {} + +GoogCcDebugFactory::GoogCcDebugFactory(GoogCcFactoryConfig config) + : GoogCcNetworkControllerFactory(std::move(config)) {} + +std::unique_ptr GoogCcDebugFactory::Create( + NetworkControllerConfig config) { + RTC_CHECK(controller_ == nullptr); + auto controller = GoogCcNetworkControllerFactory::Create(config); + controller_ = static_cast(controller.get()); + return controller; +} + +void GoogCcDebugFactory::PrintState(const Timestamp at_time) { + if (controller_ && log_writer_) { + printer_.PrintState(log_writer_.get(), controller_, at_time); + } +} + +void GoogCcDebugFactory::AttachWriter( + std::unique_ptr log_writer) { + if (log_writer) { + log_writer_ = std::move(log_writer); + printer_.PrintHeaders(log_writer_.get()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/test/goog_cc_printer.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/test/goog_cc_printer.h new file mode 100644 index 0000000000..16fa657e71 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/test/goog_cc_printer.h @@ -0,0 +1,75 @@ +/* + * Copyright 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_TEST_GOOG_CC_PRINTER_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_TEST_GOOG_CC_PRINTER_H_ + +#include +#include +#include + +#include "api/rtc_event_log/rtc_event_log.h" +#include "api/transport/goog_cc_factory.h" +#include "api/transport/network_control.h" +#include "api/transport/network_types.h" +#include "api/units/timestamp.h" +#include "modules/congestion_controller/goog_cc/goog_cc_network_control.h" +#include "test/logging/log_writer.h" + +namespace webrtc { + +class FieldLogger { + public: + virtual ~FieldLogger() = default; + virtual const std::string& name() const = 0; + virtual void WriteValue(RtcEventLogOutput* out) = 0; +}; + +class GoogCcStatePrinter { + public: + GoogCcStatePrinter(); + GoogCcStatePrinter(const GoogCcStatePrinter&) = delete; + GoogCcStatePrinter& operator=(const GoogCcStatePrinter&) = delete; + ~GoogCcStatePrinter(); + + void PrintHeaders(RtcEventLogOutput* log); + void PrintState(RtcEventLogOutput* log, + GoogCcNetworkController* controller, + Timestamp at_time); + + private: + std::deque CreateLoggers(); + std::deque> loggers_; + + GoogCcNetworkController* controller_ = nullptr; + TargetTransferRate target_; + PacerConfig pacing_; + DataSize congestion_window_ = DataSize::PlusInfinity(); + NetworkStateEstimate est_; +}; + +class GoogCcDebugFactory : public GoogCcNetworkControllerFactory { + public: + GoogCcDebugFactory(); + explicit GoogCcDebugFactory(GoogCcFactoryConfig config); + std::unique_ptr Create( + NetworkControllerConfig config) override; + + void PrintState(Timestamp at_time); + + void AttachWriter(std::unique_ptr log_writer); + + private: + GoogCcStatePrinter printer_; + GoogCcNetworkController* controller_ = nullptr; + std::unique_ptr log_writer_; +}; +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_TEST_GOOG_CC_PRINTER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator.cc new file mode 100644 index 0000000000..88182d4f80 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator.cc @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/trendline_estimator.h" + +#include + +#include +#include + +#include "absl/strings/match.h" +#include "absl/types/optional.h" +#include "api/network_state_predictor.h" +#include "modules/remote_bitrate_estimator/test/bwe_test_logging.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { + +// Parameters for linear least squares fit of regression line to noisy data. +constexpr double kDefaultTrendlineSmoothingCoeff = 0.9; +constexpr double kDefaultTrendlineThresholdGain = 4.0; +const char kBweWindowSizeInPacketsExperiment[] = + "WebRTC-BweWindowSizeInPackets"; + +size_t ReadTrendlineFilterWindowSize(const FieldTrialsView* key_value_config) { + std::string experiment_string = + key_value_config->Lookup(kBweWindowSizeInPacketsExperiment); + size_t window_size; + int parsed_values = + sscanf(experiment_string.c_str(), "Enabled-%zu", &window_size); + if (parsed_values == 1) { + if (window_size > 1) + return window_size; + RTC_LOG(LS_WARNING) << "Window size must be greater than 1."; + } + RTC_LOG(LS_WARNING) << "Failed to parse parameters for BweWindowSizeInPackets" + " experiment from field trial string. Using default."; + return TrendlineEstimatorSettings::kDefaultTrendlineWindowSize; +} + +absl::optional LinearFitSlope( + const std::deque& packets) { + RTC_DCHECK(packets.size() >= 2); + // Compute the "center of mass". + double sum_x = 0; + double sum_y = 0; + for (const auto& packet : packets) { + sum_x += packet.arrival_time_ms; + sum_y += packet.smoothed_delay_ms; + } + double x_avg = sum_x / packets.size(); + double y_avg = sum_y / packets.size(); + // Compute the slope k = \sum (x_i-x_avg)(y_i-y_avg) / \sum (x_i-x_avg)^2 + double numerator = 0; + double denominator = 0; + for (const auto& packet : packets) { + double x = packet.arrival_time_ms; + double y = packet.smoothed_delay_ms; + numerator += (x - x_avg) * (y - y_avg); + denominator += (x - x_avg) * (x - x_avg); + } + if (denominator == 0) + return absl::nullopt; + return numerator / denominator; +} + +absl::optional ComputeSlopeCap( + const std::deque& packets, + const TrendlineEstimatorSettings& settings) { + RTC_DCHECK(1 <= settings.beginning_packets && + settings.beginning_packets < packets.size()); + RTC_DCHECK(1 <= settings.end_packets && + settings.end_packets < packets.size()); + RTC_DCHECK(settings.beginning_packets + settings.end_packets <= + packets.size()); + TrendlineEstimator::PacketTiming early = packets[0]; + for (size_t i = 1; i < settings.beginning_packets; ++i) { + if (packets[i].raw_delay_ms < early.raw_delay_ms) + early = packets[i]; + } + size_t late_start = packets.size() - settings.end_packets; + TrendlineEstimator::PacketTiming late = packets[late_start]; + for (size_t i = late_start + 1; i < packets.size(); ++i) { + if (packets[i].raw_delay_ms < late.raw_delay_ms) + late = packets[i]; + } + if (late.arrival_time_ms - early.arrival_time_ms < 1) { + return absl::nullopt; + } + return (late.raw_delay_ms - early.raw_delay_ms) / + (late.arrival_time_ms - early.arrival_time_ms) + + settings.cap_uncertainty; +} + +constexpr double kMaxAdaptOffsetMs = 15.0; +constexpr double kOverUsingTimeThreshold = 10; +constexpr int kMinNumDeltas = 60; +constexpr int kDeltaCounterMax = 1000; + +} // namespace + +constexpr char TrendlineEstimatorSettings::kKey[]; + +TrendlineEstimatorSettings::TrendlineEstimatorSettings( + const FieldTrialsView* key_value_config) { + if (absl::StartsWith( + key_value_config->Lookup(kBweWindowSizeInPacketsExperiment), + "Enabled")) { + window_size = ReadTrendlineFilterWindowSize(key_value_config); + } + Parser()->Parse(key_value_config->Lookup(TrendlineEstimatorSettings::kKey)); + if (window_size < 10 || 200 < window_size) { + RTC_LOG(LS_WARNING) << "Window size must be between 10 and 200 packets"; + window_size = kDefaultTrendlineWindowSize; + } + if (enable_cap) { + if (beginning_packets < 1 || end_packets < 1 || + beginning_packets > window_size || end_packets > window_size) { + RTC_LOG(LS_WARNING) << "Size of beginning and end must be between 1 and " + << window_size; + enable_cap = false; + beginning_packets = end_packets = 0; + cap_uncertainty = 0.0; + } + if (beginning_packets + end_packets > window_size) { + RTC_LOG(LS_WARNING) + << "Size of beginning plus end can't exceed the window size"; + enable_cap = false; + beginning_packets = end_packets = 0; + cap_uncertainty = 0.0; + } + if (cap_uncertainty < 0.0 || 0.025 < cap_uncertainty) { + RTC_LOG(LS_WARNING) << "Cap uncertainty must be between 0 and 0.025"; + cap_uncertainty = 0.0; + } + } +} + +std::unique_ptr TrendlineEstimatorSettings::Parser() { + return StructParametersParser::Create("sort", &enable_sort, // + "cap", &enable_cap, // + "beginning_packets", + &beginning_packets, // + "end_packets", &end_packets, // + "cap_uncertainty", &cap_uncertainty, // + "window_size", &window_size); +} + +TrendlineEstimator::TrendlineEstimator( + const FieldTrialsView* key_value_config, + NetworkStatePredictor* network_state_predictor) + : settings_(key_value_config), + smoothing_coef_(kDefaultTrendlineSmoothingCoeff), + threshold_gain_(kDefaultTrendlineThresholdGain), + num_of_deltas_(0), + first_arrival_time_ms_(-1), + accumulated_delay_(0), + smoothed_delay_(0), + delay_hist_(), + k_up_(0.0087), + k_down_(0.039), + overusing_time_threshold_(kOverUsingTimeThreshold), + threshold_(12.5), + prev_modified_trend_(NAN), + last_update_ms_(-1), + prev_trend_(0.0), + time_over_using_(-1), + overuse_counter_(0), + hypothesis_(BandwidthUsage::kBwNormal), + hypothesis_predicted_(BandwidthUsage::kBwNormal), + network_state_predictor_(network_state_predictor) { + RTC_LOG(LS_INFO) + << "Using Trendline filter for delay change estimation with settings " + << settings_.Parser()->Encode() << " and " + << (network_state_predictor_ ? "injected" : "no") + << " network state predictor"; +} + +TrendlineEstimator::~TrendlineEstimator() {} + +void TrendlineEstimator::UpdateTrendline(double recv_delta_ms, + double send_delta_ms, + int64_t send_time_ms, + int64_t arrival_time_ms, + size_t packet_size) { + const double delta_ms = recv_delta_ms - send_delta_ms; + ++num_of_deltas_; + num_of_deltas_ = std::min(num_of_deltas_, kDeltaCounterMax); + if (first_arrival_time_ms_ == -1) + first_arrival_time_ms_ = arrival_time_ms; + + // Exponential backoff filter. + accumulated_delay_ += delta_ms; + BWE_TEST_LOGGING_PLOT(1, "accumulated_delay_ms", arrival_time_ms, + accumulated_delay_); + smoothed_delay_ = smoothing_coef_ * smoothed_delay_ + + (1 - smoothing_coef_) * accumulated_delay_; + BWE_TEST_LOGGING_PLOT(1, "smoothed_delay_ms", arrival_time_ms, + smoothed_delay_); + + // Maintain packet window + delay_hist_.emplace_back( + static_cast(arrival_time_ms - first_arrival_time_ms_), + smoothed_delay_, accumulated_delay_); + if (settings_.enable_sort) { + for (size_t i = delay_hist_.size() - 1; + i > 0 && + delay_hist_[i].arrival_time_ms < delay_hist_[i - 1].arrival_time_ms; + --i) { + std::swap(delay_hist_[i], delay_hist_[i - 1]); + } + } + if (delay_hist_.size() > settings_.window_size) + delay_hist_.pop_front(); + + // Simple linear regression. + double trend = prev_trend_; + if (delay_hist_.size() == settings_.window_size) { + // Update trend_ if it is possible to fit a line to the data. The delay + // trend can be seen as an estimate of (send_rate - capacity)/capacity. + // 0 < trend < 1 -> the delay increases, queues are filling up + // trend == 0 -> the delay does not change + // trend < 0 -> the delay decreases, queues are being emptied + trend = LinearFitSlope(delay_hist_).value_or(trend); + if (settings_.enable_cap) { + absl::optional cap = ComputeSlopeCap(delay_hist_, settings_); + // We only use the cap to filter out overuse detections, not + // to detect additional underuses. + if (trend >= 0 && cap.has_value() && trend > cap.value()) { + trend = cap.value(); + } + } + } + BWE_TEST_LOGGING_PLOT(1, "trendline_slope", arrival_time_ms, trend); + + Detect(trend, send_delta_ms, arrival_time_ms); +} + +void TrendlineEstimator::Update(double recv_delta_ms, + double send_delta_ms, + int64_t send_time_ms, + int64_t arrival_time_ms, + size_t packet_size, + bool calculated_deltas) { + if (calculated_deltas) { + UpdateTrendline(recv_delta_ms, send_delta_ms, send_time_ms, arrival_time_ms, + packet_size); + } + if (network_state_predictor_) { + hypothesis_predicted_ = network_state_predictor_->Update( + send_time_ms, arrival_time_ms, hypothesis_); + } +} + +BandwidthUsage TrendlineEstimator::State() const { + return network_state_predictor_ ? hypothesis_predicted_ : hypothesis_; +} + +void TrendlineEstimator::Detect(double trend, double ts_delta, int64_t now_ms) { + if (num_of_deltas_ < 2) { + hypothesis_ = BandwidthUsage::kBwNormal; + return; + } + const double modified_trend = + std::min(num_of_deltas_, kMinNumDeltas) * trend * threshold_gain_; + prev_modified_trend_ = modified_trend; + BWE_TEST_LOGGING_PLOT(1, "T", now_ms, modified_trend); + BWE_TEST_LOGGING_PLOT(1, "threshold", now_ms, threshold_); + if (modified_trend > threshold_) { + if (time_over_using_ == -1) { + // Initialize the timer. Assume that we've been + // over-using half of the time since the previous + // sample. + time_over_using_ = ts_delta / 2; + } else { + // Increment timer + time_over_using_ += ts_delta; + } + overuse_counter_++; + if (time_over_using_ > overusing_time_threshold_ && overuse_counter_ > 1) { + if (trend >= prev_trend_) { + time_over_using_ = 0; + overuse_counter_ = 0; + hypothesis_ = BandwidthUsage::kBwOverusing; + } + } + } else if (modified_trend < -threshold_) { + time_over_using_ = -1; + overuse_counter_ = 0; + hypothesis_ = BandwidthUsage::kBwUnderusing; + } else { + time_over_using_ = -1; + overuse_counter_ = 0; + hypothesis_ = BandwidthUsage::kBwNormal; + } + prev_trend_ = trend; + UpdateThreshold(modified_trend, now_ms); +} + +void TrendlineEstimator::UpdateThreshold(double modified_trend, + int64_t now_ms) { + if (last_update_ms_ == -1) + last_update_ms_ = now_ms; + + if (fabs(modified_trend) > threshold_ + kMaxAdaptOffsetMs) { + // Avoid adapting the threshold to big latency spikes, caused e.g., + // by a sudden capacity drop. + last_update_ms_ = now_ms; + return; + } + + const double k = fabs(modified_trend) < threshold_ ? k_down_ : k_up_; + const int64_t kMaxTimeDeltaMs = 100; + int64_t time_delta_ms = std::min(now_ms - last_update_ms_, kMaxTimeDeltaMs); + threshold_ += k * (fabs(modified_trend) - threshold_) * time_delta_ms; + threshold_ = rtc::SafeClamp(threshold_, 6.f, 600.f); + last_update_ms_ = now_ms; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator.h b/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator.h new file mode 100644 index 0000000000..ffda25df74 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_CONGESTION_CONTROLLER_GOOG_CC_TRENDLINE_ESTIMATOR_H_ +#define MODULES_CONGESTION_CONTROLLER_GOOG_CC_TRENDLINE_ESTIMATOR_H_ + +#include +#include + +#include +#include +#include + +#include "api/field_trials_view.h" +#include "api/network_state_predictor.h" +#include "modules/congestion_controller/goog_cc/delay_increase_detector_interface.h" +#include "rtc_base/experiments/struct_parameters_parser.h" + +namespace webrtc { + +struct TrendlineEstimatorSettings { + static constexpr char kKey[] = "WebRTC-Bwe-TrendlineEstimatorSettings"; + static constexpr unsigned kDefaultTrendlineWindowSize = 20; + + TrendlineEstimatorSettings() = delete; + explicit TrendlineEstimatorSettings(const FieldTrialsView* key_value_config); + + // Sort the packets in the window. Should be redundant, + // but then almost no cost. + bool enable_sort = false; + + // Cap the trendline slope based on the minimum delay seen + // in the beginning_packets and end_packets respectively. + bool enable_cap = false; + unsigned beginning_packets = 7; + unsigned end_packets = 7; + double cap_uncertainty = 0.0; + + // Size (in packets) of the window. + unsigned window_size = kDefaultTrendlineWindowSize; + + std::unique_ptr Parser(); +}; + +class TrendlineEstimator : public DelayIncreaseDetectorInterface { + public: + TrendlineEstimator(const FieldTrialsView* key_value_config, + NetworkStatePredictor* network_state_predictor); + + ~TrendlineEstimator() override; + + TrendlineEstimator(const TrendlineEstimator&) = delete; + TrendlineEstimator& operator=(const TrendlineEstimator&) = delete; + + // Update the estimator with a new sample. The deltas should represent deltas + // between timestamp groups as defined by the InterArrival class. + void Update(double recv_delta_ms, + double send_delta_ms, + int64_t send_time_ms, + int64_t arrival_time_ms, + size_t packet_size, + bool calculated_deltas) override; + + void UpdateTrendline(double recv_delta_ms, + double send_delta_ms, + int64_t send_time_ms, + int64_t arrival_time_ms, + size_t packet_size); + + BandwidthUsage State() const override; + + struct PacketTiming { + PacketTiming(double arrival_time_ms, + double smoothed_delay_ms, + double raw_delay_ms) + : arrival_time_ms(arrival_time_ms), + smoothed_delay_ms(smoothed_delay_ms), + raw_delay_ms(raw_delay_ms) {} + double arrival_time_ms; + double smoothed_delay_ms; + double raw_delay_ms; + }; + + private: + friend class GoogCcStatePrinter; + void Detect(double trend, double ts_delta, int64_t now_ms); + + void UpdateThreshold(double modified_offset, int64_t now_ms); + + // Parameters. + TrendlineEstimatorSettings settings_; + const double smoothing_coef_; + const double threshold_gain_; + // Used by the existing threshold. + int num_of_deltas_; + // Keep the arrival times small by using the change from the first packet. + int64_t first_arrival_time_ms_; + // Exponential backoff filtering. + double accumulated_delay_; + double smoothed_delay_; + // Linear least squares regression. + std::deque delay_hist_; + + const double k_up_; + const double k_down_; + double overusing_time_threshold_; + double threshold_; + double prev_modified_trend_; + int64_t last_update_ms_; + double prev_trend_; + double time_over_using_; + int overuse_counter_; + BandwidthUsage hypothesis_; + BandwidthUsage hypothesis_predicted_; + NetworkStatePredictor* network_state_predictor_; +}; +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_GOOG_CC_TRENDLINE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator_unittest.cc new file mode 100644 index 0000000000..b0195abdf5 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/goog_cc/trendline_estimator_unittest.cc @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/goog_cc/trendline_estimator.h" + +#include +#include +#include + +#include "api/transport/field_trial_based_config.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +class PacketTimeGenerator { + public: + PacketTimeGenerator(int64_t initial_clock, double time_between_packets) + : initial_clock_(initial_clock), + time_between_packets_(time_between_packets), + packets_(0) {} + int64_t operator()() { + return initial_clock_ + time_between_packets_ * packets_++; + } + + private: + const int64_t initial_clock_; + const double time_between_packets_; + size_t packets_; +}; + +class TrendlineEstimatorTest : public testing::Test { + public: + TrendlineEstimatorTest() + : send_times(kPacketCount), + recv_times(kPacketCount), + packet_sizes(kPacketCount), + config(), + estimator(&config, nullptr), + count(1) { + std::fill(packet_sizes.begin(), packet_sizes.end(), kPacketSizeBytes); + } + + void RunTestUntilStateChange() { + RTC_DCHECK_EQ(send_times.size(), kPacketCount); + RTC_DCHECK_EQ(recv_times.size(), kPacketCount); + RTC_DCHECK_EQ(packet_sizes.size(), kPacketCount); + RTC_DCHECK_GE(count, 1); + RTC_DCHECK_LT(count, kPacketCount); + + auto initial_state = estimator.State(); + for (; count < kPacketCount; count++) { + double recv_delta = recv_times[count] - recv_times[count - 1]; + double send_delta = send_times[count] - send_times[count - 1]; + estimator.Update(recv_delta, send_delta, send_times[count], + recv_times[count], packet_sizes[count], true); + if (estimator.State() != initial_state) { + return; + } + } + } + + protected: + const size_t kPacketCount = 25; + const size_t kPacketSizeBytes = 1200; + std::vector send_times; + std::vector recv_times; + std::vector packet_sizes; + const FieldTrialBasedConfig config; + TrendlineEstimator estimator; + size_t count; +}; +} // namespace + +TEST_F(TrendlineEstimatorTest, Normal) { + PacketTimeGenerator send_time_generator(123456789 /*initial clock*/, + 20 /*20 ms between sent packets*/); + std::generate(send_times.begin(), send_times.end(), send_time_generator); + + PacketTimeGenerator recv_time_generator(987654321 /*initial clock*/, + 20 /*delivered at the same pace*/); + std::generate(recv_times.begin(), recv_times.end(), recv_time_generator); + + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwNormal); + RunTestUntilStateChange(); + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwNormal); + EXPECT_EQ(count, kPacketCount); // All packets processed +} + +TEST_F(TrendlineEstimatorTest, Overusing) { + PacketTimeGenerator send_time_generator(123456789 /*initial clock*/, + 20 /*20 ms between sent packets*/); + std::generate(send_times.begin(), send_times.end(), send_time_generator); + + PacketTimeGenerator recv_time_generator(987654321 /*initial clock*/, + 1.1 * 20 /*10% slower delivery*/); + std::generate(recv_times.begin(), recv_times.end(), recv_time_generator); + + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwNormal); + RunTestUntilStateChange(); + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwOverusing); + RunTestUntilStateChange(); + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwOverusing); + EXPECT_EQ(count, kPacketCount); // All packets processed +} + +TEST_F(TrendlineEstimatorTest, Underusing) { + PacketTimeGenerator send_time_generator(123456789 /*initial clock*/, + 20 /*20 ms between sent packets*/); + std::generate(send_times.begin(), send_times.end(), send_time_generator); + + PacketTimeGenerator recv_time_generator(987654321 /*initial clock*/, + 0.85 * 20 /*15% faster delivery*/); + std::generate(recv_times.begin(), recv_times.end(), recv_time_generator); + + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwNormal); + RunTestUntilStateChange(); + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwUnderusing); + RunTestUntilStateChange(); + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwUnderusing); + EXPECT_EQ(count, kPacketCount); // All packets processed +} + +TEST_F(TrendlineEstimatorTest, IncludesSmallPacketsByDefault) { + PacketTimeGenerator send_time_generator(123456789 /*initial clock*/, + 20 /*20 ms between sent packets*/); + std::generate(send_times.begin(), send_times.end(), send_time_generator); + + PacketTimeGenerator recv_time_generator(987654321 /*initial clock*/, + 1.1 * 20 /*10% slower delivery*/); + std::generate(recv_times.begin(), recv_times.end(), recv_time_generator); + + std::fill(packet_sizes.begin(), packet_sizes.end(), 100); + + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwNormal); + RunTestUntilStateChange(); + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwOverusing); + RunTestUntilStateChange(); + EXPECT_EQ(estimator.State(), BandwidthUsage::kBwOverusing); + EXPECT_EQ(count, kPacketCount); // All packets processed +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/include/receive_side_congestion_controller.h b/third_party/libwebrtc/modules/congestion_controller/include/receive_side_congestion_controller.h new file mode 100644 index 0000000000..7696396016 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/include/receive_side_congestion_controller.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_INCLUDE_RECEIVE_SIDE_CONGESTION_CONTROLLER_H_ +#define MODULES_CONGESTION_CONTROLLER_INCLUDE_RECEIVE_SIDE_CONGESTION_CONTROLLER_H_ + +#include +#include + +#include "api/transport/network_control.h" +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "modules/congestion_controller/remb_throttler.h" +#include "modules/pacing/packet_router.h" +#include "modules/remote_bitrate_estimator/remote_estimator_proxy.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { +class RemoteBitrateEstimator; + +// This class represents the congestion control state for receive +// streams. For send side bandwidth estimation, this is simply +// relaying for each received RTP packet back to the sender. While for +// receive side bandwidth estimation, we do the estimation locally and +// send our results back to the sender. +class ReceiveSideCongestionController : public CallStatsObserver { + public: + ReceiveSideCongestionController( + Clock* clock, + RemoteEstimatorProxy::TransportFeedbackSender feedback_sender, + RembThrottler::RembSender remb_sender, + NetworkStateEstimator* network_state_estimator); + + ~ReceiveSideCongestionController() override {} + + virtual void OnReceivedPacket(int64_t arrival_time_ms, + size_t payload_size, + const RTPHeader& header); + + void SetSendPeriodicFeedback(bool send_periodic_feedback); + + // Implements CallStatsObserver. + void OnRttUpdate(int64_t avg_rtt_ms, int64_t max_rtt_ms) override; + + // This is send bitrate, used to control the rate of feedback messages. + void OnBitrateChanged(int bitrate_bps); + + // Ensures the remote party is notified of the receive bitrate no larger than + // `bitrate` using RTCP REMB. + void SetMaxDesiredReceiveBitrate(DataRate bitrate); + + void SetTransportOverhead(DataSize overhead_per_packet); + + // Returns latest receive side bandwidth estimation. + // Returns zero if receive side bandwidth estimation is unavailable. + DataRate LatestReceiveSideEstimate() const; + + // Removes stream from receive side bandwidth estimation. + // Noop if receive side bwe is not used or stream doesn't participate in it. + void RemoveStream(uint32_t ssrc); + + // Runs periodic tasks if it is time to run them, returns time until next + // call to `MaybeProcess` should be non idle. + TimeDelta MaybeProcess(); + + private: + void PickEstimatorFromHeader(const RTPHeader& header) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + void PickEstimator() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + Clock& clock_; + RembThrottler remb_throttler_; + RemoteEstimatorProxy remote_estimator_proxy_; + + mutable Mutex mutex_; + std::unique_ptr rbe_ RTC_GUARDED_BY(mutex_); + bool using_absolute_send_time_ RTC_GUARDED_BY(mutex_); + uint32_t packets_since_absolute_send_time_ RTC_GUARDED_BY(mutex_); +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_INCLUDE_RECEIVE_SIDE_CONGESTION_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/BUILD.gn b/third_party/libwebrtc/modules/congestion_controller/pcc/BUILD.gn new file mode 100644 index 0000000000..85b12b3771 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/BUILD.gn @@ -0,0 +1,123 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("pcc") { + sources = [ + "pcc_factory.cc", + "pcc_factory.h", + ] + deps = [ + ":pcc_controller", + "../../../api/transport:network_control", + "../../../api/units:time_delta", + ] +} + +rtc_library("pcc_controller") { + sources = [ + "pcc_network_controller.cc", + "pcc_network_controller.h", + ] + deps = [ + ":bitrate_controller", + ":monitor_interval", + ":rtt_tracker", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:data_size", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../rtc_base:checks", + "../../../rtc_base:random", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("monitor_interval") { + sources = [ + "monitor_interval.cc", + "monitor_interval.h", + ] + deps = [ + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:data_size", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../rtc_base:logging", + ] +} + +rtc_library("rtt_tracker") { + sources = [ + "rtt_tracker.cc", + "rtt_tracker.h", + ] + deps = [ + "../../../api/transport:network_control", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + ] +} + +rtc_library("utility_function") { + sources = [ + "utility_function.cc", + "utility_function.h", + ] + deps = [ + ":monitor_interval", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../rtc_base:checks", + ] +} + +rtc_library("bitrate_controller") { + sources = [ + "bitrate_controller.cc", + "bitrate_controller.h", + ] + deps = [ + ":monitor_interval", + ":utility_function", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +if (rtc_include_tests && !build_with_chromium) { + rtc_library("pcc_unittests") { + testonly = true + sources = [ + "bitrate_controller_unittest.cc", + "monitor_interval_unittest.cc", + "pcc_network_controller_unittest.cc", + "rtt_tracker_unittest.cc", + "utility_function_unittest.cc", + ] + deps = [ + ":bitrate_controller", + ":monitor_interval", + ":pcc", + ":pcc_controller", + ":rtt_tracker", + ":utility_function", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:data_size", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../test:test_support", + "../../../test/scenario", + ] + } +} diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller.cc new file mode 100644 index 0000000000..16b8e6966f --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller.cc @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/bitrate_controller.h" + +#include +#include +#include +#include +#include +#include + + +namespace webrtc { +namespace pcc { + +PccBitrateController::PccBitrateController(double initial_conversion_factor, + double initial_dynamic_boundary, + double dynamic_boundary_increment, + double rtt_gradient_coefficient, + double loss_coefficient, + double throughput_coefficient, + double throughput_power, + double rtt_gradient_threshold, + double delay_gradient_negative_bound) + : PccBitrateController(initial_conversion_factor, + initial_dynamic_boundary, + dynamic_boundary_increment, + std::make_unique( + rtt_gradient_coefficient, + loss_coefficient, + throughput_coefficient, + throughput_power, + rtt_gradient_threshold, + delay_gradient_negative_bound)) {} + +PccBitrateController::PccBitrateController( + double initial_conversion_factor, + double initial_dynamic_boundary, + double dynamic_boundary_increment, + std::unique_ptr utility_function) + : consecutive_boundary_adjustments_number_(0), + initial_dynamic_boundary_(initial_dynamic_boundary), + dynamic_boundary_increment_(dynamic_boundary_increment), + utility_function_(std::move(utility_function)), + step_size_adjustments_number_(0), + initial_conversion_factor_(initial_conversion_factor) {} + +PccBitrateController::~PccBitrateController() = default; + +double PccBitrateController::ComputeStepSize(double utility_gradient) { + // Computes number of consecutive step size adjustments. + if (utility_gradient > 0) { + step_size_adjustments_number_ = + std::max(step_size_adjustments_number_ + 1, 1); + } else if (utility_gradient < 0) { + step_size_adjustments_number_ = + std::min(step_size_adjustments_number_ - 1, -1); + } else { + step_size_adjustments_number_ = 0; + } + // Computes step size amplifier. + int64_t step_size_amplifier = 1; + if (std::abs(step_size_adjustments_number_) <= 3) { + step_size_amplifier = + std::max(std::abs(step_size_adjustments_number_), 1); + } else { + step_size_amplifier = 2 * std::abs(step_size_adjustments_number_) - 3; + } + return step_size_amplifier * initial_conversion_factor_; +} + +double PccBitrateController::ApplyDynamicBoundary(double rate_change, + double bitrate) { + double rate_change_abs = std::abs(rate_change); + int64_t rate_change_sign = (rate_change > 0) ? 1 : -1; + if (consecutive_boundary_adjustments_number_ * rate_change_sign < 0) { + consecutive_boundary_adjustments_number_ = 0; + } + double dynamic_change_boundary = + initial_dynamic_boundary_ + + std::abs(consecutive_boundary_adjustments_number_) * + dynamic_boundary_increment_; + double boundary = bitrate * dynamic_change_boundary; + if (rate_change_abs > boundary) { + consecutive_boundary_adjustments_number_ += rate_change_sign; + return boundary * rate_change_sign; + } + // Rate change smaller than boundary. Reset boundary to the smallest possible + // that would allow the change. + while (rate_change_abs <= boundary && + consecutive_boundary_adjustments_number_ * rate_change_sign > 0) { + consecutive_boundary_adjustments_number_ -= rate_change_sign; + dynamic_change_boundary = + initial_dynamic_boundary_ + + std::abs(consecutive_boundary_adjustments_number_) * + dynamic_boundary_increment_; + boundary = bitrate * dynamic_change_boundary; + } + consecutive_boundary_adjustments_number_ += rate_change_sign; + return rate_change; +} + +absl::optional +PccBitrateController::ComputeRateUpdateForSlowStartMode( + const PccMonitorInterval& monitor_interval) { + double utility_value = utility_function_->Compute(monitor_interval); + if (previous_utility_.has_value() && utility_value <= previous_utility_) { + return absl::nullopt; + } + previous_utility_ = utility_value; + return monitor_interval.GetTargetSendingRate(); +} + +DataRate PccBitrateController::ComputeRateUpdateForOnlineLearningMode( + const std::vector& intervals, + DataRate bandwith_estimate) { + double first_utility = utility_function_->Compute(intervals[0]); + double second_utility = utility_function_->Compute(intervals[1]); + double first_bitrate_bps = intervals[0].GetTargetSendingRate().bps(); + double second_bitrate_bps = intervals[1].GetTargetSendingRate().bps(); + double gradient = (first_utility - second_utility) / + (first_bitrate_bps - second_bitrate_bps); + double rate_change_bps = gradient * ComputeStepSize(gradient); // delta_r + rate_change_bps = + ApplyDynamicBoundary(rate_change_bps, bandwith_estimate.bps()); + return DataRate::BitsPerSec( + std::max(0.0, bandwith_estimate.bps() + rate_change_bps)); +} + +} // namespace pcc +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller.h b/third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller.h new file mode 100644 index 0000000000..fadeea1b55 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_PCC_BITRATE_CONTROLLER_H_ +#define MODULES_CONGESTION_CONTROLLER_PCC_BITRATE_CONTROLLER_H_ + +#include + +#include +#include + +#include "absl/types/optional.h" +#include "api/units/data_rate.h" +#include "modules/congestion_controller/pcc/monitor_interval.h" +#include "modules/congestion_controller/pcc/utility_function.h" + +namespace webrtc { +namespace pcc { + +class PccBitrateController { + public: + PccBitrateController(double initial_conversion_factor, + double initial_dynamic_boundary, + double dynamic_boundary_increment, + double rtt_gradient_coefficient, + double loss_coefficient, + double throughput_coefficient, + double throughput_power, + double rtt_gradient_threshold, + double delay_gradient_negative_bound); + + PccBitrateController( + double initial_conversion_factor, + double initial_dynamic_boundary, + double dynamic_boundary_increment, + std::unique_ptr utility_function); + + absl::optional ComputeRateUpdateForSlowStartMode( + const PccMonitorInterval& monitor_interval); + + DataRate ComputeRateUpdateForOnlineLearningMode( + const std::vector& block, + DataRate bandwidth_estimate); + + ~PccBitrateController(); + + private: + double ApplyDynamicBoundary(double rate_change, double bitrate); + double ComputeStepSize(double utility_gradient); + + // Dynamic boundary variables: + int64_t consecutive_boundary_adjustments_number_; + const double initial_dynamic_boundary_; + const double dynamic_boundary_increment_; + + const std::unique_ptr utility_function_; + // Step Size variables: + int64_t step_size_adjustments_number_; + const double initial_conversion_factor_; + + absl::optional previous_utility_; +}; + +} // namespace pcc +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_PCC_BITRATE_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller_unittest.cc new file mode 100644 index 0000000000..957d99b1de --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/bitrate_controller_unittest.cc @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/bitrate_controller.h" + +#include +#include + +#include "modules/congestion_controller/pcc/monitor_interval.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace pcc { +namespace test { +namespace { +constexpr double kInitialConversionFactor = 1; +constexpr double kInitialDynamicBoundary = 0.05; +constexpr double kDynamicBoundaryIncrement = 0.1; + +constexpr double kDelayGradientCoefficient = 900; +constexpr double kLossCoefficient = 11.35; +constexpr double kThroughputCoefficient = 500 * 1000; +constexpr double kThroughputPower = 0.99; +constexpr double kDelayGradientThreshold = 0.01; +constexpr double kDelayGradientNegativeBound = 10; + +const DataRate kTargetSendingRate = DataRate::KilobitsPerSec(300); +const double kEpsilon = 0.05; +const Timestamp kStartTime = Timestamp::Micros(0); +const TimeDelta kPacketsDelta = TimeDelta::Millis(1); +const TimeDelta kIntervalDuration = TimeDelta::Millis(1000); +const TimeDelta kDefaultRtt = TimeDelta::Millis(1000); +const DataSize kDefaultDataSize = DataSize::Bytes(100); + +std::vector CreatePacketResults( + const std::vector& packets_send_times, + const std::vector& packets_received_times = {}, + const std::vector& packets_sizes = {}) { + std::vector packet_results; + PacketResult packet_result; + SentPacket sent_packet; + for (size_t i = 0; i < packets_send_times.size(); ++i) { + sent_packet.send_time = packets_send_times[i]; + if (packets_sizes.empty()) { + sent_packet.size = kDefaultDataSize; + } else { + sent_packet.size = packets_sizes[i]; + } + packet_result.sent_packet = sent_packet; + if (packets_received_times.empty()) { + packet_result.receive_time = packets_send_times[i] + kDefaultRtt; + } else { + packet_result.receive_time = packets_received_times[i]; + } + packet_results.push_back(packet_result); + } + return packet_results; +} + +class MockUtilityFunction : public PccUtilityFunctionInterface { + public: + MOCK_METHOD(double, + Compute, + (const PccMonitorInterval& monitor_interval), + (const, override)); +}; + +} // namespace + +TEST(PccBitrateControllerTest, IncreaseRateWhenNoChangesForTestBitrates) { + PccBitrateController bitrate_controller( + kInitialConversionFactor, kInitialDynamicBoundary, + kDynamicBoundaryIncrement, kDelayGradientCoefficient, kLossCoefficient, + kThroughputCoefficient, kThroughputPower, kDelayGradientThreshold, + kDelayGradientNegativeBound); + VivaceUtilityFunction utility_function( + kDelayGradientCoefficient, kLossCoefficient, kThroughputCoefficient, + kThroughputPower, kDelayGradientThreshold, kDelayGradientNegativeBound); + std::vector monitor_block{ + PccMonitorInterval(kTargetSendingRate * (1 + kEpsilon), kStartTime, + kIntervalDuration), + PccMonitorInterval(kTargetSendingRate * (1 - kEpsilon), + kStartTime + kIntervalDuration, kIntervalDuration)}; + monitor_block[0].OnPacketsFeedback( + CreatePacketResults({kStartTime + kPacketsDelta, + kStartTime + kIntervalDuration + kPacketsDelta, + kStartTime + 3 * kIntervalDuration}, + {}, {})); + monitor_block[1].OnPacketsFeedback( + CreatePacketResults({kStartTime + kPacketsDelta, + kStartTime + kIntervalDuration + kPacketsDelta, + kStartTime + 3 * kIntervalDuration}, + {}, {})); + // For both of the monitor intervals there were no change in rtt gradient + // and in packet loss. Since the only difference is in the sending rate, + // the higher sending rate should be chosen by congestion controller. + EXPECT_GT(bitrate_controller + .ComputeRateUpdateForOnlineLearningMode(monitor_block, + kTargetSendingRate) + .bps(), + kTargetSendingRate.bps()); +} + +TEST(PccBitrateControllerTest, NoChangesWhenUtilityFunctionDoesntChange) { + std::unique_ptr mock_utility_function = + std::make_unique(); + EXPECT_CALL(*mock_utility_function, Compute(::testing::_)) + .Times(2) + .WillOnce(::testing::Return(100)) + .WillOnce(::testing::Return(100)); + + PccBitrateController bitrate_controller( + kInitialConversionFactor, kInitialDynamicBoundary, + kDynamicBoundaryIncrement, std::move(mock_utility_function)); + std::vector monitor_block{ + PccMonitorInterval(kTargetSendingRate * (1 + kEpsilon), kStartTime, + kIntervalDuration), + PccMonitorInterval(kTargetSendingRate * (1 - kEpsilon), + kStartTime + kIntervalDuration, kIntervalDuration)}; + // To complete collecting feedback within monitor intervals. + monitor_block[0].OnPacketsFeedback( + CreatePacketResults({kStartTime + 3 * kIntervalDuration}, {}, {})); + monitor_block[1].OnPacketsFeedback( + CreatePacketResults({kStartTime + 3 * kIntervalDuration}, {}, {})); + // Because we don't have any packets inside of monitor intervals, utility + // function should be zero for both of them and the sending rate should not + // change. + EXPECT_EQ(bitrate_controller + .ComputeRateUpdateForOnlineLearningMode(monitor_block, + kTargetSendingRate) + .bps(), + kTargetSendingRate.bps()); +} + +TEST(PccBitrateControllerTest, NoBoundaryWhenSmallGradient) { + std::unique_ptr mock_utility_function = + std::make_unique(); + constexpr double kFirstMonitorIntervalUtility = 0; + const double kSecondMonitorIntervalUtility = + 2 * kTargetSendingRate.bps() * kEpsilon; + + EXPECT_CALL(*mock_utility_function, Compute(::testing::_)) + .Times(2) + .WillOnce(::testing::Return(kFirstMonitorIntervalUtility)) + .WillOnce(::testing::Return(kSecondMonitorIntervalUtility)); + + PccBitrateController bitrate_controller( + kInitialConversionFactor, kInitialDynamicBoundary, + kDynamicBoundaryIncrement, std::move(mock_utility_function)); + std::vector monitor_block{ + PccMonitorInterval(kTargetSendingRate * (1 + kEpsilon), kStartTime, + kIntervalDuration), + PccMonitorInterval(kTargetSendingRate * (1 - kEpsilon), + kStartTime + kIntervalDuration, kIntervalDuration)}; + // To complete collecting feedback within monitor intervals. + monitor_block[0].OnPacketsFeedback( + CreatePacketResults({kStartTime + 3 * kIntervalDuration}, {}, {})); + monitor_block[1].OnPacketsFeedback( + CreatePacketResults({kStartTime + 3 * kIntervalDuration}, {}, {})); + + double gradient = + (kFirstMonitorIntervalUtility - kSecondMonitorIntervalUtility) / + (kTargetSendingRate.bps() * 2 * kEpsilon); + // When the gradient is small we don't hit the dynamic boundary. + EXPECT_EQ(bitrate_controller + .ComputeRateUpdateForOnlineLearningMode(monitor_block, + kTargetSendingRate) + .bps(), + kTargetSendingRate.bps() + kInitialConversionFactor * gradient); +} + +TEST(PccBitrateControllerTest, FaceBoundaryWhenLargeGradient) { + std::unique_ptr mock_utility_function = + std::make_unique(); + constexpr double kFirstMonitorIntervalUtility = 0; + const double kSecondMonitorIntervalUtility = + 10 * kInitialDynamicBoundary * kTargetSendingRate.bps() * 2 * + kTargetSendingRate.bps() * kEpsilon; + + EXPECT_CALL(*mock_utility_function, Compute(::testing::_)) + .Times(4) + .WillOnce(::testing::Return(kFirstMonitorIntervalUtility)) + .WillOnce(::testing::Return(kSecondMonitorIntervalUtility)) + .WillOnce(::testing::Return(kFirstMonitorIntervalUtility)) + .WillOnce(::testing::Return(kSecondMonitorIntervalUtility)); + + PccBitrateController bitrate_controller( + kInitialConversionFactor, kInitialDynamicBoundary, + kDynamicBoundaryIncrement, std::move(mock_utility_function)); + std::vector monitor_block{ + PccMonitorInterval(kTargetSendingRate * (1 + kEpsilon), kStartTime, + kIntervalDuration), + PccMonitorInterval(kTargetSendingRate * (1 - kEpsilon), + kStartTime + kIntervalDuration, kIntervalDuration)}; + // To complete collecting feedback within monitor intervals. + monitor_block[0].OnPacketsFeedback( + CreatePacketResults({kStartTime + 3 * kIntervalDuration}, {}, {})); + monitor_block[1].OnPacketsFeedback( + CreatePacketResults({kStartTime + 3 * kIntervalDuration}, {}, {})); + // The utility function gradient is too big and we hit the dynamic boundary. + EXPECT_EQ(bitrate_controller.ComputeRateUpdateForOnlineLearningMode( + monitor_block, kTargetSendingRate), + kTargetSendingRate * (1 - kInitialDynamicBoundary)); + // For the second time we hit the dynamic boundary in the same direction, the + // boundary should increase. + EXPECT_EQ(bitrate_controller + .ComputeRateUpdateForOnlineLearningMode(monitor_block, + kTargetSendingRate) + .bps(), + kTargetSendingRate.bps() * + (1 - kInitialDynamicBoundary - kDynamicBoundaryIncrement)); +} + +TEST(PccBitrateControllerTest, SlowStartMode) { + std::unique_ptr mock_utility_function = + std::make_unique(); + constexpr double kFirstUtilityFunction = 1000; + EXPECT_CALL(*mock_utility_function, Compute(::testing::_)) + .Times(4) + // For first 3 calls we expect to stay in the SLOW_START mode and double + // the sending rate since the utility function increases its value. For + // the last call utility function decreases its value, this means that + // we should not double the sending rate and exit SLOW_START mode. + .WillOnce(::testing::Return(kFirstUtilityFunction)) + .WillOnce(::testing::Return(kFirstUtilityFunction + 1)) + .WillOnce(::testing::Return(kFirstUtilityFunction + 2)) + .WillOnce(::testing::Return(kFirstUtilityFunction + 1)); + + PccBitrateController bitrate_controller( + kInitialConversionFactor, kInitialDynamicBoundary, + kDynamicBoundaryIncrement, std::move(mock_utility_function)); + std::vector monitor_block{PccMonitorInterval( + 2 * kTargetSendingRate, kStartTime, kIntervalDuration)}; + // To complete collecting feedback within monitor intervals. + monitor_block[0].OnPacketsFeedback( + CreatePacketResults({kStartTime + 3 * kIntervalDuration}, {}, {})); + EXPECT_EQ( + bitrate_controller.ComputeRateUpdateForSlowStartMode(monitor_block[0]), + kTargetSendingRate * 2); + EXPECT_EQ( + bitrate_controller.ComputeRateUpdateForSlowStartMode(monitor_block[0]), + kTargetSendingRate * 2); + EXPECT_EQ( + bitrate_controller.ComputeRateUpdateForSlowStartMode(monitor_block[0]), + kTargetSendingRate * 2); + EXPECT_EQ( + bitrate_controller.ComputeRateUpdateForSlowStartMode(monitor_block[0]), + absl::nullopt); +} + +TEST(PccBitrateControllerTest, StepSizeIncrease) { + std::unique_ptr mock_utility_function = + std::make_unique(); + constexpr double kFirstMiUtilityFunction = 0; + const double kSecondMiUtilityFunction = + 2 * kTargetSendingRate.bps() * kEpsilon; + + EXPECT_CALL(*mock_utility_function, Compute(::testing::_)) + .Times(4) + .WillOnce(::testing::Return(kFirstMiUtilityFunction)) + .WillOnce(::testing::Return(kSecondMiUtilityFunction)) + .WillOnce(::testing::Return(kFirstMiUtilityFunction)) + .WillOnce(::testing::Return(kSecondMiUtilityFunction)); + std::vector monitor_block{ + PccMonitorInterval(kTargetSendingRate * (1 + kEpsilon), kStartTime, + kIntervalDuration), + PccMonitorInterval(kTargetSendingRate * (1 - kEpsilon), + kStartTime + kIntervalDuration, kIntervalDuration)}; + // To complete collecting feedback within monitor intervals. + monitor_block[0].OnPacketsFeedback( + CreatePacketResults({kStartTime + 3 * kIntervalDuration}, {}, {})); + monitor_block[1].OnPacketsFeedback( + CreatePacketResults({kStartTime + 3 * kIntervalDuration}, {}, {})); + + double gradient = (kFirstMiUtilityFunction - kSecondMiUtilityFunction) / + (kTargetSendingRate.bps() * 2 * kEpsilon); + PccBitrateController bitrate_controller( + kInitialConversionFactor, kInitialDynamicBoundary, + kDynamicBoundaryIncrement, std::move(mock_utility_function)); + // If we are moving in the same direction - the step size should increase. + EXPECT_EQ(bitrate_controller + .ComputeRateUpdateForOnlineLearningMode(monitor_block, + kTargetSendingRate) + .bps(), + kTargetSendingRate.bps() + kInitialConversionFactor * gradient); + EXPECT_EQ(bitrate_controller + .ComputeRateUpdateForOnlineLearningMode(monitor_block, + kTargetSendingRate) + .bps(), + kTargetSendingRate.bps() + 2 * kInitialConversionFactor * gradient); +} + +} // namespace test +} // namespace pcc +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval.cc new file mode 100644 index 0000000000..de1e2d5e69 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/monitor_interval.h" + +#include + +#include + +#include "rtc_base/logging.h" + +namespace webrtc { +namespace pcc { + +PccMonitorInterval::PccMonitorInterval(DataRate target_sending_rate, + Timestamp start_time, + TimeDelta duration) + : target_sending_rate_(target_sending_rate), + start_time_(start_time), + interval_duration_(duration), + received_packets_size_(DataSize::Zero()), + feedback_collection_done_(false) {} + +PccMonitorInterval::~PccMonitorInterval() = default; + +PccMonitorInterval::PccMonitorInterval(const PccMonitorInterval& other) = + default; + +void PccMonitorInterval::OnPacketsFeedback( + const std::vector& packets_results) { + for (const PacketResult& packet_result : packets_results) { + if (packet_result.sent_packet.send_time <= start_time_) { + continue; + } + // Here we assume that if some packets are reordered with packets sent + // after the end of the monitor interval, then they are lost. (Otherwise + // it is not clear how long should we wait for packets feedback to arrive). + if (packet_result.sent_packet.send_time > + start_time_ + interval_duration_) { + feedback_collection_done_ = true; + return; + } + if (!packet_result.IsReceived()) { + lost_packets_sent_time_.push_back(packet_result.sent_packet.send_time); + } else { + received_packets_.push_back( + {packet_result.receive_time - packet_result.sent_packet.send_time, + packet_result.sent_packet.send_time}); + received_packets_size_ += packet_result.sent_packet.size; + } + } +} + +// For the formula used in computations see formula for "slope" in the second +// method: +// https://www.johndcook.com/blog/2008/10/20/comparing-two-ways-to-fit-a-line-to-data/ +double PccMonitorInterval::ComputeDelayGradient( + double delay_gradient_threshold) const { + // Early return to prevent division by 0 in case all packets are sent at the + // same time. + if (received_packets_.empty() || received_packets_.front().sent_time == + received_packets_.back().sent_time) { + return 0; + } + double sum_times = 0; + for (const ReceivedPacket& packet : received_packets_) { + double time_delta_us = + (packet.sent_time - received_packets_[0].sent_time).us(); + sum_times += time_delta_us; + } + double sum_squared_scaled_time_deltas = 0; + double sum_scaled_time_delta_dot_delay = 0; + for (const ReceivedPacket& packet : received_packets_) { + double time_delta_us = + (packet.sent_time - received_packets_[0].sent_time).us(); + double delay = packet.delay.us(); + double scaled_time_delta_us = + time_delta_us - sum_times / received_packets_.size(); + sum_squared_scaled_time_deltas += + scaled_time_delta_us * scaled_time_delta_us; + sum_scaled_time_delta_dot_delay += scaled_time_delta_us * delay; + } + double rtt_gradient = + sum_scaled_time_delta_dot_delay / sum_squared_scaled_time_deltas; + if (std::abs(rtt_gradient) < delay_gradient_threshold) + rtt_gradient = 0; + return rtt_gradient; +} + +bool PccMonitorInterval::IsFeedbackCollectionDone() const { + return feedback_collection_done_; +} + +Timestamp PccMonitorInterval::GetEndTime() const { + return start_time_ + interval_duration_; +} + +double PccMonitorInterval::GetLossRate() const { + size_t packets_lost = lost_packets_sent_time_.size(); + size_t packets_received = received_packets_.size(); + if (packets_lost == 0) + return 0; + return static_cast(packets_lost) / (packets_lost + packets_received); +} + +DataRate PccMonitorInterval::GetTargetSendingRate() const { + return target_sending_rate_; +} + +DataRate PccMonitorInterval::GetTransmittedPacketsRate() const { + if (received_packets_.empty()) { + return target_sending_rate_; + } + Timestamp receive_time_of_first_packet = + received_packets_.front().sent_time + received_packets_.front().delay; + Timestamp receive_time_of_last_packet = + received_packets_.back().sent_time + received_packets_.back().delay; + if (receive_time_of_first_packet == receive_time_of_last_packet) { + RTC_LOG(LS_WARNING) + << "All packets in monitor interval were received at the same time."; + return target_sending_rate_; + } + return received_packets_size_ / + (receive_time_of_last_packet - receive_time_of_first_packet); +} + +} // namespace pcc +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval.h b/third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval.h new file mode 100644 index 0000000000..51bd0f068a --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_PCC_MONITOR_INTERVAL_H_ +#define MODULES_CONGESTION_CONTROLLER_PCC_MONITOR_INTERVAL_H_ + +#include + +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" + +namespace webrtc { +namespace pcc { + +// PCC divides time into consecutive monitor intervals which are used to test +// consequences for performance of sending at a certain rate. +class PccMonitorInterval { + public: + PccMonitorInterval(DataRate target_sending_rate, + Timestamp start_time, + TimeDelta duration); + ~PccMonitorInterval(); + PccMonitorInterval(const PccMonitorInterval& other); + void OnPacketsFeedback(const std::vector& packets_results); + // Returns true if got complete information about packets. + // Notice, this only happens when received feedback about the first packet + // which were sent after the end of the monitor interval. If such event + // doesn't occur, we don't mind anyway and stay in the same state. + bool IsFeedbackCollectionDone() const; + Timestamp GetEndTime() const; + + double GetLossRate() const; + // Estimates the gradient using linear regression on the 2-dimensional + // dataset (sampled packets delay, time of sampling). + double ComputeDelayGradient(double delay_gradient_threshold) const; + DataRate GetTargetSendingRate() const; + // How fast receiving side gets packets. + DataRate GetTransmittedPacketsRate() const; + + private: + struct ReceivedPacket { + TimeDelta delay; + Timestamp sent_time; + }; + // Target bitrate used to generate and pace the outgoing packets. + // Actually sent bitrate might not match the target exactly. + DataRate target_sending_rate_; + // Start time is not included into interval while end time is included. + Timestamp start_time_; + TimeDelta interval_duration_; + // Vectors below updates while receiving feedback. + std::vector received_packets_; + std::vector lost_packets_sent_time_; + DataSize received_packets_size_; + bool feedback_collection_done_; +}; + +} // namespace pcc +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_PCC_MONITOR_INTERVAL_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval_unittest.cc new file mode 100644 index 0000000000..aaff57bd2a --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/monitor_interval_unittest.cc @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/monitor_interval.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace pcc { +namespace test { +namespace { +const DataRate kTargetSendingRate = DataRate::KilobitsPerSec(300); +const Timestamp kStartTime = Timestamp::Micros(0); +const TimeDelta kPacketsDelta = TimeDelta::Millis(1); +const TimeDelta kIntervalDuration = TimeDelta::Millis(100); +const TimeDelta kDefaultDelay = TimeDelta::Millis(100); +const DataSize kDefaultPacketSize = DataSize::Bytes(100); +constexpr double kDelayGradientThreshold = 0.01; + +std::vector CreatePacketResults( + const std::vector& packets_send_times, + const std::vector& packets_received_times = {}, + const std::vector& packets_sizes = {}) { + std::vector packet_results; + for (size_t i = 0; i < packets_send_times.size(); ++i) { + SentPacket sent_packet; + sent_packet.send_time = packets_send_times[i]; + if (packets_sizes.empty()) { + sent_packet.size = kDefaultPacketSize; + } else { + sent_packet.size = packets_sizes[i]; + } + PacketResult packet_result; + packet_result.sent_packet = sent_packet; + if (packets_received_times.empty()) { + packet_result.receive_time = packets_send_times[i] + kDefaultDelay; + } else { + packet_result.receive_time = packets_received_times[i]; + } + packet_results.push_back(packet_result); + } + return packet_results; +} + +} // namespace + +TEST(PccMonitorIntervalTest, InitialValuesAreEqualToOnesSetInConstructor) { + PccMonitorInterval interval{kTargetSendingRate, kStartTime, + kIntervalDuration}; + EXPECT_EQ(interval.IsFeedbackCollectionDone(), false); + EXPECT_EQ(interval.GetEndTime(), kStartTime + kIntervalDuration); + EXPECT_EQ(interval.GetTargetSendingRate(), kTargetSendingRate); +} + +TEST(PccMonitorIntervalTest, IndicatesDoneWhenFeedbackReceivedAfterInterval) { + PccMonitorInterval interval{kTargetSendingRate, kStartTime, + kIntervalDuration}; + interval.OnPacketsFeedback(CreatePacketResults({kStartTime})); + EXPECT_EQ(interval.IsFeedbackCollectionDone(), false); + interval.OnPacketsFeedback( + CreatePacketResults({kStartTime, kStartTime + kIntervalDuration})); + EXPECT_EQ(interval.IsFeedbackCollectionDone(), false); + interval.OnPacketsFeedback(CreatePacketResults( + {kStartTime + kIntervalDuration, kStartTime + 2 * kIntervalDuration})); + EXPECT_EQ(interval.IsFeedbackCollectionDone(), true); +} + +TEST(PccMonitorIntervalTest, LossRateIsOneThirdIfLostOnePacketOutOfThree) { + PccMonitorInterval interval{kTargetSendingRate, kStartTime, + kIntervalDuration}; + std::vector start_times = { + kStartTime, kStartTime + 0.1 * kIntervalDuration, + kStartTime + 0.5 * kIntervalDuration, kStartTime + kIntervalDuration, + kStartTime + 2 * kIntervalDuration}; + std::vector end_times = { + kStartTime + 2 * kIntervalDuration, kStartTime + 2 * kIntervalDuration, + Timestamp::PlusInfinity(), kStartTime + 2 * kIntervalDuration, + kStartTime + 4 * kIntervalDuration}; + std::vector packet_sizes = { + kDefaultPacketSize, 2 * kDefaultPacketSize, 3 * kDefaultPacketSize, + 4 * kDefaultPacketSize, 5 * kDefaultPacketSize}; + std::vector packet_results = + CreatePacketResults(start_times, end_times, packet_sizes); + interval.OnPacketsFeedback(packet_results); + EXPECT_EQ(interval.IsFeedbackCollectionDone(), true); + + EXPECT_DOUBLE_EQ(interval.GetLossRate(), 1. / 3); +} + +TEST(PccMonitorIntervalTest, DelayGradientIsZeroIfNoChangeInPacketDelay) { + PccMonitorInterval monitor_interval(kTargetSendingRate, kStartTime, + kIntervalDuration); + monitor_interval.OnPacketsFeedback(CreatePacketResults( + {kStartTime + kPacketsDelta, kStartTime + 2 * kPacketsDelta, + kStartTime + 3 * kPacketsDelta, kStartTime + 2 * kIntervalDuration}, + {kStartTime + kDefaultDelay, Timestamp::PlusInfinity(), + kStartTime + kDefaultDelay + 2 * kPacketsDelta, + Timestamp::PlusInfinity()}, + {})); + // Delay gradient should be zero, because both received packets have the + // same one way delay. + EXPECT_DOUBLE_EQ( + monitor_interval.ComputeDelayGradient(kDelayGradientThreshold), 0); +} + +TEST(PccMonitorIntervalTest, + DelayGradientIsZeroWhenOnePacketSentInMonitorInterval) { + PccMonitorInterval monitor_interval(kTargetSendingRate, kStartTime, + kIntervalDuration); + monitor_interval.OnPacketsFeedback(CreatePacketResults( + {kStartTime + kPacketsDelta, kStartTime + 2 * kIntervalDuration}, + {kStartTime + kDefaultDelay, kStartTime + 3 * kIntervalDuration}, {})); + // Only one received packet belongs to the monitor_interval, delay gradient + // should be zero in this case. + EXPECT_DOUBLE_EQ( + monitor_interval.ComputeDelayGradient(kDelayGradientThreshold), 0); +} + +TEST(PccMonitorIntervalTest, DelayGradientIsOne) { + PccMonitorInterval monitor_interval(kTargetSendingRate, kStartTime, + kIntervalDuration); + monitor_interval.OnPacketsFeedback(CreatePacketResults( + {kStartTime + kPacketsDelta, kStartTime + 2 * kPacketsDelta, + kStartTime + 3 * kPacketsDelta, kStartTime + 3 * kIntervalDuration}, + {kStartTime + kDefaultDelay, Timestamp::PlusInfinity(), + kStartTime + 4 * kPacketsDelta + kDefaultDelay, + kStartTime + 3 * kIntervalDuration}, + {})); + EXPECT_DOUBLE_EQ( + monitor_interval.ComputeDelayGradient(kDelayGradientThreshold), 1); +} + +TEST(PccMonitorIntervalTest, DelayGradientIsMinusOne) { + PccMonitorInterval monitor_interval(kTargetSendingRate, kStartTime, + kIntervalDuration); + monitor_interval.OnPacketsFeedback(CreatePacketResults( + {kStartTime + kPacketsDelta, kStartTime + 2 * kPacketsDelta, + kStartTime + 5 * kPacketsDelta, kStartTime + 2 * kIntervalDuration}, + {kStartTime + kDefaultDelay, Timestamp::PlusInfinity(), + kStartTime + kDefaultDelay, kStartTime + 3 * kIntervalDuration}, + {})); + EXPECT_DOUBLE_EQ( + monitor_interval.ComputeDelayGradient(kDelayGradientThreshold), -1); +} + +TEST(PccMonitorIntervalTest, + DelayGradientIsZeroIfItSmallerWhenGradientThreshold) { + PccMonitorInterval monitor_interval(kTargetSendingRate, kStartTime, + kIntervalDuration); + monitor_interval.OnPacketsFeedback(CreatePacketResults( + {kStartTime + kPacketsDelta, kStartTime + kPacketsDelta, + kStartTime + 102 * kPacketsDelta, kStartTime + 2 * kIntervalDuration}, + {kStartTime + kDefaultDelay, Timestamp::PlusInfinity(), + kStartTime + kDefaultDelay + kPacketsDelta, + kStartTime + 3 * kIntervalDuration}, + {})); + // Delay gradient is less than 0.01 hence should be treated as zero. + EXPECT_DOUBLE_EQ( + monitor_interval.ComputeDelayGradient(kDelayGradientThreshold), 0); +} + +TEST(PccMonitorIntervalTest, + DelayGradientIsZeroWhenAllPacketsSentAtTheSameTime) { + PccMonitorInterval monitor_interval(kTargetSendingRate, kStartTime, + kIntervalDuration); + monitor_interval.OnPacketsFeedback(CreatePacketResults( + {kStartTime + kPacketsDelta, kStartTime + kPacketsDelta, + kStartTime + kPacketsDelta, kStartTime + 2 * kIntervalDuration}, + {kStartTime + kDefaultDelay, Timestamp::PlusInfinity(), + kStartTime + kDefaultDelay + kPacketsDelta, + kStartTime + 3 * kIntervalDuration}, + {})); + // If all packets were sent at the same time, then delay gradient should be + // zero. + EXPECT_DOUBLE_EQ( + monitor_interval.ComputeDelayGradient(kDelayGradientThreshold), 0); +} + +} // namespace test +} // namespace pcc +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_factory.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_factory.cc new file mode 100644 index 0000000000..c35c6e8ab2 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_factory.cc @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/pcc_factory.h" + +#include + +#include "modules/congestion_controller/pcc/pcc_network_controller.h" + +namespace webrtc { + +PccNetworkControllerFactory::PccNetworkControllerFactory() {} + +std::unique_ptr PccNetworkControllerFactory::Create( + NetworkControllerConfig config) { + return std::make_unique(config); +} + +TimeDelta PccNetworkControllerFactory::GetProcessInterval() const { + return TimeDelta::PlusInfinity(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_factory.h b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_factory.h new file mode 100644 index 0000000000..bb70d7a499 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_factory.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_PCC_PCC_FACTORY_H_ +#define MODULES_CONGESTION_CONTROLLER_PCC_PCC_FACTORY_H_ + +#include + +#include "api/transport/network_control.h" +#include "api/units/time_delta.h" + +namespace webrtc { + +class PccNetworkControllerFactory : public NetworkControllerFactoryInterface { + public: + PccNetworkControllerFactory(); + std::unique_ptr Create( + NetworkControllerConfig config) override; + TimeDelta GetProcessInterval() const override; +}; +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_PCC_PCC_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller.cc new file mode 100644 index 0000000000..8653470955 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller.cc @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/pcc_network_controller.h" + +#include + +#include "absl/types/optional.h" +#include "api/units/data_size.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace pcc { +namespace { +constexpr int64_t kInitialRttMs = 200; +constexpr int64_t kInitialBandwidthKbps = 300; +constexpr double kMonitorIntervalDurationRatio = 1; +constexpr double kDefaultSamplingStep = 0.05; +constexpr double kTimeoutRatio = 2; +constexpr double kAlphaForRtt = 0.9; +constexpr double kSlowStartModeIncrease = 1.5; + +constexpr double kAlphaForPacketInterval = 0.9; +constexpr int64_t kMinPacketsNumberPerInterval = 20; +const TimeDelta kMinDurationOfMonitorInterval = TimeDelta::Millis(50); +const TimeDelta kStartupDuration = TimeDelta::Millis(500); +constexpr double kMinRateChangeBps = 4000; +constexpr DataRate kMinRateHaveMultiplicativeRateChange = DataRate::BitsPerSec( + static_cast(kMinRateChangeBps / kDefaultSamplingStep)); + +// Bitrate controller constants. +constexpr double kInitialConversionFactor = 5; +constexpr double kInitialDynamicBoundary = 0.1; +constexpr double kDynamicBoundaryIncrement = 0.1; +// Utility function parameters. +constexpr double kRttGradientCoefficientBps = 0.005; +constexpr double kLossCoefficientBps = 10; +constexpr double kThroughputCoefficient = 0.001; +constexpr double kThroughputPower = 0.9; +constexpr double kRttGradientThreshold = 0.01; +constexpr double kDelayGradientNegativeBound = 0.1; + +constexpr int64_t kNumberOfPacketsToKeep = 20; +const uint64_t kRandomSeed = 100; +} // namespace + +PccNetworkController::PccNetworkController(NetworkControllerConfig config) + : start_time_(Timestamp::PlusInfinity()), + last_sent_packet_time_(Timestamp::PlusInfinity()), + smoothed_packets_sending_interval_(TimeDelta::Zero()), + mode_(Mode::kStartup), + default_bandwidth_(DataRate::KilobitsPerSec(kInitialBandwidthKbps)), + bandwidth_estimate_(default_bandwidth_), + rtt_tracker_(TimeDelta::Millis(kInitialRttMs), kAlphaForRtt), + monitor_interval_timeout_(TimeDelta::Millis(kInitialRttMs) * + kTimeoutRatio), + monitor_interval_length_strategy_(MonitorIntervalLengthStrategy::kFixed), + monitor_interval_duration_ratio_(kMonitorIntervalDurationRatio), + sampling_step_(kDefaultSamplingStep), + monitor_interval_timeout_ratio_(kTimeoutRatio), + min_packets_number_per_interval_(kMinPacketsNumberPerInterval), + bitrate_controller_(kInitialConversionFactor, + kInitialDynamicBoundary, + kDynamicBoundaryIncrement, + kRttGradientCoefficientBps, + kLossCoefficientBps, + kThroughputCoefficient, + kThroughputPower, + kRttGradientThreshold, + kDelayGradientNegativeBound), + monitor_intervals_duration_(TimeDelta::Zero()), + complete_feedback_monitor_interval_number_(0), + random_generator_(kRandomSeed) { + if (config.constraints.starting_rate) { + default_bandwidth_ = *config.constraints.starting_rate; + bandwidth_estimate_ = default_bandwidth_; + } +} + +PccNetworkController::~PccNetworkController() {} + +NetworkControlUpdate PccNetworkController::CreateRateUpdate( + Timestamp at_time) const { + DataRate sending_rate = DataRate::Zero(); + if (monitor_intervals_.empty() || + (monitor_intervals_.size() >= monitor_intervals_bitrates_.size() && + at_time >= monitor_intervals_.back().GetEndTime())) { + sending_rate = bandwidth_estimate_; + } else { + sending_rate = monitor_intervals_.back().GetTargetSendingRate(); + } + // Set up config when sending rate is computed. + NetworkControlUpdate update; + + // Set up target rate to encoder. + TargetTransferRate target_rate_msg; + target_rate_msg.at_time = at_time; + target_rate_msg.network_estimate.at_time = at_time; + target_rate_msg.network_estimate.round_trip_time = rtt_tracker_.GetRtt(); + // TODO(koloskova): Add correct estimate. + target_rate_msg.network_estimate.loss_rate_ratio = 0; + target_rate_msg.network_estimate.bwe_period = + monitor_interval_duration_ratio_ * rtt_tracker_.GetRtt(); + + target_rate_msg.target_rate = sending_rate; + update.target_rate = target_rate_msg; + + // Set up pacing/padding target rate. + PacerConfig pacer_config; + pacer_config.at_time = at_time; + pacer_config.time_window = TimeDelta::Millis(1); + pacer_config.data_window = sending_rate * pacer_config.time_window; + pacer_config.pad_window = sending_rate * pacer_config.time_window; + + update.pacer_config = pacer_config; + return update; +} + +NetworkControlUpdate PccNetworkController::OnSentPacket(SentPacket msg) { + // Start new monitor interval if previous has finished. + // Monitor interval is initialized in OnProcessInterval function. + if (start_time_.IsInfinite()) { + start_time_ = msg.send_time; + monitor_intervals_duration_ = kStartupDuration; + monitor_intervals_bitrates_ = {bandwidth_estimate_}; + monitor_intervals_.emplace_back(bandwidth_estimate_, msg.send_time, + monitor_intervals_duration_); + complete_feedback_monitor_interval_number_ = 0; + } + if (last_sent_packet_time_.IsFinite()) { + smoothed_packets_sending_interval_ = + (msg.send_time - last_sent_packet_time_) * kAlphaForPacketInterval + + (1 - kAlphaForPacketInterval) * smoothed_packets_sending_interval_; + } + last_sent_packet_time_ = msg.send_time; + if (!monitor_intervals_.empty() && + msg.send_time >= monitor_intervals_.back().GetEndTime() && + monitor_intervals_bitrates_.size() > monitor_intervals_.size()) { + // Start new monitor interval. + monitor_intervals_.emplace_back( + monitor_intervals_bitrates_[monitor_intervals_.size()], msg.send_time, + monitor_intervals_duration_); + } + if (IsTimeoutExpired(msg.send_time)) { + DataSize received_size = DataSize::Zero(); + for (size_t i = 1; i < last_received_packets_.size(); ++i) { + received_size += last_received_packets_[i].sent_packet.size; + } + TimeDelta sending_time = TimeDelta::Zero(); + if (last_received_packets_.size() > 0) + sending_time = last_received_packets_.back().receive_time - + last_received_packets_.front().receive_time; + DataRate receiving_rate = bandwidth_estimate_; + if (sending_time > TimeDelta::Zero()) + receiving_rate = received_size / sending_time; + bandwidth_estimate_ = + std::min(bandwidth_estimate_ * 0.5, receiving_rate); + if (mode_ == Mode::kSlowStart) + mode_ = Mode::kOnlineLearning; + } + if (mode_ == Mode::kStartup && + msg.send_time - start_time_ >= kStartupDuration) { + DataSize received_size = DataSize::Zero(); + for (size_t i = 1; i < last_received_packets_.size(); ++i) { + received_size += last_received_packets_[i].sent_packet.size; + } + TimeDelta sending_time = TimeDelta::Zero(); + if (last_received_packets_.size() > 0) + sending_time = last_received_packets_.back().receive_time - + last_received_packets_.front().receive_time; + DataRate receiving_rate = bandwidth_estimate_; + if (sending_time > TimeDelta::Zero()) + receiving_rate = received_size / sending_time; + bandwidth_estimate_ = receiving_rate; + monitor_intervals_.clear(); + mode_ = Mode::kSlowStart; + monitor_intervals_duration_ = ComputeMonitorIntervalsDuration(); + monitor_intervals_bitrates_ = {bandwidth_estimate_}; + monitor_intervals_.emplace_back(bandwidth_estimate_, msg.send_time, + monitor_intervals_duration_); + bandwidth_estimate_ = bandwidth_estimate_ * (1 / kSlowStartModeIncrease); + complete_feedback_monitor_interval_number_ = 0; + return CreateRateUpdate(msg.send_time); + } + if (IsFeedbackCollectionDone() || IsTimeoutExpired(msg.send_time)) { + // Creating new monitor intervals. + monitor_intervals_.clear(); + monitor_interval_timeout_ = + rtt_tracker_.GetRtt() * monitor_interval_timeout_ratio_; + monitor_intervals_duration_ = ComputeMonitorIntervalsDuration(); + complete_feedback_monitor_interval_number_ = 0; + // Compute bitrates and start first monitor interval. + if (mode_ == Mode::kSlowStart) { + monitor_intervals_bitrates_ = {kSlowStartModeIncrease * + bandwidth_estimate_}; + monitor_intervals_.emplace_back( + kSlowStartModeIncrease * bandwidth_estimate_, msg.send_time, + monitor_intervals_duration_); + } else { + RTC_DCHECK(mode_ == Mode::kOnlineLearning || mode_ == Mode::kDoubleCheck); + monitor_intervals_.clear(); + int64_t sign = 2 * (random_generator_.Rand(0, 1) % 2) - 1; + RTC_DCHECK_GE(sign, -1); + RTC_DCHECK_LE(sign, 1); + if (bandwidth_estimate_ >= kMinRateHaveMultiplicativeRateChange) { + monitor_intervals_bitrates_ = { + bandwidth_estimate_ * (1 + sign * sampling_step_), + bandwidth_estimate_ * (1 - sign * sampling_step_)}; + } else { + monitor_intervals_bitrates_ = { + DataRate::BitsPerSec(std::max( + bandwidth_estimate_.bps() + sign * kMinRateChangeBps, 0)), + DataRate::BitsPerSec(std::max( + bandwidth_estimate_.bps() - sign * kMinRateChangeBps, 0))}; + } + monitor_intervals_.emplace_back(monitor_intervals_bitrates_[0], + msg.send_time, + monitor_intervals_duration_); + } + } + return CreateRateUpdate(msg.send_time); +} + +TimeDelta PccNetworkController::ComputeMonitorIntervalsDuration() const { + TimeDelta monitor_intervals_duration = TimeDelta::Zero(); + if (monitor_interval_length_strategy_ == + MonitorIntervalLengthStrategy::kAdaptive) { + monitor_intervals_duration = std::max( + rtt_tracker_.GetRtt() * monitor_interval_duration_ratio_, + smoothed_packets_sending_interval_ * min_packets_number_per_interval_); + } else { + RTC_DCHECK(monitor_interval_length_strategy_ == + MonitorIntervalLengthStrategy::kFixed); + monitor_intervals_duration = + smoothed_packets_sending_interval_ * min_packets_number_per_interval_; + } + monitor_intervals_duration = + std::max(kMinDurationOfMonitorInterval, monitor_intervals_duration); + return monitor_intervals_duration; +} + +bool PccNetworkController::IsTimeoutExpired(Timestamp current_time) const { + if (complete_feedback_monitor_interval_number_ >= monitor_intervals_.size()) { + return false; + } + return current_time - + monitor_intervals_[complete_feedback_monitor_interval_number_] + .GetEndTime() >= + monitor_interval_timeout_; +} + +bool PccNetworkController::IsFeedbackCollectionDone() const { + return complete_feedback_monitor_interval_number_ >= + monitor_intervals_bitrates_.size(); +} + +NetworkControlUpdate PccNetworkController::OnTransportPacketsFeedback( + TransportPacketsFeedback msg) { + if (msg.packet_feedbacks.empty()) + return NetworkControlUpdate(); + // Save packets to last_received_packets_ array. + for (const PacketResult& packet_result : msg.ReceivedWithSendInfo()) { + last_received_packets_.push_back(packet_result); + } + while (last_received_packets_.size() > kNumberOfPacketsToKeep) { + last_received_packets_.pop_front(); + } + rtt_tracker_.OnPacketsFeedback(msg.PacketsWithFeedback(), msg.feedback_time); + // Skip rate update in case when online learning mode just started, but + // corresponding monitor intervals were not started yet. + if (mode_ == Mode::kOnlineLearning && + monitor_intervals_bitrates_.size() < 2) { + return NetworkControlUpdate(); + } + if (!IsFeedbackCollectionDone() && !monitor_intervals_.empty()) { + while (complete_feedback_monitor_interval_number_ < + monitor_intervals_.size()) { + monitor_intervals_[complete_feedback_monitor_interval_number_] + .OnPacketsFeedback(msg.PacketsWithFeedback()); + if (!monitor_intervals_[complete_feedback_monitor_interval_number_] + .IsFeedbackCollectionDone()) + break; + ++complete_feedback_monitor_interval_number_; + } + } + if (IsFeedbackCollectionDone()) { + if (mode_ == Mode::kDoubleCheck) { + mode_ = Mode::kOnlineLearning; + } else if (NeedDoubleCheckMeasurments()) { + mode_ = Mode::kDoubleCheck; + } + if (mode_ != Mode::kDoubleCheck) + UpdateSendingRateAndMode(); + } + return NetworkControlUpdate(); +} + +bool PccNetworkController::NeedDoubleCheckMeasurments() const { + if (mode_ == Mode::kSlowStart) { + return false; + } + double first_loss_rate = monitor_intervals_[0].GetLossRate(); + double second_loss_rate = monitor_intervals_[1].GetLossRate(); + DataRate first_bitrate = monitor_intervals_[0].GetTargetSendingRate(); + DataRate second_bitrate = monitor_intervals_[1].GetTargetSendingRate(); + if ((first_bitrate.bps() - second_bitrate.bps()) * + (first_loss_rate - second_loss_rate) < + 0) { + return true; + } + return false; +} + +void PccNetworkController::UpdateSendingRateAndMode() { + if (monitor_intervals_.empty() || !IsFeedbackCollectionDone()) { + return; + } + if (mode_ == Mode::kSlowStart) { + DataRate old_bandwidth_estimate = bandwidth_estimate_; + bandwidth_estimate_ = + bitrate_controller_ + .ComputeRateUpdateForSlowStartMode(monitor_intervals_[0]) + .value_or(bandwidth_estimate_); + if (bandwidth_estimate_ <= old_bandwidth_estimate) + mode_ = Mode::kOnlineLearning; + } else { + RTC_DCHECK(mode_ == Mode::kOnlineLearning); + bandwidth_estimate_ = + bitrate_controller_.ComputeRateUpdateForOnlineLearningMode( + monitor_intervals_, bandwidth_estimate_); + } +} + +NetworkControlUpdate PccNetworkController::OnNetworkAvailability( + NetworkAvailability msg) { + return NetworkControlUpdate(); +} + +NetworkControlUpdate PccNetworkController::OnNetworkRouteChange( + NetworkRouteChange msg) { + return NetworkControlUpdate(); +} + +NetworkControlUpdate PccNetworkController::OnProcessInterval( + ProcessInterval msg) { + return CreateRateUpdate(msg.at_time); +} + +NetworkControlUpdate PccNetworkController::OnTargetRateConstraints( + TargetRateConstraints msg) { + return NetworkControlUpdate(); +} + +NetworkControlUpdate PccNetworkController::OnRemoteBitrateReport( + RemoteBitrateReport) { + return NetworkControlUpdate(); +} + +NetworkControlUpdate PccNetworkController::OnRoundTripTimeUpdate( + RoundTripTimeUpdate) { + return NetworkControlUpdate(); +} + +NetworkControlUpdate PccNetworkController::OnTransportLossReport( + TransportLossReport) { + return NetworkControlUpdate(); +} + +NetworkControlUpdate PccNetworkController::OnStreamsConfig(StreamsConfig msg) { + return NetworkControlUpdate(); +} + +NetworkControlUpdate PccNetworkController::OnReceivedPacket( + ReceivedPacket msg) { + return NetworkControlUpdate(); +} + +NetworkControlUpdate PccNetworkController::OnNetworkStateEstimate( + NetworkStateEstimate msg) { + return NetworkControlUpdate(); +} + +} // namespace pcc +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller.h b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller.h new file mode 100644 index 0000000000..e5f65dd7d9 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_PCC_PCC_NETWORK_CONTROLLER_H_ +#define MODULES_CONGESTION_CONTROLLER_PCC_PCC_NETWORK_CONTROLLER_H_ + +#include +#include + +#include +#include + +#include "api/transport/network_control.h" +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "modules/congestion_controller/pcc/bitrate_controller.h" +#include "modules/congestion_controller/pcc/monitor_interval.h" +#include "modules/congestion_controller/pcc/rtt_tracker.h" +#include "rtc_base/random.h" + +namespace webrtc { +namespace pcc { + +// PCC (Performance-oriented Congestion Control) Vivace is a congestion +// control algorithm based on online (convex) optimization in machine learning. +// It divides time into consecutive Monitor Intervals (MI) to test sending +// rates r(1 + eps), r(1 - eps) for the current sending rate r. +// At the end of each MI it computes utility function to transform the +// performance statistics into a numerical value. Then it updates current +// sending rate using gradient ascent to maximize utility function. +class PccNetworkController : public NetworkControllerInterface { + public: + enum class Mode { + kStartup, + // Slow start phase of PCC doubles sending rate each monitor interval. + kSlowStart, + // After getting the first decrease in utility function PCC exits slow start + // and enters the online learning phase. + kOnlineLearning, + // If we got that sending with the lower rate resulted in higher packet + // loss, then the measurements are unreliable and we need to double check + // them. + kDoubleCheck + }; + + enum class MonitorIntervalLengthStrategy { + // Monitor interval length adaptive when it is proportional to packets RTT. + kAdaptive, + // Monitor interval length is fixed when it is equal to the time of sending + // predefined amount of packets (kMinPacketsNumberPerInterval). + kFixed + }; + + explicit PccNetworkController(NetworkControllerConfig config); + ~PccNetworkController() override; + + // NetworkControllerInterface + NetworkControlUpdate OnNetworkAvailability(NetworkAvailability msg) override; + NetworkControlUpdate OnNetworkRouteChange(NetworkRouteChange msg) override; + NetworkControlUpdate OnProcessInterval(ProcessInterval msg) override; + NetworkControlUpdate OnSentPacket(SentPacket msg) override; + NetworkControlUpdate OnTargetRateConstraints( + TargetRateConstraints msg) override; + NetworkControlUpdate OnTransportPacketsFeedback( + TransportPacketsFeedback msg) override; + + // Part of remote bitrate estimation api, not implemented for PCC + NetworkControlUpdate OnStreamsConfig(StreamsConfig msg) override; + NetworkControlUpdate OnRemoteBitrateReport(RemoteBitrateReport msg) override; + NetworkControlUpdate OnRoundTripTimeUpdate(RoundTripTimeUpdate msg) override; + NetworkControlUpdate OnTransportLossReport(TransportLossReport msg) override; + NetworkControlUpdate OnReceivedPacket(ReceivedPacket msg) override; + NetworkControlUpdate OnNetworkStateEstimate( + NetworkStateEstimate msg) override; + + private: + void UpdateSendingRateAndMode(); + NetworkControlUpdate CreateRateUpdate(Timestamp at_time) const; + TimeDelta ComputeMonitorIntervalsDuration() const; + bool NeedDoubleCheckMeasurments() const; + bool IsTimeoutExpired(Timestamp current_time) const; + bool IsFeedbackCollectionDone() const; + + Timestamp start_time_; + Timestamp last_sent_packet_time_; + TimeDelta smoothed_packets_sending_interval_; + Mode mode_; + + // Default value used for initializing bandwidth. + DataRate default_bandwidth_; + // Current estimate r. + DataRate bandwidth_estimate_; + + RttTracker rtt_tracker_; + TimeDelta monitor_interval_timeout_; + const MonitorIntervalLengthStrategy monitor_interval_length_strategy_; + const double monitor_interval_duration_ratio_; + const double sampling_step_; // Epsilon. + const double monitor_interval_timeout_ratio_; + const int64_t min_packets_number_per_interval_; + + PccBitrateController bitrate_controller_; + + std::vector monitor_intervals_; + std::vector monitor_intervals_bitrates_; + TimeDelta monitor_intervals_duration_; + size_t complete_feedback_monitor_interval_number_; + + webrtc::Random random_generator_; + std::deque last_received_packets_; +}; + +} // namespace pcc +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_PCC_PCC_NETWORK_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller_unittest.cc new file mode 100644 index 0000000000..c98680c785 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/pcc_network_controller_unittest.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/pcc_network_controller.h" + +#include + +#include "modules/congestion_controller/pcc/pcc_factory.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/scenario/scenario.h" + +using ::testing::AllOf; +using ::testing::Field; +using ::testing::Ge; +using ::testing::Le; +using ::testing::Matcher; +using ::testing::Property; + +namespace webrtc { +namespace test { +namespace { + +const DataRate kInitialBitrate = DataRate::KilobitsPerSec(60); +const Timestamp kDefaultStartTime = Timestamp::Millis(10000000); + +constexpr double kDataRateMargin = 0.20; +constexpr double kMinDataRateFactor = 1 - kDataRateMargin; +constexpr double kMaxDataRateFactor = 1 + kDataRateMargin; +inline Matcher TargetRateCloseTo(DataRate rate) { + DataRate min_data_rate = rate * kMinDataRateFactor; + DataRate max_data_rate = rate * kMaxDataRateFactor; + return Field(&TargetTransferRate::target_rate, + AllOf(Ge(min_data_rate), Le(max_data_rate))); +} + +NetworkControllerConfig InitialConfig( + int starting_bandwidth_kbps = kInitialBitrate.kbps(), + int min_data_rate_kbps = 0, + int max_data_rate_kbps = 5 * kInitialBitrate.kbps()) { + NetworkControllerConfig config; + config.constraints.at_time = kDefaultStartTime; + config.constraints.min_data_rate = + DataRate::KilobitsPerSec(min_data_rate_kbps); + config.constraints.max_data_rate = + DataRate::KilobitsPerSec(max_data_rate_kbps); + config.constraints.starting_rate = + DataRate::KilobitsPerSec(starting_bandwidth_kbps); + return config; +} + +ProcessInterval InitialProcessInterval() { + ProcessInterval process_interval; + process_interval.at_time = kDefaultStartTime; + return process_interval; +} + +} // namespace + +TEST(PccNetworkControllerTest, SendsConfigurationOnFirstProcess) { + std::unique_ptr controller_; + controller_.reset(new pcc::PccNetworkController(InitialConfig())); + + NetworkControlUpdate update = + controller_->OnProcessInterval(InitialProcessInterval()); + EXPECT_THAT(*update.target_rate, TargetRateCloseTo(kInitialBitrate)); + EXPECT_THAT(*update.pacer_config, + Property(&PacerConfig::data_rate, Ge(kInitialBitrate))); +} + +TEST(PccNetworkControllerTest, UpdatesTargetSendRate) { + PccNetworkControllerFactory factory; + Scenario s("pcc_unit/updates_rate", false); + CallClientConfig config; + config.transport.cc_factory = &factory; + config.transport.rates.min_rate = DataRate::KilobitsPerSec(10); + config.transport.rates.max_rate = DataRate::KilobitsPerSec(1500); + config.transport.rates.start_rate = DataRate::KilobitsPerSec(300); + auto send_net = s.CreateMutableSimulationNode([](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(500); + c->delay = TimeDelta::Millis(100); + }); + auto ret_net = s.CreateMutableSimulationNode( + [](NetworkSimulationConfig* c) { c->delay = TimeDelta::Millis(100); }); + + auto* client = s.CreateClient("send", config); + auto* route = s.CreateRoutes(client, {send_net->node()}, + s.CreateClient("return", CallClientConfig()), + {ret_net->node()}); + VideoStreamConfig video; + video.stream.use_rtx = false; + s.CreateVideoStream(route->forward(), video); + s.RunFor(TimeDelta::Seconds(30)); + EXPECT_NEAR(client->target_rate().kbps(), 450, 100); + send_net->UpdateConfig([](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(800); + c->delay = TimeDelta::Millis(100); + }); + s.RunFor(TimeDelta::Seconds(20)); + EXPECT_NEAR(client->target_rate().kbps(), 750, 150); + send_net->UpdateConfig([](NetworkSimulationConfig* c) { + c->bandwidth = DataRate::KilobitsPerSec(200); + c->delay = TimeDelta::Millis(200); + }); + ret_net->UpdateConfig( + [](NetworkSimulationConfig* c) { c->delay = TimeDelta::Millis(200); }); + s.RunFor(TimeDelta::Seconds(35)); + EXPECT_NEAR(client->target_rate().kbps(), 170, 50); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker.cc new file mode 100644 index 0000000000..af9dc8f11b --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker.cc @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/rtt_tracker.h" + +#include + +namespace webrtc { +namespace pcc { + +RttTracker::RttTracker(TimeDelta initial_rtt, double alpha) + : rtt_estimate_(initial_rtt), alpha_(alpha) {} + +void RttTracker::OnPacketsFeedback( + const std::vector& packet_feedbacks, + Timestamp feedback_received_time) { + TimeDelta packet_rtt = TimeDelta::MinusInfinity(); + for (const PacketResult& packet_result : packet_feedbacks) { + if (!packet_result.IsReceived()) + continue; + packet_rtt = std::max( + packet_rtt, + feedback_received_time - packet_result.sent_packet.send_time); + } + if (packet_rtt.IsFinite()) + rtt_estimate_ = (1 - alpha_) * rtt_estimate_ + alpha_ * packet_rtt; +} + +TimeDelta RttTracker::GetRtt() const { + return rtt_estimate_; +} + +} // namespace pcc +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker.h b/third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker.h new file mode 100644 index 0000000000..94033cd511 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_PCC_RTT_TRACKER_H_ +#define MODULES_CONGESTION_CONTROLLER_PCC_RTT_TRACKER_H_ + +#include + +#include "api/transport/network_types.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" + +namespace webrtc { +namespace pcc { + +class RttTracker { + public: + RttTracker(TimeDelta initial_rtt, double alpha); + // Updates RTT estimate. + void OnPacketsFeedback(const std::vector& packet_feedbacks, + Timestamp feedback_received_time); + TimeDelta GetRtt() const; + + private: + TimeDelta rtt_estimate_; + double alpha_; +}; + +} // namespace pcc +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_PCC_RTT_TRACKER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker_unittest.cc new file mode 100644 index 0000000000..7d90e86822 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/rtt_tracker_unittest.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/rtt_tracker.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace pcc { +namespace test { +namespace { +const TimeDelta kInitialRtt = TimeDelta::Micros(10); +constexpr double kAlpha = 0.9; +const Timestamp kStartTime = Timestamp::Seconds(0); + +PacketResult GetPacketWithRtt(TimeDelta rtt) { + SentPacket packet; + packet.send_time = kStartTime; + PacketResult packet_result; + packet_result.sent_packet = packet; + if (rtt.IsFinite()) { + packet_result.receive_time = kStartTime + rtt; + } else { + packet_result.receive_time = Timestamp::PlusInfinity(); + } + return packet_result; +} +} // namespace + +TEST(PccRttTrackerTest, InitialValue) { + RttTracker tracker{kInitialRtt, kAlpha}; + EXPECT_EQ(kInitialRtt, tracker.GetRtt()); + for (int i = 0; i < 100; ++i) { + tracker.OnPacketsFeedback({GetPacketWithRtt(kInitialRtt)}, + kStartTime + kInitialRtt); + } + EXPECT_EQ(kInitialRtt, tracker.GetRtt()); +} + +TEST(PccRttTrackerTest, DoNothingWhenPacketIsLost) { + RttTracker tracker{kInitialRtt, kAlpha}; + tracker.OnPacketsFeedback({GetPacketWithRtt(TimeDelta::PlusInfinity())}, + kStartTime + kInitialRtt); + EXPECT_EQ(tracker.GetRtt(), kInitialRtt); +} + +TEST(PccRttTrackerTest, ChangeInRtt) { + RttTracker tracker{kInitialRtt, kAlpha}; + const TimeDelta kNewRtt = TimeDelta::Micros(100); + tracker.OnPacketsFeedback({GetPacketWithRtt(kNewRtt)}, kStartTime + kNewRtt); + EXPECT_GT(tracker.GetRtt(), kInitialRtt); + EXPECT_LE(tracker.GetRtt(), kNewRtt); + for (int i = 0; i < 100; ++i) { + tracker.OnPacketsFeedback({GetPacketWithRtt(kNewRtt)}, + kStartTime + kNewRtt); + } + const TimeDelta absolute_error = TimeDelta::Micros(1); + EXPECT_NEAR(tracker.GetRtt().us(), kNewRtt.us(), absolute_error.us()); + EXPECT_LE(tracker.GetRtt(), kNewRtt); +} + +} // namespace test +} // namespace pcc +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/utility_function.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/utility_function.cc new file mode 100644 index 0000000000..006a2fccd9 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/utility_function.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/utility_function.h" + +#include +#include + +#include "api/units/data_rate.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace pcc { + +VivaceUtilityFunction::VivaceUtilityFunction( + double delay_gradient_coefficient, + double loss_coefficient, + double throughput_coefficient, + double throughput_power, + double delay_gradient_threshold, + double delay_gradient_negative_bound) + : delay_gradient_coefficient_(delay_gradient_coefficient), + loss_coefficient_(loss_coefficient), + throughput_power_(throughput_power), + throughput_coefficient_(throughput_coefficient), + delay_gradient_threshold_(delay_gradient_threshold), + delay_gradient_negative_bound_(delay_gradient_negative_bound) { + RTC_DCHECK_GE(delay_gradient_negative_bound_, 0); +} + +double VivaceUtilityFunction::Compute( + const PccMonitorInterval& monitor_interval) const { + RTC_DCHECK(monitor_interval.IsFeedbackCollectionDone()); + double bitrate = monitor_interval.GetTargetSendingRate().bps(); + double loss_rate = monitor_interval.GetLossRate(); + double rtt_gradient = + monitor_interval.ComputeDelayGradient(delay_gradient_threshold_); + rtt_gradient = std::max(rtt_gradient, -delay_gradient_negative_bound_); + return (throughput_coefficient_ * std::pow(bitrate, throughput_power_)) - + (delay_gradient_coefficient_ * bitrate * rtt_gradient) - + (loss_coefficient_ * bitrate * loss_rate); +} + +VivaceUtilityFunction::~VivaceUtilityFunction() = default; + +ModifiedVivaceUtilityFunction::ModifiedVivaceUtilityFunction( + double delay_gradient_coefficient, + double loss_coefficient, + double throughput_coefficient, + double throughput_power, + double delay_gradient_threshold, + double delay_gradient_negative_bound) + : delay_gradient_coefficient_(delay_gradient_coefficient), + loss_coefficient_(loss_coefficient), + throughput_power_(throughput_power), + throughput_coefficient_(throughput_coefficient), + delay_gradient_threshold_(delay_gradient_threshold), + delay_gradient_negative_bound_(delay_gradient_negative_bound) { + RTC_DCHECK_GE(delay_gradient_negative_bound_, 0); +} + +double ModifiedVivaceUtilityFunction::Compute( + const PccMonitorInterval& monitor_interval) const { + RTC_DCHECK(monitor_interval.IsFeedbackCollectionDone()); + double bitrate = monitor_interval.GetTargetSendingRate().bps(); + double loss_rate = monitor_interval.GetLossRate(); + double rtt_gradient = + monitor_interval.ComputeDelayGradient(delay_gradient_threshold_); + rtt_gradient = std::max(rtt_gradient, -delay_gradient_negative_bound_); + return (throughput_coefficient_ * std::pow(bitrate, throughput_power_) * + bitrate) - + (delay_gradient_coefficient_ * bitrate * bitrate * rtt_gradient) - + (loss_coefficient_ * bitrate * bitrate * loss_rate); +} + +ModifiedVivaceUtilityFunction::~ModifiedVivaceUtilityFunction() = default; + +} // namespace pcc +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/utility_function.h b/third_party/libwebrtc/modules/congestion_controller/pcc/utility_function.h new file mode 100644 index 0000000000..98bb0744c1 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/utility_function.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_PCC_UTILITY_FUNCTION_H_ +#define MODULES_CONGESTION_CONTROLLER_PCC_UTILITY_FUNCTION_H_ + +#include "modules/congestion_controller/pcc/monitor_interval.h" + +namespace webrtc { +namespace pcc { + +// Utility function is used by PCC to transform the performance statistics +// (sending rate, loss rate, packets latency) gathered at one monitor interval +// into a numerical value. +// https://www.usenix.org/conference/nsdi18/presentation/dong +class PccUtilityFunctionInterface { + public: + virtual double Compute(const PccMonitorInterval& monitor_interval) const = 0; + virtual ~PccUtilityFunctionInterface() = default; +}; + +// Vivace utility function were suggested in the paper "PCC Vivace: +// Online-Learning Congestion Control", Mo Dong et all. +class VivaceUtilityFunction : public PccUtilityFunctionInterface { + public: + VivaceUtilityFunction(double delay_gradient_coefficient, + double loss_coefficient, + double throughput_coefficient, + double throughput_power, + double delay_gradient_threshold, + double delay_gradient_negative_bound); + double Compute(const PccMonitorInterval& monitor_interval) const override; + ~VivaceUtilityFunction() override; + + private: + const double delay_gradient_coefficient_; + const double loss_coefficient_; + const double throughput_power_; + const double throughput_coefficient_; + const double delay_gradient_threshold_; + const double delay_gradient_negative_bound_; +}; + +// This utility function were obtained by tuning Vivace utility function. +// The main difference is that gradient of modified utilify funtion (as well as +// rate updates) scales proportionally to the sending rate which leads to +// better performance in case of single sender. +class ModifiedVivaceUtilityFunction : public PccUtilityFunctionInterface { + public: + ModifiedVivaceUtilityFunction(double delay_gradient_coefficient, + double loss_coefficient, + double throughput_coefficient, + double throughput_power, + double delay_gradient_threshold, + double delay_gradient_negative_bound); + double Compute(const PccMonitorInterval& monitor_interval) const override; + ~ModifiedVivaceUtilityFunction() override; + + private: + const double delay_gradient_coefficient_; + const double loss_coefficient_; + const double throughput_power_; + const double throughput_coefficient_; + const double delay_gradient_threshold_; + const double delay_gradient_negative_bound_; +}; + +} // namespace pcc +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_PCC_UTILITY_FUNCTION_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/pcc/utility_function_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/pcc/utility_function_unittest.cc new file mode 100644 index 0000000000..19b2d15920 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/pcc/utility_function_unittest.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/pcc/utility_function.h" + +#include + +#include +#include +#include + +#include "api/transport/network_types.h" +#include "api/units/data_rate.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "test/gtest.h" + +namespace webrtc { +namespace pcc { +namespace test { +namespace { +constexpr double kLossCoefficient = 11.35; +constexpr double kThroughputPower = 0.9; +constexpr double kThroughputCoefficient = 1; +constexpr double kDelayGradientNegativeBound = 10; + +const Timestamp kStartTime = Timestamp::Micros(0); +const TimeDelta kPacketsDelta = TimeDelta::Millis(1); +const TimeDelta kIntervalDuration = TimeDelta::Millis(100); +const DataRate kSendingBitrate = DataRate::BitsPerSec(1000); + +const DataSize kDefaultDataSize = DataSize::Bytes(100); +const TimeDelta kDefaultDelay = TimeDelta::Millis(100); + +std::vector CreatePacketResults( + const std::vector& packets_send_times, + const std::vector& packets_received_times = {}, + const std::vector& packets_sizes = {}) { + std::vector packet_results; + PacketResult packet_result; + SentPacket sent_packet; + for (size_t i = 0; i < packets_send_times.size(); ++i) { + sent_packet.send_time = packets_send_times[i]; + if (packets_sizes.empty()) { + sent_packet.size = kDefaultDataSize; + } else { + sent_packet.size = packets_sizes[i]; + } + packet_result.sent_packet = sent_packet; + if (packets_received_times.empty()) { + packet_result.receive_time = packets_send_times[i] + kDefaultDelay; + } else { + packet_result.receive_time = packets_received_times[i]; + } + packet_results.push_back(packet_result); + } + return packet_results; +} + +} // namespace + +TEST(PccVivaceUtilityFunctionTest, + UtilityIsThroughputTermIfAllRestCoefficientsAreZero) { + VivaceUtilityFunction utility_function(0, 0, kThroughputCoefficient, + kThroughputPower, 0, + kDelayGradientNegativeBound); + PccMonitorInterval monitor_interval(kSendingBitrate, kStartTime, + kIntervalDuration); + monitor_interval.OnPacketsFeedback(CreatePacketResults( + {kStartTime + kPacketsDelta, kStartTime + 2 * kPacketsDelta, + kStartTime + 3 * kPacketsDelta, kStartTime + 2 * kIntervalDuration}, + {kStartTime + kPacketsDelta + kDefaultDelay, Timestamp::PlusInfinity(), + kStartTime + kDefaultDelay + 3 * kPacketsDelta, + Timestamp::PlusInfinity()}, + {kDefaultDataSize, kDefaultDataSize, kDefaultDataSize, + kDefaultDataSize})); + EXPECT_DOUBLE_EQ(utility_function.Compute(monitor_interval), + kThroughputCoefficient * + std::pow(kSendingBitrate.bps(), kThroughputPower)); +} + +TEST(PccVivaceUtilityFunctionTest, + LossTermIsNonZeroIfLossCoefficientIsNonZero) { + VivaceUtilityFunction utility_function( + 0, kLossCoefficient, kThroughputCoefficient, kThroughputPower, 0, + kDelayGradientNegativeBound); + PccMonitorInterval monitor_interval(kSendingBitrate, kStartTime, + kIntervalDuration); + monitor_interval.OnPacketsFeedback(CreatePacketResults( + {kStartTime + kPacketsDelta, kStartTime + 2 * kPacketsDelta, + kStartTime + 5 * kPacketsDelta, kStartTime + 2 * kIntervalDuration}, + {kStartTime + kDefaultDelay, Timestamp::PlusInfinity(), + kStartTime + kDefaultDelay, kStartTime + 3 * kIntervalDuration}, + {})); + // The second packet was lost. + EXPECT_DOUBLE_EQ(utility_function.Compute(monitor_interval), + kThroughputCoefficient * + std::pow(kSendingBitrate.bps(), kThroughputPower) - + kLossCoefficient * kSendingBitrate.bps() * + monitor_interval.GetLossRate()); +} + +} // namespace test +} // namespace pcc +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/receive_side_congestion_controller.cc b/third_party/libwebrtc/modules/congestion_controller/receive_side_congestion_controller.cc new file mode 100644 index 0000000000..e43b020f6e --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/receive_side_congestion_controller.cc @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/include/receive_side_congestion_controller.h" + +#include "api/units/data_rate.h" +#include "modules/pacing/packet_router.h" +#include "modules/remote_bitrate_estimator/include/bwe_defines.h" +#include "modules/remote_bitrate_estimator/remote_bitrate_estimator_abs_send_time.h" +#include "modules/remote_bitrate_estimator/remote_bitrate_estimator_single_stream.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { +static const uint32_t kTimeOffsetSwitchThreshold = 30; +} // namespace + +void ReceiveSideCongestionController::OnRttUpdate(int64_t avg_rtt_ms, + int64_t max_rtt_ms) { + MutexLock lock(&mutex_); + rbe_->OnRttUpdate(avg_rtt_ms, max_rtt_ms); +} + +void ReceiveSideCongestionController::RemoveStream(uint32_t ssrc) { + MutexLock lock(&mutex_); + rbe_->RemoveStream(ssrc); +} + +DataRate ReceiveSideCongestionController::LatestReceiveSideEstimate() const { + MutexLock lock(&mutex_); + return rbe_->LatestEstimate(); +} + +void ReceiveSideCongestionController::PickEstimatorFromHeader( + const RTPHeader& header) { + if (header.extension.hasAbsoluteSendTime) { + // If we see AST in header, switch RBE strategy immediately. + if (!using_absolute_send_time_) { + RTC_LOG(LS_INFO) + << "WrappingBitrateEstimator: Switching to absolute send time RBE."; + using_absolute_send_time_ = true; + PickEstimator(); + } + packets_since_absolute_send_time_ = 0; + } else { + // When we don't see AST, wait for a few packets before going back to TOF. + if (using_absolute_send_time_) { + ++packets_since_absolute_send_time_; + if (packets_since_absolute_send_time_ >= kTimeOffsetSwitchThreshold) { + RTC_LOG(LS_INFO) + << "WrappingBitrateEstimator: Switching to transmission " + "time offset RBE."; + using_absolute_send_time_ = false; + PickEstimator(); + } + } + } +} + +// Instantiate RBE for Time Offset or Absolute Send Time extensions. +void ReceiveSideCongestionController::PickEstimator() { + if (using_absolute_send_time_) { + rbe_ = std::make_unique(&remb_throttler_, + &clock_); + } else { + rbe_ = std::make_unique( + &remb_throttler_, &clock_); + } +} + +ReceiveSideCongestionController::ReceiveSideCongestionController( + Clock* clock, + RemoteEstimatorProxy::TransportFeedbackSender feedback_sender, + RembThrottler::RembSender remb_sender, + NetworkStateEstimator* network_state_estimator) + : clock_(*clock), + remb_throttler_(std::move(remb_sender), clock), + remote_estimator_proxy_(std::move(feedback_sender), + network_state_estimator), + rbe_(new RemoteBitrateEstimatorSingleStream(&remb_throttler_, clock)), + using_absolute_send_time_(false), + packets_since_absolute_send_time_(0) {} + +void ReceiveSideCongestionController::OnReceivedPacket( + int64_t arrival_time_ms, + size_t payload_size, + const RTPHeader& header) { + remote_estimator_proxy_.IncomingPacket(arrival_time_ms, payload_size, header); + if (!header.extension.hasTransportSequenceNumber) { + // Receive-side BWE. + MutexLock lock(&mutex_); + PickEstimatorFromHeader(header); + rbe_->IncomingPacket(arrival_time_ms, payload_size, header); + } +} + +void ReceiveSideCongestionController::SetSendPeriodicFeedback( + bool send_periodic_feedback) { + remote_estimator_proxy_.SetSendPeriodicFeedback(send_periodic_feedback); +} + +void ReceiveSideCongestionController::OnBitrateChanged(int bitrate_bps) { + remote_estimator_proxy_.OnBitrateChanged(bitrate_bps); +} + +TimeDelta ReceiveSideCongestionController::MaybeProcess() { + Timestamp now = clock_.CurrentTime(); + mutex_.Lock(); + TimeDelta time_until_rbe = rbe_->Process(); + mutex_.Unlock(); + TimeDelta time_until_rep = remote_estimator_proxy_.Process(now); + TimeDelta time_until = std::min(time_until_rbe, time_until_rep); + return std::max(time_until, TimeDelta::Zero()); +} + +void ReceiveSideCongestionController::SetMaxDesiredReceiveBitrate( + DataRate bitrate) { + remb_throttler_.SetMaxDesiredReceiveBitrate(bitrate); +} + +void ReceiveSideCongestionController::SetTransportOverhead( + DataSize overhead_per_packet) { + remote_estimator_proxy_.SetTransportOverhead(overhead_per_packet); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/receive_side_congestion_controller_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/receive_side_congestion_controller_unittest.cc new file mode 100644 index 0000000000..f2fd6d11d7 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/receive_side_congestion_controller_unittest.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/include/receive_side_congestion_controller.h" + +#include "api/test/network_emulation/create_cross_traffic.h" +#include "api/test/network_emulation/cross_traffic.h" +#include "modules/pacing/packet_router.h" +#include "system_wrappers/include/clock.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/scenario/scenario.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::ElementsAre; +using ::testing::MockFunction; + +namespace webrtc { + +namespace { + +// Helper to convert some time format to resolution used in absolute send time +// header extension, rounded upwards. `t` is the time to convert, in some +// resolution. `denom` is the value to divide `t` by to get whole seconds, +// e.g. `denom` = 1000 if `t` is in milliseconds. +uint32_t AbsSendTime(int64_t t, int64_t denom) { + return (((t << 18) + (denom >> 1)) / denom) & 0x00fffffful; +} + +const uint32_t kInitialBitrateBps = 60000; + +} // namespace + +namespace test { + +TEST(ReceiveSideCongestionControllerTest, SendsRembWithAbsSendTime) { + MockFunction>)> + feedback_sender; + MockFunction)> remb_sender; + SimulatedClock clock_(123456); + + ReceiveSideCongestionController controller( + &clock_, feedback_sender.AsStdFunction(), remb_sender.AsStdFunction(), + nullptr); + + size_t payload_size = 1000; + RTPHeader header; + header.ssrc = 0x11eb21c; + header.extension.hasAbsoluteSendTime = true; + + EXPECT_CALL(remb_sender, Call(_, ElementsAre(header.ssrc))).Times(AtLeast(1)); + + for (int i = 0; i < 10; ++i) { + clock_.AdvanceTimeMilliseconds((1000 * payload_size) / kInitialBitrateBps); + int64_t now_ms = clock_.TimeInMilliseconds(); + header.extension.absoluteSendTime = AbsSendTime(now_ms, 1000); + controller.OnReceivedPacket(now_ms, payload_size, header); + } +} + +TEST(ReceiveSideCongestionControllerTest, + SendsRembAfterSetMaxDesiredReceiveBitrate) { + MockFunction>)> + feedback_sender; + MockFunction)> remb_sender; + SimulatedClock clock_(123456); + + ReceiveSideCongestionController controller( + &clock_, feedback_sender.AsStdFunction(), remb_sender.AsStdFunction(), + nullptr); + EXPECT_CALL(remb_sender, Call(123, _)); + controller.SetMaxDesiredReceiveBitrate(DataRate::BitsPerSec(123)); +} + +TEST(ReceiveSideCongestionControllerTest, ConvergesToCapacity) { + Scenario s("receive_cc_unit/converge"); + NetworkSimulationConfig net_conf; + net_conf.bandwidth = DataRate::KilobitsPerSec(1000); + net_conf.delay = TimeDelta::Millis(50); + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = DataRate::KilobitsPerSec(300); + }); + + auto* route = s.CreateRoutes(client, {s.CreateSimulationNode(net_conf)}, + s.CreateClient("return", CallClientConfig()), + {s.CreateSimulationNode(net_conf)}); + VideoStreamConfig video; + video.stream.packet_feedback = false; + s.CreateVideoStream(route->forward(), video); + s.RunFor(TimeDelta::Seconds(30)); + EXPECT_NEAR(client->send_bandwidth().kbps(), 900, 150); +} + +TEST(ReceiveSideCongestionControllerTest, IsFairToTCP) { + Scenario s("receive_cc_unit/tcp_fairness"); + NetworkSimulationConfig net_conf; + net_conf.bandwidth = DataRate::KilobitsPerSec(1000); + net_conf.delay = TimeDelta::Millis(50); + auto* client = s.CreateClient("send", [&](CallClientConfig* c) { + c->transport.rates.start_rate = DataRate::KilobitsPerSec(1000); + }); + auto send_net = {s.CreateSimulationNode(net_conf)}; + auto ret_net = {s.CreateSimulationNode(net_conf)}; + auto* route = s.CreateRoutes( + client, send_net, s.CreateClient("return", CallClientConfig()), ret_net); + VideoStreamConfig video; + video.stream.packet_feedback = false; + s.CreateVideoStream(route->forward(), video); + s.net()->StartCrossTraffic(CreateFakeTcpCrossTraffic( + s.net()->CreateRoute(send_net), s.net()->CreateRoute(ret_net), + FakeTcpConfig())); + s.RunFor(TimeDelta::Seconds(30)); + // For some reason we get outcompeted by TCP here, this should probably be + // fixed and a lower bound should be added to the test. + EXPECT_LT(client->send_bandwidth().kbps(), 750); +} +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/remb_throttler.cc b/third_party/libwebrtc/modules/congestion_controller/remb_throttler.cc new file mode 100644 index 0000000000..fcc30af9a8 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/remb_throttler.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/remb_throttler.h" + +#include +#include + +namespace webrtc { + +namespace { +constexpr TimeDelta kRembSendInterval = TimeDelta::Millis(200); +} // namespace + +RembThrottler::RembThrottler(RembSender remb_sender, Clock* clock) + : remb_sender_(std::move(remb_sender)), + clock_(clock), + last_remb_time_(Timestamp::MinusInfinity()), + last_send_remb_bitrate_(DataRate::PlusInfinity()), + max_remb_bitrate_(DataRate::PlusInfinity()) {} + +void RembThrottler::OnReceiveBitrateChanged(const std::vector& ssrcs, + uint32_t bitrate_bps) { + DataRate receive_bitrate = DataRate::BitsPerSec(bitrate_bps); + Timestamp now = clock_->CurrentTime(); + { + MutexLock lock(&mutex_); + // % threshold for if we should send a new REMB asap. + const int64_t kSendThresholdPercent = 103; + if (receive_bitrate * kSendThresholdPercent / 100 > + last_send_remb_bitrate_ && + now < last_remb_time_ + kRembSendInterval) { + return; + } + last_remb_time_ = now; + last_send_remb_bitrate_ = receive_bitrate; + receive_bitrate = std::min(last_send_remb_bitrate_, max_remb_bitrate_); + } + remb_sender_(receive_bitrate.bps(), ssrcs); +} + +void RembThrottler::SetMaxDesiredReceiveBitrate(DataRate bitrate) { + Timestamp now = clock_->CurrentTime(); + { + MutexLock lock(&mutex_); + max_remb_bitrate_ = bitrate; + if (now - last_remb_time_ < kRembSendInterval && + !last_send_remb_bitrate_.IsZero() && + last_send_remb_bitrate_ <= max_remb_bitrate_) { + return; + } + } + remb_sender_(bitrate.bps(), /*ssrcs=*/{}); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/remb_throttler.h b/third_party/libwebrtc/modules/congestion_controller/remb_throttler.h new file mode 100644 index 0000000000..85292cbc09 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/remb_throttler.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_CONGESTION_CONTROLLER_REMB_THROTTLER_H_ +#define MODULES_CONGESTION_CONTROLLER_REMB_THROTTLER_H_ + +#include +#include + +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "modules/remote_bitrate_estimator/include/remote_bitrate_estimator.h" +#include "rtc_base/synchronization/mutex.h" + +namespace webrtc { + +// RembThrottler is a helper class used for throttling RTCP REMB messages. +// Throttles small changes to the received BWE within 200ms. +class RembThrottler : public RemoteBitrateObserver { + public: + using RembSender = + std::function ssrcs)>; + RembThrottler(RembSender remb_sender, Clock* clock); + + // Ensures the remote party is notified of the receive bitrate no larger than + // `bitrate` using RTCP REMB. + void SetMaxDesiredReceiveBitrate(DataRate bitrate); + + // Implements RemoteBitrateObserver; + // Called every time there is a new bitrate estimate for a receive channel + // group. This call will trigger a new RTCP REMB packet if the bitrate + // estimate has decreased or if no RTCP REMB packet has been sent for + // a certain time interval. + void OnReceiveBitrateChanged(const std::vector& ssrcs, + uint32_t bitrate_bps) override; + + private: + const RembSender remb_sender_; + Clock* const clock_; + mutable Mutex mutex_; + Timestamp last_remb_time_ RTC_GUARDED_BY(mutex_); + DataRate last_send_remb_bitrate_ RTC_GUARDED_BY(mutex_); + DataRate max_remb_bitrate_ RTC_GUARDED_BY(mutex_); +}; + +} // namespace webrtc +#endif // MODULES_CONGESTION_CONTROLLER_REMB_THROTTLER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/remb_throttler_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/remb_throttler_unittest.cc new file mode 100644 index 0000000000..3f8df8a7bb --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/remb_throttler_unittest.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/congestion_controller/remb_throttler.h" + +#include + +#include "api/units/data_rate.h" +#include "api/units/time_delta.h" +#include "system_wrappers/include/clock.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +using ::testing::_; +using ::testing::MockFunction; + +TEST(RembThrottlerTest, CallRembSenderOnFirstReceiveBitrateChange) { + SimulatedClock clock(Timestamp::Zero()); + MockFunction)> remb_sender; + RembThrottler remb_throttler(remb_sender.AsStdFunction(), &clock); + + EXPECT_CALL(remb_sender, Call(12345, std::vector({1, 2, 3}))); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/12345); +} + +TEST(RembThrottlerTest, ThrottlesSmallReceiveBitrateDecrease) { + SimulatedClock clock(Timestamp::Zero()); + MockFunction)> remb_sender; + RembThrottler remb_throttler(remb_sender.AsStdFunction(), &clock); + + EXPECT_CALL(remb_sender, Call); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/12346); + clock.AdvanceTime(TimeDelta::Millis(100)); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/12345); + + EXPECT_CALL(remb_sender, Call(12345, _)); + clock.AdvanceTime(TimeDelta::Millis(101)); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/12345); +} + +TEST(RembThrottlerTest, DoNotThrottleLargeReceiveBitrateDecrease) { + SimulatedClock clock(Timestamp::Zero()); + MockFunction)> remb_sender; + RembThrottler remb_throttler(remb_sender.AsStdFunction(), &clock); + + EXPECT_CALL(remb_sender, Call(2345, _)); + EXPECT_CALL(remb_sender, Call(1234, _)); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/2345); + clock.AdvanceTime(TimeDelta::Millis(1)); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/1234); +} + +TEST(RembThrottlerTest, ThrottlesReceiveBitrateIncrease) { + SimulatedClock clock(Timestamp::Zero()); + MockFunction)> remb_sender; + RembThrottler remb_throttler(remb_sender.AsStdFunction(), &clock); + + EXPECT_CALL(remb_sender, Call); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/1234); + clock.AdvanceTime(TimeDelta::Millis(100)); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/2345); + + // Updates 200ms after previous callback is not throttled. + EXPECT_CALL(remb_sender, Call(2345, _)); + clock.AdvanceTime(TimeDelta::Millis(101)); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/2345); +} + +TEST(RembThrottlerTest, CallRembSenderOnSetMaxDesiredReceiveBitrate) { + SimulatedClock clock(Timestamp::Zero()); + MockFunction)> remb_sender; + RembThrottler remb_throttler(remb_sender.AsStdFunction(), &clock); + EXPECT_CALL(remb_sender, Call(1234, _)); + remb_throttler.SetMaxDesiredReceiveBitrate(DataRate::BitsPerSec(1234)); +} + +TEST(RembThrottlerTest, CallRembSenderWithMinOfMaxDesiredAndOnReceivedBitrate) { + SimulatedClock clock(Timestamp::Zero()); + MockFunction)> remb_sender; + RembThrottler remb_throttler(remb_sender.AsStdFunction(), &clock); + + EXPECT_CALL(remb_sender, Call(1234, _)); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/1234); + clock.AdvanceTime(TimeDelta::Millis(1)); + remb_throttler.SetMaxDesiredReceiveBitrate(DataRate::BitsPerSec(4567)); + + clock.AdvanceTime(TimeDelta::Millis(200)); + EXPECT_CALL(remb_sender, Call(4567, _)); + remb_throttler.OnReceiveBitrateChanged({1, 2, 3}, /*bitrate_bps=*/5678); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/BUILD.gn b/third_party/libwebrtc/modules/congestion_controller/rtp/BUILD.gn new file mode 100644 index 0000000000..cd13332b7f --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/BUILD.gn @@ -0,0 +1,100 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +config("bwe_test_logging") { + if (rtc_enable_bwe_test_logging) { + defines = [ "BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=1" ] + } else { + defines = [ "BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0" ] + } +} + +rtc_library("control_handler") { + visibility = [ "*" ] + sources = [ + "control_handler.cc", + "control_handler.h", + ] + + deps = [ + "../../../api:sequence_checker", + "../../../api/transport:network_control", + "../../../api/units:data_rate", + "../../../api/units:data_size", + "../../../api/units:time_delta", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base/system:no_unique_address", + "../../../system_wrappers:field_trial", + "../../pacing", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} +rtc_library("transport_feedback") { + visibility = [ "*" ] + sources = [ + "transport_feedback_adapter.cc", + "transport_feedback_adapter.h", + "transport_feedback_demuxer.cc", + "transport_feedback_demuxer.h", + ] + + deps = [ + "../..:module_api_public", + "../../../api:sequence_checker", + "../../../api/transport:network_control", + "../../../api/units:data_size", + "../../../api/units:timestamp", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:macromagic", + "../../../rtc_base:network_route", + "../../../rtc_base:rtc_numerics", + "../../../rtc_base/network:sent_packet", + "../../../rtc_base/synchronization:mutex", + "../../../rtc_base/system:no_unique_address", + "../../../system_wrappers", + "../../../system_wrappers:field_trial", + "../../rtp_rtcp:rtp_rtcp_format", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/algorithm:container", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +if (rtc_include_tests) { + rtc_library("congestion_controller_unittests") { + testonly = true + + sources = [ + "transport_feedback_adapter_unittest.cc", + "transport_feedback_demuxer_unittest.cc", + ] + deps = [ + ":transport_feedback", + "../:congestion_controller", + "../../../api/transport:network_control", + "../../../logging:mocks", + "../../../rtc_base:checks", + "../../../rtc_base:safe_conversions", + "../../../rtc_base/network:sent_packet", + "../../../system_wrappers", + "../../../test:field_trial", + "../../../test:test_support", + "../../pacing", + "../../remote_bitrate_estimator", + "../../rtp_rtcp:rtp_rtcp_format", + "//testing/gmock", + ] + } +} diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler.cc b/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler.cc new file mode 100644 index 0000000000..ffa373aeba --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/rtp/control_handler.h" + +#include +#include + +#include "api/units/data_rate.h" +#include "modules/pacing/pacing_controller.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +// By default, pacer emergency stops encoder when buffer reaches a high level. +bool IsPacerEmergencyStopDisabled() { + return field_trial::IsEnabled("WebRTC-DisablePacerEmergencyStop"); +} + +} // namespace +CongestionControlHandler::CongestionControlHandler() + : disable_pacer_emergency_stop_(IsPacerEmergencyStopDisabled()) { + sequenced_checker_.Detach(); +} + +CongestionControlHandler::~CongestionControlHandler() {} + +void CongestionControlHandler::SetTargetRate( + TargetTransferRate new_target_rate) { + RTC_DCHECK_RUN_ON(&sequenced_checker_); + RTC_CHECK(new_target_rate.at_time.IsFinite()); + last_incoming_ = new_target_rate; +} + +void CongestionControlHandler::SetNetworkAvailability(bool network_available) { + RTC_DCHECK_RUN_ON(&sequenced_checker_); + network_available_ = network_available; +} + +void CongestionControlHandler::SetPacerQueue(TimeDelta expected_queue_time) { + RTC_DCHECK_RUN_ON(&sequenced_checker_); + pacer_expected_queue_ms_ = expected_queue_time.ms(); +} + +absl::optional CongestionControlHandler::GetUpdate() { + RTC_DCHECK_RUN_ON(&sequenced_checker_); + if (!last_incoming_.has_value()) + return absl::nullopt; + TargetTransferRate new_outgoing = *last_incoming_; + DataRate log_target_rate = new_outgoing.target_rate; + bool pause_encoding = false; + if (!network_available_) { + pause_encoding = true; + } else if (!disable_pacer_emergency_stop_ && + pacer_expected_queue_ms_ > + PacingController::kMaxExpectedQueueLength.ms()) { + pause_encoding = true; + } + if (pause_encoding) + new_outgoing.target_rate = DataRate::Zero(); + if (!last_reported_ || + last_reported_->target_rate != new_outgoing.target_rate || + (!new_outgoing.target_rate.IsZero() && + (last_reported_->network_estimate.loss_rate_ratio != + new_outgoing.network_estimate.loss_rate_ratio || + last_reported_->network_estimate.round_trip_time != + new_outgoing.network_estimate.round_trip_time))) { + if (encoder_paused_in_last_report_ != pause_encoding) + RTC_LOG(LS_INFO) << "Bitrate estimate state changed, BWE: " + << ToString(log_target_rate) << "."; + encoder_paused_in_last_report_ = pause_encoding; + last_reported_ = new_outgoing; + return new_outgoing; + } + return absl::nullopt; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler.h b/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler.h new file mode 100644 index 0000000000..d8e7263a02 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_RTP_CONTROL_HANDLER_H_ +#define MODULES_CONGESTION_CONTROLLER_RTP_CONTROL_HANDLER_H_ + +#include + +#include "absl/types/optional.h" +#include "api/sequence_checker.h" +#include "api/transport/network_types.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "rtc_base/system/no_unique_address.h" + +namespace webrtc { +// This is used to observe the network controller state and route calls to +// the proper handler. It also keeps cached values for safe asynchronous use. +// This makes sure that things running on the worker queue can't access state +// in RtpTransportControllerSend, which would risk causing data race on +// destruction unless members are properly ordered. +class CongestionControlHandler { + public: + CongestionControlHandler(); + ~CongestionControlHandler(); + + CongestionControlHandler(const CongestionControlHandler&) = delete; + CongestionControlHandler& operator=(const CongestionControlHandler&) = delete; + + void SetTargetRate(TargetTransferRate new_target_rate); + void SetNetworkAvailability(bool network_available); + void SetPacerQueue(TimeDelta expected_queue_time); + absl::optional GetUpdate(); + + private: + absl::optional last_incoming_; + absl::optional last_reported_; + bool network_available_ = true; + bool encoder_paused_in_last_report_ = false; + + const bool disable_pacer_emergency_stop_; + int64_t pacer_expected_queue_ms_ = 0; + + RTC_NO_UNIQUE_ADDRESS SequenceChecker sequenced_checker_; +}; +} // namespace webrtc +#endif // MODULES_CONGESTION_CONTROLLER_RTP_CONTROL_HANDLER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler_gn/moz.build new file mode 100644 index 0000000000..6ee156d2f8 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("control_handler_gn") diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter.cc b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter.cc new file mode 100644 index 0000000000..e83d09d263 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter.cc @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/rtp/transport_feedback_adapter.h" + +#include + +#include +#include +#include + +#include "absl/algorithm/container.h" +#include "api/units/timestamp.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/rtp_rtcp/source/rtcp_packet/transport_feedback.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +constexpr TimeDelta kSendTimeHistoryWindow = TimeDelta::Seconds(60); + +void InFlightBytesTracker::AddInFlightPacketBytes( + const PacketFeedback& packet) { + RTC_DCHECK(packet.sent.send_time.IsFinite()); + auto it = in_flight_data_.find(packet.network_route); + if (it != in_flight_data_.end()) { + it->second += packet.sent.size; + } else { + in_flight_data_.insert({packet.network_route, packet.sent.size}); + } +} + +void InFlightBytesTracker::RemoveInFlightPacketBytes( + const PacketFeedback& packet) { + if (packet.sent.send_time.IsInfinite()) + return; + auto it = in_flight_data_.find(packet.network_route); + if (it != in_flight_data_.end()) { + RTC_DCHECK_GE(it->second, packet.sent.size); + it->second -= packet.sent.size; + if (it->second.IsZero()) + in_flight_data_.erase(it); + } +} + +DataSize InFlightBytesTracker::GetOutstandingData( + const rtc::NetworkRoute& network_route) const { + auto it = in_flight_data_.find(network_route); + if (it != in_flight_data_.end()) { + return it->second; + } else { + return DataSize::Zero(); + } +} + +// Comparator for consistent map with NetworkRoute as key. +bool InFlightBytesTracker::NetworkRouteComparator::operator()( + const rtc::NetworkRoute& a, + const rtc::NetworkRoute& b) const { + if (a.local.network_id() != b.local.network_id()) + return a.local.network_id() < b.local.network_id(); + if (a.remote.network_id() != b.remote.network_id()) + return a.remote.network_id() < b.remote.network_id(); + + if (a.local.adapter_id() != b.local.adapter_id()) + return a.local.adapter_id() < b.local.adapter_id(); + if (a.remote.adapter_id() != b.remote.adapter_id()) + return a.remote.adapter_id() < b.remote.adapter_id(); + + if (a.local.uses_turn() != b.local.uses_turn()) + return a.local.uses_turn() < b.local.uses_turn(); + if (a.remote.uses_turn() != b.remote.uses_turn()) + return a.remote.uses_turn() < b.remote.uses_turn(); + + return a.connected < b.connected; +} + +TransportFeedbackAdapter::TransportFeedbackAdapter() = default; + + +void TransportFeedbackAdapter::AddPacket(const RtpPacketSendInfo& packet_info, + size_t overhead_bytes, + Timestamp creation_time) { + PacketFeedback packet; + packet.creation_time = creation_time; + packet.sent.sequence_number = + seq_num_unwrapper_.Unwrap(packet_info.transport_sequence_number); + packet.sent.size = DataSize::Bytes(packet_info.length + overhead_bytes); + packet.sent.audio = packet_info.packet_type == RtpPacketMediaType::kAudio; + packet.network_route = network_route_; + packet.sent.pacing_info = packet_info.pacing_info; + + while (!history_.empty() && + creation_time - history_.begin()->second.creation_time > + kSendTimeHistoryWindow) { + // TODO(sprang): Warn if erasing (too many) old items? + if (history_.begin()->second.sent.sequence_number > last_ack_seq_num_) + in_flight_.RemoveInFlightPacketBytes(history_.begin()->second); + history_.erase(history_.begin()); + } + history_.insert(std::make_pair(packet.sent.sequence_number, packet)); +} + +absl::optional TransportFeedbackAdapter::ProcessSentPacket( + const rtc::SentPacket& sent_packet) { + auto send_time = Timestamp::Millis(sent_packet.send_time_ms); + // TODO(srte): Only use one way to indicate that packet feedback is used. + if (sent_packet.info.included_in_feedback || sent_packet.packet_id != -1) { + int64_t unwrapped_seq_num = + seq_num_unwrapper_.Unwrap(sent_packet.packet_id); + auto it = history_.find(unwrapped_seq_num); + if (it != history_.end()) { + bool packet_retransmit = it->second.sent.send_time.IsFinite(); + it->second.sent.send_time = send_time; + last_send_time_ = std::max(last_send_time_, send_time); + // TODO(srte): Don't do this on retransmit. + if (!pending_untracked_size_.IsZero()) { + if (send_time < last_untracked_send_time_) + RTC_LOG(LS_WARNING) + << "appending acknowledged data for out of order packet. (Diff: " + << ToString(last_untracked_send_time_ - send_time) << " ms.)"; + it->second.sent.prior_unacked_data += pending_untracked_size_; + pending_untracked_size_ = DataSize::Zero(); + } + if (!packet_retransmit) { + if (it->second.sent.sequence_number > last_ack_seq_num_) + in_flight_.AddInFlightPacketBytes(it->second); + it->second.sent.data_in_flight = GetOutstandingData(); + return it->second.sent; + } + } + } else if (sent_packet.info.included_in_allocation) { + if (send_time < last_send_time_) { + RTC_LOG(LS_WARNING) << "ignoring untracked data for out of order packet."; + } + pending_untracked_size_ += + DataSize::Bytes(sent_packet.info.packet_size_bytes); + last_untracked_send_time_ = std::max(last_untracked_send_time_, send_time); + } + return absl::nullopt; +} + +absl::optional +TransportFeedbackAdapter::ProcessTransportFeedback( + const rtcp::TransportFeedback& feedback, + Timestamp feedback_receive_time) { + if (feedback.GetPacketStatusCount() == 0) { + RTC_LOG(LS_INFO) << "Empty transport feedback packet received."; + return absl::nullopt; + } + + TransportPacketsFeedback msg; + msg.feedback_time = feedback_receive_time; + + msg.prior_in_flight = in_flight_.GetOutstandingData(network_route_); + msg.packet_feedbacks = + ProcessTransportFeedbackInner(feedback, feedback_receive_time); + if (msg.packet_feedbacks.empty()) + return absl::nullopt; + + auto it = history_.find(last_ack_seq_num_); + if (it != history_.end()) { + msg.first_unacked_send_time = it->second.sent.send_time; + } + msg.data_in_flight = in_flight_.GetOutstandingData(network_route_); + + return msg; +} + +void TransportFeedbackAdapter::SetNetworkRoute( + const rtc::NetworkRoute& network_route) { + network_route_ = network_route; +} + +DataSize TransportFeedbackAdapter::GetOutstandingData() const { + return in_flight_.GetOutstandingData(network_route_); +} + +std::vector +TransportFeedbackAdapter::ProcessTransportFeedbackInner( + const rtcp::TransportFeedback& feedback, + Timestamp feedback_receive_time) { + // Add timestamp deltas to a local time base selected on first packet arrival. + // This won't be the true time base, but makes it easier to manually inspect + // time stamps. + if (last_timestamp_.IsInfinite()) { + current_offset_ = feedback_receive_time; + } else { + // TODO(srte): We shouldn't need to do rounding here. + const TimeDelta delta = feedback.GetBaseDelta(last_timestamp_) + .RoundDownTo(TimeDelta::Millis(1)); + // Protect against assigning current_offset_ negative value. + if (delta < Timestamp::Zero() - current_offset_) { + RTC_LOG(LS_WARNING) << "Unexpected feedback timestamp received."; + current_offset_ = feedback_receive_time; + } else { + current_offset_ += delta; + } + } + last_timestamp_ = feedback.BaseTime(); + + std::vector packet_result_vector; + packet_result_vector.reserve(feedback.GetPacketStatusCount()); + + size_t failed_lookups = 0; + size_t ignored = 0; + + feedback.ForAllPackets([&](uint16_t sequence_number, + TimeDelta delta_since_base) { + int64_t seq_num = seq_num_unwrapper_.Unwrap(sequence_number); + + if (seq_num > last_ack_seq_num_) { + // Starts at history_.begin() if last_ack_seq_num_ < 0, since any valid + // sequence number is >= 0. + for (auto it = history_.upper_bound(last_ack_seq_num_); + it != history_.upper_bound(seq_num); ++it) { + in_flight_.RemoveInFlightPacketBytes(it->second); + } + last_ack_seq_num_ = seq_num; + } + + auto it = history_.find(seq_num); + if (it == history_.end()) { + ++failed_lookups; + return; + } + + if (it->second.sent.send_time.IsInfinite()) { + // TODO(srte): Fix the tests that makes this happen and make this a + // DCHECK. + RTC_DLOG(LS_ERROR) + << "Received feedback before packet was indicated as sent"; + return; + } + + PacketFeedback packet_feedback = it->second; + if (delta_since_base.IsFinite()) { + packet_feedback.receive_time = + current_offset_ + delta_since_base.RoundDownTo(TimeDelta::Millis(1)); + // Note: Lost packets are not removed from history because they might be + // reported as received by a later feedback. + history_.erase(it); + } + if (packet_feedback.network_route == network_route_) { + PacketResult result; + result.sent_packet = packet_feedback.sent; + result.receive_time = packet_feedback.receive_time; + packet_result_vector.push_back(result); + } else { + ++ignored; + } + }); + + if (failed_lookups > 0) { + RTC_LOG(LS_WARNING) << "Failed to lookup send time for " << failed_lookups + << " packet" << (failed_lookups > 1 ? "s" : "") + << ". Send time history too small?"; + } + if (ignored > 0) { + RTC_LOG(LS_INFO) << "Ignoring " << ignored + << " packets because they were sent on a different route."; + } + + return packet_result_vector; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter.h b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter.h new file mode 100644 index 0000000000..7b1243b64b --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_CONGESTION_CONTROLLER_RTP_TRANSPORT_FEEDBACK_ADAPTER_H_ +#define MODULES_CONGESTION_CONTROLLER_RTP_TRANSPORT_FEEDBACK_ADAPTER_H_ + +#include +#include +#include +#include + +#include "api/sequence_checker.h" +#include "api/transport/network_types.h" +#include "api/units/timestamp.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "rtc_base/network/sent_packet.h" +#include "rtc_base/network_route.h" +#include "rtc_base/numerics/sequence_number_unwrapper.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +struct PacketFeedback { + PacketFeedback() = default; + // Time corresponding to when this object was created. + Timestamp creation_time = Timestamp::MinusInfinity(); + SentPacket sent; + // Time corresponding to when the packet was received. Timestamped with the + // receiver's clock. For unreceived packet, Timestamp::PlusInfinity() is + // used. + Timestamp receive_time = Timestamp::PlusInfinity(); + + // The network route that this packet is associated with. + rtc::NetworkRoute network_route; +}; + +class InFlightBytesTracker { + public: + void AddInFlightPacketBytes(const PacketFeedback& packet); + void RemoveInFlightPacketBytes(const PacketFeedback& packet); + DataSize GetOutstandingData(const rtc::NetworkRoute& network_route) const; + + private: + struct NetworkRouteComparator { + bool operator()(const rtc::NetworkRoute& a, + const rtc::NetworkRoute& b) const; + }; + std::map in_flight_data_; +}; + +class TransportFeedbackAdapter { + public: + TransportFeedbackAdapter(); + + void AddPacket(const RtpPacketSendInfo& packet_info, + size_t overhead_bytes, + Timestamp creation_time); + absl::optional ProcessSentPacket( + const rtc::SentPacket& sent_packet); + + absl::optional ProcessTransportFeedback( + const rtcp::TransportFeedback& feedback, + Timestamp feedback_receive_time); + + void SetNetworkRoute(const rtc::NetworkRoute& network_route); + + DataSize GetOutstandingData() const; + + private: + enum class SendTimeHistoryStatus { kNotAdded, kOk, kDuplicate }; + + std::vector ProcessTransportFeedbackInner( + const rtcp::TransportFeedback& feedback, + Timestamp feedback_receive_time); + + DataSize pending_untracked_size_ = DataSize::Zero(); + Timestamp last_send_time_ = Timestamp::MinusInfinity(); + Timestamp last_untracked_send_time_ = Timestamp::MinusInfinity(); + RtpSequenceNumberUnwrapper seq_num_unwrapper_; + std::map history_; + + // Sequence numbers are never negative, using -1 as it always < a real + // sequence number. + int64_t last_ack_seq_num_ = -1; + InFlightBytesTracker in_flight_; + + Timestamp current_offset_ = Timestamp::MinusInfinity(); + Timestamp last_timestamp_ = Timestamp::MinusInfinity(); + + rtc::NetworkRoute network_route_; +}; + +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_RTP_TRANSPORT_FEEDBACK_ADAPTER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter_unittest.cc new file mode 100644 index 0000000000..14a2b13831 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter_unittest.cc @@ -0,0 +1,407 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/congestion_controller/rtp/transport_feedback_adapter.h" + +#include +#include +#include + +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/rtp_rtcp/source/rtcp_packet/transport_feedback.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/clock.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::Invoke; + +namespace webrtc { + +namespace { +constexpr uint32_t kSsrc = 8492; +const PacedPacketInfo kPacingInfo0(0, 5, 2000); +const PacedPacketInfo kPacingInfo1(1, 8, 4000); +const PacedPacketInfo kPacingInfo2(2, 14, 7000); +const PacedPacketInfo kPacingInfo3(3, 20, 10000); +const PacedPacketInfo kPacingInfo4(4, 22, 10000); + +void ComparePacketFeedbackVectors(const std::vector& truth, + const std::vector& input) { + ASSERT_EQ(truth.size(), input.size()); + size_t len = truth.size(); + // truth contains the input data for the test, and input is what will be + // sent to the bandwidth estimator. truth.arrival_tims_ms is used to + // populate the transport feedback messages. As these times may be changed + // (because of resolution limits in the packets, and because of the time + // base adjustment performed by the TransportFeedbackAdapter at the first + // packet, the truth[x].arrival_time and input[x].arrival_time may not be + // equal. However, the difference must be the same for all x. + TimeDelta arrival_time_delta = truth[0].receive_time - input[0].receive_time; + for (size_t i = 0; i < len; ++i) { + RTC_CHECK(truth[i].IsReceived()); + if (input[i].IsReceived()) { + EXPECT_EQ(truth[i].receive_time - input[i].receive_time, + arrival_time_delta); + } + EXPECT_EQ(truth[i].sent_packet.send_time, input[i].sent_packet.send_time); + EXPECT_EQ(truth[i].sent_packet.sequence_number, + input[i].sent_packet.sequence_number); + EXPECT_EQ(truth[i].sent_packet.size, input[i].sent_packet.size); + EXPECT_EQ(truth[i].sent_packet.pacing_info, + input[i].sent_packet.pacing_info); + } +} + +PacketResult CreatePacket(int64_t receive_time_ms, + int64_t send_time_ms, + int64_t sequence_number, + size_t payload_size, + const PacedPacketInfo& pacing_info) { + PacketResult res; + res.receive_time = Timestamp::Millis(receive_time_ms); + res.sent_packet.send_time = Timestamp::Millis(send_time_ms); + res.sent_packet.sequence_number = sequence_number; + res.sent_packet.size = DataSize::Bytes(payload_size); + res.sent_packet.pacing_info = pacing_info; + return res; +} + +class MockStreamFeedbackObserver : public webrtc::StreamFeedbackObserver { + public: + MOCK_METHOD(void, + OnPacketFeedbackVector, + (std::vector packet_feedback_vector), + (override)); +}; + +} // namespace + +class TransportFeedbackAdapterTest : public ::testing::Test { + public: + TransportFeedbackAdapterTest() : clock_(0) {} + + virtual ~TransportFeedbackAdapterTest() {} + + virtual void SetUp() { adapter_.reset(new TransportFeedbackAdapter()); } + + virtual void TearDown() { adapter_.reset(); } + + protected: + void OnReceivedEstimatedBitrate(uint32_t bitrate) {} + + void OnReceivedRtcpReceiverReport(const ReportBlockList& report_blocks, + int64_t rtt, + int64_t now_ms) {} + + void OnSentPacket(const PacketResult& packet_feedback) { + RtpPacketSendInfo packet_info; + packet_info.media_ssrc = kSsrc; + packet_info.transport_sequence_number = + packet_feedback.sent_packet.sequence_number; + packet_info.rtp_sequence_number = 0; + packet_info.length = packet_feedback.sent_packet.size.bytes(); + packet_info.pacing_info = packet_feedback.sent_packet.pacing_info; + packet_info.packet_type = RtpPacketMediaType::kVideo; + adapter_->AddPacket(RtpPacketSendInfo(packet_info), 0u, + clock_.CurrentTime()); + adapter_->ProcessSentPacket(rtc::SentPacket( + packet_feedback.sent_packet.sequence_number, + packet_feedback.sent_packet.send_time.ms(), rtc::PacketInfo())); + } + + SimulatedClock clock_; + std::unique_ptr adapter_; +}; + +TEST_F(TransportFeedbackAdapterTest, AdaptsFeedbackAndPopulatesSendTimes) { + std::vector packets; + packets.push_back(CreatePacket(100, 200, 0, 1500, kPacingInfo0)); + packets.push_back(CreatePacket(110, 210, 1, 1500, kPacingInfo0)); + packets.push_back(CreatePacket(120, 220, 2, 1500, kPacingInfo0)); + packets.push_back(CreatePacket(130, 230, 3, 1500, kPacingInfo1)); + packets.push_back(CreatePacket(140, 240, 4, 1500, kPacingInfo1)); + + for (const auto& packet : packets) + OnSentPacket(packet); + + rtcp::TransportFeedback feedback; + feedback.SetBase(packets[0].sent_packet.sequence_number, + packets[0].receive_time); + + for (const auto& packet : packets) { + EXPECT_TRUE(feedback.AddReceivedPacket(packet.sent_packet.sequence_number, + packet.receive_time)); + } + + feedback.Build(); + + auto result = + adapter_->ProcessTransportFeedback(feedback, clock_.CurrentTime()); + ComparePacketFeedbackVectors(packets, result->packet_feedbacks); +} + +TEST_F(TransportFeedbackAdapterTest, FeedbackVectorReportsUnreceived) { + std::vector sent_packets = { + CreatePacket(100, 220, 0, 1500, kPacingInfo0), + CreatePacket(110, 210, 1, 1500, kPacingInfo0), + CreatePacket(120, 220, 2, 1500, kPacingInfo0), + CreatePacket(130, 230, 3, 1500, kPacingInfo0), + CreatePacket(140, 240, 4, 1500, kPacingInfo0), + CreatePacket(150, 250, 5, 1500, kPacingInfo0), + CreatePacket(160, 260, 6, 1500, kPacingInfo0)}; + + for (const auto& packet : sent_packets) + OnSentPacket(packet); + + // Note: Important to include the last packet, as only unreceived packets in + // between received packets can be inferred. + std::vector received_packets = { + sent_packets[0], sent_packets[2], sent_packets[6]}; + + rtcp::TransportFeedback feedback; + feedback.SetBase(received_packets[0].sent_packet.sequence_number, + received_packets[0].receive_time); + + for (const auto& packet : received_packets) { + EXPECT_TRUE(feedback.AddReceivedPacket(packet.sent_packet.sequence_number, + packet.receive_time)); + } + + feedback.Build(); + + auto res = adapter_->ProcessTransportFeedback(feedback, clock_.CurrentTime()); + ComparePacketFeedbackVectors(sent_packets, res->packet_feedbacks); +} + +TEST_F(TransportFeedbackAdapterTest, HandlesDroppedPackets) { + std::vector packets; + packets.push_back(CreatePacket(100, 200, 0, 1500, kPacingInfo0)); + packets.push_back(CreatePacket(110, 210, 1, 1500, kPacingInfo1)); + packets.push_back(CreatePacket(120, 220, 2, 1500, kPacingInfo2)); + packets.push_back(CreatePacket(130, 230, 3, 1500, kPacingInfo3)); + packets.push_back(CreatePacket(140, 240, 4, 1500, kPacingInfo4)); + + const uint16_t kSendSideDropBefore = 1; + const uint16_t kReceiveSideDropAfter = 3; + + for (const auto& packet : packets) { + if (packet.sent_packet.sequence_number >= kSendSideDropBefore) + OnSentPacket(packet); + } + + rtcp::TransportFeedback feedback; + feedback.SetBase(packets[0].sent_packet.sequence_number, + packets[0].receive_time); + + for (const auto& packet : packets) { + if (packet.sent_packet.sequence_number <= kReceiveSideDropAfter) { + EXPECT_TRUE(feedback.AddReceivedPacket(packet.sent_packet.sequence_number, + packet.receive_time)); + } + } + + feedback.Build(); + + std::vector expected_packets( + packets.begin() + kSendSideDropBefore, + packets.begin() + kReceiveSideDropAfter + 1); + // Packets that have timed out on the send-side have lost the + // information stored on the send-side. And they will not be reported to + // observers since we won't know that they come from the same networks. + + auto res = adapter_->ProcessTransportFeedback(feedback, clock_.CurrentTime()); + ComparePacketFeedbackVectors(expected_packets, res->packet_feedbacks); +} + +TEST_F(TransportFeedbackAdapterTest, SendTimeWrapsBothWays) { + TimeDelta kHighArrivalTime = + rtcp::TransportFeedback::kDeltaTick * (1 << 8) * ((1 << 23) - 1); + std::vector packets; + packets.push_back(CreatePacket(kHighArrivalTime.ms() + 64, 210, 0, 1500, + PacedPacketInfo())); + packets.push_back(CreatePacket(kHighArrivalTime.ms() - 64, 210, 1, 1500, + PacedPacketInfo())); + packets.push_back( + CreatePacket(kHighArrivalTime.ms(), 220, 2, 1500, PacedPacketInfo())); + + for (const auto& packet : packets) + OnSentPacket(packet); + + for (size_t i = 0; i < packets.size(); ++i) { + std::unique_ptr feedback( + new rtcp::TransportFeedback()); + feedback->SetBase(packets[i].sent_packet.sequence_number, + packets[i].receive_time); + + EXPECT_TRUE(feedback->AddReceivedPacket( + packets[i].sent_packet.sequence_number, packets[i].receive_time)); + + rtc::Buffer raw_packet = feedback->Build(); + feedback = rtcp::TransportFeedback::ParseFrom(raw_packet.data(), + raw_packet.size()); + + std::vector expected_packets; + expected_packets.push_back(packets[i]); + + auto res = adapter_->ProcessTransportFeedback(*feedback.get(), + clock_.CurrentTime()); + ComparePacketFeedbackVectors(expected_packets, res->packet_feedbacks); + } +} + +TEST_F(TransportFeedbackAdapterTest, HandlesArrivalReordering) { + std::vector packets; + packets.push_back(CreatePacket(120, 200, 0, 1500, kPacingInfo0)); + packets.push_back(CreatePacket(110, 210, 1, 1500, kPacingInfo0)); + packets.push_back(CreatePacket(100, 220, 2, 1500, kPacingInfo0)); + + for (const auto& packet : packets) + OnSentPacket(packet); + + rtcp::TransportFeedback feedback; + feedback.SetBase(packets[0].sent_packet.sequence_number, + packets[0].receive_time); + + for (const auto& packet : packets) { + EXPECT_TRUE(feedback.AddReceivedPacket(packet.sent_packet.sequence_number, + packet.receive_time)); + } + + feedback.Build(); + + // Adapter keeps the packets ordered by sequence number (which is itself + // assigned by the order of transmission). Reordering by some other criteria, + // eg. arrival time, is up to the observers. + auto res = adapter_->ProcessTransportFeedback(feedback, clock_.CurrentTime()); + ComparePacketFeedbackVectors(packets, res->packet_feedbacks); +} + +TEST_F(TransportFeedbackAdapterTest, TimestampDeltas) { + std::vector sent_packets; + // TODO(srte): Consider using us resolution in the constants. + const TimeDelta kSmallDelta = (rtcp::TransportFeedback::kDeltaTick * 0xFF) + .RoundDownTo(TimeDelta::Millis(1)); + const TimeDelta kLargePositiveDelta = (rtcp::TransportFeedback::kDeltaTick * + std::numeric_limits::max()) + .RoundDownTo(TimeDelta::Millis(1)); + const TimeDelta kLargeNegativeDelta = (rtcp::TransportFeedback::kDeltaTick * + std::numeric_limits::min()) + .RoundDownTo(TimeDelta::Millis(1)); + + PacketResult packet_feedback; + packet_feedback.sent_packet.sequence_number = 1; + packet_feedback.sent_packet.send_time = Timestamp::Millis(100); + packet_feedback.receive_time = Timestamp::Millis(200); + packet_feedback.sent_packet.size = DataSize::Bytes(1500); + sent_packets.push_back(packet_feedback); + + // TODO(srte): This rounding maintains previous behavior, but should ot be + // required. + packet_feedback.sent_packet.send_time += kSmallDelta; + packet_feedback.receive_time += kSmallDelta; + ++packet_feedback.sent_packet.sequence_number; + sent_packets.push_back(packet_feedback); + + packet_feedback.sent_packet.send_time += kLargePositiveDelta; + packet_feedback.receive_time += kLargePositiveDelta; + ++packet_feedback.sent_packet.sequence_number; + sent_packets.push_back(packet_feedback); + + packet_feedback.sent_packet.send_time += kLargeNegativeDelta; + packet_feedback.receive_time += kLargeNegativeDelta; + ++packet_feedback.sent_packet.sequence_number; + sent_packets.push_back(packet_feedback); + + // Too large, delta - will need two feedback messages. + packet_feedback.sent_packet.send_time += + kLargePositiveDelta + TimeDelta::Millis(1); + packet_feedback.receive_time += kLargePositiveDelta + TimeDelta::Millis(1); + ++packet_feedback.sent_packet.sequence_number; + + // Packets will be added to send history. + for (const auto& packet : sent_packets) + OnSentPacket(packet); + OnSentPacket(packet_feedback); + + // Create expected feedback and send into adapter. + std::unique_ptr feedback( + new rtcp::TransportFeedback()); + feedback->SetBase(sent_packets[0].sent_packet.sequence_number, + sent_packets[0].receive_time); + + for (const auto& packet : sent_packets) { + EXPECT_TRUE(feedback->AddReceivedPacket(packet.sent_packet.sequence_number, + packet.receive_time)); + } + EXPECT_FALSE( + feedback->AddReceivedPacket(packet_feedback.sent_packet.sequence_number, + packet_feedback.receive_time)); + + rtc::Buffer raw_packet = feedback->Build(); + feedback = + rtcp::TransportFeedback::ParseFrom(raw_packet.data(), raw_packet.size()); + + std::vector received_feedback; + + EXPECT_TRUE(feedback.get() != nullptr); + auto res = + adapter_->ProcessTransportFeedback(*feedback.get(), clock_.CurrentTime()); + ComparePacketFeedbackVectors(sent_packets, res->packet_feedbacks); + + // Create a new feedback message and add the trailing item. + feedback.reset(new rtcp::TransportFeedback()); + feedback->SetBase(packet_feedback.sent_packet.sequence_number, + packet_feedback.receive_time); + EXPECT_TRUE( + feedback->AddReceivedPacket(packet_feedback.sent_packet.sequence_number, + packet_feedback.receive_time)); + raw_packet = feedback->Build(); + feedback = + rtcp::TransportFeedback::ParseFrom(raw_packet.data(), raw_packet.size()); + + EXPECT_TRUE(feedback.get() != nullptr); + { + auto res = adapter_->ProcessTransportFeedback(*feedback.get(), + clock_.CurrentTime()); + std::vector expected_packets; + expected_packets.push_back(packet_feedback); + ComparePacketFeedbackVectors(expected_packets, res->packet_feedbacks); + } +} + +TEST_F(TransportFeedbackAdapterTest, IgnoreDuplicatePacketSentCalls) { + auto packet = CreatePacket(100, 200, 0, 1500, kPacingInfo0); + + // Add a packet and then mark it as sent. + RtpPacketSendInfo packet_info; + packet_info.media_ssrc = kSsrc; + packet_info.transport_sequence_number = packet.sent_packet.sequence_number; + packet_info.length = packet.sent_packet.size.bytes(); + packet_info.pacing_info = packet.sent_packet.pacing_info; + packet_info.packet_type = RtpPacketMediaType::kVideo; + adapter_->AddPacket(packet_info, 0u, clock_.CurrentTime()); + absl::optional sent_packet = adapter_->ProcessSentPacket( + rtc::SentPacket(packet.sent_packet.sequence_number, + packet.sent_packet.send_time.ms(), rtc::PacketInfo())); + EXPECT_TRUE(sent_packet.has_value()); + + // Call ProcessSentPacket() again with the same sequence number. This packet + // has already been marked as sent and the call should be ignored. + absl::optional duplicate_packet = adapter_->ProcessSentPacket( + rtc::SentPacket(packet.sent_packet.sequence_number, + packet.sent_packet.send_time.ms(), rtc::PacketInfo())); + EXPECT_FALSE(duplicate_packet.has_value()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer.cc b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer.cc new file mode 100644 index 0000000000..469c21434a --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer.cc @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/congestion_controller/rtp/transport_feedback_demuxer.h" +#include "absl/algorithm/container.h" +#include "modules/rtp_rtcp/source/rtcp_packet/transport_feedback.h" + +namespace webrtc { +namespace { +static const size_t kMaxPacketsInHistory = 5000; +} + +TransportFeedbackDemuxer::TransportFeedbackDemuxer() { + // In case the construction thread is different from where the registration + // and callbacks occur, detach from the construction thread. + observer_checker_.Detach(); +} + +void TransportFeedbackDemuxer::RegisterStreamFeedbackObserver( + std::vector ssrcs, + StreamFeedbackObserver* observer) { + RTC_DCHECK_RUN_ON(&observer_checker_); + RTC_DCHECK(observer); + RTC_DCHECK(absl::c_find_if(observers_, [=](const auto& pair) { + return pair.second == observer; + }) == observers_.end()); + observers_.push_back({ssrcs, observer}); +} + +void TransportFeedbackDemuxer::DeRegisterStreamFeedbackObserver( + StreamFeedbackObserver* observer) { + RTC_DCHECK_RUN_ON(&observer_checker_); + RTC_DCHECK(observer); + const auto it = absl::c_find_if( + observers_, [=](const auto& pair) { return pair.second == observer; }); + RTC_DCHECK(it != observers_.end()); + observers_.erase(it); +} + +void TransportFeedbackDemuxer::AddPacket(const RtpPacketSendInfo& packet_info) { + RTC_DCHECK_RUN_ON(&observer_checker_); + + StreamFeedbackObserver::StreamPacketInfo info; + info.ssrc = packet_info.media_ssrc; + info.rtp_sequence_number = packet_info.rtp_sequence_number; + info.received = false; + info.is_retransmission = + packet_info.packet_type == RtpPacketMediaType::kRetransmission; + history_.insert( + {seq_num_unwrapper_.Unwrap(packet_info.transport_sequence_number), info}); + + while (history_.size() > kMaxPacketsInHistory) { + history_.erase(history_.begin()); + } +} + +void TransportFeedbackDemuxer::OnTransportFeedback( + const rtcp::TransportFeedback& feedback) { + RTC_DCHECK_RUN_ON(&observer_checker_); + + std::vector stream_feedbacks; + feedback.ForAllPackets( + [&](uint16_t sequence_number, TimeDelta delta_since_base) { + RTC_DCHECK_RUN_ON(&observer_checker_); + auto it = history_.find(seq_num_unwrapper_.PeekUnwrap(sequence_number)); + if (it != history_.end()) { + auto packet_info = it->second; + packet_info.received = delta_since_base.IsFinite(); + stream_feedbacks.push_back(std::move(packet_info)); + if (delta_since_base.IsFinite()) + history_.erase(it); + } + }); + + for (auto& observer : observers_) { + std::vector selected_feedback; + for (const auto& packet_info : stream_feedbacks) { + if (absl::c_count(observer.first, packet_info.ssrc) > 0) { + selected_feedback.push_back(packet_info); + } + } + if (!selected_feedback.empty()) { + observer.second->OnPacketFeedbackVector(std::move(selected_feedback)); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer.h b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer.h new file mode 100644 index 0000000000..278c144b61 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_CONGESTION_CONTROLLER_RTP_TRANSPORT_FEEDBACK_DEMUXER_H_ +#define MODULES_CONGESTION_CONTROLLER_RTP_TRANSPORT_FEEDBACK_DEMUXER_H_ + +#include +#include +#include + +#include "api/sequence_checker.h" +#include "modules/include/module_common_types_public.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "rtc_base/numerics/sequence_number_unwrapper.h" +#include "rtc_base/system/no_unique_address.h" + +namespace webrtc { + +// Implementation of StreamFeedbackProvider that provides a way for +// implementations of StreamFeedbackObserver to register for feedback callbacks +// for a given set of SSRCs. +// Registration methods need to be called from the same execution context +// (thread or task queue) and callbacks to +// StreamFeedbackObserver::OnPacketFeedbackVector will be made in that same +// context. +// TODO(tommi): This appears to be the only implementation of this interface. +// Do we need the interface? +class TransportFeedbackDemuxer final : public StreamFeedbackProvider { + public: + TransportFeedbackDemuxer(); + + // Implements StreamFeedbackProvider interface + void RegisterStreamFeedbackObserver( + std::vector ssrcs, + StreamFeedbackObserver* observer) override; + void DeRegisterStreamFeedbackObserver( + StreamFeedbackObserver* observer) override; + void AddPacket(const RtpPacketSendInfo& packet_info); + void OnTransportFeedback(const rtcp::TransportFeedback& feedback); + + private: + RTC_NO_UNIQUE_ADDRESS SequenceChecker observer_checker_; + RtpSequenceNumberUnwrapper seq_num_unwrapper_ + RTC_GUARDED_BY(&observer_checker_); + std::map history_ + RTC_GUARDED_BY(&observer_checker_); + + // Maps a set of ssrcs to corresponding observer. Vectors are used rather than + // set/map to ensure that the processing order is consistent independently of + // the randomized ssrcs. + std::vector, StreamFeedbackObserver*>> + observers_ RTC_GUARDED_BY(&observer_checker_); +}; +} // namespace webrtc + +#endif // MODULES_CONGESTION_CONTROLLER_RTP_TRANSPORT_FEEDBACK_DEMUXER_H_ diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer_unittest.cc b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer_unittest.cc new file mode 100644 index 0000000000..52d8018bff --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer_unittest.cc @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/congestion_controller/rtp/transport_feedback_demuxer.h" + +#include "modules/rtp_rtcp/source/rtcp_packet/transport_feedback.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::AllOf; +using ::testing::ElementsAre; +using ::testing::Field; +using PacketInfo = StreamFeedbackObserver::StreamPacketInfo; + +static constexpr uint32_t kSsrc = 8492; + +class MockStreamFeedbackObserver : public webrtc::StreamFeedbackObserver { + public: + MOCK_METHOD(void, + OnPacketFeedbackVector, + (std::vector packet_feedback_vector), + (override)); +}; + +RtpPacketSendInfo CreatePacket(uint32_t ssrc, + uint16_t rtp_sequence_number, + int64_t transport_sequence_number, + bool is_retransmission) { + RtpPacketSendInfo res; + res.media_ssrc = ssrc; + res.transport_sequence_number = transport_sequence_number; + res.rtp_sequence_number = rtp_sequence_number; + res.packet_type = is_retransmission ? RtpPacketMediaType::kRetransmission + : RtpPacketMediaType::kVideo; + return res; +} +} // namespace + +TEST(TransportFeedbackDemuxerTest, ObserverSanity) { + TransportFeedbackDemuxer demuxer; + MockStreamFeedbackObserver mock; + demuxer.RegisterStreamFeedbackObserver({kSsrc}, &mock); + + const uint16_t kRtpStartSeq = 55; + const int64_t kTransportStartSeq = 1; + demuxer.AddPacket(CreatePacket(kSsrc, kRtpStartSeq, kTransportStartSeq, + /*is_retransmit=*/false)); + demuxer.AddPacket(CreatePacket(kSsrc, kRtpStartSeq + 1, + kTransportStartSeq + 1, + /*is_retransmit=*/false)); + demuxer.AddPacket(CreatePacket( + kSsrc, kRtpStartSeq + 2, kTransportStartSeq + 2, /*is_retransmit=*/true)); + + rtcp::TransportFeedback feedback; + feedback.SetBase(kTransportStartSeq, Timestamp::Millis(1)); + ASSERT_TRUE( + feedback.AddReceivedPacket(kTransportStartSeq, Timestamp::Millis(1))); + // Drop middle packet. + ASSERT_TRUE( + feedback.AddReceivedPacket(kTransportStartSeq + 2, Timestamp::Millis(3))); + + EXPECT_CALL( + mock, OnPacketFeedbackVector(ElementsAre( + AllOf(Field(&PacketInfo::received, true), + Field(&PacketInfo::ssrc, kSsrc), + Field(&PacketInfo::rtp_sequence_number, kRtpStartSeq), + Field(&PacketInfo::is_retransmission, false)), + AllOf(Field(&PacketInfo::received, false), + Field(&PacketInfo::ssrc, kSsrc), + Field(&PacketInfo::rtp_sequence_number, kRtpStartSeq + 1), + Field(&PacketInfo::is_retransmission, false)), + AllOf(Field(&PacketInfo::received, true), + Field(&PacketInfo::ssrc, kSsrc), + Field(&PacketInfo::rtp_sequence_number, kRtpStartSeq + 2), + Field(&PacketInfo::is_retransmission, true))))); + demuxer.OnTransportFeedback(feedback); + + demuxer.DeRegisterStreamFeedbackObserver(&mock); + + demuxer.AddPacket( + CreatePacket(kSsrc, kRtpStartSeq + 3, kTransportStartSeq + 3, false)); + rtcp::TransportFeedback second_feedback; + second_feedback.SetBase(kTransportStartSeq + 3, Timestamp::Millis(4)); + ASSERT_TRUE(second_feedback.AddReceivedPacket(kTransportStartSeq + 3, + Timestamp::Millis(4))); + + EXPECT_CALL(mock, OnPacketFeedbackVector).Times(0); + demuxer.OnTransportFeedback(second_feedback); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_gn/moz.build b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_gn/moz.build new file mode 100644 index 0000000000..eb5a2a87e0 --- /dev/null +++ b/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_adapter.cc", + "/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_demuxer.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("transport_feedback_gn") diff --git a/third_party/libwebrtc/modules/desktop_capture/BUILD.gn b/third_party/libwebrtc/modules/desktop_capture/BUILD.gn new file mode 100644 index 0000000000..060d4e8200 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/BUILD.gn @@ -0,0 +1,652 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("//build/config/linux/gtk/gtk.gni") +import("//build/config/ui.gni") +import("../../webrtc.gni") + +use_desktop_capture_differ_sse2 = target_cpu == "x86" || target_cpu == "x64" + +config("x11_config") { + if (rtc_use_x11_extensions) { + defines = [ "WEBRTC_USE_X11" ] + } +} + +rtc_library("primitives") { + visibility = [ "*" ] + sources = [ + "desktop_capture_types.h", + "desktop_frame.cc", + "desktop_frame.h", + "desktop_geometry.cc", + "desktop_geometry.h", + "desktop_region.cc", + "desktop_region.h", + "shared_desktop_frame.cc", + "shared_desktop_frame.h", + "shared_memory.cc", + "shared_memory.h", + ] + + deps = [ + "../../api:scoped_refptr", + "../../rtc_base:checks", + "../../rtc_base:refcount", + "../../rtc_base/system:rtc_export", + "//third_party/libyuv", + ] +} + +if (rtc_include_tests) { + rtc_library("desktop_capture_modules_tests") { + testonly = true + + defines = [] + sources = [] + deps = [ + ":desktop_capture", + "../../api:function_view", + "../../api:scoped_refptr", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:platform_thread", + "../../rtc_base:random", + "../../rtc_base:timeutils", + ] + if (rtc_desktop_capture_supported) { + deps += [ + ":desktop_capture_mock", + ":primitives", + ":screen_drawer", + "../../rtc_base/third_party/base64", + "../../system_wrappers", + "../../test:test_support", + "../../test:video_test_support", + ] + sources += [ + "screen_capturer_integration_test.cc", + "screen_drawer_unittest.cc", + "window_finder_unittest.cc", + ] + + if ((is_linux || is_chromeos) && rtc_use_pipewire) { + configs += [ "../portal:gio" ] + } + + public_configs = [ ":x11_config" ] + + if (is_win) { + deps += [ "../../rtc_base/win:windows_version" ] + } + } + } + + if ((is_linux || is_chromeos) && rtc_use_pipewire) { + rtc_test("shared_screencast_stream_test") { + testonly = true + + sources = [ + "linux/wayland/shared_screencast_stream_unittest.cc", + "linux/wayland/test/test_screencast_stream_provider.cc", + "linux/wayland/test/test_screencast_stream_provider.h", + ] + + configs += [ "../portal:pipewire_all" ] + + deps = [ + ":desktop_capture", + ":desktop_capture_mock", + ":primitives", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:random", + "../../rtc_base:timeutils", + "../portal", + + # TODO(bugs.webrtc.org/9987): Remove this dep on rtc_base:rtc_base once + # rtc_base:threading is fully defined. + "../../rtc_base:task_queue_for_test", + "../../rtc_base:threading", + "../../system_wrappers", + "../../test:test_main", + "../../test:test_support", + "//api/units:time_delta", + "//rtc_base:rtc_event", + ] + + data = [ "../../third_party/pipewire" ] + public_configs = [ "../portal:pipewire_config" ] + } + } + + rtc_library("desktop_capture_unittests") { + testonly = true + + defines = [] + sources = [ + "blank_detector_desktop_capturer_wrapper_unittest.cc", + "cropped_desktop_frame_unittest.cc", + "desktop_and_cursor_composer_unittest.cc", + "desktop_capturer_differ_wrapper_unittest.cc", + "desktop_frame_rotation_unittest.cc", + "desktop_frame_unittest.cc", + "desktop_geometry_unittest.cc", + "desktop_region_unittest.cc", + "differ_block_unittest.cc", + "fallback_desktop_capturer_wrapper_unittest.cc", + "mouse_cursor_monitor_unittest.cc", + "rgba_color_unittest.cc", + "test_utils.cc", + "test_utils.h", + "test_utils_unittest.cc", + ] + + if ((is_linux || is_chromeos) && rtc_use_pipewire) { + configs += [ "../portal:gio" ] + } + + deps = [ + ":desktop_capture", + ":desktop_capture_mock", + ":primitives", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:random", + "../../rtc_base:timeutils", + + # TODO(bugs.webrtc.org/9987): Remove this dep on rtc_base:rtc_base once + # rtc_base:threading is fully defined. + "../../rtc_base:task_queue_for_test", + "../../rtc_base:threading", + "../../system_wrappers", + "../../test:test_support", + ] + + if (is_win) { + sources += [ + "win/cursor_unittest.cc", + "win/cursor_unittest_resources.h", + "win/cursor_unittest_resources.rc", + "win/screen_capture_utils_unittest.cc", + "win/screen_capturer_win_directx_unittest.cc", + "win/test_support/test_window.cc", + "win/test_support/test_window.h", + "win/window_capture_utils_unittest.cc", + ] + deps += [ + "../../rtc_base/win:scoped_com_initializer", + "../../rtc_base/win:windows_version", + ] + } + + if (rtc_desktop_capture_supported) { + sources += [ + "screen_capturer_helper_unittest.cc", + "screen_capturer_unittest.cc", + "window_capturer_unittest.cc", + ] + if (is_mac) { + sources += [ "screen_capturer_mac_unittest.cc" ] + } + if (rtc_enable_win_wgc) { + sources += [ + "win/wgc_capture_source_unittest.cc", + "win/wgc_capturer_win_unittest.cc", + ] + } + deps += [ + ":desktop_capture_mock", + "../../system_wrappers:metrics", + ] + public_configs = [ ":x11_config" ] + } + } + + rtc_library("screen_drawer") { + testonly = true + + sources = [ + "screen_drawer.cc", + "screen_drawer.h", + ] + + if (is_linux || is_chromeos) { + sources += [ "screen_drawer_linux.cc" ] + libs = [ "X11" ] + } + + if (is_mac) { + sources += [ "screen_drawer_mac.cc" ] + } + + if (is_win) { + sources += [ "screen_drawer_win.cc" ] + } + + deps = [ + ":desktop_capture", + ":primitives", + "../../api:scoped_refptr", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../system_wrappers", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + + if (is_posix || is_fuchsia) { + sources += [ + "screen_drawer_lock_posix.cc", + "screen_drawer_lock_posix.h", + ] + } + } + + rtc_library("desktop_capture_mock") { + testonly = true + + sources = [ + "mock_desktop_capturer_callback.cc", + "mock_desktop_capturer_callback.h", + ] + + if ((is_linux || is_chromeos) && rtc_use_pipewire) { + configs += [ "../portal:gio" ] + } + + deps = [ + ":desktop_capture", + ":primitives", + "../../test:test_support", + ] + } +} + +# TODO(bugs.webrtc.org/14187): remove when all users are gone +if ((is_linux || is_chromeos) && rtc_use_pipewire) { + config("pipewire_config") { + configs = [ "../portal:pipewire_config" ] + } +} + +rtc_library("desktop_capture") { + visibility = [ "*" ] + defines = [] + deps = [] + public_configs = [ ":x11_config" ] + sources = [ + "blank_detector_desktop_capturer_wrapper.cc", + "blank_detector_desktop_capturer_wrapper.h", + "cropped_desktop_frame.cc", + "cropped_desktop_frame.h", + "cropping_window_capturer.cc", + "cropping_window_capturer.h", + "delegated_source_list_controller.h", + "desktop_and_cursor_composer.cc", + "desktop_and_cursor_composer.h", + "desktop_capture_metrics_helper.cc", + "desktop_capture_metrics_helper.h", + "desktop_capture_options.cc", + "desktop_capture_options.h", + "desktop_capturer.cc", + "desktop_capturer.h", + "desktop_capturer_differ_wrapper.cc", + "desktop_capturer_differ_wrapper.h", + "desktop_capturer_wrapper.cc", + "desktop_capturer_wrapper.h", + "desktop_frame_generator.cc", + "desktop_frame_generator.h", + "desktop_frame_rotation.cc", + "desktop_frame_rotation.h", + "differ_block.cc", + "differ_block.h", + "fake_desktop_capturer.cc", + "fake_desktop_capturer.h", + "fallback_desktop_capturer_wrapper.cc", + "fallback_desktop_capturer_wrapper.h", + "full_screen_application_handler.cc", + "full_screen_application_handler.h", + "full_screen_window_detector.cc", + "full_screen_window_detector.h", + "mouse_cursor.cc", + "mouse_cursor.h", + "mouse_cursor_monitor.h", + "resolution_tracker.cc", + "resolution_tracker.h", + "rgba_color.cc", + "rgba_color.h", + "screen_capture_frame_queue.h", + "screen_capturer_helper.cc", + "screen_capturer_helper.h", + "window_finder.cc", + "window_finder.h", + ] + if (is_linux && !is_castos && rtc_use_pipewire) { + sources += [ "desktop_capture_metadata.h" ] + } + if (is_mac) { + sources += [ + "mac/desktop_configuration.h", + "mac/desktop_configuration_monitor.cc", + "mac/desktop_configuration_monitor.h", + "mac/full_screen_mac_application_handler.cc", + "mac/full_screen_mac_application_handler.h", + "mac/window_list_utils.cc", + "mac/window_list_utils.h", + ] + deps += [ ":desktop_capture_objc" ] + } + if (rtc_use_x11_extensions || rtc_use_pipewire) { + sources += [ + "mouse_cursor_monitor_linux.cc", + "screen_capturer_linux.cc", + "window_capturer_linux.cc", + ] + } + + if (rtc_use_x11_extensions) { + sources += [ + "linux/x11/mouse_cursor_monitor_x11.cc", + "linux/x11/mouse_cursor_monitor_x11.h", + "linux/x11/screen_capturer_x11.cc", + "linux/x11/screen_capturer_x11.h", + "linux/x11/shared_x_display.cc", + "linux/x11/shared_x_display.h", + "linux/x11/window_capturer_x11.cc", + "linux/x11/window_capturer_x11.h", + "linux/x11/window_finder_x11.cc", + "linux/x11/window_finder_x11.h", + "linux/x11/window_list_utils.cc", + "linux/x11/window_list_utils.h", + "linux/x11/x_atom_cache.cc", + "linux/x11/x_atom_cache.h", + "linux/x11/x_error_trap.cc", + "linux/x11/x_error_trap.h", + "linux/x11/x_server_pixel_buffer.cc", + "linux/x11/x_server_pixel_buffer.h", + "linux/x11/x_window_property.cc", + "linux/x11/x_window_property.h", + ] + libs = [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrender", + "Xrandr", + "Xtst", + ] + } + + if (!is_win && !is_mac && !rtc_use_x11_extensions && !rtc_use_pipewire && + !is_fuchsia) { + sources += [ + "mouse_cursor_monitor_null.cc", + "screen_capturer_null.cc", + "window_capturer_null.cc", + ] + } + + deps += [ + ":primitives", + "../../api:function_view", + "../../api:make_ref_counted", + "../../api:refcountedbase", + "../../api:scoped_refptr", + "../../api:sequence_checker", + "../../rtc_base:checks", + "../../rtc_base:event_tracer", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:random", + "../../rtc_base:stringutils", + "../../rtc_base:timeutils", + "../../rtc_base/synchronization:mutex", + "../../rtc_base/system:arch", + "../../rtc_base/system:no_unique_address", + "../../rtc_base/system:rtc_export", + "../../system_wrappers", + "../../system_wrappers:metrics", + ] + + if (is_fuchsia) { + sources += [ + "mouse_cursor_monitor_null.cc", + "screen_capturer_fuchsia.cc", + "screen_capturer_fuchsia.h", + "window_capturer_null.cc", + ] + deps += [ + "../../rtc_base:divide_round", + "//third_party/fuchsia-sdk/sdk/fidl/fuchsia.sysmem", + "//third_party/fuchsia-sdk/sdk/fidl/fuchsia.ui.composition", + "//third_party/fuchsia-sdk/sdk/fidl/fuchsia.ui.scenic", + "//third_party/fuchsia-sdk/sdk/pkg/scenic_cpp", + "//third_party/fuchsia-sdk/sdk/pkg/sys_cpp", + ] + } + + if (is_win) { + sources += [ + "cropping_window_capturer_win.cc", + "desktop_frame_win.cc", + "desktop_frame_win.h", + "mouse_cursor_monitor_win.cc", + "screen_capturer_win.cc", + "win/cursor.cc", + "win/cursor.h", + "win/d3d_device.cc", + "win/d3d_device.h", + "win/desktop.cc", + "win/desktop.h", + "win/desktop_capture_utils.cc", + "win/desktop_capture_utils.h", + "win/display_configuration_monitor.cc", + "win/display_configuration_monitor.h", + "win/dxgi_adapter_duplicator.cc", + "win/dxgi_adapter_duplicator.h", + "win/dxgi_context.cc", + "win/dxgi_context.h", + "win/dxgi_duplicator_controller.cc", + "win/dxgi_duplicator_controller.h", + "win/dxgi_frame.cc", + "win/dxgi_frame.h", + "win/dxgi_output_duplicator.cc", + "win/dxgi_output_duplicator.h", + "win/dxgi_texture.cc", + "win/dxgi_texture.h", + "win/dxgi_texture_mapping.cc", + "win/dxgi_texture_mapping.h", + "win/dxgi_texture_staging.cc", + "win/dxgi_texture_staging.h", + "win/full_screen_win_application_handler.cc", + "win/full_screen_win_application_handler.h", + "win/scoped_gdi_object.h", + "win/scoped_thread_desktop.cc", + "win/scoped_thread_desktop.h", + "win/screen_capture_utils.cc", + "win/screen_capture_utils.h", + "win/screen_capturer_win_directx.cc", + "win/screen_capturer_win_directx.h", + "win/screen_capturer_win_gdi.cc", + "win/screen_capturer_win_gdi.h", + "win/screen_capturer_win_magnifier.cc", + "win/screen_capturer_win_magnifier.h", + "win/selected_window_context.cc", + "win/selected_window_context.h", + "win/window_capture_utils.cc", + "win/window_capture_utils.h", + "win/window_capturer_win_gdi.cc", + "win/window_capturer_win_gdi.h", + "window_capturer_win.cc", + "window_finder_win.cc", + "window_finder_win.h", + ] + libs = [ + "d3d11.lib", + "dxgi.lib", + ] + deps += [ + "../../rtc_base:win32", + "../../rtc_base/win:create_direct3d_device", + "../../rtc_base/win:get_activation_factory", + "../../rtc_base/win:windows_version", + ] + } + + absl_deps = [ + "//third_party/abseil-cpp/absl/memory", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + if (rtc_use_x11_extensions) { + deps += [ "../../rtc_base:sanitizer" ] + } + + deps += [ "//third_party/libyuv" ] + + if (use_desktop_capture_differ_sse2) { + deps += [ ":desktop_capture_differ_sse2" ] + } + + if (rtc_use_pipewire) { + sources += [ + "linux/wayland/base_capturer_pipewire.cc", + "linux/wayland/base_capturer_pipewire.h", + "linux/wayland/egl_dmabuf.cc", + "linux/wayland/egl_dmabuf.h", + "linux/wayland/mouse_cursor_monitor_pipewire.cc", + "linux/wayland/mouse_cursor_monitor_pipewire.h", + "linux/wayland/portal_request_response.h", + "linux/wayland/restore_token_manager.cc", + "linux/wayland/restore_token_manager.h", + "linux/wayland/scoped_glib.h", + "linux/wayland/screen_capture_portal_interface.cc", + "linux/wayland/screen_capture_portal_interface.h", + "linux/wayland/screencast_portal.cc", + "linux/wayland/screencast_portal.h", + "linux/wayland/screencast_stream_utils.cc", + "linux/wayland/screencast_stream_utils.h", + "linux/wayland/shared_screencast_stream.cc", + "linux/wayland/shared_screencast_stream.h", + "linux/wayland/xdg_desktop_portal_utils.h", + "linux/wayland/xdg_session_details.h", + ] + + configs += [ "../portal:pipewire_all" ] + + public_configs += [ "../portal:pipewire_config" ] + + deps += [ + "../../rtc_base:sanitizer", + "../portal", + ] + } + + if (rtc_enable_win_wgc) { + sources += [ + "win/wgc_capture_session.cc", + "win/wgc_capture_session.h", + "win/wgc_capture_source.cc", + "win/wgc_capture_source.h", + "win/wgc_capturer_win.cc", + "win/wgc_capturer_win.h", + "win/wgc_desktop_frame.cc", + "win/wgc_desktop_frame.h", + ] + libs += [ "dwmapi.lib" ] + deps += [ + "../../rtc_base:rtc_event", + "../../rtc_base/win:hstring", + ] + } +} + +if (is_mac) { + rtc_library("desktop_capture_objc") { + # This target, needs to be separated from ":desktop_capture" because + # that is the C++ part of the target while this one is the Obj-C++ part. + # Aside from this, both represent a "desktop_capture" target. + # This target separation based on programming languages introduces a + # dependency cycle between ":desktop_capture" and + # ":desktop_capture_objc". + # To break this, ":desktop_capture_objc" shares some .h files with + # ":desktop_capture" but when external targets need one of these + # headers, they should depend on ":desktop_capture" and consider + # this target as private. + visibility = [ ":desktop_capture" ] + sources = [ + "delegated_source_list_controller.h", + "desktop_capture_options.h", + "desktop_capturer.h", + "full_screen_application_handler.h", + "full_screen_window_detector.h", + "mac/desktop_configuration.h", + "mac/desktop_configuration.mm", + "mac/desktop_configuration_monitor.h", + "mac/desktop_frame_cgimage.h", + "mac/desktop_frame_cgimage.mm", + "mac/desktop_frame_iosurface.h", + "mac/desktop_frame_iosurface.mm", + "mac/desktop_frame_provider.h", + "mac/desktop_frame_provider.mm", + "mac/screen_capturer_mac.h", + "mac/screen_capturer_mac.mm", + "mac/window_list_utils.h", + "mouse_cursor.h", + "mouse_cursor_monitor.h", + "mouse_cursor_monitor_mac.mm", + "screen_capture_frame_queue.h", + "screen_capturer_darwin.mm", + "screen_capturer_helper.h", + "window_capturer_mac.mm", + "window_finder.h", + "window_finder_mac.h", + "window_finder_mac.mm", + ] + deps = [ + ":primitives", + "../../api:function_view", + "../../api:refcountedbase", + "../../api:scoped_refptr", + "../../api:sequence_checker", + "../../rtc_base:checks", + "../../rtc_base:event_tracer", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:timeutils", + "../../rtc_base/synchronization:mutex", + "../../rtc_base/system:rtc_export", + "../../sdk:helpers_objc", + ] + frameworks = [ + "AppKit.framework", + "IOKit.framework", + "IOSurface.framework", + ] + } +} + +if (use_desktop_capture_differ_sse2) { + # Have to be compiled as a separate target because it needs to be compiled + # with SSE2 enabled. + rtc_library("desktop_capture_differ_sse2") { + visibility = [ ":*" ] + sources = [ + "differ_vector_sse2.cc", + "differ_vector_sse2.h", + ] + + if (is_posix || is_fuchsia) { + cflags = [ "-msse2" ] + } + } +} diff --git a/third_party/libwebrtc/modules/desktop_capture/DEPS b/third_party/libwebrtc/modules/desktop_capture/DEPS new file mode 100644 index 0000000000..8c894c4430 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/DEPS @@ -0,0 +1,19 @@ +include_rules = [ + "+system_wrappers", + "+third_party/libyuv", +] + +specific_include_rules = { + "desktop_frame_cgimage\.h": [ + "+sdk/objc", + ], + "desktop_frame_iosurface\.h": [ + "+sdk/objc", + ], + "desktop_frame_provider\.h": [ + "+sdk/objc", + ], + "screen_capturer_mac\.mm": [ + "+sdk/objc", + ], +} diff --git a/third_party/libwebrtc/modules/desktop_capture/OWNERS b/third_party/libwebrtc/modules/desktop_capture/OWNERS new file mode 100644 index 0000000000..e3bc32ee5c --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/OWNERS @@ -0,0 +1,2 @@ +alcooper@chromium.org +mfoltz@chromium.org diff --git a/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper.cc b/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper.cc new file mode 100644 index 0000000000..8e56ffc3fd --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper.cc @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/blank_detector_desktop_capturer_wrapper.h" + +#include + +#include + +#include "modules/desktop_capture/desktop_geometry.h" +#include "modules/desktop_capture/desktop_region.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +BlankDetectorDesktopCapturerWrapper::BlankDetectorDesktopCapturerWrapper( + std::unique_ptr capturer, + RgbaColor blank_pixel, + bool check_per_capture) + : capturer_(std::move(capturer)), + blank_pixel_(blank_pixel), + check_per_capture_(check_per_capture) { + RTC_DCHECK(capturer_); +} + +BlankDetectorDesktopCapturerWrapper::~BlankDetectorDesktopCapturerWrapper() = + default; + +void BlankDetectorDesktopCapturerWrapper::Start( + DesktopCapturer::Callback* callback) { + callback_ = callback; + capturer_->Start(this); +} + +void BlankDetectorDesktopCapturerWrapper::SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory) { + capturer_->SetSharedMemoryFactory(std::move(shared_memory_factory)); +} + +void BlankDetectorDesktopCapturerWrapper::CaptureFrame() { + RTC_DCHECK(callback_); + capturer_->CaptureFrame(); +} + +void BlankDetectorDesktopCapturerWrapper::SetExcludedWindow(WindowId window) { + capturer_->SetExcludedWindow(window); +} + +bool BlankDetectorDesktopCapturerWrapper::GetSourceList(SourceList* sources) { + return capturer_->GetSourceList(sources); +} + +bool BlankDetectorDesktopCapturerWrapper::SelectSource(SourceId id) { + if (check_per_capture_) { + // If we start capturing a new source, we must reset these members + // so we don't short circuit the blank detection logic. + is_first_frame_ = true; + non_blank_frame_received_ = false; + } + + return capturer_->SelectSource(id); +} + +bool BlankDetectorDesktopCapturerWrapper::FocusOnSelectedSource() { + return capturer_->FocusOnSelectedSource(); +} + +bool BlankDetectorDesktopCapturerWrapper::IsOccluded(const DesktopVector& pos) { + return capturer_->IsOccluded(pos); +} + +void BlankDetectorDesktopCapturerWrapper::OnCaptureResult( + Result result, + std::unique_ptr frame) { + RTC_DCHECK(callback_); + if (result != Result::SUCCESS || non_blank_frame_received_) { + callback_->OnCaptureResult(result, std::move(frame)); + return; + } + + if (!frame) { + // Capturer can call the blank detector with empty frame. Blank + // detector regards it as a blank frame. + callback_->OnCaptureResult(Result::ERROR_TEMPORARY, + std::unique_ptr()); + return; + } + + // If nothing has been changed in current frame, we do not need to check it + // again. + if (!frame->updated_region().is_empty() || is_first_frame_) { + last_frame_is_blank_ = IsBlankFrame(*frame); + is_first_frame_ = false; + } + RTC_HISTOGRAM_BOOLEAN("WebRTC.DesktopCapture.BlankFrameDetected", + last_frame_is_blank_); + if (!last_frame_is_blank_) { + non_blank_frame_received_ = true; + callback_->OnCaptureResult(Result::SUCCESS, std::move(frame)); + return; + } + + callback_->OnCaptureResult(Result::ERROR_TEMPORARY, + std::unique_ptr()); +} + +bool BlankDetectorDesktopCapturerWrapper::IsBlankFrame( + const DesktopFrame& frame) const { + // We will check 7489 pixels for a frame with 1024 x 768 resolution. + for (int i = 0; i < frame.size().width() * frame.size().height(); i += 105) { + const int x = i % frame.size().width(); + const int y = i / frame.size().width(); + if (!IsBlankPixel(frame, x, y)) { + return false; + } + } + + // We are verifying the pixel in the center as well. + return IsBlankPixel(frame, frame.size().width() / 2, + frame.size().height() / 2); +} + +bool BlankDetectorDesktopCapturerWrapper::IsBlankPixel( + const DesktopFrame& frame, + int x, + int y) const { + uint8_t* pixel_data = frame.GetFrameDataAtPos(DesktopVector(x, y)); + return RgbaColor(pixel_data) == blank_pixel_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper.h b/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper.h new file mode 100644 index 0000000000..d10f9cf725 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_BLANK_DETECTOR_DESKTOP_CAPTURER_WRAPPER_H_ +#define MODULES_DESKTOP_CAPTURE_BLANK_DETECTOR_DESKTOP_CAPTURER_WRAPPER_H_ + +#include + +#include "modules/desktop_capture/desktop_capture_types.h" +#include "modules/desktop_capture/desktop_capturer.h" +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/rgba_color.h" +#include "modules/desktop_capture/shared_memory.h" + +namespace webrtc { + +// A DesktopCapturer wrapper detects the return value of its owned +// DesktopCapturer implementation. If sampled pixels returned by the +// DesktopCapturer implementation all equal to the blank pixel, this wrapper +// returns ERROR_TEMPORARY. If the DesktopCapturer implementation fails for too +// many times, this wrapper returns ERROR_PERMANENT. +class BlankDetectorDesktopCapturerWrapper final + : public DesktopCapturer, + public DesktopCapturer::Callback { + public: + // Creates BlankDetectorDesktopCapturerWrapper. BlankDesktopCapturerWrapper + // takes ownership of `capturer`. The `blank_pixel` is the unmodified color + // returned by the `capturer`. + BlankDetectorDesktopCapturerWrapper(std::unique_ptr capturer, + RgbaColor blank_pixel, + bool check_per_capture = false); + ~BlankDetectorDesktopCapturerWrapper() override; + + // DesktopCapturer interface. + void Start(DesktopCapturer::Callback* callback) override; + void SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory) override; + void CaptureFrame() override; + void SetExcludedWindow(WindowId window) override; + bool GetSourceList(SourceList* sources) override; + bool SelectSource(SourceId id) override; + bool FocusOnSelectedSource() override; + bool IsOccluded(const DesktopVector& pos) override; + + private: + // DesktopCapturer::Callback interface. + void OnCaptureResult(Result result, + std::unique_ptr frame) override; + + bool IsBlankFrame(const DesktopFrame& frame) const; + + // Detects whether pixel at (x, y) equals to `blank_pixel_`. + bool IsBlankPixel(const DesktopFrame& frame, int x, int y) const; + + const std::unique_ptr capturer_; + const RgbaColor blank_pixel_; + + // Whether a non-blank frame has been received. + bool non_blank_frame_received_ = false; + + // Whether the last frame is blank. + bool last_frame_is_blank_ = false; + + // Whether current frame is the first frame. + bool is_first_frame_ = true; + + // Blank inspection is made per capture instead of once for all + // screens or windows. + bool check_per_capture_ = false; + + DesktopCapturer::Callback* callback_ = nullptr; +}; + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_BLANK_DETECTOR_DESKTOP_CAPTURER_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper_unittest.cc b/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper_unittest.cc new file mode 100644 index 0000000000..25a81edd89 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper_unittest.cc @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/blank_detector_desktop_capturer_wrapper.h" + +#include +#include + +#include "modules/desktop_capture/desktop_capturer.h" +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/desktop_frame_generator.h" +#include "modules/desktop_capture/desktop_geometry.h" +#include "modules/desktop_capture/desktop_region.h" +#include "modules/desktop_capture/fake_desktop_capturer.h" +#include "test/gtest.h" + +namespace webrtc { + +class BlankDetectorDesktopCapturerWrapperTest + : public ::testing::Test, + public DesktopCapturer::Callback { + public: + BlankDetectorDesktopCapturerWrapperTest(); + ~BlankDetectorDesktopCapturerWrapperTest() override; + + protected: + void PerfTest(DesktopCapturer* capturer); + + const int frame_width_ = 1024; + const int frame_height_ = 768; + std::unique_ptr wrapper_; + DesktopCapturer* capturer_ = nullptr; + BlackWhiteDesktopFramePainter painter_; + int num_frames_captured_ = 0; + DesktopCapturer::Result last_result_ = DesktopCapturer::Result::SUCCESS; + std::unique_ptr last_frame_; + + private: + // DesktopCapturer::Callback interface. + void OnCaptureResult(DesktopCapturer::Result result, + std::unique_ptr frame) override; + + PainterDesktopFrameGenerator frame_generator_; +}; + +BlankDetectorDesktopCapturerWrapperTest:: + BlankDetectorDesktopCapturerWrapperTest() { + frame_generator_.size()->set(frame_width_, frame_height_); + frame_generator_.set_desktop_frame_painter(&painter_); + std::unique_ptr capturer(new FakeDesktopCapturer()); + FakeDesktopCapturer* fake_capturer = + static_cast(capturer.get()); + fake_capturer->set_frame_generator(&frame_generator_); + capturer_ = fake_capturer; + wrapper_.reset(new BlankDetectorDesktopCapturerWrapper( + std::move(capturer), RgbaColor(0, 0, 0, 0))); + wrapper_->Start(this); +} + +BlankDetectorDesktopCapturerWrapperTest:: + ~BlankDetectorDesktopCapturerWrapperTest() = default; + +void BlankDetectorDesktopCapturerWrapperTest::OnCaptureResult( + DesktopCapturer::Result result, + std::unique_ptr frame) { + last_result_ = result; + last_frame_ = std::move(frame); + num_frames_captured_++; +} + +void BlankDetectorDesktopCapturerWrapperTest::PerfTest( + DesktopCapturer* capturer) { + for (int i = 0; i < 10000; i++) { + capturer->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, i + 1); + } +} + +TEST_F(BlankDetectorDesktopCapturerWrapperTest, ShouldDetectBlankFrame) { + wrapper_->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, 1); + ASSERT_EQ(last_result_, DesktopCapturer::Result::ERROR_TEMPORARY); + ASSERT_FALSE(last_frame_); +} + +TEST_F(BlankDetectorDesktopCapturerWrapperTest, ShouldPassBlankDetection) { + painter_.updated_region()->AddRect(DesktopRect::MakeXYWH(0, 0, 100, 100)); + wrapper_->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, 1); + ASSERT_EQ(last_result_, DesktopCapturer::Result::SUCCESS); + ASSERT_TRUE(last_frame_); + + painter_.updated_region()->AddRect( + DesktopRect::MakeXYWH(frame_width_ - 100, frame_height_ - 100, 100, 100)); + wrapper_->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, 2); + ASSERT_EQ(last_result_, DesktopCapturer::Result::SUCCESS); + ASSERT_TRUE(last_frame_); + + painter_.updated_region()->AddRect( + DesktopRect::MakeXYWH(0, frame_height_ - 100, 100, 100)); + wrapper_->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, 3); + ASSERT_EQ(last_result_, DesktopCapturer::Result::SUCCESS); + ASSERT_TRUE(last_frame_); + + painter_.updated_region()->AddRect( + DesktopRect::MakeXYWH(frame_width_ - 100, 0, 100, 100)); + wrapper_->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, 4); + ASSERT_EQ(last_result_, DesktopCapturer::Result::SUCCESS); + ASSERT_TRUE(last_frame_); + + painter_.updated_region()->AddRect(DesktopRect::MakeXYWH( + (frame_width_ >> 1) - 50, (frame_height_ >> 1) - 50, 100, 100)); + wrapper_->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, 5); + ASSERT_EQ(last_result_, DesktopCapturer::Result::SUCCESS); + ASSERT_TRUE(last_frame_); +} + +TEST_F(BlankDetectorDesktopCapturerWrapperTest, + ShouldNotCheckAfterANonBlankFrameReceived) { + wrapper_->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, 1); + ASSERT_EQ(last_result_, DesktopCapturer::Result::ERROR_TEMPORARY); + ASSERT_FALSE(last_frame_); + + painter_.updated_region()->AddRect( + DesktopRect::MakeXYWH(frame_width_ - 100, 0, 100, 100)); + wrapper_->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, 2); + ASSERT_EQ(last_result_, DesktopCapturer::Result::SUCCESS); + ASSERT_TRUE(last_frame_); + + for (int i = 0; i < 100; i++) { + wrapper_->CaptureFrame(); + ASSERT_EQ(num_frames_captured_, i + 3); + ASSERT_EQ(last_result_, DesktopCapturer::Result::SUCCESS); + ASSERT_TRUE(last_frame_); + } +} + +// There is no perceptible impact by using BlankDetectorDesktopCapturerWrapper. +// i.e. less than 0.2ms per frame. +// [ OK ] DISABLED_Performance (10210 ms) +// [ OK ] DISABLED_PerformanceComparison (8791 ms) +TEST_F(BlankDetectorDesktopCapturerWrapperTest, DISABLED_Performance) { + PerfTest(wrapper_.get()); +} + +TEST_F(BlankDetectorDesktopCapturerWrapperTest, + DISABLED_PerformanceComparison) { + capturer_->Start(this); + PerfTest(capturer_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame.cc b/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame.cc new file mode 100644 index 0000000000..54488b7d62 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/cropped_desktop_frame.h" + +#include +#include + +#include "modules/desktop_capture/desktop_region.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// A DesktopFrame that is a sub-rect of another DesktopFrame. +class CroppedDesktopFrame : public DesktopFrame { + public: + CroppedDesktopFrame(std::unique_ptr frame, + const DesktopRect& rect); + + CroppedDesktopFrame(const CroppedDesktopFrame&) = delete; + CroppedDesktopFrame& operator=(const CroppedDesktopFrame&) = delete; + + private: + const std::unique_ptr frame_; +}; + +std::unique_ptr CreateCroppedDesktopFrame( + std::unique_ptr frame, + const DesktopRect& rect) { + RTC_DCHECK(frame); + + DesktopRect intersection = DesktopRect::MakeSize(frame->size()); + intersection.IntersectWith(rect); + if (intersection.is_empty()) { + return nullptr; + } + + if (frame->size().equals(rect.size())) { + return frame; + } + + return std::unique_ptr( + new CroppedDesktopFrame(std::move(frame), intersection)); +} + +CroppedDesktopFrame::CroppedDesktopFrame(std::unique_ptr frame, + const DesktopRect& rect) + : DesktopFrame(rect.size(), + frame->stride(), + frame->GetFrameDataAtPos(rect.top_left()), + frame->shared_memory()), + frame_(std::move(frame)) { + MoveFrameInfoFrom(frame_.get()); + set_top_left(frame_->top_left().add(rect.top_left())); + mutable_updated_region()->IntersectWith(rect); + mutable_updated_region()->Translate(-rect.left(), -rect.top()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame.h b/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame.h new file mode 100644 index 0000000000..5c672c7d32 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_CROPPED_DESKTOP_FRAME_H_ +#define MODULES_DESKTOP_CAPTURE_CROPPED_DESKTOP_FRAME_H_ + +#include + +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/desktop_geometry.h" +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { + +// Creates a DesktopFrame to contain only the area of `rect` in the original +// `frame`. +// `frame` should not be nullptr. `rect` is in `frame` coordinate, i.e. +// `frame`->top_left() does not impact the area of `rect`. +// Returns nullptr frame if `rect` is not contained by the bounds of `frame`. +std::unique_ptr RTC_EXPORT +CreateCroppedDesktopFrame(std::unique_ptr frame, + const DesktopRect& rect); + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_CROPPED_DESKTOP_FRAME_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame_unittest.cc b/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame_unittest.cc new file mode 100644 index 0000000000..9becf69636 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame_unittest.cc @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/cropped_desktop_frame.h" + +#include +#include + +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/shared_desktop_frame.h" +#include "test/gtest.h" + +namespace webrtc { + +std::unique_ptr CreateTestFrame() { + return std::make_unique(DesktopSize(10, 20)); +} + +TEST(CroppedDesktopFrameTest, DoNotCreateWrapperIfSizeIsNotChanged) { + std::unique_ptr original = CreateTestFrame(); + // owned by `original` and CroppedDesktopFrame. + DesktopFrame* raw_original = original.get(); + std::unique_ptr cropped = CreateCroppedDesktopFrame( + std::move(original), DesktopRect::MakeWH(10, 20)); + ASSERT_EQ(cropped.get(), raw_original); +} + +TEST(CroppedDesktopFrameTest, CropWhenPartiallyOutOfBounds) { + std::unique_ptr cropped = + CreateCroppedDesktopFrame(CreateTestFrame(), DesktopRect::MakeWH(11, 10)); + ASSERT_NE(nullptr, cropped); + ASSERT_EQ(cropped->size().width(), 10); + ASSERT_EQ(cropped->size().height(), 10); + ASSERT_EQ(cropped->top_left().x(), 0); + ASSERT_EQ(cropped->top_left().y(), 0); +} + +TEST(CroppedDesktopFrameTest, ReturnNullIfCropRegionIsOutOfBounds) { + std::unique_ptr frame = CreateTestFrame(); + frame->set_top_left(DesktopVector(100, 200)); + ASSERT_EQ(nullptr, + CreateCroppedDesktopFrame( + std::move(frame), DesktopRect::MakeLTRB(101, 203, 109, 218))); +} + +TEST(CroppedDesktopFrameTest, CropASubArea) { + std::unique_ptr cropped = CreateCroppedDesktopFrame( + CreateTestFrame(), DesktopRect::MakeLTRB(1, 2, 9, 19)); + ASSERT_EQ(cropped->size().width(), 8); + ASSERT_EQ(cropped->size().height(), 17); + ASSERT_EQ(cropped->top_left().x(), 1); + ASSERT_EQ(cropped->top_left().y(), 2); +} + +TEST(CroppedDesktopFrameTest, SetTopLeft) { + std::unique_ptr frame = CreateTestFrame(); + frame->set_top_left(DesktopVector(100, 200)); + frame = CreateCroppedDesktopFrame(std::move(frame), + DesktopRect::MakeLTRB(1, 3, 9, 18)); + ASSERT_EQ(frame->size().width(), 8); + ASSERT_EQ(frame->size().height(), 15); + ASSERT_EQ(frame->top_left().x(), 101); + ASSERT_EQ(frame->top_left().y(), 203); +} + +TEST(CroppedDesktopFrameTest, InitializedWithZeros) { + std::unique_ptr frame = CreateTestFrame(); + const DesktopVector frame_origin = frame->top_left(); + const DesktopSize frame_size = frame->size(); + std::unique_ptr cropped = CreateCroppedDesktopFrame( + std::move(frame), DesktopRect::MakeOriginSize(frame_origin, frame_size)); + for (int j = 0; j < cropped->size().height(); ++j) { + for (int i = 0; i < cropped->stride(); ++i) { + ASSERT_EQ(cropped->data()[i + j * cropped->stride()], 0); + } + } +} + +TEST(CroppedDesktopFrameTest, IccProfile) { + const uint8_t fake_icc_profile_data_array[] = {0x1a, 0x00, 0x2b, 0x00, + 0x3c, 0x00, 0x4d}; + const std::vector icc_profile( + fake_icc_profile_data_array, + fake_icc_profile_data_array + sizeof(fake_icc_profile_data_array)); + + std::unique_ptr frame = CreateTestFrame(); + EXPECT_EQ(frame->icc_profile().size(), 0UL); + + frame->set_icc_profile(icc_profile); + EXPECT_EQ(frame->icc_profile().size(), 7UL); + EXPECT_EQ(frame->icc_profile(), icc_profile); + + frame = CreateCroppedDesktopFrame(std::move(frame), + DesktopRect::MakeLTRB(2, 2, 8, 18)); + EXPECT_EQ(frame->icc_profile().size(), 7UL); + EXPECT_EQ(frame->icc_profile(), icc_profile); + + std::unique_ptr shared = + SharedDesktopFrame::Wrap(std::move(frame)); + EXPECT_EQ(shared->icc_profile().size(), 7UL); + EXPECT_EQ(shared->icc_profile(), icc_profile); + + std::unique_ptr shared_other = shared->Share(); + EXPECT_EQ(shared_other->icc_profile().size(), 7UL); + EXPECT_EQ(shared_other->icc_profile(), icc_profile); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer.cc b/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer.cc new file mode 100644 index 0000000000..5e0faaade9 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/cropping_window_capturer.h" + +#include + +#include + +#include "modules/desktop_capture/cropped_desktop_frame.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +CroppingWindowCapturer::CroppingWindowCapturer( + const DesktopCaptureOptions& options) + : options_(options), + callback_(NULL), + window_capturer_(DesktopCapturer::CreateRawWindowCapturer(options)), + selected_window_(kNullWindowId), + excluded_window_(kNullWindowId) {} + +CroppingWindowCapturer::~CroppingWindowCapturer() {} + +void CroppingWindowCapturer::Start(DesktopCapturer::Callback* callback) { + callback_ = callback; + window_capturer_->Start(callback); +} + +void CroppingWindowCapturer::SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory) { + window_capturer_->SetSharedMemoryFactory(std::move(shared_memory_factory)); +} + +void CroppingWindowCapturer::CaptureFrame() { + if (ShouldUseScreenCapturer()) { + if (!screen_capturer_.get()) { + screen_capturer_ = DesktopCapturer::CreateRawScreenCapturer(options_); + if (excluded_window_) { + screen_capturer_->SetExcludedWindow(excluded_window_); + } + screen_capturer_->Start(this); + } + screen_capturer_->CaptureFrame(); + } else { + window_capturer_->CaptureFrame(); + } +} + +void CroppingWindowCapturer::SetExcludedWindow(WindowId window) { + excluded_window_ = window; + if (screen_capturer_.get()) { + screen_capturer_->SetExcludedWindow(window); + } +} + +bool CroppingWindowCapturer::GetSourceList(SourceList* sources) { + return window_capturer_->GetSourceList(sources); +} + +bool CroppingWindowCapturer::SelectSource(SourceId id) { + if (window_capturer_->SelectSource(id)) { + selected_window_ = id; + return true; + } + return false; +} + +bool CroppingWindowCapturer::FocusOnSelectedSource() { + return window_capturer_->FocusOnSelectedSource(); +} + +void CroppingWindowCapturer::OnCaptureResult( + DesktopCapturer::Result result, + std::unique_ptr screen_frame) { + if (!ShouldUseScreenCapturer()) { + RTC_LOG(LS_INFO) << "Window no longer on top when ScreenCapturer finishes"; + window_capturer_->CaptureFrame(); + return; + } + + if (result != Result::SUCCESS) { + RTC_LOG(LS_WARNING) << "ScreenCapturer failed to capture a frame"; + callback_->OnCaptureResult(result, nullptr); + return; + } + + DesktopRect window_rect = GetWindowRectInVirtualScreen(); + if (window_rect.is_empty()) { + RTC_LOG(LS_WARNING) << "Window rect is empty"; + callback_->OnCaptureResult(Result::ERROR_TEMPORARY, nullptr); + return; + } + + std::unique_ptr cropped_frame = + CreateCroppedDesktopFrame(std::move(screen_frame), window_rect); + + if (!cropped_frame) { + RTC_LOG(LS_WARNING) << "Window is outside of the captured display"; + callback_->OnCaptureResult(Result::ERROR_TEMPORARY, nullptr); + return; + } + + callback_->OnCaptureResult(Result::SUCCESS, std::move(cropped_frame)); +} + +bool CroppingWindowCapturer::IsOccluded(const DesktopVector& pos) { + // Returns true if either capturer returns true. + if (window_capturer_->IsOccluded(pos)) { + return true; + } + if (screen_capturer_ != nullptr && screen_capturer_->IsOccluded(pos)) { + return true; + } + return false; +} + +#if !defined(WEBRTC_WIN) +// CroppingWindowCapturer is implemented only for windows. On other platforms +// the regular window capturer is used. +// static +std::unique_ptr CroppingWindowCapturer::CreateCapturer( + const DesktopCaptureOptions& options) { + return DesktopCapturer::CreateWindowCapturer(options); +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer.h b/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer.h new file mode 100644 index 0000000000..56478030b1 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_CROPPING_WINDOW_CAPTURER_H_ +#define MODULES_DESKTOP_CAPTURE_CROPPING_WINDOW_CAPTURER_H_ + +#include + +#include "modules/desktop_capture/desktop_capture_options.h" +#include "modules/desktop_capture/desktop_capture_types.h" +#include "modules/desktop_capture/desktop_capturer.h" +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/desktop_geometry.h" +#include "modules/desktop_capture/shared_memory.h" +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { + +// WindowCapturer implementation that uses a screen capturer to capture the +// whole screen and crops the video frame to the window area when the captured +// window is on top. +class RTC_EXPORT CroppingWindowCapturer : public DesktopCapturer, + public DesktopCapturer::Callback { + public: + static std::unique_ptr CreateCapturer( + const DesktopCaptureOptions& options); + + ~CroppingWindowCapturer() override; + + // DesktopCapturer implementation. + void Start(DesktopCapturer::Callback* callback) override; + void SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory) override; + void CaptureFrame() override; + void SetExcludedWindow(WindowId window) override; + bool GetSourceList(SourceList* sources) override; + bool SelectSource(SourceId id) override; + bool FocusOnSelectedSource() override; + bool IsOccluded(const DesktopVector& pos) override; + + // DesktopCapturer::Callback implementation, passed to `screen_capturer_` to + // intercept the capture result. + void OnCaptureResult(DesktopCapturer::Result result, + std::unique_ptr frame) override; + + protected: + explicit CroppingWindowCapturer(const DesktopCaptureOptions& options); + + // The platform implementation should override these methods. + + // Returns true if it is OK to capture the whole screen and crop to the + // selected window, i.e. the selected window is opaque, rectangular, and not + // occluded. + virtual bool ShouldUseScreenCapturer() = 0; + + // Returns the window area relative to the top left of the virtual screen + // within the bounds of the virtual screen. This function should return the + // DesktopRect in full desktop coordinates, i.e. the top-left monitor starts + // from (0, 0). + virtual DesktopRect GetWindowRectInVirtualScreen() = 0; + + WindowId selected_window() const { return selected_window_; } + WindowId excluded_window() const { return excluded_window_; } + DesktopCapturer* window_capturer() const { return window_capturer_.get(); } + + private: + DesktopCaptureOptions options_; + DesktopCapturer::Callback* callback_; + std::unique_ptr window_capturer_; + std::unique_ptr screen_capturer_; + SourceId selected_window_; + WindowId excluded_window_; +}; + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_CROPPING_WINDOW_CAPTURER_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer_win.cc b/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer_win.cc new file mode 100644 index 0000000000..ab2f807d33 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer_win.cc @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/cropping_window_capturer.h" +#include "modules/desktop_capture/desktop_capturer_differ_wrapper.h" +#include "modules/desktop_capture/win/screen_capture_utils.h" +#include "modules/desktop_capture/win/selected_window_context.h" +#include "modules/desktop_capture/win/window_capture_utils.h" +#include "rtc_base/logging.h" +#include "rtc_base/trace_event.h" +#include "rtc_base/win/windows_version.h" + +namespace webrtc { + +namespace { + +// Used to pass input data for verifying the selected window is on top. +struct TopWindowVerifierContext : public SelectedWindowContext { + TopWindowVerifierContext(HWND selected_window, + HWND excluded_window, + DesktopRect selected_window_rect, + WindowCaptureHelperWin* window_capture_helper) + : SelectedWindowContext(selected_window, + selected_window_rect, + window_capture_helper), + excluded_window(excluded_window) { + RTC_DCHECK_NE(selected_window, excluded_window); + } + + // Determines whether the selected window is on top (not occluded by any + // windows except for those it owns or any excluded window). + bool IsTopWindow() { + if (!IsSelectedWindowValid()) { + return false; + } + + // Enumerate all top-level windows above the selected window in Z-order, + // checking whether any overlaps it. This uses FindWindowEx rather than + // EnumWindows because the latter excludes certain system windows (e.g. the + // Start menu & other taskbar menus) that should be detected here to avoid + // inadvertent capture. + int num_retries = 0; + while (true) { + HWND hwnd = nullptr; + while ((hwnd = FindWindowEx(nullptr, hwnd, nullptr, nullptr))) { + if (hwnd == selected_window()) { + // Windows are enumerated in top-down Z-order, so we can stop + // enumerating upon reaching the selected window & report it's on top. + return true; + } + + // Ignore the excluded window. + if (hwnd == excluded_window) { + continue; + } + + // Ignore windows that aren't visible on the current desktop. + if (!window_capture_helper()->IsWindowVisibleOnCurrentDesktop(hwnd)) { + continue; + } + + // Ignore Chrome notification windows, especially the notification for + // the ongoing window sharing. Notes: + // - This only works with notifications from Chrome, not other Apps. + // - All notifications from Chrome will be ignored. + // - This may cause part or whole of notification window being cropped + // into the capturing of the target window if there is overlapping. + if (window_capture_helper()->IsWindowChromeNotification(hwnd)) { + continue; + } + + // Ignore windows owned by the selected window since we want to capture + // them. + if (IsWindowOwnedBySelectedWindow(hwnd)) { + continue; + } + + // Check whether this window intersects with the selected window. + if (IsWindowOverlappingSelectedWindow(hwnd)) { + // If intersection is not empty, the selected window is not on top. + return false; + } + } + + DWORD lastError = GetLastError(); + if (lastError == ERROR_SUCCESS) { + // The enumeration completed successfully without finding the selected + // window (which may have been closed). + RTC_LOG(LS_WARNING) << "Failed to find selected window (only expected " + "if it was closed)"; + RTC_DCHECK(!IsWindow(selected_window())); + return false; + } else if (lastError == ERROR_INVALID_WINDOW_HANDLE) { + // This error may occur if a window is closed around the time it's + // enumerated; retry the enumeration in this case up to 10 times + // (this should be a rare race & unlikely to recur). + if (++num_retries <= 10) { + RTC_LOG(LS_WARNING) << "Enumeration failed due to race with a window " + "closing; retrying - retry #" + << num_retries; + continue; + } else { + RTC_LOG(LS_ERROR) + << "Exhausted retry allowance around window enumeration failures " + "due to races with windows closing"; + } + } + + // The enumeration failed with an unexpected error (or more repeats of + // an infrequently-expected error than anticipated). After logging this & + // firing an assert when enabled, report that the selected window isn't + // topmost to avoid inadvertent capture of other windows. + RTC_LOG(LS_ERROR) << "Failed to enumerate windows: " << lastError; + RTC_DCHECK_NOTREACHED(); + return false; + } + } + + const HWND excluded_window; +}; + +class CroppingWindowCapturerWin : public CroppingWindowCapturer { + public: + explicit CroppingWindowCapturerWin(const DesktopCaptureOptions& options) + : CroppingWindowCapturer(options), + enumerate_current_process_windows_( + options.enumerate_current_process_windows()), + full_screen_window_detector_(options.full_screen_window_detector()) {} + + void CaptureFrame() override; + + private: + bool ShouldUseScreenCapturer() override; + DesktopRect GetWindowRectInVirtualScreen() override; + + // Returns either selected by user sourceId or sourceId provided by + // FullScreenWindowDetector + WindowId GetWindowToCapture() const; + + // The region from GetWindowRgn in the desktop coordinate if the region is + // rectangular, or the rect from GetWindowRect if the region is not set. + DesktopRect window_region_rect_; + + WindowCaptureHelperWin window_capture_helper_; + + bool enumerate_current_process_windows_; + + rtc::scoped_refptr full_screen_window_detector_; + + // Used to make sure that we only log the usage of fullscreen detection once. + mutable bool fullscreen_usage_logged_ = false; +}; + +void CroppingWindowCapturerWin::CaptureFrame() { + DesktopCapturer* win_capturer = window_capturer(); + if (win_capturer) { + // Feed the actual list of windows into full screen window detector. + if (full_screen_window_detector_) { + full_screen_window_detector_->UpdateWindowListIfNeeded( + selected_window(), [this](DesktopCapturer::SourceList* sources) { + // Get the list of top level windows, including ones with empty + // title. win_capturer_->GetSourceList can't be used here + // cause it filters out the windows with empty titles and + // it uses responsiveness check which could lead to performance + // issues. + SourceList result; + int window_list_flags = + enumerate_current_process_windows_ + ? GetWindowListFlags::kNone + : GetWindowListFlags::kIgnoreCurrentProcessWindows; + + if (!webrtc::GetWindowList(window_list_flags, &result)) + return false; + + // Filter out windows not visible on current desktop + auto it = std::remove_if( + result.begin(), result.end(), [this](const auto& source) { + HWND hwnd = reinterpret_cast(source.id); + return !window_capture_helper_ + .IsWindowVisibleOnCurrentDesktop(hwnd); + }); + result.erase(it, result.end()); + + sources->swap(result); + return true; + }); + } + win_capturer->SelectSource(GetWindowToCapture()); + } + + CroppingWindowCapturer::CaptureFrame(); +} + +bool CroppingWindowCapturerWin::ShouldUseScreenCapturer() { + if (rtc::rtc_win::GetVersion() < rtc::rtc_win::Version::VERSION_WIN8 && + window_capture_helper_.IsAeroEnabled()) { + return false; + } + + const HWND selected = reinterpret_cast(GetWindowToCapture()); + // Check if the window is visible on current desktop. + if (!window_capture_helper_.IsWindowVisibleOnCurrentDesktop(selected)) { + return false; + } + + // Check if the window is a translucent layered window. + const LONG window_ex_style = GetWindowLong(selected, GWL_EXSTYLE); + if (window_ex_style & WS_EX_LAYERED) { + COLORREF color_ref_key = 0; + BYTE alpha = 0; + DWORD flags = 0; + + // GetLayeredWindowAttributes fails if the window was setup with + // UpdateLayeredWindow. We have no way to know the opacity of the window in + // that case. This happens for Stiky Note (crbug/412726). + if (!GetLayeredWindowAttributes(selected, &color_ref_key, &alpha, &flags)) + return false; + + // UpdateLayeredWindow is the only way to set per-pixel alpha and will cause + // the previous GetLayeredWindowAttributes to fail. So we only need to check + // the window wide color key or alpha. + if ((flags & LWA_COLORKEY) || ((flags & LWA_ALPHA) && (alpha < 255))) { + return false; + } + } + + if (!GetWindowRect(selected, &window_region_rect_)) { + return false; + } + + DesktopRect content_rect; + if (!GetWindowContentRect(selected, &content_rect)) { + return false; + } + + DesktopRect region_rect; + // Get the window region and check if it is rectangular. + const int region_type = + GetWindowRegionTypeWithBoundary(selected, ®ion_rect); + + // Do not use the screen capturer if the region is empty or not rectangular. + if (region_type == COMPLEXREGION || region_type == NULLREGION) { + return false; + } + + if (region_type == SIMPLEREGION) { + // The `region_rect` returned from GetRgnBox() is always in window + // coordinate. + region_rect.Translate(window_region_rect_.left(), + window_region_rect_.top()); + // MSDN: The window region determines the area *within* the window where the + // system permits drawing. + // https://msdn.microsoft.com/en-us/library/windows/desktop/dd144950(v=vs.85).aspx. + // + // `region_rect` should always be inside of `window_region_rect_`. So after + // the intersection, `window_region_rect_` == `region_rect`. If so, what's + // the point of the intersecting operations? Why cannot we directly retrieve + // `window_region_rect_` from GetWindowRegionTypeWithBoundary() function? + // TODO(zijiehe): Figure out the purpose of these intersections. + window_region_rect_.IntersectWith(region_rect); + content_rect.IntersectWith(region_rect); + } + + // Check if the client area is out of the screen area. When the window is + // maximized, only its client area is visible in the screen, the border will + // be hidden. So we are using `content_rect` here. + if (!GetFullscreenRect().ContainsRect(content_rect)) { + return false; + } + + // Check if the window is occluded by any other window, excluding the child + // windows, context menus, and `excluded_window_`. + // `content_rect` is preferred, see the comments on + // IsWindowIntersectWithSelectedWindow(). + TopWindowVerifierContext context(selected, + reinterpret_cast(excluded_window()), + content_rect, &window_capture_helper_); + return context.IsTopWindow(); +} + +DesktopRect CroppingWindowCapturerWin::GetWindowRectInVirtualScreen() { + TRACE_EVENT0("webrtc", + "CroppingWindowCapturerWin::GetWindowRectInVirtualScreen"); + DesktopRect window_rect; + HWND hwnd = reinterpret_cast(GetWindowToCapture()); + if (!GetCroppedWindowRect(hwnd, /*avoid_cropping_border*/ false, &window_rect, + /*original_rect*/ nullptr)) { + RTC_LOG(LS_WARNING) << "Failed to get window info: " << GetLastError(); + return window_rect; + } + window_rect.IntersectWith(window_region_rect_); + + // Convert `window_rect` to be relative to the top-left of the virtual screen. + DesktopRect screen_rect(GetFullscreenRect()); + window_rect.IntersectWith(screen_rect); + window_rect.Translate(-screen_rect.left(), -screen_rect.top()); + return window_rect; +} + +WindowId CroppingWindowCapturerWin::GetWindowToCapture() const { + const auto selected_source = selected_window(); + const auto full_screen_source = + full_screen_window_detector_ + ? full_screen_window_detector_->FindFullScreenWindow(selected_source) + : 0; + if (full_screen_source && full_screen_source != selected_source && + !fullscreen_usage_logged_) { + fullscreen_usage_logged_ = true; + LogDesktopCapturerFullscreenDetectorUsage(); + } + return full_screen_source ? full_screen_source : selected_source; +} + +} // namespace + +// static +std::unique_ptr CroppingWindowCapturer::CreateCapturer( + const DesktopCaptureOptions& options) { + std::unique_ptr capturer( + new CroppingWindowCapturerWin(options)); + if (capturer && options.detect_updated_region()) { + capturer.reset(new DesktopCapturerDifferWrapper(std::move(capturer))); + } + + return capturer; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/delegated_source_list_controller.h b/third_party/libwebrtc/modules/desktop_capture/delegated_source_list_controller.h new file mode 100644 index 0000000000..cada7dc817 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/delegated_source_list_controller.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_DELEGATED_SOURCE_LIST_CONTROLLER_H_ +#define MODULES_DESKTOP_CAPTURE_DELEGATED_SOURCE_LIST_CONTROLLER_H_ + +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { + +// A controller to be implemented and returned by +// GetDelegatedSourceListController in capturers that require showing their own +// source list and managing user selection there. Apart from ensuring the +// visibility of the source list, these capturers should largely be interacted +// with the same as a normal capturer, though there may be some caveats for +// some DesktopCapturer methods. See GetDelegatedSourceListController for more +// information. +class RTC_EXPORT DelegatedSourceListController { + public: + // Notifications that can be used to help drive any UI that the consumer may + // want to show around this source list (e.g. if an consumer shows their own + // UI in addition to the delegated source list). + class Observer { + public: + // Called after the user has made a selection in the delegated source list. + // Note that the consumer will still need to get the source out of the + // capturer by calling GetSourceList. + virtual void OnSelection() = 0; + + // Called when there is any user action that cancels the source selection. + virtual void OnCancelled() = 0; + + // Called when there is a system error that cancels the source selection. + virtual void OnError() = 0; + + protected: + virtual ~Observer() {} + }; + + // Observer must remain valid until the owning DesktopCapturer is destroyed. + // Only one Observer is allowed at a time, and may be cleared by passing + // nullptr. + virtual void Observe(Observer* observer) = 0; + + // Used to prompt the capturer to show the delegated source list. If the + // source list is already visible, this will be a no-op. Must be called after + // starting the DesktopCapturer. + // + // Note that any selection from a previous invocation of the source list may + // be cleared when this method is called. + virtual void EnsureVisible() = 0; + + // Used to prompt the capturer to hide the delegated source list. If the + // source list is already hidden, this will be a no-op. + virtual void EnsureHidden() = 0; + + protected: + virtual ~DelegatedSourceListController() {} +}; + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_DELEGATED_SOURCE_LIST_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer.cc b/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer.cc new file mode 100644 index 0000000000..dd688ac5f2 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer.cc @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/desktop_and_cursor_composer.h" + +#include +#include + +#include +#include + +#include "modules/desktop_capture/desktop_capturer.h" +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/mouse_cursor.h" +#include "modules/desktop_capture/mouse_cursor_monitor.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Helper function that blends one image into another. Source image must be +// pre-multiplied with the alpha channel. Destination is assumed to be opaque. +void AlphaBlend(uint8_t* dest, + int dest_stride, + const uint8_t* src, + int src_stride, + const DesktopSize& size) { + for (int y = 0; y < size.height(); ++y) { + for (int x = 0; x < size.width(); ++x) { + uint32_t base_alpha = 255 - src[x * DesktopFrame::kBytesPerPixel + 3]; + if (base_alpha == 255) { + continue; + } else if (base_alpha == 0) { + memcpy(dest + x * DesktopFrame::kBytesPerPixel, + src + x * DesktopFrame::kBytesPerPixel, + DesktopFrame::kBytesPerPixel); + } else { + dest[x * DesktopFrame::kBytesPerPixel] = + dest[x * DesktopFrame::kBytesPerPixel] * base_alpha / 255 + + src[x * DesktopFrame::kBytesPerPixel]; + dest[x * DesktopFrame::kBytesPerPixel + 1] = + dest[x * DesktopFrame::kBytesPerPixel + 1] * base_alpha / 255 + + src[x * DesktopFrame::kBytesPerPixel + 1]; + dest[x * DesktopFrame::kBytesPerPixel + 2] = + dest[x * DesktopFrame::kBytesPerPixel + 2] * base_alpha / 255 + + src[x * DesktopFrame::kBytesPerPixel + 2]; + } + } + src += src_stride; + dest += dest_stride; + } +} + +// DesktopFrame wrapper that draws mouse on a frame and restores original +// content before releasing the underlying frame. +class DesktopFrameWithCursor : public DesktopFrame { + public: + // Takes ownership of `frame`. + DesktopFrameWithCursor(std::unique_ptr frame, + const MouseCursor& cursor, + const DesktopVector& position, + const DesktopRect& previous_cursor_rect, + bool cursor_changed); + ~DesktopFrameWithCursor() override; + + DesktopFrameWithCursor(const DesktopFrameWithCursor&) = delete; + DesktopFrameWithCursor& operator=(const DesktopFrameWithCursor&) = delete; + + DesktopRect cursor_rect() const { return cursor_rect_; } + + private: + const std::unique_ptr original_frame_; + + DesktopVector restore_position_; + std::unique_ptr restore_frame_; + DesktopRect cursor_rect_; +}; + +DesktopFrameWithCursor::DesktopFrameWithCursor( + std::unique_ptr frame, + const MouseCursor& cursor, + const DesktopVector& position, + const DesktopRect& previous_cursor_rect, + bool cursor_changed) + : DesktopFrame(frame->size(), + frame->stride(), + frame->data(), + frame->shared_memory()), + original_frame_(std::move(frame)) { + MoveFrameInfoFrom(original_frame_.get()); + + DesktopVector image_pos = position.subtract(cursor.hotspot()); + cursor_rect_ = DesktopRect::MakeSize(cursor.image()->size()); + cursor_rect_.Translate(image_pos); + DesktopVector cursor_origin = cursor_rect_.top_left(); + cursor_rect_.IntersectWith(DesktopRect::MakeSize(size())); + + if (!previous_cursor_rect.equals(cursor_rect_)) { + mutable_updated_region()->AddRect(cursor_rect_); + // TODO(crbug:1323241) Update this code to properly handle the case where + // |previous_cursor_rect| is outside of the boundaries of |frame|. + // Any boundary check has to take into account the fact that + // |previous_cursor_rect| can be in DPI or in pixels, based on the platform + // we're running on. + mutable_updated_region()->AddRect(previous_cursor_rect); + } else if (cursor_changed) { + mutable_updated_region()->AddRect(cursor_rect_); + } + + if (cursor_rect_.is_empty()) + return; + + // Copy original screen content under cursor to `restore_frame_`. + restore_position_ = cursor_rect_.top_left(); + restore_frame_.reset(new BasicDesktopFrame(cursor_rect_.size())); + restore_frame_->CopyPixelsFrom(*this, cursor_rect_.top_left(), + DesktopRect::MakeSize(restore_frame_->size())); + + // Blit the cursor. + uint8_t* cursor_rect_data = + reinterpret_cast(data()) + cursor_rect_.top() * stride() + + cursor_rect_.left() * DesktopFrame::kBytesPerPixel; + DesktopVector origin_shift = cursor_rect_.top_left().subtract(cursor_origin); + AlphaBlend(cursor_rect_data, stride(), + cursor.image()->data() + + origin_shift.y() * cursor.image()->stride() + + origin_shift.x() * DesktopFrame::kBytesPerPixel, + cursor.image()->stride(), cursor_rect_.size()); +} + +DesktopFrameWithCursor::~DesktopFrameWithCursor() { + // Restore original content of the frame. + if (restore_frame_) { + DesktopRect target_rect = DesktopRect::MakeSize(restore_frame_->size()); + target_rect.Translate(restore_position_); + CopyPixelsFrom(restore_frame_->data(), restore_frame_->stride(), + target_rect); + } +} + +} // namespace + +DesktopAndCursorComposer::DesktopAndCursorComposer( + std::unique_ptr desktop_capturer, + const DesktopCaptureOptions& options) + : DesktopAndCursorComposer(desktop_capturer.release(), + MouseCursorMonitor::Create(options).release()) {} + +DesktopAndCursorComposer::DesktopAndCursorComposer( + DesktopCapturer* desktop_capturer, + MouseCursorMonitor* mouse_monitor) + : desktop_capturer_(desktop_capturer), mouse_monitor_(mouse_monitor) { + RTC_DCHECK(desktop_capturer_); +} + +DesktopAndCursorComposer::~DesktopAndCursorComposer() = default; + +std::unique_ptr +DesktopAndCursorComposer::CreateWithoutMouseCursorMonitor( + std::unique_ptr desktop_capturer) { + return std::unique_ptr( + new DesktopAndCursorComposer(desktop_capturer.release(), nullptr)); +} + +void DesktopAndCursorComposer::Start(DesktopCapturer::Callback* callback) { + callback_ = callback; + if (mouse_monitor_) + mouse_monitor_->Init(this, MouseCursorMonitor::SHAPE_AND_POSITION); + desktop_capturer_->Start(this); +} + +void DesktopAndCursorComposer::SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory) { + desktop_capturer_->SetSharedMemoryFactory(std::move(shared_memory_factory)); +} + +void DesktopAndCursorComposer::CaptureFrame() { + if (mouse_monitor_) + mouse_monitor_->Capture(); + desktop_capturer_->CaptureFrame(); +} + +void DesktopAndCursorComposer::SetExcludedWindow(WindowId window) { + desktop_capturer_->SetExcludedWindow(window); +} + +bool DesktopAndCursorComposer::GetSourceList(SourceList* sources) { + return desktop_capturer_->GetSourceList(sources); +} + +bool DesktopAndCursorComposer::SelectSource(SourceId id) { + return desktop_capturer_->SelectSource(id); +} + +bool DesktopAndCursorComposer::FocusOnSelectedSource() { + return desktop_capturer_->FocusOnSelectedSource(); +} + +bool DesktopAndCursorComposer::IsOccluded(const DesktopVector& pos) { + return desktop_capturer_->IsOccluded(pos); +} + +#if defined(WEBRTC_USE_GIO) +DesktopCaptureMetadata DesktopAndCursorComposer::GetMetadata() { + return desktop_capturer_->GetMetadata(); +} +#endif // defined(WEBRTC_USE_GIO) + +void DesktopAndCursorComposer::OnCaptureResult( + DesktopCapturer::Result result, + std::unique_ptr frame) { + if (frame && cursor_) { + if (!frame->may_contain_cursor() && + frame->rect().Contains(cursor_position_) && + !desktop_capturer_->IsOccluded(cursor_position_)) { + DesktopVector relative_position = + cursor_position_.subtract(frame->top_left()); +#if defined(WEBRTC_MAC) || defined(CHROMEOS) + // On OSX, the logical(DIP) and physical coordinates are used mixingly. + // For example, the captured cursor has its size in physical pixels(2x) + // and location in logical(DIP) pixels on Retina monitor. This will cause + // problem when the desktop is mixed with Retina and non-Retina monitors. + // So we use DIP pixel for all location info and compensate with the scale + // factor of current frame to the `relative_position`. + const float scale = frame->scale_factor(); + relative_position.set(relative_position.x() * scale, + relative_position.y() * scale); +#endif + auto frame_with_cursor = std::make_unique( + std::move(frame), *cursor_, relative_position, previous_cursor_rect_, + cursor_changed_); + previous_cursor_rect_ = frame_with_cursor->cursor_rect(); + cursor_changed_ = false; + frame = std::move(frame_with_cursor); + frame->set_may_contain_cursor(true); + } + } + + callback_->OnCaptureResult(result, std::move(frame)); +} + +void DesktopAndCursorComposer::OnMouseCursor(MouseCursor* cursor) { + cursor_changed_ = true; + cursor_.reset(cursor); +} + +void DesktopAndCursorComposer::OnMouseCursorPosition( + const DesktopVector& position) { + cursor_position_ = position; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer.h b/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer.h new file mode 100644 index 0000000000..a078b3eeef --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_DESKTOP_AND_CURSOR_COMPOSER_H_ +#define MODULES_DESKTOP_CAPTURE_DESKTOP_AND_CURSOR_COMPOSER_H_ + +#include +#if defined(WEBRTC_USE_GIO) +#include "modules/desktop_capture/desktop_capture_metadata.h" +#endif // defined(WEBRTC_USE_GIO) +#include "modules/desktop_capture/desktop_capture_options.h" +#include "modules/desktop_capture/desktop_capture_types.h" +#include "modules/desktop_capture/desktop_capturer.h" +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/desktop_geometry.h" +#include "modules/desktop_capture/mouse_cursor.h" +#include "modules/desktop_capture/mouse_cursor_monitor.h" +#include "modules/desktop_capture/shared_memory.h" +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { + +// A wrapper for DesktopCapturer that also captures mouse using specified +// MouseCursorMonitor and renders it on the generated streams. +class RTC_EXPORT DesktopAndCursorComposer + : public DesktopCapturer, + public DesktopCapturer::Callback, + public MouseCursorMonitor::Callback { + public: + // Creates a new composer that captures mouse cursor using + // MouseCursorMonitor::Create(options) and renders it into the frames + // generated by `desktop_capturer`. + DesktopAndCursorComposer(std::unique_ptr desktop_capturer, + const DesktopCaptureOptions& options); + + ~DesktopAndCursorComposer() override; + + DesktopAndCursorComposer(const DesktopAndCursorComposer&) = delete; + DesktopAndCursorComposer& operator=(const DesktopAndCursorComposer&) = delete; + + // Creates a new composer that relies on an external source for cursor shape + // and position information via the MouseCursorMonitor::Callback interface. + static std::unique_ptr + CreateWithoutMouseCursorMonitor( + std::unique_ptr desktop_capturer); + + // DesktopCapturer interface. + void Start(DesktopCapturer::Callback* callback) override; + void SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory) override; + void CaptureFrame() override; + void SetExcludedWindow(WindowId window) override; + bool GetSourceList(SourceList* sources) override; + bool SelectSource(SourceId id) override; + bool FocusOnSelectedSource() override; + bool IsOccluded(const DesktopVector& pos) override; +#if defined(WEBRTC_USE_GIO) + DesktopCaptureMetadata GetMetadata() override; +#endif // defined(WEBRTC_USE_GIO) + + // MouseCursorMonitor::Callback interface. + void OnMouseCursor(MouseCursor* cursor) override; + void OnMouseCursorPosition(const DesktopVector& position) override; + + private: + // Allows test cases to use a fake MouseCursorMonitor implementation. + friend class DesktopAndCursorComposerTest; + + // Constructor to delegate both deprecated and new constructors and allows + // test cases to use a fake MouseCursorMonitor implementation. + DesktopAndCursorComposer(DesktopCapturer* desktop_capturer, + MouseCursorMonitor* mouse_monitor); + + // DesktopCapturer::Callback interface. + void OnCaptureResult(DesktopCapturer::Result result, + std::unique_ptr frame) override; + + const std::unique_ptr desktop_capturer_; + const std::unique_ptr mouse_monitor_; + + DesktopCapturer::Callback* callback_; + + std::unique_ptr cursor_; + DesktopVector cursor_position_; + DesktopRect previous_cursor_rect_; + bool cursor_changed_ = false; +}; + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_DESKTOP_AND_CURSOR_COMPOSER_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer_unittest.cc b/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer_unittest.cc new file mode 100644 index 0000000000..179e002bc5 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer_unittest.cc @@ -0,0 +1,479 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/desktop_and_cursor_composer.h" + +#include +#include + +#include +#include +#include + +#include "modules/desktop_capture/desktop_capturer.h" +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/mouse_cursor.h" +#include "modules/desktop_capture/shared_desktop_frame.h" +#include "rtc_base/arraysize.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +using testing::ElementsAre; + +const int kFrameXCoord = 100; +const int kFrameYCoord = 200; +const int kScreenWidth = 100; +const int kScreenHeight = 100; +const int kCursorWidth = 10; +const int kCursorHeight = 10; + +const int kTestCursorSize = 3; +const uint32_t kTestCursorData[kTestCursorSize][kTestCursorSize] = { + { + 0xffffffff, + 0x99990000, + 0xaa222222, + }, + { + 0x88008800, + 0xaa0000aa, + 0xaa333333, + }, + { + 0x00000000, + 0xaa0000aa, + 0xaa333333, + }, +}; + +uint32_t GetFakeFramePixelValue(const DesktopVector& p) { + uint32_t r = 100 + p.x(); + uint32_t g = 100 + p.y(); + uint32_t b = 100 + p.x() + p.y(); + return b + (g << 8) + (r << 16) + 0xff000000; +} + +uint32_t GetFramePixel(const DesktopFrame& frame, const DesktopVector& pos) { + return *reinterpret_cast(frame.GetFrameDataAtPos(pos)); +} + +// Blends two pixel values taking into account alpha. +uint32_t BlendPixels(uint32_t dest, uint32_t src) { + uint8_t alpha = 255 - ((src & 0xff000000) >> 24); + uint32_t r = + ((dest & 0x00ff0000) >> 16) * alpha / 255 + ((src & 0x00ff0000) >> 16); + uint32_t g = + ((dest & 0x0000ff00) >> 8) * alpha / 255 + ((src & 0x0000ff00) >> 8); + uint32_t b = (dest & 0x000000ff) * alpha / 255 + (src & 0x000000ff); + return b + (g << 8) + (r << 16) + 0xff000000; +} + +DesktopFrame* CreateTestFrame(int width = kScreenWidth, + int height = kScreenHeight) { + DesktopFrame* frame = new BasicDesktopFrame(DesktopSize(width, height)); + uint32_t* data = reinterpret_cast(frame->data()); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + *(data++) = GetFakeFramePixelValue(DesktopVector(x, y)); + } + } + return frame; +} + +MouseCursor* CreateTestCursor(DesktopVector hotspot) { + std::unique_ptr image( + new BasicDesktopFrame(DesktopSize(kCursorWidth, kCursorHeight))); + uint32_t* data = reinterpret_cast(image->data()); + // Set four pixels near the hotspot and leave all other blank. + for (int y = 0; y < kTestCursorSize; ++y) { + for (int x = 0; x < kTestCursorSize; ++x) { + data[(hotspot.y() + y) * kCursorWidth + (hotspot.x() + x)] = + kTestCursorData[y][x]; + } + } + return new MouseCursor(image.release(), hotspot); +} + +class FakeScreenCapturer : public DesktopCapturer { + public: + FakeScreenCapturer() {} + + void Start(Callback* callback) override { callback_ = callback; } + + void CaptureFrame() override { + callback_->OnCaptureResult( + next_frame_ ? Result::SUCCESS : Result::ERROR_TEMPORARY, + std::move(next_frame_)); + } + + void SetNextFrame(std::unique_ptr next_frame) { + next_frame_ = std::move(next_frame); + } + + bool IsOccluded(const DesktopVector& pos) override { return is_occluded_; } + + void set_is_occluded(bool value) { is_occluded_ = value; } + + private: + Callback* callback_ = nullptr; + + std::unique_ptr next_frame_; + bool is_occluded_ = false; +}; + +class FakeMouseMonitor : public MouseCursorMonitor { + public: + FakeMouseMonitor() : changed_(true) {} + + void SetState(CursorState state, const DesktopVector& pos) { + state_ = state; + position_ = pos; + } + + void SetHotspot(const DesktopVector& hotspot) { + if (!hotspot_.equals(hotspot)) + changed_ = true; + hotspot_ = hotspot; + } + + void Init(Callback* callback, Mode mode) override { callback_ = callback; } + + void Capture() override { + if (changed_) { + callback_->OnMouseCursor(CreateTestCursor(hotspot_)); + } + callback_->OnMouseCursorPosition(position_); + } + + private: + Callback* callback_; + CursorState state_; + DesktopVector position_; + DesktopVector hotspot_; + bool changed_; +}; + +void VerifyFrame(const DesktopFrame& frame, + MouseCursorMonitor::CursorState state, + const DesktopVector& pos) { + // Verify that all other pixels are set to their original values. + DesktopRect image_rect = + DesktopRect::MakeWH(kTestCursorSize, kTestCursorSize); + image_rect.Translate(pos); + + for (int y = 0; y < kScreenHeight; ++y) { + for (int x = 0; x < kScreenWidth; ++x) { + DesktopVector p(x, y); + if (state == MouseCursorMonitor::INSIDE && image_rect.Contains(p)) { + EXPECT_EQ(BlendPixels(GetFakeFramePixelValue(p), + kTestCursorData[y - pos.y()][x - pos.x()]), + GetFramePixel(frame, p)); + } else { + EXPECT_EQ(GetFakeFramePixelValue(p), GetFramePixel(frame, p)); + } + } + } +} + +} // namespace + +bool operator==(const DesktopRect& left, const DesktopRect& right) { + return left.equals(right); +} + +std::ostream& operator<<(std::ostream& out, const DesktopRect& rect) { + out << "{" << rect.left() << "+" << rect.top() << "-" << rect.width() << "x" + << rect.height() << "}"; + return out; +} + +class DesktopAndCursorComposerTest : public ::testing::Test, + public DesktopCapturer::Callback { + public: + explicit DesktopAndCursorComposerTest(bool include_cursor = true) + : fake_screen_(new FakeScreenCapturer()), + fake_cursor_(include_cursor ? new FakeMouseMonitor() : nullptr), + blender_(fake_screen_, fake_cursor_) { + blender_.Start(this); + } + + // DesktopCapturer::Callback interface + void OnCaptureResult(DesktopCapturer::Result result, + std::unique_ptr frame) override { + frame_ = std::move(frame); + } + + protected: + // Owned by `blender_`. + FakeScreenCapturer* fake_screen_; + FakeMouseMonitor* fake_cursor_; + + DesktopAndCursorComposer blender_; + std::unique_ptr frame_; +}; + +class DesktopAndCursorComposerNoCursorMonitorTest + : public DesktopAndCursorComposerTest { + public: + DesktopAndCursorComposerNoCursorMonitorTest() + : DesktopAndCursorComposerTest(false) {} +}; + +TEST_F(DesktopAndCursorComposerTest, CursorShouldBeIgnoredIfNoFrameCaptured) { + struct { + int x, y; + int hotspot_x, hotspot_y; + bool inside; + } tests[] = { + {0, 0, 0, 0, true}, {50, 50, 0, 0, true}, {100, 50, 0, 0, true}, + {50, 100, 0, 0, true}, {100, 100, 0, 0, true}, {0, 0, 2, 5, true}, + {1, 1, 2, 5, true}, {50, 50, 2, 5, true}, {100, 100, 2, 5, true}, + {0, 0, 5, 2, true}, {50, 50, 5, 2, true}, {100, 100, 5, 2, true}, + {0, 0, 0, 0, false}, + }; + + for (size_t i = 0; i < arraysize(tests); i++) { + SCOPED_TRACE(i); + + DesktopVector hotspot(tests[i].hotspot_x, tests[i].hotspot_y); + fake_cursor_->SetHotspot(hotspot); + + MouseCursorMonitor::CursorState state = tests[i].inside + ? MouseCursorMonitor::INSIDE + : MouseCursorMonitor::OUTSIDE; + DesktopVector pos(tests[i].x, tests[i].y); + fake_cursor_->SetState(state, pos); + + std::unique_ptr frame( + SharedDesktopFrame::Wrap(CreateTestFrame())); + + blender_.CaptureFrame(); + // If capturer captured nothing, then cursor should be ignored, not matter + // its state or position. + EXPECT_EQ(frame_, nullptr); + } +} + +TEST_F(DesktopAndCursorComposerTest, CursorShouldBeIgnoredIfFrameMayContainIt) { + // We can't use a shared frame because we need to detect modifications + // compared to a control. + std::unique_ptr control_frame(CreateTestFrame()); + control_frame->set_top_left(DesktopVector(kFrameXCoord, kFrameYCoord)); + + struct { + int x; + int y; + bool may_contain_cursor; + } tests[] = { + {100, 200, true}, + {100, 200, false}, + {150, 250, true}, + {150, 250, false}, + }; + + for (size_t i = 0; i < arraysize(tests); i++) { + SCOPED_TRACE(i); + + std::unique_ptr frame(CreateTestFrame()); + frame->set_top_left(DesktopVector(kFrameXCoord, kFrameYCoord)); + frame->set_may_contain_cursor(tests[i].may_contain_cursor); + fake_screen_->SetNextFrame(std::move(frame)); + + const DesktopVector abs_pos(tests[i].x, tests[i].y); + fake_cursor_->SetState(MouseCursorMonitor::INSIDE, abs_pos); + blender_.CaptureFrame(); + + // If the frame may already have contained the cursor, then `CaptureFrame()` + // should not have modified it, so it should be the same as the control. + EXPECT_TRUE(frame_); + const DesktopVector rel_pos(abs_pos.subtract(control_frame->top_left())); + if (tests[i].may_contain_cursor) { + EXPECT_EQ( + *reinterpret_cast(frame_->GetFrameDataAtPos(rel_pos)), + *reinterpret_cast( + control_frame->GetFrameDataAtPos(rel_pos))); + + } else { + // `CaptureFrame()` should have modified the frame to have the cursor. + EXPECT_NE( + *reinterpret_cast(frame_->GetFrameDataAtPos(rel_pos)), + *reinterpret_cast( + control_frame->GetFrameDataAtPos(rel_pos))); + EXPECT_TRUE(frame_->may_contain_cursor()); + } + } +} + +TEST_F(DesktopAndCursorComposerTest, + CursorShouldBeIgnoredIfItIsOutOfDesktopFrame) { + std::unique_ptr frame( + SharedDesktopFrame::Wrap(CreateTestFrame())); + frame->set_top_left(DesktopVector(kFrameXCoord, kFrameYCoord)); + // The frame covers (100, 200) - (200, 300). + + struct { + int x; + int y; + } tests[] = { + {0, 0}, {50, 50}, {50, 150}, {100, 150}, {50, 200}, + {99, 200}, {100, 199}, {200, 300}, {200, 299}, {199, 300}, + {-1, -1}, {-10000, -10000}, {10000, 10000}, + }; + for (size_t i = 0; i < arraysize(tests); i++) { + SCOPED_TRACE(i); + + fake_screen_->SetNextFrame(frame->Share()); + // The CursorState is ignored when using absolute cursor position. + fake_cursor_->SetState(MouseCursorMonitor::OUTSIDE, + DesktopVector(tests[i].x, tests[i].y)); + blender_.CaptureFrame(); + VerifyFrame(*frame_, MouseCursorMonitor::OUTSIDE, DesktopVector(0, 0)); + } +} + +TEST_F(DesktopAndCursorComposerTest, IsOccludedShouldBeConsidered) { + std::unique_ptr frame( + SharedDesktopFrame::Wrap(CreateTestFrame())); + frame->set_top_left(DesktopVector(kFrameXCoord, kFrameYCoord)); + // The frame covers (100, 200) - (200, 300). + + struct { + int x; + int y; + } tests[] = { + {100, 200}, {101, 200}, {100, 201}, {101, 201}, {150, 250}, {199, 299}, + }; + fake_screen_->set_is_occluded(true); + for (size_t i = 0; i < arraysize(tests); i++) { + SCOPED_TRACE(i); + + fake_screen_->SetNextFrame(frame->Share()); + // The CursorState is ignored when using absolute cursor position. + fake_cursor_->SetState(MouseCursorMonitor::OUTSIDE, + DesktopVector(tests[i].x, tests[i].y)); + blender_.CaptureFrame(); + VerifyFrame(*frame_, MouseCursorMonitor::OUTSIDE, DesktopVector()); + } +} + +TEST_F(DesktopAndCursorComposerTest, CursorIncluded) { + std::unique_ptr frame( + SharedDesktopFrame::Wrap(CreateTestFrame())); + frame->set_top_left(DesktopVector(kFrameXCoord, kFrameYCoord)); + // The frame covers (100, 200) - (200, 300). + + struct { + int x; + int y; + } tests[] = { + {100, 200}, {101, 200}, {100, 201}, {101, 201}, {150, 250}, {199, 299}, + }; + for (size_t i = 0; i < arraysize(tests); i++) { + SCOPED_TRACE(i); + + const DesktopVector abs_pos(tests[i].x, tests[i].y); + const DesktopVector rel_pos(abs_pos.subtract(frame->top_left())); + + fake_screen_->SetNextFrame(frame->Share()); + // The CursorState is ignored when using absolute cursor position. + fake_cursor_->SetState(MouseCursorMonitor::OUTSIDE, abs_pos); + blender_.CaptureFrame(); + VerifyFrame(*frame_, MouseCursorMonitor::INSIDE, rel_pos); + + // Verify that the cursor is erased before the frame buffer is returned to + // the screen capturer. + frame_.reset(); + VerifyFrame(*frame, MouseCursorMonitor::OUTSIDE, DesktopVector()); + } +} + +TEST_F(DesktopAndCursorComposerNoCursorMonitorTest, + UpdatedRegionIncludesOldAndNewCursorRectsIfMoved) { + std::unique_ptr frame( + SharedDesktopFrame::Wrap(CreateTestFrame())); + DesktopRect first_cursor_rect; + { + // Block to scope test_cursor, which is invalidated by OnMouseCursor. + MouseCursor* test_cursor = CreateTestCursor(DesktopVector(0, 0)); + first_cursor_rect = DesktopRect::MakeSize(test_cursor->image()->size()); + blender_.OnMouseCursor(test_cursor); + } + blender_.OnMouseCursorPosition(DesktopVector(0, 0)); + fake_screen_->SetNextFrame(frame->Share()); + blender_.CaptureFrame(); + + DesktopVector cursor_move_offset(1, 1); + DesktopRect second_cursor_rect = first_cursor_rect; + second_cursor_rect.Translate(cursor_move_offset); + blender_.OnMouseCursorPosition(cursor_move_offset); + fake_screen_->SetNextFrame(frame->Share()); + blender_.CaptureFrame(); + + EXPECT_TRUE(frame->updated_region().is_empty()); + DesktopRegion expected_region; + expected_region.AddRect(first_cursor_rect); + expected_region.AddRect(second_cursor_rect); + EXPECT_TRUE(frame_->updated_region().Equals(expected_region)); +} + +TEST_F(DesktopAndCursorComposerNoCursorMonitorTest, + UpdatedRegionIncludesOldAndNewCursorRectsIfShapeChanged) { + std::unique_ptr frame( + SharedDesktopFrame::Wrap(CreateTestFrame())); + DesktopRect first_cursor_rect; + { + // Block to scope test_cursor, which is invalidated by OnMouseCursor. + MouseCursor* test_cursor = CreateTestCursor(DesktopVector(0, 0)); + first_cursor_rect = DesktopRect::MakeSize(test_cursor->image()->size()); + blender_.OnMouseCursor(test_cursor); + } + blender_.OnMouseCursorPosition(DesktopVector(0, 0)); + fake_screen_->SetNextFrame(frame->Share()); + blender_.CaptureFrame(); + + // Create a second cursor, the same shape as the first. Since the code doesn't + // compare the cursor pixels, this is sufficient, and avoids needing two test + // cursor bitmaps. + DesktopRect second_cursor_rect; + { + MouseCursor* test_cursor = CreateTestCursor(DesktopVector(0, 0)); + second_cursor_rect = DesktopRect::MakeSize(test_cursor->image()->size()); + blender_.OnMouseCursor(test_cursor); + } + fake_screen_->SetNextFrame(frame->Share()); + blender_.CaptureFrame(); + + EXPECT_TRUE(frame->updated_region().is_empty()); + DesktopRegion expected_region; + expected_region.AddRect(first_cursor_rect); + expected_region.AddRect(second_cursor_rect); + EXPECT_TRUE(frame_->updated_region().Equals(expected_region)); +} + +TEST_F(DesktopAndCursorComposerNoCursorMonitorTest, + UpdatedRegionUnchangedIfCursorUnchanged) { + std::unique_ptr frame( + SharedDesktopFrame::Wrap(CreateTestFrame())); + blender_.OnMouseCursor(CreateTestCursor(DesktopVector(0, 0))); + blender_.OnMouseCursorPosition(DesktopVector(0, 0)); + fake_screen_->SetNextFrame(frame->Share()); + blender_.CaptureFrame(); + fake_screen_->SetNextFrame(frame->Share()); + blender_.CaptureFrame(); + + EXPECT_TRUE(frame->updated_region().is_empty()); + EXPECT_TRUE(frame_->updated_region().is_empty()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_differ_sse2_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_differ_sse2_gn/moz.build new file mode 100644 index 0000000000..06e178901e --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_differ_sse2_gn/moz.build @@ -0,0 +1,153 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_AVX2"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/differ_vector_sse2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Darwin": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_GNU_SOURCE"] = True + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2", + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + +Library("desktop_capture_differ_sse2_gn") diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build new file mode 100644 index 0000000000..8c56b6b8e5 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build @@ -0,0 +1,678 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### +if CONFIG["MOZ_WIDGET_TOOLKIT"] == "gtk": + CXXFLAGS += CONFIG["MOZ_GTK3_CFLAGS"] + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/media/libyuv/", + "/media/libyuv/libyuv/include/", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/fallback_desktop_capturer_wrapper.cc" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/blank_detector_desktop_capturer_wrapper.cc", + "/third_party/libwebrtc/modules/desktop_capture/cropped_desktop_frame.cc", + "/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer.cc", + "/third_party/libwebrtc/modules/desktop_capture/desktop_and_cursor_composer.cc", + "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metrics_helper.cc", + "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_options.cc", + "/third_party/libwebrtc/modules/desktop_capture/desktop_capturer.cc", + "/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper.cc", + "/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_wrapper.cc", + "/third_party/libwebrtc/modules/desktop_capture/desktop_frame_generator.cc", + "/third_party/libwebrtc/modules/desktop_capture/desktop_frame_rotation.cc", + "/third_party/libwebrtc/modules/desktop_capture/differ_block.cc", + "/third_party/libwebrtc/modules/desktop_capture/fake_desktop_capturer.cc", + "/third_party/libwebrtc/modules/desktop_capture/full_screen_application_handler.cc", + "/third_party/libwebrtc/modules/desktop_capture/full_screen_window_detector.cc", + "/third_party/libwebrtc/modules/desktop_capture/mouse_cursor.cc", + "/third_party/libwebrtc/modules/desktop_capture/resolution_tracker.cc", + "/third_party/libwebrtc/modules/desktop_capture/rgba_color.cc", + "/third_party/libwebrtc/modules/desktop_capture/screen_capturer_helper.cc", + "/third_party/libwebrtc/modules/desktop_capture/window_finder.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/mac/desktop_configuration_monitor.cc", + "/third_party/libwebrtc/modules/desktop_capture/mac/full_screen_mac_application_handler.cc", + "/third_party/libwebrtc/modules/desktop_capture/mac/window_list_utils.cc" + ] + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor_linux.cc" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/screen_capturer_linux.cc", + "/third_party/libwebrtc/modules/desktop_capture/window_capturer_linux.cc" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["WEBRTC_USE_X11"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrandr", + "Xrender", + "Xtst" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor_linux.cc" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc", + "/third_party/libwebrtc/modules/desktop_capture/screen_capturer_linux.cc", + "/third_party/libwebrtc/modules/desktop_capture/window_capturer_linux.cc" + ] + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "d3d11", + "dxgi", + "iphlpapi", + "secur32", + "winmm" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_gdi.cc" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/cropping_window_capturer_win.cc", + "/third_party/libwebrtc/modules/desktop_capture/desktop_frame_win.cc", + "/third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor_win.cc", + "/third_party/libwebrtc/modules/desktop_capture/screen_capturer_win.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/cursor.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/d3d_device.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/desktop.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/desktop_capture_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/display_configuration_monitor.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/dxgi_adapter_duplicator.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/dxgi_context.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/dxgi_duplicator_controller.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/dxgi_frame.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/dxgi_output_duplicator.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/dxgi_texture.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/dxgi_texture_mapping.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/dxgi_texture_staging.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/full_screen_win_application_handler.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/scoped_thread_desktop.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/screen_capture_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_directx.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/screen_capturer_win_magnifier.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/selected_window_context.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/window_capture_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/win/window_capturer_win_gdi.cc", + "/third_party/libwebrtc/modules/desktop_capture/window_capturer_win.cc", + "/third_party/libwebrtc/modules/desktop_capture/window_finder_win.cc" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + DEFINES["WEBRTC_USE_PIPEWIRE"] = True + DEFINES["_GNU_SOURCE"] = True + + LOCAL_INCLUDES += [ + "/gfx/angle/checkout/include/", + "/third_party/drm/drm/", + "/third_party/drm/drm/include/", + "/third_party/drm/drm/include/libdrm/", + "/third_party/gbm/gbm/", + "/third_party/libepoxy/libepoxy/include/", + "/third_party/pipewire/" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/base_capturer_pipewire.cc" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/mouse_cursor_monitor_pipewire.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/restore_token_manager.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screen_capture_portal_interface.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_portal.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_stream_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc" + ] + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["WEBRTC_USE_PIPEWIRE"] = True + DEFINES["_GNU_SOURCE"] = True + + LOCAL_INCLUDES += [ + "/gfx/angle/checkout/include/", + "/third_party/drm/drm/", + "/third_party/drm/drm/include/", + "/third_party/drm/drm/include/libdrm/", + "/third_party/gbm/gbm/", + "/third_party/libepoxy/libepoxy/include/", + "/third_party/pipewire/" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/base_capturer_pipewire.cc" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/mouse_cursor_monitor_pipewire.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/restore_token_manager.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screen_capture_portal_interface.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_portal.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_stream_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc" + ] + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["WEBRTC_USE_PIPEWIRE"] = True + DEFINES["_GNU_SOURCE"] = True + + LOCAL_INCLUDES += [ + "/gfx/angle/checkout/include/", + "/third_party/drm/drm/", + "/third_party/drm/drm/include/", + "/third_party/drm/drm/include/libdrm/", + "/third_party/gbm/gbm/", + "/third_party/libepoxy/libepoxy/include/", + "/third_party/pipewire/" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/base_capturer_pipewire.cc" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/mouse_cursor_monitor_pipewire.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/restore_token_manager.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screen_capture_portal_interface.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_portal.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_stream_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc" + ] + +if CONFIG["CPU_ARCH"] == "ppc64": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_USE_X11"] = True + + OS_LIBS += [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrandr", + "Xrender", + "Xtst" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + +if CONFIG["CPU_ARCH"] == "riscv64": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_USE_X11"] = True + + OS_LIBS += [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrandr", + "Xrender", + "Xtst" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_USE_PIPEWIRE"] = True + DEFINES["_GNU_SOURCE"] = True + + LOCAL_INCLUDES += [ + "/gfx/angle/checkout/include/", + "/third_party/drm/drm/", + "/third_party/drm/drm/include/", + "/third_party/drm/drm/include/libdrm/", + "/third_party/gbm/gbm/", + "/third_party/libepoxy/libepoxy/include/", + "/third_party/pipewire/" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/base_capturer_pipewire.cc" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/mouse_cursor_monitor_pipewire.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/restore_token_manager.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screen_capture_portal_interface.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_portal.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_stream_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["WEBRTC_USE_PIPEWIRE"] = True + DEFINES["_GNU_SOURCE"] = True + + LOCAL_INCLUDES += [ + "/gfx/angle/checkout/include/", + "/third_party/drm/drm/", + "/third_party/drm/drm/include/", + "/third_party/drm/drm/include/libdrm/", + "/third_party/gbm/gbm/", + "/third_party/libepoxy/libepoxy/include/", + "/third_party/pipewire/" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/base_capturer_pipewire.cc" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/mouse_cursor_monitor_pipewire.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/restore_token_manager.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screen_capture_portal_interface.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_portal.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_stream_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_USE_PIPEWIRE"] = True + DEFINES["_GNU_SOURCE"] = True + + LOCAL_INCLUDES += [ + "/gfx/angle/checkout/include/", + "/third_party/drm/drm/", + "/third_party/drm/drm/include/", + "/third_party/drm/drm/include/libdrm/", + "/third_party/gbm/gbm/", + "/third_party/libepoxy/libepoxy/include/", + "/third_party/pipewire/" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/base_capturer_pipewire.cc" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/mouse_cursor_monitor_pipewire.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/restore_token_manager.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screen_capture_portal_interface.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_portal.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/screencast_stream_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_USE_X11"] = True + + OS_LIBS += [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrandr", + "Xrender", + "Xtst" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_USE_X11"] = True + + OS_LIBS += [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrandr", + "Xrender", + "Xtst" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + +if CONFIG["CPU_ARCH"] == "mips32" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_USE_X11"] = True + + OS_LIBS += [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrandr", + "Xrender", + "Xtst" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + +if CONFIG["CPU_ARCH"] == "mips64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_USE_X11"] = True + + OS_LIBS += [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrandr", + "Xrender", + "Xtst" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_USE_X11"] = True + + OS_LIBS += [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrandr", + "Xrender", + "Xtst" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_USE_X11"] = True + + OS_LIBS += [ + "X11", + "Xcomposite", + "Xdamage", + "Xext", + "Xfixes", + "Xrandr", + "Xrender", + "Xtst" + ] + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + +Library("desktop_capture_gn") diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metadata.h b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metadata.h new file mode 100644 index 0000000000..49a20e729c --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metadata.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_METADATA_H_ +#define MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_METADATA_H_ + +#if defined(WEBRTC_USE_GIO) +#include "modules/portal/xdg_session_details.h" +#endif // defined(WEBRTC_USE_GIO) + +namespace webrtc { + +// Container for the metadata associated with a desktop capturer. +struct DesktopCaptureMetadata { +#if defined(WEBRTC_USE_GIO) + // Details about the XDG desktop session handle (used by wayland + // implementation in remoting) + xdg_portal::SessionDetails session_details; +#endif // defined(WEBRTC_USE_GIO) +}; + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_METADATA_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metrics_helper.cc b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metrics_helper.cc new file mode 100644 index 0000000000..6b741ef4bb --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metrics_helper.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/desktop_capture_metrics_helper.h" + +#include "modules/desktop_capture/desktop_capture_types.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { +// This enum is logged via UMA so entries should not be reordered or have their +// values changed. This should also be kept in sync with the values in the +// DesktopCapturerId namespace. +enum class SequentialDesktopCapturerId { + kUnknown = 0, + kWgcCapturerWin = 1, + kScreenCapturerWinMagnifier = 2, + kWindowCapturerWinGdi = 3, + kScreenCapturerWinGdi = 4, + kScreenCapturerWinDirectx = 5, + kMaxValue = kScreenCapturerWinDirectx +}; +} // namespace + +void RecordCapturerImpl(uint32_t capturer_id) { + SequentialDesktopCapturerId sequential_id; + switch (capturer_id) { + case DesktopCapturerId::kWgcCapturerWin: + sequential_id = SequentialDesktopCapturerId::kWgcCapturerWin; + break; + case DesktopCapturerId::kScreenCapturerWinMagnifier: + sequential_id = SequentialDesktopCapturerId::kScreenCapturerWinMagnifier; + break; + case DesktopCapturerId::kWindowCapturerWinGdi: + sequential_id = SequentialDesktopCapturerId::kWindowCapturerWinGdi; + break; + case DesktopCapturerId::kScreenCapturerWinGdi: + sequential_id = SequentialDesktopCapturerId::kScreenCapturerWinGdi; + break; + case DesktopCapturerId::kScreenCapturerWinDirectx: + sequential_id = SequentialDesktopCapturerId::kScreenCapturerWinDirectx; + break; + case DesktopCapturerId::kUnknown: + default: + sequential_id = SequentialDesktopCapturerId::kUnknown; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.DesktopCapture.Win.DesktopCapturerImpl", + static_cast(sequential_id), + static_cast(SequentialDesktopCapturerId::kMaxValue)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metrics_helper.h b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metrics_helper.h new file mode 100644 index 0000000000..37542b84bb --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_metrics_helper.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_METRICS_HELPER_H_ +#define MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_METRICS_HELPER_H_ + +#include + +namespace webrtc { + +void RecordCapturerImpl(uint32_t capturer_id); + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_METRICS_HELPER_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_objc_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_objc_gn/moz.build new file mode 100644 index 0000000000..2706bed4a6 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_objc_gn/moz.build @@ -0,0 +1,77 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +CMMFLAGS += [ + "-fobjc-arc" +] + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MAC"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_POSIX"] = True +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" +DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True +DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" +DEFINES["__STDC_CONSTANT_MACROS"] = True +DEFINES["__STDC_FORMAT_MACROS"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/sdk/objc/", + "/third_party/libwebrtc/sdk/objc/base/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/desktop_capture/mac/desktop_configuration.mm", + "/third_party/libwebrtc/modules/desktop_capture/mac/desktop_frame_cgimage.mm", + "/third_party/libwebrtc/modules/desktop_capture/mac/desktop_frame_iosurface.mm", + "/third_party/libwebrtc/modules/desktop_capture/mac/desktop_frame_provider.mm", + "/third_party/libwebrtc/modules/desktop_capture/mac/screen_capturer_mac.mm", + "/third_party/libwebrtc/modules/desktop_capture/mouse_cursor_monitor_mac.mm", + "/third_party/libwebrtc/modules/desktop_capture/screen_capturer_darwin.mm", + "/third_party/libwebrtc/modules/desktop_capture/window_capturer_mac.mm", + "/third_party/libwebrtc/modules/desktop_capture/window_finder_mac.mm" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["_DEBUG"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +Library("desktop_capture_objc_gn") diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_options.cc b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_options.cc new file mode 100644 index 0000000000..22c59ef4cc --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_options.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/desktop_capture_options.h" + +#include "api/make_ref_counted.h" + +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) +#include "modules/desktop_capture/mac/full_screen_mac_application_handler.h" +#elif defined(WEBRTC_WIN) +#include "modules/desktop_capture/win/full_screen_win_application_handler.h" +#endif +#if defined(WEBRTC_USE_PIPEWIRE) +#include "modules/desktop_capture/linux/wayland/shared_screencast_stream.h" +#endif + +namespace webrtc { + +DesktopCaptureOptions::DesktopCaptureOptions() {} +DesktopCaptureOptions::DesktopCaptureOptions( + const DesktopCaptureOptions& options) = default; +DesktopCaptureOptions::DesktopCaptureOptions(DesktopCaptureOptions&& options) = + default; +DesktopCaptureOptions::~DesktopCaptureOptions() {} + +DesktopCaptureOptions& DesktopCaptureOptions::operator=( + const DesktopCaptureOptions& options) = default; +DesktopCaptureOptions& DesktopCaptureOptions::operator=( + DesktopCaptureOptions&& options) = default; + +// static +DesktopCaptureOptions DesktopCaptureOptions::CreateDefault() { + DesktopCaptureOptions result; +#if defined(WEBRTC_USE_X11) + result.set_x_display(SharedXDisplay::CreateDefault()); +#endif +#if defined(WEBRTC_USE_PIPEWIRE) + result.set_screencast_stream(SharedScreenCastStream::CreateDefault()); +#endif +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) + result.set_configuration_monitor( + rtc::make_ref_counted()); + result.set_full_screen_window_detector( + rtc::make_ref_counted( + CreateFullScreenMacApplicationHandler)); +#elif defined(WEBRTC_WIN) + result.set_full_screen_window_detector( + rtc::make_ref_counted( + CreateFullScreenWinApplicationHandler)); +#endif + return result; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_options.h b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_options.h new file mode 100644 index 0000000000..67dffee08a --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_options.h @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_OPTIONS_H_ +#define MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_OPTIONS_H_ + +#include "api/scoped_refptr.h" +#include "rtc_base/system/rtc_export.h" + +#if defined(WEBRTC_USE_X11) +#include "modules/desktop_capture/linux/x11/shared_x_display.h" +#endif + +#if defined(WEBRTC_USE_PIPEWIRE) +#include "modules/desktop_capture/linux/wayland/shared_screencast_stream.h" +#endif + +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) +#include "modules/desktop_capture/mac/desktop_configuration_monitor.h" +#endif + +#include "modules/desktop_capture/full_screen_window_detector.h" + +namespace webrtc { + +// An object that stores initialization parameters for screen and window +// capturers. +class RTC_EXPORT DesktopCaptureOptions { + public: + // Returns instance of DesktopCaptureOptions with default parameters. On Linux + // also initializes X window connection. x_display() will be set to null if + // X11 connection failed (e.g. DISPLAY isn't set). + static DesktopCaptureOptions CreateDefault(); + + DesktopCaptureOptions(); + DesktopCaptureOptions(const DesktopCaptureOptions& options); + DesktopCaptureOptions(DesktopCaptureOptions&& options); + ~DesktopCaptureOptions(); + + DesktopCaptureOptions& operator=(const DesktopCaptureOptions& options); + DesktopCaptureOptions& operator=(DesktopCaptureOptions&& options); + +#if defined(WEBRTC_USE_X11) + const rtc::scoped_refptr& x_display() const { + return x_display_; + } + void set_x_display(rtc::scoped_refptr x_display) { + x_display_ = x_display; + } +#endif + +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) + // TODO(zijiehe): Remove both DesktopConfigurationMonitor and + // FullScreenChromeWindowDetector out of DesktopCaptureOptions. It's not + // reasonable for external consumers to set these two parameters. + const rtc::scoped_refptr& configuration_monitor() + const { + return configuration_monitor_; + } + // If nullptr is set, ScreenCapturer won't work and WindowCapturer may return + // inaccurate result from IsOccluded() function. + void set_configuration_monitor( + rtc::scoped_refptr m) { + configuration_monitor_ = m; + } + + bool allow_iosurface() const { return allow_iosurface_; } + void set_allow_iosurface(bool allow) { allow_iosurface_ = allow; } +#endif + + const rtc::scoped_refptr& + full_screen_window_detector() const { + return full_screen_window_detector_; + } + void set_full_screen_window_detector( + rtc::scoped_refptr detector) { + full_screen_window_detector_ = detector; + } + + // Flag indicating that the capturer should use screen change notifications. + // Enables/disables use of XDAMAGE in the X11 capturer. + bool use_update_notifications() const { return use_update_notifications_; } + void set_use_update_notifications(bool use_update_notifications) { + use_update_notifications_ = use_update_notifications; + } + + // Flag indicating if desktop effects (e.g. Aero) should be disabled when the + // capturer is active. Currently used only on Windows. + bool disable_effects() const { return disable_effects_; } + void set_disable_effects(bool disable_effects) { + disable_effects_ = disable_effects; + } + + // Flag that should be set if the consumer uses updated_region() and the + // capturer should try to provide correct updated_region() for the frames it + // generates (e.g. by comparing each frame with the previous one). + bool detect_updated_region() const { return detect_updated_region_; } + void set_detect_updated_region(bool detect_updated_region) { + detect_updated_region_ = detect_updated_region; + } + + // Indicates that the capturer should try to include the cursor in the frame. + // If it is able to do so it will set `DesktopFrame::may_contain_cursor()`. + // Not all capturers will support including the cursor. If this value is false + // or the cursor otherwise cannot be included in the frame, then cursor + // metadata will be sent, though the capturer may choose to always send cursor + // metadata. + bool prefer_cursor_embedded() const { return prefer_cursor_embedded_; } + void set_prefer_cursor_embedded(bool prefer_cursor_embedded) { + prefer_cursor_embedded_ = prefer_cursor_embedded; + } + +#if defined(WEBRTC_WIN) + // Enumerating windows owned by the current process on Windows has some + // complications due to |GetWindowText*()| APIs potentially causing a + // deadlock (see the comments in the `GetWindowListHandler()` function in + // window_capture_utils.cc for more details on the deadlock). + // To avoid this issue, consumers can either ensure that the thread that runs + // their message loop never waits on `GetSourceList()`, or they can set this + // flag to false which will prevent windows running in the current process + // from being enumerated and included in the results. Consumers can still + // provide the WindowId for their own windows to `SelectSource()` and capture + // them. + bool enumerate_current_process_windows() const { + return enumerate_current_process_windows_; + } + void set_enumerate_current_process_windows( + bool enumerate_current_process_windows) { + enumerate_current_process_windows_ = enumerate_current_process_windows; + } + + bool allow_use_magnification_api() const { + return allow_use_magnification_api_; + } + void set_allow_use_magnification_api(bool allow) { + allow_use_magnification_api_ = allow; + } + // Allowing directx based capturer or not, this capturer works on windows 7 + // with platform update / windows 8 or upper. + bool allow_directx_capturer() const { return allow_directx_capturer_; } + void set_allow_directx_capturer(bool enabled) { + allow_directx_capturer_ = enabled; + } + + // Flag that may be set to allow use of the cropping window capturer (which + // captures the screen & crops that to the window region in some cases). An + // advantage of using this is significantly higher capture frame rates than + // capturing the window directly. A disadvantage of using this is the + // possibility of capturing unrelated content (e.g. overlapping windows that + // aren't detected properly, or neighboring regions when moving/resizing the + // captured window). Note: this flag influences the behavior of calls to + // DesktopCapturer::CreateWindowCapturer; calls to + // CroppingWindowCapturer::CreateCapturer ignore the flag (treat it as true). + bool allow_cropping_window_capturer() const { + return allow_cropping_window_capturer_; + } + void set_allow_cropping_window_capturer(bool allow) { + allow_cropping_window_capturer_ = allow; + } + +#if defined(RTC_ENABLE_WIN_WGC) + // This flag enables the WGC capturer for both window and screen capture. + // This capturer should offer similar or better performance than the cropping + // capturer without the disadvantages listed above. However, the WGC capturer + // is only available on Windows 10 version 1809 (Redstone 5) and up. This flag + // will have no affect on older versions. + // If set, and running a supported version of Win10, this flag will take + // precedence over the cropping, directx, and magnification flags. + bool allow_wgc_capturer() const { return allow_wgc_capturer_; } + void set_allow_wgc_capturer(bool allow) { allow_wgc_capturer_ = allow; } + + // This flag enables the WGC capturer for fallback capturer. + // The flag is useful when the first capturer (eg. WindowCapturerWinGdi) is + // unreliable in certain devices where WGC is supported, but not used by + // default. + bool allow_wgc_capturer_fallback() const { + return allow_wgc_capturer_fallback_; + } + void set_allow_wgc_capturer_fallback(bool allow) { + allow_wgc_capturer_fallback_ = allow; + } +#endif // defined(RTC_ENABLE_WIN_WGC) +#endif // defined(WEBRTC_WIN) + +#if defined(WEBRTC_USE_PIPEWIRE) + bool allow_pipewire() const { return allow_pipewire_; } + void set_allow_pipewire(bool allow) { allow_pipewire_ = allow; } + + const rtc::scoped_refptr& screencast_stream() const { + return screencast_stream_; + } + void set_screencast_stream( + rtc::scoped_refptr stream) { + screencast_stream_ = stream; + } + + void set_width(uint32_t width) { width_ = width; } + uint32_t get_width() const { return width_; } + + void set_height(uint32_t height) { height_ = height; } + uint32_t get_height() const { return height_; } + + void set_pipewire_use_damage_region(bool use_damage_regions) { + pipewire_use_damage_region_ = use_damage_regions; + } + bool pipewire_use_damage_region() const { + return pipewire_use_damage_region_; + } +#endif + + private: +#if defined(WEBRTC_USE_X11) + rtc::scoped_refptr x_display_; +#endif +#if defined(WEBRTC_USE_PIPEWIRE) + // An instance of shared PipeWire ScreenCast stream we share between + // BaseCapturerPipeWire and MouseCursorMonitorPipeWire as cursor information + // is sent together with screen content. + rtc::scoped_refptr screencast_stream_; +#endif +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) + rtc::scoped_refptr configuration_monitor_; + bool allow_iosurface_ = false; +#endif + + rtc::scoped_refptr full_screen_window_detector_; + +#if defined(WEBRTC_WIN) + bool enumerate_current_process_windows_ = true; + bool allow_use_magnification_api_ = false; + bool allow_directx_capturer_ = false; + bool allow_cropping_window_capturer_ = false; +#if defined(RTC_ENABLE_WIN_WGC) + bool allow_wgc_capturer_ = false; + bool allow_wgc_capturer_fallback_ = false; +#endif +#endif +#if defined(WEBRTC_USE_X11) + bool use_update_notifications_ = false; +#else + bool use_update_notifications_ = true; +#endif + bool disable_effects_ = true; + bool detect_updated_region_ = false; + bool prefer_cursor_embedded_ = false; +#if defined(WEBRTC_USE_PIPEWIRE) + bool allow_pipewire_ = false; + bool pipewire_use_damage_region_ = true; + uint32_t width_ = 0; + uint32_t height_ = 0; +#endif +}; + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_OPTIONS_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_types.h b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_types.h new file mode 100644 index 0000000000..e777a45f92 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_types.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_TYPES_H_ +#define MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_TYPES_H_ + +#include + +#ifdef XP_WIN // Moving this into the global namespace +typedef int pid_t; // matching what used to be in +#endif // video_capture_defines.h + +namespace webrtc { + +enum class CaptureType { kWindow, kScreen, kAnyScreenContent }; + +// Type used to identify windows on the desktop. Values are platform-specific: +// - On Windows: HWND cast to intptr_t. +// - On Linux (with X11): X11 Window (unsigned long) type cast to intptr_t. +// - On OSX: integer window number. +typedef intptr_t WindowId; + +const WindowId kNullWindowId = 0; + +const int64_t kInvalidDisplayId = -1; + +// Type used to identify screens on the desktop. Values are platform-specific: +// - On Windows: integer display device index. +// - On OSX: CGDirectDisplayID cast to intptr_t. +// - On Linux (with X11): TBD. +// - On ChromeOS: display::Display::id() is an int64_t. +// On Windows, ScreenId is implementation dependent: sending a ScreenId from one +// implementation to another usually won't work correctly. +#if defined(CHROMEOS) +typedef int64_t ScreenId; +#else +typedef intptr_t ScreenId; +#endif + +// The screen id corresponds to all screen combined together. +const ScreenId kFullDesktopScreenId = -1; + +const ScreenId kInvalidScreenId = -2; + +// Integers to attach to each DesktopFrame to differentiate the generator of +// the frame. The entries in this namespace should remain in sync with the +// SequentialDesktopCapturerId enum, which is logged via UMA. +// `kScreenCapturerWinGdi` and `kScreenCapturerWinDirectx` values are preserved +// to maintain compatibility +namespace DesktopCapturerId { +constexpr uint32_t CreateFourCC(char a, char b, char c, char d) { + return ((static_cast(a)) | (static_cast(b) << 8) | + (static_cast(c) << 16) | (static_cast(d) << 24)); +} + +constexpr uint32_t kUnknown = 0; +constexpr uint32_t kWgcCapturerWin = 1; +constexpr uint32_t kScreenCapturerWinMagnifier = 2; +constexpr uint32_t kWindowCapturerWinGdi = 3; +constexpr uint32_t kScreenCapturerWinGdi = CreateFourCC('G', 'D', 'I', ' '); +constexpr uint32_t kScreenCapturerWinDirectx = CreateFourCC('D', 'X', 'G', 'I'); +constexpr uint32_t kX11CapturerLinux = CreateFourCC('X', '1', '1', ' '); +constexpr uint32_t kWaylandCapturerLinux = CreateFourCC('W', 'L', ' ', ' '); +} // namespace DesktopCapturerId + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURE_TYPES_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capturer.cc b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer.cc new file mode 100644 index 0000000000..7f601821fc --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/desktop_capturer.h" + +#include +#include + +#include +#include + +#include "modules/desktop_capture/cropping_window_capturer.h" +#include "modules/desktop_capture/desktop_capture_options.h" +#include "modules/desktop_capture/desktop_capturer_differ_wrapper.h" +#include "system_wrappers/include/metrics.h" + +#if defined(RTC_ENABLE_WIN_WGC) +#include "modules/desktop_capture/win/wgc_capturer_win.h" +#include "rtc_base/win/windows_version.h" +#endif // defined(RTC_ENABLE_WIN_WGC) + +#if defined(WEBRTC_USE_PIPEWIRE) +#include "modules/desktop_capture/linux/wayland/base_capturer_pipewire.h" +#endif + +namespace webrtc { + +void LogDesktopCapturerFullscreenDetectorUsage() { + RTC_HISTOGRAM_BOOLEAN("WebRTC.Screenshare.DesktopCapturerFullscreenDetector", + true); +} + +DesktopCapturer::~DesktopCapturer() = default; + +DelegatedSourceListController* +DesktopCapturer::GetDelegatedSourceListController() { + return nullptr; +} + +void DesktopCapturer::SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory) {} + +void DesktopCapturer::SetExcludedWindow(WindowId window) {} + +bool DesktopCapturer::GetSourceList(SourceList* sources) { + return true; +} + +bool DesktopCapturer::SelectSource(SourceId id) { + return false; +} + +bool DesktopCapturer::FocusOnSelectedSource() { + return false; +} + +bool DesktopCapturer::IsOccluded(const DesktopVector& pos) { + return false; +} + +// static +std::unique_ptr DesktopCapturer::CreateWindowCapturer( + const DesktopCaptureOptions& options) { +#if defined(RTC_ENABLE_WIN_WGC) + if (options.allow_wgc_capturer() && IsWgcSupported(CaptureType::kWindow)) { + return WgcCapturerWin::CreateRawWindowCapturer(options); + } +#endif // defined(RTC_ENABLE_WIN_WGC) + +#if defined(WEBRTC_WIN) + if (options.allow_cropping_window_capturer()) { + return CroppingWindowCapturer::CreateCapturer(options); + } +#endif // defined(WEBRTC_WIN) + + std::unique_ptr capturer = CreateRawWindowCapturer(options); + if (capturer && options.detect_updated_region()) { + capturer.reset(new DesktopCapturerDifferWrapper(std::move(capturer))); + } + + return capturer; +} + +// static +std::unique_ptr DesktopCapturer::CreateGenericCapturer( + const DesktopCaptureOptions& options) { + std::unique_ptr capturer = CreateRawGenericCapturer(options); + if (capturer && options.detect_updated_region()) { + capturer.reset(new DesktopCapturerDifferWrapper(std::move(capturer))); + } + + return capturer; +} + +std::unique_ptr DesktopCapturer::CreateRawGenericCapturer( + const DesktopCaptureOptions& options) { +#if defined(WEBRTC_USE_PIPEWIRE) + if (options.allow_pipewire() && DesktopCapturer::IsRunningUnderWayland()) { + return std::make_unique(options, + CaptureType::kAnyScreenContent); + } +#endif // defined(WEBRTC_USE_PIPEWIRE) + + return nullptr; +} + +// static +std::unique_ptr DesktopCapturer::CreateScreenCapturer( + const DesktopCaptureOptions& options) { +#if defined(RTC_ENABLE_WIN_WGC) + if (options.allow_wgc_capturer() && IsWgcSupported(CaptureType::kScreen)) { + return WgcCapturerWin::CreateRawScreenCapturer(options); + } +#endif // defined(RTC_ENABLE_WIN_WGC) + + std::unique_ptr capturer = CreateRawScreenCapturer(options); + if (capturer && options.detect_updated_region()) { + capturer.reset(new DesktopCapturerDifferWrapper(std::move(capturer))); + } + + return capturer; +} + +#if defined(WEBRTC_USE_PIPEWIRE) || defined(WEBRTC_USE_X11) +bool DesktopCapturer::IsRunningUnderWayland() { + const char* xdg_session_type = getenv("XDG_SESSION_TYPE"); + if (!xdg_session_type || strncmp(xdg_session_type, "wayland", 7) != 0) + return false; + + if (!(getenv("WAYLAND_DISPLAY"))) + return false; + + return true; +} +#endif // defined(WEBRTC_USE_PIPEWIRE) || defined(WEBRTC_USE_X11) + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capturer.h b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer.h new file mode 100644 index 0000000000..5c3420710f --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer.h @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURER_H_ +#define MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURER_H_ + +#include +#include + +#include +#include +#include +#include + +// TODO(alcooper): Update include usage in downstream consumers and then change +// this to a forward declaration. +#include "modules/desktop_capture/delegated_source_list_controller.h" +#if defined(WEBRTC_USE_GIO) +#include "modules/desktop_capture/desktop_capture_metadata.h" +#endif // defined(WEBRTC_USE_GIO) +#include "modules/desktop_capture/desktop_capture_types.h" +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/shared_memory.h" +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { + +void RTC_EXPORT LogDesktopCapturerFullscreenDetectorUsage(); + +class DesktopCaptureOptions; +class DesktopFrame; + +// Abstract interface for screen and window capturers. +class RTC_EXPORT DesktopCapturer { + public: + enum class Result { + // The frame was captured successfully. + SUCCESS, + + // There was a temporary error. The caller should continue calling + // CaptureFrame(), in the expectation that it will eventually recover. + ERROR_TEMPORARY, + + // Capture has failed and will keep failing if the caller tries calling + // CaptureFrame() again. + ERROR_PERMANENT, + + MAX_VALUE = ERROR_PERMANENT + }; + + // Interface that must be implemented by the DesktopCapturer consumers. + class Callback { + public: + // Called after a frame has been captured. `frame` is not nullptr if and + // only if `result` is SUCCESS. + virtual void OnCaptureResult(Result result, + std::unique_ptr frame) = 0; + + protected: + virtual ~Callback() {} + }; + +#if defined(CHROMEOS) + typedef int64_t SourceId; +#else + typedef intptr_t SourceId; +#endif + + static_assert(std::is_same::value, + "SourceId should be a same type as ScreenId."); + + struct Source { + // The unique id to represent a Source of current DesktopCapturer. + SourceId id; + pid_t pid; + + // Title of the window or screen in UTF-8 encoding, maybe empty. This field + // should not be used to identify a source. + std::string title; + +#if defined(CHROMEOS) + // TODO(https://crbug.com/1369162): Remove or refactor this value. + WindowId in_process_id = kNullWindowId; +#endif + + // The display's unique ID. If no ID is defined, it will hold the value + // kInvalidDisplayId. + int64_t display_id = kInvalidDisplayId; + }; + + typedef std::vector SourceList; + + virtual ~DesktopCapturer(); + + // Called at the beginning of a capturing session. `callback` must remain + // valid until capturer is destroyed. + virtual void Start(Callback* callback) = 0; + + // Returns a valid pointer if the capturer requires the user to make a + // selection from a source list provided by the capturer. + // Returns nullptr if the capturer does not provide a UI for the user to make + // a selection. + // + // Callers should not take ownership of the returned pointer, but it is + // guaranteed to be valid as long as the desktop_capturer is valid. + // Note that consumers should still use GetSourceList and SelectSource, but + // their behavior may be modified if this returns a value. See those methods + // for a more in-depth discussion of those potential modifications. + virtual DelegatedSourceListController* GetDelegatedSourceListController(); + + // Sets SharedMemoryFactory that will be used to create buffers for the + // captured frames. The factory can be invoked on a thread other than the one + // where CaptureFrame() is called. It will be destroyed on the same thread. + // Shared memory is currently supported only by some DesktopCapturer + // implementations. + virtual void SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory); + + // Captures next frame, and involve callback provided by Start() function. + // Pending capture requests are canceled when DesktopCapturer is deleted. + virtual void CaptureFrame() = 0; + + // Sets the window to be excluded from the captured image in the future + // Capture calls. Used to exclude the screenshare notification window for + // screen capturing. + virtual void SetExcludedWindow(WindowId window); + + // TODO(zijiehe): Following functions should be pure virtual. The default + // implementations are for backward compatibility only. Remove default + // implementations once all DesktopCapturer implementations in Chromium have + // implemented these functions. + + // Gets a list of sources current capturer supports. Returns false in case of + // a failure. + // For DesktopCapturer implementations to capture screens, this function + // should return monitors. + // For DesktopCapturer implementations to capture windows, this function + // should only return root windows owned by applications. + // + // Note that capturers who use a delegated source list will return a + // SourceList with exactly one value, but it may not be viable for capture + // (e.g. CaptureFrame will return ERROR_TEMPORARY) until a selection has been + // made. + virtual bool GetSourceList(SourceList* sources); + + // Selects a source to be captured. Returns false in case of a failure (e.g. + // if there is no source with the specified type and id.) + // + // Note that some capturers with delegated source lists may also support + // selecting a SourceID that is not in the returned source list as a form of + // restore token. + virtual bool SelectSource(SourceId id); + + // Brings the selected source to the front and sets the input focus on it. + // Returns false in case of a failure or no source has been selected or the + // implementation does not support this functionality. + virtual bool FocusOnSelectedSource(); + + // Returns true if the `pos` on the selected source is covered by other + // elements on the display, and is not visible to the users. + // `pos` is in full desktop coordinates, i.e. the top-left monitor always + // starts from (0, 0). + // The return value if `pos` is out of the scope of the source is undefined. + virtual bool IsOccluded(const DesktopVector& pos); + + // Creates a DesktopCapturer instance which targets to capture windows and screens. + static std::unique_ptr CreateGenericCapturer( + const DesktopCaptureOptions& options); + + // Creates a DesktopCapturer instance which targets to capture windows. + static std::unique_ptr CreateWindowCapturer( + const DesktopCaptureOptions& options); + + // Creates a DesktopCapturer instance which targets to capture screens. + static std::unique_ptr CreateScreenCapturer( + const DesktopCaptureOptions& options); + +#if defined(WEBRTC_USE_PIPEWIRE) || defined(WEBRTC_USE_X11) + static bool IsRunningUnderWayland(); + + virtual void UpdateResolution(uint32_t width, uint32_t height) {} +#endif // defined(WEBRTC_USE_PIPEWIRE) || defined(WEBRTC_USE_X11) + +#if defined(WEBRTC_USE_GIO) + // Populates implementation specific metadata into the passed in pointer. + // Classes can choose to override it or use the default no-op implementation. + virtual DesktopCaptureMetadata GetMetadata() { return {}; } +#endif // defined(WEBRTC_USE_GIO) + + protected: + // CroppingWindowCapturer needs to create raw capturers without wrappers, so + // the following two functions are protected. + + // Creates a platform specific DesktopCapturer instance which targets to + // capture windows and screens. + static std::unique_ptr CreateRawGenericCapturer( + const DesktopCaptureOptions& options); + + // Creates a platform specific DesktopCapturer instance which targets to + // capture windows. + static std::unique_ptr CreateRawWindowCapturer( + const DesktopCaptureOptions& options); + + // Creates a platform specific DesktopCapturer instance which targets to + // capture screens. + static std::unique_ptr CreateRawScreenCapturer( + const DesktopCaptureOptions& options); +}; + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURER_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper.cc b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper.cc new file mode 100644 index 0000000000..77543e4060 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper.cc @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/desktop_capturer_differ_wrapper.h" + +#include +#include + +#include + +#include "modules/desktop_capture/desktop_geometry.h" +#include "modules/desktop_capture/desktop_region.h" +#include "modules/desktop_capture/differ_block.h" +#include "rtc_base/checks.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +namespace { + +// Returns true if (0, 0) - (`width`, `height`) vector in `old_buffer` and +// `new_buffer` are equal. `width` should be less than 32 +// (defined by kBlockSize), otherwise BlockDifference() should be used. +bool PartialBlockDifference(const uint8_t* old_buffer, + const uint8_t* new_buffer, + int width, + int height, + int stride) { + RTC_DCHECK_LT(width, kBlockSize); + const int width_bytes = width * DesktopFrame::kBytesPerPixel; + for (int i = 0; i < height; i++) { + if (memcmp(old_buffer, new_buffer, width_bytes) != 0) { + return true; + } + old_buffer += stride; + new_buffer += stride; + } + return false; +} + +// Compares columns in the range of [`left`, `right`), in a row in the +// range of [`top`, `top` + `height`), starts from `old_buffer` and +// `new_buffer`, and outputs updated regions into `output`. `stride` is the +// DesktopFrame::stride(). +void CompareRow(const uint8_t* old_buffer, + const uint8_t* new_buffer, + const int left, + const int right, + const int top, + const int bottom, + const int stride, + DesktopRegion* const output) { + const int block_x_offset = kBlockSize * DesktopFrame::kBytesPerPixel; + const int width = right - left; + const int height = bottom - top; + const int block_count = (width - 1) / kBlockSize; + const int last_block_width = width - block_count * kBlockSize; + RTC_DCHECK_GT(last_block_width, 0); + RTC_DCHECK_LE(last_block_width, kBlockSize); + + // The first block-column in a continuous dirty area in current block-row. + int first_dirty_x_block = -1; + + // We always need to add dirty area into `output` in the last block, so handle + // it separatedly. + for (int x = 0; x < block_count; x++) { + if (BlockDifference(old_buffer, new_buffer, height, stride)) { + if (first_dirty_x_block == -1) { + // This is the first dirty block in a continuous dirty area. + first_dirty_x_block = x; + } + } else if (first_dirty_x_block != -1) { + // The block on the left is the last dirty block in a continuous + // dirty area. + output->AddRect( + DesktopRect::MakeLTRB(first_dirty_x_block * kBlockSize + left, top, + x * kBlockSize + left, bottom)); + first_dirty_x_block = -1; + } + old_buffer += block_x_offset; + new_buffer += block_x_offset; + } + + bool last_block_diff; + if (last_block_width < kBlockSize) { + // The last one is a partial vector. + last_block_diff = PartialBlockDifference(old_buffer, new_buffer, + last_block_width, height, stride); + } else { + last_block_diff = BlockDifference(old_buffer, new_buffer, height, stride); + } + if (last_block_diff) { + if (first_dirty_x_block == -1) { + first_dirty_x_block = block_count; + } + output->AddRect(DesktopRect::MakeLTRB( + first_dirty_x_block * kBlockSize + left, top, right, bottom)); + } else if (first_dirty_x_block != -1) { + output->AddRect( + DesktopRect::MakeLTRB(first_dirty_x_block * kBlockSize + left, top, + block_count * kBlockSize + left, bottom)); + } +} + +// Compares `rect` area in `old_frame` and `new_frame`, and outputs dirty +// regions into `output`. +void CompareFrames(const DesktopFrame& old_frame, + const DesktopFrame& new_frame, + DesktopRect rect, + DesktopRegion* const output) { + RTC_DCHECK(old_frame.size().equals(new_frame.size())); + RTC_DCHECK_EQ(old_frame.stride(), new_frame.stride()); + rect.IntersectWith(DesktopRect::MakeSize(old_frame.size())); + + const int y_block_count = (rect.height() - 1) / kBlockSize; + const int last_y_block_height = rect.height() - y_block_count * kBlockSize; + // Offset from the start of one block-row to the next. + const int block_y_stride = old_frame.stride() * kBlockSize; + const uint8_t* prev_block_row_start = + old_frame.GetFrameDataAtPos(rect.top_left()); + const uint8_t* curr_block_row_start = + new_frame.GetFrameDataAtPos(rect.top_left()); + + int top = rect.top(); + // The last row may have a different height, so we handle it separately. + for (int y = 0; y < y_block_count; y++) { + CompareRow(prev_block_row_start, curr_block_row_start, rect.left(), + rect.right(), top, top + kBlockSize, old_frame.stride(), output); + top += kBlockSize; + prev_block_row_start += block_y_stride; + curr_block_row_start += block_y_stride; + } + CompareRow(prev_block_row_start, curr_block_row_start, rect.left(), + rect.right(), top, top + last_y_block_height, old_frame.stride(), + output); +} + +} // namespace + +DesktopCapturerDifferWrapper::DesktopCapturerDifferWrapper( + std::unique_ptr base_capturer) + : base_capturer_(std::move(base_capturer)) { + RTC_DCHECK(base_capturer_); +} + +DesktopCapturerDifferWrapper::~DesktopCapturerDifferWrapper() {} + +void DesktopCapturerDifferWrapper::Start(DesktopCapturer::Callback* callback) { + callback_ = callback; + base_capturer_->Start(this); +} + +void DesktopCapturerDifferWrapper::SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory) { + base_capturer_->SetSharedMemoryFactory(std::move(shared_memory_factory)); +} + +void DesktopCapturerDifferWrapper::CaptureFrame() { + base_capturer_->CaptureFrame(); +} + +void DesktopCapturerDifferWrapper::SetExcludedWindow(WindowId window) { + base_capturer_->SetExcludedWindow(window); +} + +bool DesktopCapturerDifferWrapper::GetSourceList(SourceList* sources) { + return base_capturer_->GetSourceList(sources); +} + +bool DesktopCapturerDifferWrapper::SelectSource(SourceId id) { + return base_capturer_->SelectSource(id); +} + +bool DesktopCapturerDifferWrapper::FocusOnSelectedSource() { + return base_capturer_->FocusOnSelectedSource(); +} + +bool DesktopCapturerDifferWrapper::IsOccluded(const DesktopVector& pos) { + return base_capturer_->IsOccluded(pos); +} + +#if defined(WEBRTC_USE_GIO) +DesktopCaptureMetadata DesktopCapturerDifferWrapper::GetMetadata() { + return base_capturer_->GetMetadata(); +} +#endif // defined(WEBRTC_USE_GIO) + +void DesktopCapturerDifferWrapper::OnCaptureResult( + Result result, + std::unique_ptr input_frame) { + int64_t start_time_nanos = rtc::TimeNanos(); + if (!input_frame) { + callback_->OnCaptureResult(result, nullptr); + return; + } + RTC_DCHECK(result == Result::SUCCESS); + + std::unique_ptr frame = + SharedDesktopFrame::Wrap(std::move(input_frame)); + if (last_frame_ && (last_frame_->size().width() != frame->size().width() || + last_frame_->size().height() != frame->size().height() || + last_frame_->stride() != frame->stride())) { + last_frame_.reset(); + } + + if (last_frame_) { + DesktopRegion hints; + hints.Swap(frame->mutable_updated_region()); + for (DesktopRegion::Iterator it(hints); !it.IsAtEnd(); it.Advance()) { + CompareFrames(*last_frame_, *frame, it.rect(), + frame->mutable_updated_region()); + } + } else { + frame->mutable_updated_region()->SetRect( + DesktopRect::MakeSize(frame->size())); + } + last_frame_ = frame->Share(); + + frame->set_capture_time_ms(frame->capture_time_ms() + + (rtc::TimeNanos() - start_time_nanos) / + rtc::kNumNanosecsPerMillisec); + callback_->OnCaptureResult(result, std::move(frame)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper.h b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper.h new file mode 100644 index 0000000000..6ebb5d7bc3 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURER_DIFFER_WRAPPER_H_ +#define MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURER_DIFFER_WRAPPER_H_ + +#include +#if defined(WEBRTC_USE_GIO) +#include "modules/desktop_capture/desktop_capture_metadata.h" +#endif // defined(WEBRTC_USE_GIO) +#include "modules/desktop_capture/desktop_capture_types.h" +#include "modules/desktop_capture/desktop_capturer.h" +#include "modules/desktop_capture/desktop_frame.h" +#include "modules/desktop_capture/desktop_geometry.h" +#include "modules/desktop_capture/shared_desktop_frame.h" +#include "modules/desktop_capture/shared_memory.h" +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { + +// DesktopCapturer wrapper that calculates updated_region() by comparing frames +// content. This class always expects the underlying DesktopCapturer +// implementation returns a superset of updated regions in DestkopFrame. If a +// DesktopCapturer implementation does not know the updated region, it should +// set updated_region() to full frame. +// +// This class marks entire frame as updated if the frame size or frame stride +// has been changed. +class RTC_EXPORT DesktopCapturerDifferWrapper + : public DesktopCapturer, + public DesktopCapturer::Callback { + public: + // Creates a DesktopCapturerDifferWrapper with a DesktopCapturer + // implementation, and takes its ownership. + explicit DesktopCapturerDifferWrapper( + std::unique_ptr base_capturer); + + ~DesktopCapturerDifferWrapper() override; + + // DesktopCapturer interface. + void Start(DesktopCapturer::Callback* callback) override; + void SetSharedMemoryFactory( + std::unique_ptr shared_memory_factory) override; + void CaptureFrame() override; + void SetExcludedWindow(WindowId window) override; + bool GetSourceList(SourceList* screens) override; + bool SelectSource(SourceId id) override; + bool FocusOnSelectedSource() override; + bool IsOccluded(const DesktopVector& pos) override; +#if defined(WEBRTC_USE_GIO) + DesktopCaptureMetadata GetMetadata() override; +#endif // defined(WEBRTC_USE_GIO) + private: + // DesktopCapturer::Callback interface. + void OnCaptureResult(Result result, + std::unique_ptr frame) override; + + const std::unique_ptr base_capturer_; + DesktopCapturer::Callback* callback_; + std::unique_ptr last_frame_; +}; + +} // namespace webrtc + +#endif // MODULES_DESKTOP_CAPTURE_DESKTOP_CAPTURER_DIFFER_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper_unittest.cc b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper_unittest.cc new file mode 100644 index 0000000000..9ccef3cc10 --- /dev/null +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capturer_differ_wrapper_unittest.cc @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/desktop_capture/desktop_capturer_differ_wrapper.h" + +#include +#include +#include +#include + +#include "modules/desktop_capture/desktop_geometry.h" +#include "modules/desktop_capture/desktop_region.h" +#include "modules/desktop_capture/differ_block.h" +#include "modules/desktop_capture/fake_desktop_capturer.h" +#include "modules/desktop_capture/mock_desktop_capturer_callback.h" +#include "rtc_base/random.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +// Compares and asserts `frame`.updated_region() equals to `rects`. This +// function does not care about the order of the `rects` and it does not expect +// DesktopRegion to return an exact area for each rectangle in `rects`. +template